AUSAM/source/S/sort.c

Compare this file to the similar file:
Show the results in this format:

#define	L	1024
#define	N	7
#define	MEM	(28*2048)
#define NF	10

int ibuf[259];
int obuf[259];
char *file;
char *filep;
int nfiles;
int nlines;
int ntext;
int *lspace;
char *tspace;
int cmp(), cmpa();
int(*compare)() cmpa;
int term();
int mflg;
int uflg;
char *outfil;
char tabchar;
int eargc;
char **eargv;

char zero[256];

char fold[256]
{
	0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
	0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
	0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
	0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
	0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
	0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
	0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
	0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
	0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
	0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
	0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
	0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
	0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
	0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
	0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
	0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
	0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
	0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
	0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
	0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
	0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
	0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
	0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
	0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
	0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
	0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
	0130, 0131, 0132, 0133, 0134, 0134, 0136, 0137,
	0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
	0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
	0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
	0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177
};
char nofold[256]
{
	0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
	0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
	0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
	0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
	0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
	0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
	0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
	0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
	0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
	0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
	0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
	0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
	0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
	0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
	0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
	0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
	0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
	0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
	0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
	0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
	0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
	0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
	0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
	0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
	0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
	0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
	0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
	0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
	0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
	0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
	0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177
};

char nonprint[256]
{
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
};

char dict[256]
{
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1
};

struct field
{
	char *code;
	char *ignore;
	int nflg;
	int rflg;
	int bflg;
	char *m[2];
	char *n[2];
} fields[NF];
int proto[9]
{
	nofold+128,
	zero+128,
	0,
	1,
	0,
	0, -1,
	0, 0
};
int nfields;
int error 1;
char *setfil();

main(argc, argv)
char **argv;
{
	register a, i;
	char *arg;
	register int *p;
	int *q;

	copyproto();
	eargv = argv;
	while(--argc > 0)
	{
		if(**++argv == '-')
			for(arg = *argv; ; )
			{
				switch(*++arg)
				{
				case '\0':
					if(arg[-1] == '-')
						eargv[eargc++] = "-";
					break;

				case 'o':
					if(--argc > 0)
						outfil = *++argv;
					continue;

				default:
					field(++*argv, 1);
					break;
				}
				break;
			}
		else if(**argv == '+')
		{
			if(++nfields >= NF)
			{
				prints(2,"Too many keys\n");
				exit(1);
			}
			copyproto();
			field(++*argv, 0);
		}
		else
			eargv[eargc++] = *argv;
	}
	q = &fields[0];
	for(a = 1; a <= nfields; a++)
	{
		p = &fields[a];
		for(i = 0; i < 5; i++)	/*sensitive to sizeof(proto)*/ 
			if(p[i] != proto[i])
				goto next;
		for(i = 0; i < 5; i++)
			p[i] = q[i];
	next:;
	}
	if(eargc == 0)
		eargv[eargc++] = "-";

	a = MEM;
	i = lspace = sbrk(0);
	while(brk(a) == -1)
		a =- 512;
	brk(a =- 512);	/* for recursion */ 
	a =- i;
	nlines = ((a-L)>>1)&077777;
	nlines =/ 5;
	ntext = nlines*8;
	tspace = lspace+nlines;
	file = "/tmp/stmXaa";
	filep = file;
	while(*filep != 'X')
		filep++;
	for(*filep = 'a'; ; (*filep)++)
	{
		if(stat(file, lspace) < 0)
		{
			a = creat(file, 0600);
			if(a >= 0)
				break;
		}
		if(*filep == 'z')
		{
			/*
			 if(file[1] != 't') {
			 file = "/tmp/stmXaa";
			 goto loop;
			 }
			 */ 
			prints(2,"Cannot locate temp\n");
			exit(1);
		}
	}
	close(a);
	filep++;
	if((signal(2, 1)&01) == 0)
		signal(2, term);
	if((signal(13, 1)&01) == 0)
		signal(13, term);
	nfiles = eargc;
	if(!mflg)
	{
		ibuf[0] = -1;
		sort();
		close(0);
	}
	for(a = mflg?0:eargc; a+N < nfiles; a =+ N)
	{
		newfile();
		merge(a, a+N);
	}
	if(a != nfiles)
	{
		oldfile();
		merge(a, nfiles);
	}
	error = 0;
	term();
}

sort()
{
	register char *cp;
	register *lp, c;
	int done;
	int i;
	int f;


	done = 0;
	i = 0;
	do
	{
		cp = tspace;
		lp = lspace;
		while(lp < lspace+nlines && cp < tspace+ntext)
		{
			*lp++ = cp;
			while((*cp++ = c = getc(ibuf)) != '\n')
			{
				if(c >= 0)
					continue;
				cp--;
				close(ibuf[0]);
				if(i < eargc)
				{
					if((f = setfil(i++)) == 0)
						ibuf[0] = 0;
					else if(fopen(f, ibuf) < 0)
						cant(f);
				}
				else
					break;
			}
			if(c < 0)
			{
				done++;
				lp--;
				break;
			}
		}
		qsort(lspace, lp-lspace, 2, compare);
		if(done == 0 || nfiles != eargc)
			newfile();
		else
			oldfile();
		while(lp > lspace)
		{
			cp = *--lp;
			if(*cp)
				do
					putc(*cp, obuf);
				while(*cp++ != '\n');
		}
		fflush(obuf);
		close(obuf[0]);
	}
	while(done == 0);
}

struct merg
{
	char l[L];
	int b[259];
};

merge(a, b)
{
	struct merg *p;
	register char *cp, *dp;
	register i;
	struct
	{
		int *ip;
	};
	int f;
	int j;
	int k, l, c;
	int muflg;


	p = lspace;
	j = 0;
	for(i = a; i < b; i++)
	{
		f = setfil(i);
		if(f == 0)
			p->b[0] = dup(0);
		else if(fopen(f, p->b) < 0)
			cant(f);
		ibuf[j] = p;
		if(!rline(p))
			j++;
		p++;
	}

	do
	{
		i = j;
		qsort(ibuf, i, 2, compare);
		l = 0;
		while(i--)
		{
			cp = ibuf[i];
			if(*cp == '\0')
			{
				l = 1;
				if(rline(ibuf[i]))
				{
					k = i;
					while(++k < j)
						ibuf[k-1] = ibuf[k];
					j--;
				}
			}
		}
	}
	while(l);

	muflg = mflg&uflg;
	i = j;
	while(i > 0)
	{
		cp = ibuf[i-1];
		if(uflg == 0 || muflg || (*compare)(&ibuf[i-1], &ibuf[i-2]))
			do
				putc(*cp, obuf);
			while(*cp++ != '\n');
		if(muflg)
		{
			cp = ibuf[i-1];
			dp = p;
			do
			{
			}
			while((*dp++ = *cp++) != '\n');
		}
		do
		{
			if(rline(ibuf[i-1]))
			{
				i--;
				if(i == 0)
					break;
				if(i == 1)
					muflg = uflg;
			}
			cp = &ibuf[i];
			while(--cp.ip > ibuf && (*compare)(cp.ip, cp.ip-1) < 0)
			{
				dp = *cp.ip;
				*cp.ip = *(cp.ip-1);
				*(cp.ip-1) = dp;
			}
		}
		while(muflg && (*compare)(&ibuf[i-1], &p) == 0);
	}
	p = lspace;
	for(i = a; i < b; i++)
	{
		close(p->b[0]);
		p++;
		if(i >= eargc)
			close(creat(setfil(i)));
	}
	fflush(obuf);
	close(obuf[0]);
}

rline(mp)
struct merg *mp;
{
	register char *cp;
	register *bp, c;


	bp = mp->b;
	cp = mp->l;
	do
	{
		c = getc(bp);
		if(c < 0)
			return(1);
		*cp++ = c;
	}
	while(c != '\n');
	return(0);
}

newfile()
{

	if(fcreat(setfil(nfiles), obuf) < 0)
	{
		prints(2,"Can't create temp\n");
		term();
	}
	nfiles++;
}

char *setfil(i)
{

	if(i < eargc)
		if(eargv[i][0] == '-' && eargv[i][1] == '\0')
			return(0);
		else
			return(eargv[i]);
	i =- eargc;
	filep[0] = i/26+'a';
	filep[1] = i%26+'a';
	return(file);
}

oldfile()
{

	if(outfil)
	{
		if(fcreat(outfil, obuf) < 0)
		{
			prints(2,"Can't create output\n");
			term();
		}
	}
	else
		obuf[0] = 1;
}

cant(f)
{
	prints(2,"Can't open ");
	prints(2,f);
	prints(2,"\n");
	term();
}

term()
{
	register i;


	if(nfiles == eargc)
		nfiles++;
	for(i = eargc; i < nfiles; i++)
	{
		unlink(setfil(i));
	}
	exit(error);
}

cmp(i, j)
int *i, *j;
{
	register char *pa, *pb;
	char *code, *ignore;
	int a, b;
	int k;
	char *la, *lb;
	register int sa;
	int sb;
	char *ipa, *ipb, *jpa, *jpb;
	struct field *fp;


	for(k = nfields > 0; k <= nfields; k++)
	{
		fp = &fields[k];
		pa = *i;
		pb = *j;
		if(k)
		{
			la = skip(pa, fp, 1);
			pa = skip(pa, fp, 0);
			lb = skip(pb, fp, 1);
			pb = skip(pb, fp, 0);
		}
		else
		{
			la = -1;
			lb = -1;
		}
		if(fp->nflg)
		{
			while(blank(*pa))
				pa++;
			while(blank(*pb))
				pb++;
			sa = sb = fp->rflg;
			if(*pa == '-')
			{
				pa++;
				sa = -sa;
			}
			if(*pb == '-')
			{
				pb++;
				sb = -sb;
			}
			if(sa != sb)
				sa = 0;
			for(ipa = pa; ipa < la && digit(*ipa); ipa++);
			for(ipb = pb; ipb < lb && digit(*ipb); ipb++);
			jpa = ipa;
			jpb = ipb;
			a = 0;
			if(sa)
				while(ipa > pa && ipb > pb)
					if(b = *--ipb-*--ipa)
						a = b;
			while(ipa > pa)
				if(*--ipa != '0')
					return(sa?-sa:sb);
			while(ipb > pb)
				if(*--ipb != '0')
					return(sa?sa:sb);
			if(a)
				return(a*sa);
			if(*(pa = jpa) == '.')
				pa++;
			if(*(pb = jpb) == '.')
				pb++;
			while(pa < la && digit(*pa))
				if(pb < lb && digit(*pb))
				{
					if(a = *pb++ -*pa++)
						return(sa?a*sa:sb);
				}
				else if(*pa++ != '0')
					return(sa?-sa:sb);
			while(pb < lb && digit(*pb))
				if(*pb++ != '0')
					return(sa?sa:sb);
			continue;
		}
		code = fp->code;
		ignore = fp->ignore;
	loop:
		while(ignore[*pa])
			pa++;
		while(ignore[*pb])
			pb++;
		if(pa >= la || *pa == '\n')
			if(pb < lb && *pb != '\n')
				return(fp->rflg);
			else
				continue;
		if(pb >= lb || *pb == '\n')
			return(-fp->rflg);
		if((sa = code[*pb++]-code[*pa++]) == 0)
			goto loop;
		return(sa*fp->rflg);
	}
	if(uflg)
		return(0);
	return(cmpa(i, j));
}

cmpa(i, j)
int *i, *j;
{
	register char *pa, *pb;

	pa = *i;
	pb = *j;
	while(*pa == *pb)
	{
		if(*pa++ == '\n')
			return(0);
		pb++;
	}
	return(*pa == '\n'?fields[0].rflg:*pb == '\n'?-fields[0].rflg:*pb > *pa?fields[0].rflg:-fields[0].rflg);
}

skip(pp, fp, j)
struct field *fp;
char *pp;
{
	register i;
	register char *p;


	p = pp;
	if((i = fp->m[j]) < 0)
		return(-1);
	while(i-- > 0)
	{
		if(tabchar != 0)
		{
			while(*p != tabchar)
				if(*p != '\n')
					p++;
				else
					goto ret;
			p++;
		}
		else
		{
			while(blank(*p))
				p++;
			while(!blank(*p))
				if(*p != '\n')
					p++;
				else
					goto ret;
		}
	}
	if(fp->bflg)
		while(blank(*p))
			p++;
	i = fp->n[j];
	while(i-- > 0)
	{
		if(*p != '\n')
			p++;
		else
			goto ret;
	}
ret:
	return(p);
}

digit(c)
{

	return(c <= '9' && c >= '0');
}

copyproto()
{
	register int i, *p, *q;


	p = proto;
	q = &fields[nfields];
	for(i = 0; i < sizeof(proto)/2; i++)
		*q++ = *p++;
}

field(s, k)
char *s;
{
	register struct field *p;
	register d;

	p = &fields[nfields];
	d = 0;
	for(; *s != 0; s++)
	{
		switch(*s)
		{
		case '\0':
			return;

		case 'b':
			p->bflg++;
			break;

		case 'd':
			p->ignore = dict+128;
			break;

		case 'f':
			p->code = fold+128;
			break;
		case 'i':
			p->ignore = nonprint+128;
			break;

		case 'm':
			mflg++;
			continue;

		case 'n':
			p->nflg++;
			break;
		case 't':
			tabchar = *++s;
			if(tabchar == 0)
				s--;
			continue;

		case 'r':
			p->rflg = -1;
			continue;
		case 'u':
			uflg++;
			break;

		case '.':
			if(p->m[k] == -1)	/* -m.n with m missing */ 
				p->m[k] = 0;
			d = &fields[0].n[0]-&fields[0].m[0];

		default:
			p->m[k+d] = number(&s);
		}
		compare = cmp;
	}
}

number(ppa)
char **ppa;
{
	int n;
	register char *pa;

	pa = *ppa;
	n = 0;
	while(digit(*pa))
	{
		n = n*10+*pa-'0';
		*ppa = pa++;
	}
	return(n);
}

blank(c)
{
	if(c == ' ' || c == '\t')
		return(1);
	return(0);
}

int(*qscmp)();
int qses;

qsort(a, n, es, fc)
char *a;
int n, es;
int(*fc)();
{
	qscmp = fc;
	qses = es;
	qs1(a, a+n*es);
}

qs1(a, l)
char *a, *l;
{
	register char *i, *j, *es;
	char **k;
	char *lp, *hp;
	int n, c;



	es = qses;

start:
	if((n = l-a) <= es)
		return;


	n = ((n/(2*es))*es)&077777;
	hp = lp = a+n;
	i = a;
	j = l-es;


	for(; ; )
	{
		if(i < lp)
		{
			if((c = (*qscmp)(i, lp)) == 0)
			{
				qsexc(i, lp =- es);
				continue;
			}
			if(c < 0)
			{
				i =+ es;
				continue;
			}
		}

	loop:
		if(j > hp)
		{
			if((c = (*qscmp)(hp, j)) == 0)
			{
				qsexc(hp =+ es, j);
				goto loop;
			}
			if(c > 0)
			{
				if(i == lp)
				{
					qstexc(i, hp =+ es, j);
					i = lp =+ es;
					goto loop;
				}
				qsexc(i, j);
				j =- es;
				i =+ es;
				continue;
			}
			j =- es;
			goto loop;
		}


		if(i == lp)
		{
			if(uflg)
				for(k = lp+2; k <= hp; )
					*(*k++) = '\0';
			if(lp-a >= l-hp)
			{
				qs1(hp+es, l);
				l = lp;
			}
			else
			{
				qs1(a, lp);
				a = hp+es;
			}
			goto start;
		}


		qstexc(j, lp =- es, i);
		j = hp =- es;
	}
}

qsexc(i, j)
char *i, *j;
{
	register char *ri, *rj, c;
	int n;


	n = qses;
	ri = i;
	rj = j;
	do
	{
		c = *ri;
		*ri++ = *rj;
		*rj++ = c;
	}
	while(--n);
}

qstexc(i, j, k)
char *i, *j, *k;
{
	register char *ri, *rj, *rk;
	char c;
	int n;


	n = qses;
	ri = i;
	rj = j;
	rk = k;
	do
	{
		c = *ri;
		*ri++ = *rk;
		*rk++ = *rj;
		*rj++ = c;
	}
	while(--n);
}