V10/cmd/join/join.c

Compare this file to the similar file:
Show the results in this format:

/*	join F1 F2 on stuff */

#include	<stdio.h>
#include	<string.h>
#include	<stdlib.h>
#include	<ctype.h>

enum { F1, F2, FX, F0 };
#define	NFLD	100	/* max field per line */
#define comp() strcmp(ppi[F1][j1],ppi[F2][j2])

FILE *f[2];
char buf[2][BUFSIZ];	/*input lines */
char *ppi[2][NFLD+1];	/* pointers to fields in lines */
int	j1	= 1;	/* join on this field in file 1 */
int	j2	= 1;	/* join on this field in file 2 */
int	olist[2*NFLD];	/* output these fields */
int	olistf[2*NFLD];	/* from these files */
int	no;		/* number of entries in olist */
int	sep1	= ' ';	/* default field separator */
int	sep2	= '\t';
int	discard;	/* count of truncated lines */
char*	null	= "";
int	a1;
int 	a2;
int	vflg;

char*	jcom	= "join: -j is archaic, replaced by -1 and -2\n";
char*	ocom	= "join: archaic -o; commas are preferred\n";
char*	usage	= "usage: join [-1 n] [-2 n] [-o m.n,m.n,...]"
		  " [-t c] [-e s] [-a m] [-v m] file1 file2";

int getopt(int, char**, char*);
extern char *optarg;
extern int optind;

int onelet(char*);
int xatoi(char*);
void optiono(int, char**);
void output(int, int);
int input(int);
void oparse(char*);
void error(char*, char*);
void seek1(void), seek2(void);

int
main(int argc, char **argv)
{
	int i;

	for(;;) {
		switch(getopt(argc, argv, "1:2:a:e:j:o:t:v:")) {
		case -1:
			break;
		case '?':
			error(usage, "");
		case 'v':
			vflg++;
		case 'a':
			switch(onelet(optarg)) {
			case '1':
				a1++;
				continue;
			case '2':
				a2++;
				continue;
			}
			error(usage, "");
		case 'e':
			null = optarg;
			continue;
		case 't':
			sep1 = sep2 = onelet(optarg);
			continue;
		case '1':
			j1 = xatoi(optarg);
			continue;
		case '2':
			j2 = xatoi(optarg);
			continue;
		case 'j':
			fprintf(stderr, jcom);
			if(optarg[-1] != 'j') {
				j1 = j2 = xatoi(optarg);
				continue;
			}
			switch(onelet(optarg)) {
			case '1':
				j1 = xatoi(argv[optind++]);
				continue;
			case '2':
				j2 = xatoi(argv[optind++]);
				continue;
			}
			error("improper -j", "");
		case 'o':
			optiono(argc, argv);
			continue;
		}
		break;
	}
proceed:
	if (argc-optind != 2)
		error(usage,"");
	for (i = 0; i < no; i++)
		if (--olist[i] >= NFLD)	/* 0 origin */
			error("field number too big in -o","");
	if(--j1<0 || --j2<0)
		error("join field number not positive", "");
	if(j1>=NFLD || j2>=NFLD)
		error("join field number too big", "");
	if (strcmp(argv[optind], "-") == 0)
		f[F1] = stdin;
	else if ((f[F1] = fopen(argv[optind], "r")) == 0)
		error("can't open %s", argv[optind]);
	if(strcmp(argv[optind+1], "-") == 0) {
		f[F2] = stdin;
	} else if ((f[F2] = fopen(argv[optind+1], "r")) == 0)
		error("can't open %s", argv[optind+1]);

	if(ftell(f[F2]) >= 0)
		seek2();
	else if(ftell(f[F1]) >= 0)
		seek1();
	else
		error("sorry, need one random-access file","");
	if (discard)
		error("some input line was truncated", "");
	return 0;
}

/* lazy.  there ought to be a clean way to combine seek1 & seek2 */
#define get1() n1=input(F1)
#define get2() n2=input(F2)
void
seek2()
{
	int n1, n2;	/* number of fields in each record */
	long top2;
	long bot2 = ftell(f[F2]);
	get1();
	get2();
	while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
		if(n1>0 && n2>0 && comp()>0 || n1==0) {
			if(a2) output(0, n2);
			bot2 = ftell(f[F2]);
			get2();
		} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
			if(a1) output(n1, 0);
			get1();
		} else /*(n1>0 && n2>0 && comp()==0)*/ {
			while(n2>0 && comp()==0) {
				if(!vflg) output(n1, n2);
				top2 = ftell(f[F2]);
				get2();
			}
			fseek(f[F2], bot2, 0);
			get2();
			get1();
			for(;;) {
				if(n1>0 && n2>0 && comp()==0) {
					if(!vflg) output(n1, n2);
					get2();
				} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
					fseek(f[F2], bot2, 0);
					get2();
					get1();
				} else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
					fseek(f[F2], top2, 0);
					bot2 = top2;
					get2();
					break;
				}
			}
		}
	}
}
void
seek1()
{
	int n1, n2;	/* number of fields in each record */
	long top1;
	long bot1 = ftell(f[F1]);
	get1();
	get2();
	while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
		if(n1>0 && n2>0 && comp()>0 || n1==0) {
			if(a2) output(0, n2);
			get2();
		} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
			if(a1) output(n1, 0);
			bot1 = ftell(f[F1]);
			get1();
		} else /*(n1>0 && n2>0 && comp()==0)*/ {
			while(n2>0 && comp()==0) {
				if(!vflg) output(n1, n2);
				top1 = ftell(f[F1]);
				get1();
			}
			fseek(f[F1], bot1, 0);
			get2();
			get1();
			for(;;) {
				if(n1>0 && n2>0 && comp()==0) {
					if(!vflg) output(n1, n2);
					get1();
				} else if(n1>0 && n2>0 && comp()>0 || n1==0) {
					fseek(f[F1], bot1, 0);
					get2();
					get1();
				} else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
					fseek(f[F1], top1, 0);
					bot1 = top1;
					get1();
					break;
				}
			}
		}
	}
}

int
input(int n)		/* get line, split, return field count */
{
	register int i, c;
	char *bp;
	char **pp;

	bp = buf[n];
	pp = ppi[n];
	pp[j1] = pp[j2] = "";	/* for absent join field */
	if (fgets(bp, BUFSIZ, f[n]) == 0)
		return(0);
	i = 0;
	do {
		i++;
		if (sep1 == ' ')	/* strip multiples */
			while ((c = *bp) == sep1 || c == sep2)
				bp++;	/* skip blanks */
		else
			c = *bp;
		*pp++ = bp;	/* record beginning */
		while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
			bp++;
		*bp++ = '\0';	/* mark end by overwriting blank */
	} while (c != '\n' && c != '\0' && i < NFLD-1);
	if (c != '\n')
		discard++;
	return(i);
}

void
output(int on1, int on2)	/* print items from olist */
{
	int i;
	char *temp;

	if (no <= 0) {	/* default case */
		printf("%s", j1<on1? ppi[F1][j1]:
			     j2<on2? ppi[F2][j2]: null);
		for (i = 0; i < on1; i++)
			if (i != j1)
				printf("%c%s", sep1, ppi[F1][i]);
		for (i = 0; i < on2; i++)
			if (i != j2)
				printf("%c%s", sep1, ppi[F2][i]);
		printf("\n");
	} else {
		for (i = 0; i < no; i++) {
			temp = null;
			switch(olistf[i]) {
			case F0:
				if(j1 < on1)
					temp = ppi[F1][j1];
				else if(j2 < on2)
					temp = ppi[F2][j2];
				break;
			case F1:
				if(olist[i] < on1)
					temp = ppi[F1][olist[i]];
				break;
			case F2:
				if(olist[i] < on2)
					temp = ppi[F2][olist[i]];
				break;
			}
			printf("%s", temp);
			if (i == no - 1)
				printf("\n");
			else
				printf("%c", sep1);
		}
	}
}

void
error(char *s1, char *s2)
{
	fprintf(stderr, "join: ");
	fprintf(stderr, s1, s2);
	fprintf(stderr, "\n");
	exit(1);
}

char *
getoptarg(int *argcp, char ***argvp)
{
	int argc = *argcp;
	char **argv = *argvp;
	if(argv[1][2] != 0)
		return &argv[1][2];
	if(argc<=2 || argv[2][0]=='-')
		error("incomplete option %s", argv[1]);
	*argcp = --argc;
	*argvp = ++argv;
	return argv[1];
}

void
oparse(char *s)
{
	for (no = 0; no<2*NFLD && *s; no++, s++) {
		switch(*s) {
		case 0:
			return;
		case '0':
			olistf[no] = F0;
			break;
		case '1':
		case '2':
			if(s[1] == '.' && isdigit(s[2])) {
				olistf[no] = *s=='1'? F1: F2;
				olist[no] = xatoi(s += 2);
				break;
			} /* fall thru */
		default:
			error("invalid -o list", "");
		}
		if(s[1] == ',')
			s++;
	}
}

int
xatoi(char *s)
{
	if(!isdigit(*s))
		error("numeric argument expected", "");
	return atoi(s);
}

int
onelet(char *s)
{
	if(s[0] == 0 || s[1] != 0)
		error(usage, "");
	return s[0];
}	

void
optiono(int argc, char **argv)
{
	static multi;
	int f;
	for (no=0; *optarg; no++) {
		if(no >= 2*NFLD)
			error("too many output fields", "");
		f = *optarg++;
		f = f=='0'? F0: f=='1'? F1: f=='2'? F2: FX;
		if(f == FX)
			error("improper -o", "");
		olistf[no] = f;
		if(f != F0) {
			if(*optarg++ != '.')
				error("improper -o", "");
			olist[no] = xatoi(optarg);
			while(isdigit(*optarg))
				optarg++;
		}
		if(*optarg==',' || *optarg==' ')
			optarg++;
		else if(*optarg==0 &&
			(multi || no==0) &&
			optind<argc-2 &&
			isdigit(*argv[optind])) {
				optarg = argv[optind++];
				if(multi++ == 0)
					fprintf(stderr, ocom);
			}
	}
}