V4/usr/source/s1/diff1.c

Compare this file to the similar file:
Show the results in this format:

#define BUFSIZ 518

int *xx;
struct buf { int fdes; char junk[BUFSIZ-2]; } *buf1, *buf2;
int *oldnew[2];
int ms[2] {0,0};	/*maximum serial seen en each file*/
int debug 0;
int *ixold;
int *ixnew;
int *a0;
int *b0;
int *L, *R;
int *J;	/*index of lines in file b matched to each in file a*/

main(argc,argv)
char **argv;
{
	int i;
	if(argc>1&&*argv[1]=='-') {
		argc--;
		argv++;
	}
	if(argc!=3) {
		write(2,"arg count\n",10);
		exit();
	}
	buf1 = sbrk(BUFSIZ);
	input(argv);
	a0 = oldnew[0];
	b0 = oldnew[1];
	i = 2*ms[1]+4;
	L = sbrk(i);
	R = buf2 = sbrk(i<BUFSIZ?BUFSIZ:i);
	J = sbrk(2*ms[0]+4);
	for(i=0;i<=ms[0];i++)
		J[i]=0;
	part(a0,ms[0],b0,ms[1]);
	ixold = xx;
	ixnew = ixold+ms[0]+1;
	if(check(argv))
		write(2,"jackpot\n",8);
	output(argv);
}

input(argv)
char **argv;
{
	int i;
	register int h,k;
	k = 0;
	xx = sbrk(516);
	for(i=0;i<=1;i++) {
		oldnew[i] = &xx[k];
		if(fopen(argv[i+1],buf1)<0) {
			write(2,"can't open input\n",17);
			exit(1);
		}
		while((h=readhash(buf1))!=-1) {
			if(++k%256==0) sbrk(512);
			xx[k] = h;
		}
		if(debug) vprint("xx",xx,xx,k,1);
		close(buf1->fdes);
		ms[i] = &xx[k]-oldnew[i];
	}
}


/* check does double duty:
1.  ferret out any fortuitous correspondences due
to counfounding by hashing (which result in "jackpot")
2.  collect random access indexes to the two files */

check(argv)
char **argv;
{
	register int i, j;
	int ctold, ctnew;
	int jackpot;
	char c,d;
	fopen(argv[1],buf1);
	fopen(argv[2],buf2);
	j = 1;
	ctold = ctnew = 0;
	ixold[0] = ixnew[0] = 0;
	jackpot = 0;
	for(i=1;i<=ms[0];i++) {
		if(J[i]==0) {
			while(getc(buf1)!='\n') ctold++;
			ixold[i] = ++ctold;
			continue;
		}
		while(j<J[i]) {
			while(getc(buf2)!='\n') ctnew++;
			ixnew[j] = ++ctnew;
			j++;
		}
		while((c=getc(buf1))==(d=getc(buf2))) {
			if(c=='\n') break;
			ctold++;
			ctnew++;
		}
		while(c!='\n') {
			jackpot++;
			J[i] = 0;
			c = getc(buf1);
			ctold++;
		}
		ixold[i] = ++ctold;
		while(d!='\n') {
			jackpot++;
			J[i] = 0;
			d = getc(buf2);
			ctnew++;
		}
		ixnew[j] = ++ctnew;
		j++;
	}
	for(;j<=ms[1];j++) {
		while(getc(buf2)!='\n') ctnew++;
		ixnew[j] = ++ctnew;
	}
	close(buf1->fdes);
	close(buf2->fdes);
	return(jackpot);
}

/*
part asks inner to find the x[m][j] vector
left-to-right and right-to-left, and then partitions the problem
to give a maximum string.
This scheme, due to hirshberg, allows one
to due the dynamic programming in linear space. */

part(a,m,b,n)
int a[],b[];
{
	int h,i1,i2,j1,j2,k,k0;
	int t,u;
	if(debug) {
		vprint("part a",a,a0,m,1);
		vprint("part b",b,b0,n,1);
	}
	while(m>0&&n>0&&a[1]==b[1]) {
		J[++a-a0] = ++b-b0;
		m--;
		n--;
	}
	while(m>0&&n>0&&a[m]==b[n])
		J[a-a0+m--] = b-b0+n--;
	if(m<=0||n<=0) return;
	L[0] = R[0] = 0;
	h = m/2;
	inner(a,h,b,n,L,1);
	inner(a+m+1,m-h,b+n+1,n,R,-1);
	t = 0;
	k0 = 0;
	for(k=0;k<=n;k++)
		if((u=L[k]+R[n-k])>t) {
			t = u;
			k0 = k;
		}
	strip(a,h,b,k0,L,1,&i1,&j1);
	strip(a+m+1,m-h,b+n+1,n-k0,R,-1,&i2,&j2);
	part(a,i1,b,j1);
	part(a+m-i2,i2,b+n-j2,j2);
}
/* inner generates the last row of the matrix x[i,j]
where x[i,j] shows the length of the longest common subsequence
between the first i elements of vector a and the first
j elements of vector b.
x[i,j] is computed by the recurrence
x[0,j] = x[i,0] = 0
x[i,j] = max {x[i-1,j], x[i,j-1], x[i-1,j-1]+c[i,j]
where c[i,j] = 0 iff a[i]=b[j] */


inner(a,m,b,n,x,d)
int a[],b[],x[];
int d;			/* +1 if a and b stored forward, -1 backward */
{
	int aid;	/* a[i*d] */
	int *lbjd;	/* &b[j*d] */
	int i;
	int *y;
	register int xi_j_;	/* x[i-1][j-1] */
	register int j,t;

	if(debug) {
		vprint("inn a",a,a0,m,d);
		vprint("inn b",b,b0,n,d);
	}
	for(j=0;j<=n;j++)
		x[j] = 0;
	y = x;
	for(i=1;i<=m;i++) {
		x = y;
		xi_j_ = t = 0;
		aid = a[i*d];
		lbjd = b;
		for(j=1;j<=n;j++) {
			if(t<*++x) t = *x;	/*t=x[i][j-1] *++x=x[i-1][j]*/
			if(aid== *(lbjd=+d) && xi_j_>=t) t++;
			xi_j_ = *x;
			*x = t;		/* x[i][j] = t */
		}
	}
}
/* strip finds the last mated pair of elements in the maximum
matching of vector a and vector b, given the appropriate
x vector computed by inner.
it strips all elements after this mated pair, */



strip(a,m,b,n,x,d,li,lj)
int a[],b[],x[];
int *li,*lj;
{
	register int i,j,*laid;
	int bjd;
	if(debug) {
		vprint("strip a",a,a0,m,d);
		vprint("strip b",b,b0,n,d);
		vprint("strip x",x,x,n,1);
	}
	for(j=n;j>0&&x[j]==x[j-1];j--);
	bjd = b[j*d];
	laid = &a[(m+1)*d];
	for(i=m;i>0&&*(laid=-d)!=bjd;i--);
	*li = i;
	*lj = j;
}

output(argv)
char **argv;
{
	int dir;
	int m;
	int i0,i1,j0,j1;
	extern fout;
	dir = **argv=='-';
	fout = dup(1);
	buf1->fdes = open(argv[1],0);
	buf2->fdes = open(argv[2],0);
	m = ms[0];
	J[0] = 0;
	J[m+1] = ms[1]+1;
	if(dir==0) for(i0=1;i0<=m;i0=i1+1) {
		while(i0<=m&&J[i0]==J[i0-1]+1) i0++;
		j0 = J[i0-1]+1;
		i1 = i0-1;
		while(i1<m&&J[i1+1]==0) i1++;
		j1 = J[i1+1]-1;
		J[i1] = j1;
		change(i0,i1,j0,j1,dir);
	} else for(i0=m;i0>=1;i0=i1-1) {
		while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
		j0 = J[i0+1]-1;
		i1 = i0+1;
		while(i1>1&&J[i1-1]==0) i1--;
		j1 = J[i1-1]+1;
		J[i1] = j1;
		change(i1,i0,j1,j0,dir);
	}
	flush();
}

change(a,b,c,d,dir)
{
	if(a>b&&c>d) return;
	range(a,b);
	putchar(a>b?'a':c>d?'d':'c');
	if(dir==0) range(c,d);
	putchar('\n');
	if(dir==0) {
		fetch(ixold,a,b,buf1,"* ");
		if(a<=b&&c<=d) printf("---\n");
	}
	fetch(ixnew,c,d,buf2,dir==0?". ":"");
	if(dir!=0&&c<=d) printf(".\n");
}

range(a,b)
{
	if(a>b) printf("%d",b);
	if(a<=b) printf("%d",a);
	if(a<b) printf(",%d",b);
}

fetch(f,a,b,lb,pref)
int *f;
struct buf *lb;
char *pref;
{
	int i, j;
	for(i=a;i<=b;i++) {
		seek(lb->fdes,f[i-1],0);
		read(lb->fdes,lb->junk,f[i]-f[i-1]);
		printf(pref);
		for(j=0;j<f[i]-f[i-1];j++)
			putchar(lb->junk[j]);
	}
}

vprint(s,v,v0,n,d)
int *v,*v0;
char *s;
{
	int i;
	printf("%s\n",s);
	for(i=1;i<=n;i++)
		printf("%7o",v-v0+i*d);
	printf("\n");
	for(i=1;i<=n;i++)
		printf("%7o",v[i*d]);
	printf("\n");
}