V10/games/word_clout/small.c

Compare this file to the similar file:
Show the results in this format:

#define AMBIGUOUS 1
#define STDERR stdout
#include <stdio.h>
#include <ctype.h>
#include "./regexp.h"
regexp *regcomp(),*rs;
int nwanted,wantps;
int accept();
char *malloc();
#define freecol(x) (col[x].next=colfree,colfree=x)
#define getcol(x) ((x=colfree)?(colfree=col[colfree].next):(x=morecol()))
#define NHS 114723	/* number of headwords and senses */
#define N 400	/* max line length */
#define INITTEXT (516673+1)	/* bytes of headword text */
#define INITSQ 1980	/* bytes of sense qualifier text */
#define INITSQTAB 1063	/*  #qual sense addr with text addr */
#define NWINDEX 514104	/* number of 17 bit references */
#define X 1	/* extra bit for ref qualified */
#define WADDR 17	/* bits in headword or sense addr */
#define WMASK ((1<<WADDR)-1)
#define WQ (1<<WADDR)
#define WBITS (WADDR+X)
#define WQTEXT 2518	/* bytes of ref qualifier text */
#define NWQ 11582	/* #qual ref addr with text addr */
#define WQT 207	/* #tree entries for ref qual sort */
#define NCOL 1024	/* intermediate senses in path search */
struct col {
	unsigned int entry:17;
	unsigned int next:15;
} *col;
int colfree;
FILE *f;
FILE *wpackfile;
union x {
	struct headword {
		unsigned int type:1;
		unsigned int tt:19;
		unsigned int v:12;
	} h;
	struct sense {
		unsigned int type:1;
		unsigned int st:19;
		unsigned int p:4;
		unsigned int q:1;
		unsigned int v:7;
	} s;
} p[NHS+2];
char tbase[INITTEXT];
int text;
int z;
char in[N];
struct squal {
	unsigned int saddr:17;
	unsigned int qaddr:11;
	unsigned int v:4;
} sqtab[INITSQTAB];
int nsq;
char sqbase[INITSQ];
int sqtext;
int windex;
unsigned char arena[(NWINDEX*WBITS)/8+1];
struct wqual {
	unsigned int waddr:19;
	unsigned int qaddr:12;
} wqtab[NWQ];
int nwq;
char wqbase[WQTEXT];
int wqtext;
struct wqt {
	unsigned left:8,right:8,qaddr:12;
} wqtree[WQT];
int wqt;
#define NSET 32
int set[NSET];
int ncol;
char *parts[] = {
	"","n","v","adj","adv","pron","conj","prep","interj"
};
#define NP 9
inset(t)
char *t;
{
	char y[128];
	int i,j,k;
	if(*t!='&') {
		i=headw(t);
		if(i==0)return 0;
		for(j=0; j<NSET; j++)if(p[i+j+1].s.type==0||i+j+1>=z)break;
			else set[j]=i+j+1;
		return j;
	}
	if(isdigit(*++t)) {
		for(k=0; isdigit(*t); t++)k=k*10+*t-'0';
		i=headw(t);
		if(i==0)return 0;
		for(j=0; j<NSET; j++)if(p[i+j+1].s.type==0||i+j+1>=z)break;
			else if(j+1==k) {
				set[0]=i+j+1;
				return 1;
			}
		return 0;
	}
	if(strlen(t)>126)err("string to long in inset()");
	strcpy(y,t);
	for(j=0; y[j]&&y[j]!='.'; j++);
	if(y[j]==0)return 0;
	y[j]=0;
	for(k=1; k<NP; k++)if(strcmp(y,parts[k])==0)break;
	if(k>=NP)return 0;
	i=headw(y+j+1);
	if(i++==0)return 0;
	for(j=0; j<NSET&&i<=z&&p[i].s.type==1; i++) {
		if(p[i].s.p==k)set[j++]=i;
	}
	return j;
}
readnum()
{
	int j,k;
	for(k=0; isdigit(j=getc(f)); k=k*10+j-'0');
	if(j!='\n')err("expect number");
	fprintf(STDERR,"readnum()=%d\n",k);
	return k;
}
sensequal(zz,t)
char *t;
{
	int i;
	if(p[zz].h.type!=1)err("squal on headword");
	if(*t==0)return;
	p[zz].s.q=1;
	sqtab[nsq].saddr=zz;
	for(i=0; i<nsq; i++)if(strcmp(t,sqbase+sqtab[i].qaddr)==0)break;
	if(i<nsq) {
		sqtab[nsq].qaddr=sqtab[i].qaddr;
	}
	else {
		sqtab[nsq].qaddr=sqtext;
		strcpy(sqbase+sqtext,t);
		sqtext+=strlen(t)+1;
		if(sqtext>=INITSQ)err("sqtext overflow");
	}
	if(++nsq>INITSQTAB)err("nsq overflow");
}
err(s)
char *s;
{
	int i;
	fprintf(STDERR,"err: %s\n",s);
	fprintf(STDERR,"type return:"); getchar();
	fprintf(STDERR,"input buffer segments:\n");
	for(i=0; i<N; i++) {
		if(in[i]==0)continue;
		fprintf(STDERR,"'%s'\n",in+i);
		while(i<N&&in[i])i++;
	}
	exit(1);
}
char *squal(zz)
{
	int il,im,iu;
	if(p[zz].h.type==0) {
		err("ask for sensqual on headword");
	}
	if(p[zz].s.q==0)return "";
	il=0; iu=nsq;
	while(il<iu-1) {
		im=(il+iu)/2;
		if(sqtab[im].saddr>zz) {
			iu=im;
		}
		else il=im;
	}
	if(sqtab[il].saddr!=zz)err("binary botch in squal()");
	return sqbase+sqtab[il].qaddr;
}
char *wqual(wi)
{
	int il,im,iu;
	if((unpackw(wi)&WQ)==0)return "";
	il=0; iu=nwq;
	while(il<iu-1) {
		im=(il+iu)/2;
		if(wqtab[im].waddr>wi) {
			iu=im;
		}
		else il=im;
	}
	if(wqtab[il].waddr!=wi)err("binary botch wqual");
	return wqbase+wqtab[il].qaddr;
}
opackw(index,value)
register value;
{
	int i,j;
	register int mask=(1<<WBITS)-1;
	i=index*WBITS;
	value<<=i&7;
	mask<<=i&7;
	j=i>>3;
	while(mask&0xff) {
		arena[j]&=~mask;
		arena[j]|=value;
		value>>=8;
		mask>>=8;
		j++;
	}
}
packw(index,value)
register value;
{
	register i;
	register unsigned char *p;
	i=index*18;
	value<<=i&7;
	p=arena+(i>>3);
	*p++|=value; value>>=8; *p++|=value; value>>=8;
	*p++|=value;
}
unpackw(index)
{
	register i,k;
	register unsigned char *p;
	i=index*18;
	p=arena+(i>>3)+2;
	k=*p; k=(k<<8)|*--p; k=(k<<8)|*--p; 
	return (k>>(i&7))&0x3ffff;
}
ounpackw(index)
{
	int i,j,k;
	int x;
	register int mask=(1<<WBITS)-1;
	i=index*WBITS;
	j=i>>3;
	mask<<=i&7;
	x=k=0;
	while(mask&0xff) {
		k|=(arena[j]&mask)<<x;
		mask>>=8;
		j++;
		x+=8;
	}
	return k>>(i&7);
}
dumps(x)
{
	int i,j,k,l,m,n;
	if(x==0) {
		fprintf(STDERR,"not found\n");
		return;
	}
	for(i=x; i>0&&p[i].h.type!=0; i--);
	if(i==0) {
		fprintf(STDERR,"dump of %d no headword\n",x);
		return;
	}
	fprintf(STDERR,"\n&%d%s ",x-i,p[i].h.tt+tbase);
	if(p[x].h.type==0) {
		fprintf(STDERR,"\n");
		while(++x<z&&p[x].h.type!=0)dumps(x);
		return;
	}
	fprintf(STDERR," (%s.)",parts[p[x].s.p]);
	if(*squal(x))fprintf(STDERR," [%s]",squal(x));
	fprintf(STDERR,":\n");
	for(i=p[x].s.st; l=unpackw(i++); ) {
		for(j=l&WMASK; j>0&&p[j].h.type!=0; j--);
		fprintf(STDERR," &%d%s",(l&WMASK)-j,p[j].h.tt+tbase);
		if(l&WQ)fprintf(STDERR," [%s]",wqual(i-1));
		fprintf(STDERR,",");
	}
	fprintf(STDERR,"\n");
}
char *ma(x)
{
	char *p;
	p=(char*)calloc(x,1);
	if(p==0) {
		fprintf(STDERR,"can't calloc %d bytes\n",x);
		err("quit");
	}
	return p;
}
wsym(q,node)
char *q;
{
	int i;
	if(wqt==0) {
		strcpy(wqbase,q);
		wqtext=strlen(q)+1;
		wqt++;
		return 0;
	}
	i=strcmp(q,wqbase+wqtree[node].qaddr);
	if(i==0)return wqtree[node].qaddr;
	if(i<0) {
		if(wqtree[node].left)return wsym(q,wqtree[node].left);
		wqtree[node].left=wqt;
	}
	else {
		if(wqtree[node].right)return wsym(q,wqtree[node].right);
		wqtree[node].right=wqt;
	}
	strcpy(wqbase+wqtext,q);
	i=wqtext;
	wqtext+=strlen(q)+1;
	if(wqtext>=WQTEXT)err("wqtext ovfl");
	wqtree[wqt++].qaddr=i;
	if(wqt>=WQT)err("wqt ovfl");
	return i;
}
headw(text)
char *text;
{
	int il,iu,im;
	il=1; iu=z;
	while(il<iu+1) {
		im=(il+iu)/2;
		while(im>il&&p[im].h.type!=0)im--;
		if(im<=il) {
			while(++im<iu&&p[im].h.type!=0);
			if(im>=iu)break;
		}
		if(strcmp(p[im].h.tt+tbase,text)>0) {
			iu=im;
		}
		else {
			il=im;
		}
	}
	if(strcmp(p[il].h.tt+tbase,text))return 0;
	return il;
}
morecol()
{
	int i;
	if(col==0)col=(struct col*)ma(NCOL*sizeof*col);
	else col=(struct col*)realloc(col,(ncol+NCOL)*sizeof*col);
	for(i=NCOL+ncol-1; i>ncol; i--)col[i].next=i-1;
	col[ncol].next=0;
	ncol+=NCOL;
	colfree=ncol-2;
	return ncol-1;
}
path(a,b)
char *a,*b;
{
	int i,j,k;
	int old,new;
	int e;
	unsigned char psa[NP],psb[NP];
	int it;
	int pf;
	for(i=0; b[i]; i++)if(b[i]=='/') {
		nwanted=10;
		rsparse(b);
		qpath(accept,a);
		fprintf(STDERR,"\n");
		return;
	}
#ifdef AMBIGUOUS
	if(strncmp(a,"ambiguous ",10)==0) {
		ambpath(a+10,b);
		fprintf(STDERR,"\n");
		return;
	}
#endif
	old=new=0;
	it=1;
	pf=0;
	e=inset(a);
	if(e==0) {
		fprintf(STDERR,"can't find %s\n",a);
		return;
	}
	for(i=0; i<NP; i++)psa[i]=psb[i]=0;
	for(i=0; i<e; i++)psa[p[set[i]].s.p]++;
	k=inset(b);
	if(k==0) {
		fprintf(STDERR,"%s not found\n",b);
		return;
	}
	for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0;
	for(i=0; i<k; i++) {
		if(psa[p[set[i]].s.p]==0)continue;
		psb[p[set[i]].s.p]++;
		getcol(j);
		col[j].entry=set[i];
		col[j].next=old;
		old=j;
		p[set[i]].s.v=it;
	}
	if(old==0) {
		fprintf(STDERR,"%s and %s: no part of speech in common\n",
			a,b);
		return;
	}
	e=inset(a);
	for(i=j=0; i<e; i++) {
		set[j]=set[i];
		if(psb[p[set[i]].s.p])j++;
	}
	e=j;
	if(e==0)err("path 1");
loop:
	if(old==0) {
		if(pf)fprintf(STDERR,"\n");
		else fprintf(STDERR," no path from %s to %s\n",a,b);
		goto pathx;
	}
	it++;
	while(old) {
		i=p[col[old].entry].s.st;
		j=old;
		old=col[old].next;
		freecol(j);
		while(j=(unpackw(i++)&WMASK)) {
			if(p[j].s.type&&p[j].s.v==0) {
				getcol(k);
				col[k].next=new;
				col[k].entry=j;
				new=k;
				p[j].s.v=it;
			}
		}
	}
	old=new;
	new=0;
	for(i=j=0; i<e; i++) {
		set[j]=set[i];
		if(p[set[i]].s.v) {
			pfound(set[i]);
			pf++;
		}
		else j++;
	}
	if(e=j)goto loop;
	fprintf(STDERR,"\n");
pathx:
	freeall(old);
	freeall(new);
}
freeall(x)
{
	int i;
	while(x) {
		i=col[x].next;
		freecol(x);
		x=i;
	}
}
pfound(x)
{
	int i,j;
	do {
		for(i=x; i>0&&p[i].h.type; i--);
		if(i==0)err("no hw");
		fprintf(STDERR," &%d%s",x-i,p[i].h.tt+tbase);
		i=p[x].s.st;
		while(j=(WMASK&unpackw(i++))) {
			if(p[j].s.type&&p[j].s.v==p[x].s.v-1)break;
		}
	} while(x=j);
	fprintf(STDERR,"\n");
}
char *avdummy[2];
main(ac,av)
char *av[];
{
	int i,j,k,l,m,n;
	int nhw,nhs;
	int backref;
	char *q;
	int zz;
	int w;
	int nw;
	char y[128];
	if(ac<2) {
		fprintf(stderr,"word_clout: need data file");
		return 1;
	}
	f=fopen(av[1],"r");
	for(i=0; (j=getc(f))!=EOF&&j!='\n'&&i<N-1; i++)in[i]=j;
	if(j==EOF)err("immediate eof");
	in[i]=0;
	if(strcmp(in,"-huffman")==0) {
		goto unpackh;
	}
	else if(strcmp(in,"-pack")) {
		err("expect -huffman or -pack");
	}
	nhw=readnum();
	nhs=readnum();
/* now preallocated:
*	p=(struct x*)ma((nhs+2)*sizeof*p);
*	tbase=(char*)ma(INITTEXT);
*	sqbase=(char*)ma(INITSQ);
*	sqtab=(struct squal*)ma(INITSQTAB*sizeof*sqtab);
*	arena=(unsigned char*)ma(i=(NWINDEX*sizeof*arena*WBITS)/8+1);
*	wqtab=(struct wqual*)ma(NWQ*sizeof*wqtab);
*	wqbase=(char*)ma(WQTEXT*sizeof*wqbase);
*	wqtree=(struct wqt*)ma(WQT*sizeof*wqtree);
*/
readhead:
	for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j;
	if(i>=N)err("line too long");
	if(j==EOF)err("eof in headwords");
	in[i]=0;
	if(i==0)goto hwdone;
	for(j=i-1; j>0&&in[j]!=' '; j--);
	in[j++]=0;
	p[++z].h.tt=text;
	zz=z;
	strcpy(text+tbase,in);
	text+=strlen(in)+1;
	if(text>=INITTEXT)err("inittext too small");
	p[z].h.type=0;
	for( ; in[j]; j++) {
		p[++z].s.type=1;
		p[z].s.q=0;
		p[z].s.st=0;
		k=in[j];
		if(k<='z'&&k>='a')p[z].s.p=k-'a';
		else p[z].s.p=k-'A';
	}
	if((j=getc(f))==':') {
		for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j;
		if(j==EOF)err("eof in sense qual");
		if(i>=N)err("squal too long");
		in[i]=0;
		for(i=0; in[i]; i++)if(in[i]==':'&&in[i+1]==' ')in[i]='-';
		for(i=0; in[i]; i++) {
			if(++zz>z)err("too many squal fields");
			for(j=i; in[j]&&in[j]!=':'; j++);
			if(in[j]==0)err("squal not ending :");
			in[j]=0;
			sensequal(zz,in+i);
			i=j;
		}
		if(zz<z)err("missing squal field");
		goto readhead;
	}
	ungetc(j,f);
	goto readhead;
hwdone:
	fprintf(STDERR,"text=%d\n",text);
	z++;
	p[z].h.type=0; /* extra slot on end */
	for(i=1,j=0; i<z; i++)if(p[i].h.type==1&&p[i].s.q)j++;
	fprintf(STDERR,"%d entries %d qual senses\n",i,j);
	fprintf(STDERR,"sqtext=%d, nsq=%d\n",sqtext,nsq);
	zz=1;
readwords:
	for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j;
	if(j==EOF)goto done;
	if(i>=N)err("line too long");
	in[i]=0;
	while(zz<z&&p[zz].h.type==0)zz++;
	for(i=nw=0; in[i]<='z'&&in[i]>='a'; i++)nw=nw*26+in[i]-'a';
	w=windex;
	p[zz].s.st=w;
	windex+=nw+1;
	if(windex>NWINDEX)err("windex too big");
	while(in[i]==':') {
		q=0;
		if(in[++i]=='{') {
			for(j=i+1; in[j]&&in[j]!='}'; j++);
			if(in[j]==0)err("missing }");
			in[j]=0;
			q=in+i+1;
			i=j+1;
		}
		for(k=0; in[i]>='a'&&in[i]<='z'; i++)k=k*26+in[i]-'a';
		++k; /* offset 1 so preserve 0=0 */
		if(k!=(k&WMASK))err("exceeds wmask");
		if(p[k].h.type==1&&p[k].s.p!=p[zz].s.p) {
			dumps(k);
			dumps(zz);
			err("part bad");
		}
		if(q) {
			k|=WQ;
			wqtab[nwq].waddr=w;
			wqtab[nwq].qaddr=wsym(q,0);
			nwq++;
			if(nwq>=NWQ)err("nwq ovfl");
		}
		packw(w++,k);
	}
	zz++;
	goto readwords;
done:
	fprintf(STDERR,"wqtext=%d nwq=%d windex=%d\n",wqtext,nwq,windex);
	fprintf(STDERR,"wqt=%d\n",wqt);
	fprintf(STDERR,"zz=%d z=%d\n",zz,z);
	m=0;
	if(ac>2&&*av[2]=='c') {
		for(w=0; w<windex; w++)if(unpackw(w))m++;
		fprintf(STDERR,"%d words set\n",m);
	}
	m=0;
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type==0)continue;
		i=p[zz].s.st;
		while(j=unpackw(i++)) {
			j&=WMASK;
			if(p[j].h.type==0)continue;
			k=p[j].s.st;
			while(l=unpackw(k++)) {
				l&=WMASK;
				if(l==zz)goto backed;
			}
			if(unpackw(k))err("oversubscribed");
			packw(k-1,zz);
			m++;
		backed:;
		}
	}
	fprintf(STDERR,"backrefs added %d\n",m);
	if(ac>2&&*av[2]=='c')for(zz=1; zz<z; zz++) {
		if(p[zz].h.type==0) {
			j=zz;
			continue;
		}
		for(k=p[zz].s.st; unpackw(k)&WMASK; k++);
		if(k+1<z&&(unpackw(k+1)&WMASK)==0) {
			fprintf(STDERR,"%s is missing words\n",
				tbase+p[j].h.tt);
		}
	}
	if(ac>3&&(wpackfile=fopen(av[3],"w")))packit();
	goto ahem;
unpackh:
	unpackit();
ahem:	fprintf(STDERR,"ready.\n");
loop:
	for(i=0; i<N&&(j=getchar())!=EOF&&j!='\n'; i++)in[i]=j;
	if(i<N)in[i]=0;
	if(j==EOF||strcmp(in,"q")==0) {
		exit(0);
	}
	for(i=0; in[i]&&in[i]!='/'; i++)if(in[i]=='^') {
		in[i]=0;
		path(in,in+i+1);
		goto loop;
	}
	if(in[i]=='/') {
		strcpy(y,in+i+1);
		in[i]=0;
		if(i>0&&in[i-1]=='.')in[i-1]=0;
		for(i=0; y[i]&&y[i]!='/'; i++);
		y[i]=0;
		k=0;
		if(*in=='&')for(k=0; k<NP&&strcmp(in+1,parts[k]); k++);
		if(rs)free(rs);
		rs=regcomp(y);
		for(zz=1; zz<z; zz++) {
			if(p[zz].h.type)continue;
			if(k) {
				for(i=1; p[zz+i].s.type; i++)
					if(p[zz+i].s.p==k)goto ok;
				continue;
			}
		ok:
			if(regexec(rs,p[zz].h.tt+tbase,0,0)) {
				fprintf(stderr," %s,",p[zz].h.tt+tbase);
			}
		}
		fprintf(stderr,"\n");
		goto loop;
	}
	i=inset(in);
	for(j=0; j<i; j++)dumps(set[j]);
	if(i==0)fprintf(STDERR,"%s not found\n",in);
	goto loop;
}
/* funct == 0 reject
   funct == -1 stop
   funct == otherwise, continue
*/
qpath(funct,b)
char *b;
int (*funct)();
{
	int i,j,k;
	int old,new;
	int it;
	old=new=0;
	it=1;
	k=inset(b);
	if(k==0) {
		fprintf(STDERR,"%s not found\n",b);
		return;
	}
	for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0;
	for(i=0; i<k; i++) {
		getcol(j);
		col[j].entry=set[i];
		col[j].next=old;
		old=j;
		p[set[i]].s.v=it;
	}
loop:
	if(old==0) {
		goto pathx;
	}
	it++;
	while(old) {
		i=p[col[old].entry].s.st;
		j=old;
		old=col[old].next;
		freecol(j);
		while(j=(unpackw(i++)&WMASK)) {
			if(p[j].s.type&&p[j].s.v==0) {
				p[j].s.v=it;
				switch((*funct)(j)) {
				case 0:
					p[j].s.v=0;
					continue;
				case -1: goto pathx;
				}
				getcol(k);
				col[k].next=new;
				col[k].entry=j;
				new=k;
			}
		}
	}
	old=new;
	new=0;
	if(old)goto loop;
pathx:
	freeall(old);
	freeall(new);
}
rsparse(b)
char *b;
{
	int i,j;
	wantps=0;
	for(i=0; b[i]&&b[i]!='/'; i++);
	if(i&&b[i-1]=='.') {
		b[i-1]=0;
		for(j=0; j<NP; j++)if(strcmp(parts[j],b+1)==0)wantps=j;
	}
	if(b[i])i++;
	for(j=i; b[j]; j++);
	while(j&&b[j-1]!='/')j--;
	if(j)b[j-1]=0;
	if(atoi(b+j))nwanted=atoi(b+j);
	if(rs) {
		free(rs);
	}
	rs=regcomp(b+i);
}
accept(zz)
{
	int i;
	for(i=zz; i&&p[i].h.type; i--);
	if(regexec(rs,p[i].h.tt+tbase,0,0)) {
		fprintf(stderr," &%d%s,",zz-i,p[i].h.tt+tbase);
		if(--nwanted<=0)return -1;
		return 1;
	}
	return 1;
}
#define NCOMM 100
#define NHASCII 128
#define NNSEN 50
#define NNWDS 100
#define NSQR 256
#define NWQR 256
#define ALLOC(base,num) (base.freq=(long*)ma(num*sizeof(long)),\
			base.code=(long*)ma(num*sizeof(long)),\
			base.tree=(short*)ma(2*num*sizeof(short)))
#define FREE(base) (free(base.freq),free(base.code),free(base.tree))
#define GEN(base,n) (gencodes(base.freq,n,base.code),flipcodes(n,base.code),\
			entree(base.code,n,base.tree))
#define INBIT() (ibuffc?(ibuff>>--ibuffc)&1:(ibuffc=7,((ibuff=getc(f))>>7)&1))
struct huff {
	long *freq,*code;
	short *tree;
};
struct alpha {
	unsigned int k,wqtab;
};
alphacmp(a,b)
struct alpha *a,*b;
{
	if(p[a->k&WMASK].h.type) {
		if(p[b->k&WMASK].h.type)return (a->k&WMASK)-(b->k&WMASK);
		return 1;
	}
	if(p[b->k&WMASK].h.type==0)return (a->k&WMASK)-(b->k&WMASK);
	return -1;
}
wqcmp(a,b)
struct wqual *a,*b;
{
	return a->waddr-b->waddr;
}
int ibuffc,ibuff;
packit()
{
	struct huff comm,qascii,hascii,nsen,parsp,nwds,nwdsback;
	int i,j,k,l,m,n;
	int ii,jj,il,iu,im;
	int zz;
	int sqref[NSQR],wqref[NWQR];
	int medians,medianw;
	struct alpha wtemp[NNWDS];
	/* s & w qualifier text addr from s & w qualifier index */
	for(i=j=0; i<sqtext; i++) {
		if(i==0||sqbase[i-1]==0) {
			sqref[j++]=i;
			if(j>=NSQR)err("NSQR too small");
		}
	}
	for(i=j=0; i<wqtext; i++) {
		if(i==0||wqbase[i-1]==0) {
			wqref[j++]=i;
			if(j>=NWQR)err("NWQR too small");
		}
	}
	ALLOC(comm,NCOMM);
	ALLOC(qascii,NHASCII);
	ALLOC(hascii,NHASCII);
	ALLOC(nsen,NNSEN);
	ALLOC(parsp,NP);
	ALLOC(nwds,NNWDS);
	ALLOC(nwdsback,NNWDS);
	/* common substring, number of senses, parts of speech */
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type)continue;
		j=k,k=p[zz].h.tt;
		if(k==0)l=0;
		else for(l=0; tbase[k+l]==tbase[j+l]; l++);
		if(l>=NCOMM)err("NCOMM too small");
		comm.freq[l]++;
		for(i=l; tbase[i+k]; i++)hascii.freq[tbase[i+k]]++;
		hascii.freq[0]++;
		for(n=zz+1; p[n].s.type; n++);
		if(n-zz>=NNSEN)err("NNSEN too small");
		nsen.freq[n-zz]++;
		for(n=zz+1; p[n].s.type; n++)parsp.freq[p[n].s.p]++;
	}
	/* number of words */
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type) {
			for(i=p[zz].s.st,j=0; k=unpackw(i); i++)if((k&WMASK)<zz)j++;
			i-=p[zz].s.st;
			if(i>=NNWDS)err("NNWDS too small");
			nwds.freq[i]++;
			nwdsback.freq[j]++;
		}
	}
	/* ascii in qualifiers */
	for(i=0; i<sqtext; i++)qascii.freq[sqbase[i]]++;
	for(i=0; i<wqtext; i++)qascii.freq[wqbase[i]]++;

	GEN(qascii,NHASCII);
	GEN(hascii,NHASCII);
	GEN(comm,NCOMM);
	GEN(nsen,NNSEN);
	GEN(parsp,NP);
	GEN(nwds,NNWDS);
	GEN(nwdsback,NNWDS);

	/* median sense qualifier */
	for(i=j=0,zz=1; zz<z; zz++) {
		if(p[zz].s.type) {
			i++;
			if(p[zz].s.q)j++;
		}
	}
	medians=(7*i)/(10*j);
	/* median word qualifier runs */
	for(i=j=0,zz=1; zz<z; zz++) {
		if(p[zz].h.type) {
			for(ii=p[zz].s.st; jj=unpackw(ii); ii++) {
				i++;
				if(jj&WQ)j++;
			}
		}
	}
	medianw=(7*i)/(10*j);

	/* Start packed output */
	fprintf(wpackfile,"-huffman\n");

	/* OUTPUT trees */
	outtree(qascii);
	outtree(hascii);
	outtree(comm);
	outtree(nsen);
	outtree(parsp);
	outtree(nwds);
	outtree(nwdsback);

	/* OUTPUT headers */
	writesim(500000,text);
	writesim(100000,z);
	writesim(1000,nsq);
	writesim(500000,windex);
	writesim(10000,nwq);
	writesim(2000,wqtext);
	writesim(2000,sqtext);
	writesim(100,medians);
	writesim(100,medianw);

	/* OUTPUT sense qualifier text */
	for(i=0; i<sqtext; i++)outbit(qascii.code[sqbase[i]]);
	/* OUTPUT word qualifier text */
	for(i=0; i<wqtext; i++)outbit(qascii.code[wqbase[i]]);
	/* OUTPUT hw ncommon, text \0, number of senses, part of speech */
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type)continue;
		j=k,k=p[zz].h.tt;
		if(k==0)l=0;
		else for(l=0; tbase[k+l]==tbase[j+l]; l++);
		outbit(comm.code[l]);
		for(i=l; tbase[i+k]; i++)outbit(hascii.code[tbase[i+k]]);
		outbit(hascii.code[0]);
		for(n=zz+1; p[n].s.type; n++);
		outbit(nsen.code[n-zz]);
		for(n=zz+1; p[n].s.type; n++)outbit(parsp.code[p[n].s.p]);
	}
	/* OUTPUT qualified sense run, qualifier reference number */
	m=n=0;
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type) {
			n++;
			if(p[zz].s.q) {
				writesim(medians,n-m-1);
				m=n;
				j=squal(zz)-sqbase;
				for(i=0; i<NSQR; i++)if(sqref[i]==j)break;
				if(sqref[i]!=j)err("NSQR search botch");
				outbitf(NSQR+i);
			}
		}
	}
	writesim(medians,n-m);
	/* sort words into alphabetical order */
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type==0)continue;
		for(i=p[zz].s.st,j=0; k=unpackw(i); i++) {
			wtemp[j].wqtab=0;
			if(k&WQ) {
				il=0; iu=nwq;
				while(il<iu-1) {
					im=(il+iu)/2;
					if(wqtab[im].waddr>i)iu=im;
					else il=im;
				}
				if(wqtab[il].waddr!=i)err("bin botch");
				wtemp[j].wqtab=il;
			}
			wtemp[j++].k=k;
		}
		qsort(wtemp,j,sizeof*wtemp,alphacmp);
		for(i=p[zz].s.st,j=0; unpackw(i); i++) {
			if(wtemp[j].k&WQ) {
				wqtab[wtemp[j].wqtab].waddr=i;
			}
			opackw(i,wtemp[j++].k);
		}
	}
	qsort(wqtab,nwq,sizeof*wqtab,wqcmp);
	/* OUTPUT # words, # words back, back references */
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type==0)continue;
		for(i=p[zz].s.st,j=0; k=unpackw(i)&WMASK; i++) {
				if(k<zz||p[k].s.type==0)j++;
		}
		outbit(nwds.code[i-p[zz].s.st]);
		outbit(nwdsback.code[j]);
		for(i=p[zz].s.st; k=unpackw(i)&WMASK; i++) {
			if(k<zz||p[k].s.type==0)outbitf(WQ|k);
		}
	}
	/* OUTPUT word run, word qualifier index */
	m=n=0;
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type) {
			for(ii=p[zz].s.st; jj=unpackw(ii); ii++) {
				n++;
				if(jj&WQ) {
					writesim(medianw,n-m-1);
					m=n;
					j=wqual(ii)-wqbase;
					for(i=0; i<NWQR; i++)if(wqref[i]==j)break;
					if(wqref[i]!=j)err("NWQR search botch");
					outbitf(NWQR+i);
				}
			}
		}
	}
	writesim(medianw,n-m);
	FREE(qascii);
	FREE(hascii);
	FREE(comm);
	FREE(nsen);
	FREE(parsp);
	FREE(nwds);
	FREE(nwdsback);
	outbitf(0xff);
	fclose(wpackfile);
}
outbit(x)
register x;
{
	static obuff,b;
	while(x>1) {
		obuff=(obuff<<1)+(x&1);
		x>>=1;
		if(++b>=8) {
			b=0;
			putc(obuff,wpackfile);
		}
	}
}
outbitf(x)
register x;
{
	register y;
	for(y=1; x>1; x>>=1)y=(y<<1)+(x&1);
	outbit(y);
}
inbit() { return INBIT(); }
char *rhalloc(x) { return malloc(x); }
char *rhfree(x) char *x; { free(x); }
wbytes(n) {
	putc(n>>8,wpackfile);
	putc(n,wpackfile);
}
rbytes() {
	int i;
	i=getc(f);
	return (i<<8)+getc(f);
}
decode(tree)
register short *tree;
{
	register int i;
	i=0;
	if(*tree++<2)return *tree&077777;
	do {
		if(INBIT())i=tree[i+1];
		else i=tree[i];
	} while((i&0100000)==0);
	return i&077777;
}
outtree(x)
struct huff x;
{
	int i;
	for(i=0; i<=x.tree[0]; i++)wbytes(x.tree[i]);
}
struct huff intree()
{
	struct huff x;
	int i;
	x.code=0;
	x.freq=0;
	i=rbytes();
	x.tree=(short*)ma((i+1)*sizeof(short));
	x.tree[0]=i;
	for(i=1; i<=x.tree[0]; i++)x.tree[i]=rbytes();
	return x;
}
rfixlen(code)
register code;
{
	register i=0;
	if(code<=1)return 0;
	do {
		i=(i<<1)+INBIT();
	} while((code>>=1)>1);
	return i;
}
unpackit()
{
	int i,j,k,l,m,n;
	int sqref[NSQR],wqref[NWQR];
	struct huff qascii,hascii,comm,nsen,parsp,nwds,nwdsback;
	int medians,medianw;
	int zz;
	int ii,jj;
	fprintf(STDERR,"wait for 'ready.'\n");
	fflush(STDERR);
	qascii=intree();
	hascii=intree();
	comm=intree();
	nsen=intree();
	parsp=intree();
	nwds=intree();
	nwdsback=intree();
	text=readsim(500000);
	z=readsim(100000);
	nsq=readsim(1000);
	windex=readsim(500000);
	nwq=readsim(10000);
	wqtext=readsim(2000);
	sqtext=readsim(2000);
	medians=readsim(100);
	medianw=readsim(100);

	for(i=0; i<sqtext; i++)sqbase[i]=decode(qascii.tree);
	for(i=0; i<wqtext; i++)wqbase[i]=decode(qascii.tree);

	for(i=j=0; i<sqtext; i++) {
		if(i==0||sqbase[i-1]==0) {
			sqref[j++]=i;
		}
	}
	for(i=j=0; i<wqtext; i++) {
		if(i==0||wqbase[i-1]==0) {
			wqref[j++]=i;
		}
	}
	for(k=0,zz=1; zz<z; zz=n) {
		l=decode(comm.tree);
		for(i=0; i<l; i++)tbase[i+k]=tbase[i+j];
		while(tbase[k+l++]=decode(hascii.tree));
		p[zz].h.tt=k;
		j=k;
		k+=l;
		l=decode(nsen.tree);
		for(n=zz+1; n<zz+l; n++) {
			p[n].s.p=decode(parsp.tree);
			p[n].s.type=1;
		}
	}
	if(zz!=z)fprintf(STDERR,"botch zz!=z %d %d\n",zz,z);
	if(k!=text)fprintf(STDERR,"botch k!=text %d %d\n",k,text);
	p[z].h.type=0;
	m=n=0;
	l=readsim(medians);
	k=0;
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type) {
			n++;
			if(l==n-m-1) {
				p[zz].s.q=1;
				m=n;
				sqtab[k].saddr=zz;
				sqtab[k++].qaddr=sqref[rfixlen(NSQR)];
				l=readsim(medians);
			}
		}
	}
	if(k!=nsq)fprintf(STDERR,"k!=nsq %d %d\n",k,nsq);
	j=0;
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type==0)continue;
		l=decode(nwds.tree);
		p[zz].s.st=j;
		m=decode(nwdsback.tree);
		for(i=0; i<m; i++) {
			packw(j+i,n=rfixlen(WQ));
			if(p[n].s.type) {
				packw(p[n].s.st+p[n].s.v++,zz);
			}
		}
		p[zz].s.v=m;
		j+=l+1;
	}
	if(j!=windex)fprintf(STDERR,"j!=windex %d %d\n",j,windex);
	m=n=0;
	l=readsim(medianw);
	j=0;
	for(zz=1; zz<z; zz++) {
		if(p[zz].h.type) {
			for(ii=p[zz].s.st; jj=unpackw(ii); ii++) {
				n++;
				if(n-m-1==l) {
					m=n;
					opackw(ii,WQ+jj);
					i=wqref[rfixlen(NWQR)];
					wqtab[j].waddr=ii;
					wqtab[j++].qaddr=i;
					l=readsim(medianw);
				}
			}
		}
	}
	if(j!=nwq)fprintf(STDERR,"j!=nwq %d %d\n",j,nwq);
	free(qascii.tree);
	free(hascii.tree);
	free(comm.tree);
	free(nsen.tree);
	free(parsp.tree);
	free(nwds.tree);
	free(nwdsback.tree);
}
char *rhalloc();
typedef long huffreq;
typedef long huffcode;
typedef unsigned short hufftree;
typedef int huffnum;

/* gencodes((huffreq*)freqtab, (huffnum)n, (huffcode*)output);
   flipcodes((huffnum)n,(huffcode*)output);
   entree((huffcodes*)output.flipped,(huffnum)n,(hufftree*)treearray)
*/
#define ENDFLAG 0100000
/*	decoding tree is (hufftree) array; start at 0, add next bit to index;
	if ENDFLAG is set in entry, everything else is the wanted code;
	if not, take the next offset from the entry;

	first entry in tree is number of entries in rest of tree;
	offset 0 corresponds to tr[1].
*/
entree(cd,ncd,tr)
huffcode *cd;
huffnum ncd;
hufftree *tr;
{
	long i,j,k;
	long x;
	for(i=0; i<2*ncd; i++)tr[i]=0;
	tr++;
	k=2;

	for(i=0; i<ncd; i++) {
		j=0;
		for(x=cd[i]; x>1; x>>=1) {
			j+=x&1;
			if(tr[j]==0) {
				if(x>3) {
					tr[j]=k;
					k+=2;
				}
				else {
					tr[j]=i|ENDFLAG;
				}
			}
			j=tr[j];
			if(x>3&&(j&ENDFLAG)) {
				err("botched huffman code");
			}
		}
	}
	*--tr=k;
}
/* Huffman codes from counts */

/* gencodes(huffreq freq table, huffnum length, huffcode output code table)
	output code words have leading 1-bit set, rest is huffman code.
	program aborts if a code is more than 30 bits -- try increasing
	the minimum counts.
	zero frequency counts produce zero code-words (are ignored).

	flipcodes(huffnum length, huffcode code table) flips the bit order of
		the huffman code bits.
*/

struct code {
	struct code *left,*right;
	huffreq count;
	huffnum val;
};

struct code *getcode()
{
	struct code *h;
	h=(struct code*)rhalloc(sizeof*h);
	if(h==0)err("getcode: can't rhalloc code");
	h->left=h->right=0;
	h->val=h->count=0;
	return h;
}

tcompare(a,b)
register struct code **a,**b;
{
	if(a[0]->count<b[0]->count)return -1;
	if(a[0]->count>b[0]->count)return 1;
	if(a[0]->val<b[0]->val)return -1;
	if(a[0]->val>b[0]->val)return 1;
	return 0;
}

gencodes(infreq,nel,outcodes)
huffreq *infreq;
huffnum nel;
huffcode *outcodes;
{
	struct code **tp,**a,*p,*s;
	long i,j,k;
	long na;
	tp=(struct code**)rhalloc(nel*sizeof*tp);
	if(tp==0)err("gencodes: can't rhalloc tp");
	for(i=0; i<nel; i++) {
		outcodes[i]=0;
		tp[i]=getcode();
		tp[i]->count=infreq[i];
		tp[i]->val=i;
	}
	qsort(tp,nel,sizeof*tp,tcompare);
	for(k=0; k<nel-1; k++) {
		if(tp[k]->count==0)rhfree(tp[k]);
		else break;
	}
	a=&tp[k];
	na=nel-k;
	/* add smallest two and replace in heap till heap gone */
	for(k=na; k>2; ) {
		j=a[1]->count<a[2]->count?1:2;
		p=getcode();
		p->count=a[0]->count+a[j]->count;
		p->left=a[0];
		p->right=a[j];
		a[j]=p;
		for(i=j; (i+=i+1)<k; j=i) {
			if(i+1<k&&a[i]->count>a[i+1]->count)i++;
			if(a[j]->count<=a[i]->count)break;
			p=a[j];
			a[j]=a[i];
			a[i]=p;
		}
		a[0]=a[--k];
		for(i=j=0; (i+=i+1)<k; j=i) {
			if(i+1<k&&a[i]->count>a[i+1]->count)i++;
			if(a[j]->count<=a[i]->count)break;
			p=a[j];
			a[j]=a[i];
			a[i]=p;
		}
	}
	if(k==2) {
		p=getcode();
		p->count=a[0]->count+a[1]->count;
		p->left=a[0];
		p->right=a[1];
	}
	else p=a[0];
	rhfree(tp);
	ascodes(p,1L,outcodes);
}

ascodes(p,hcode,outcodes)
struct code *p;
long *outcodes;
long hcode;
{
	if(hcode<=0)err("ascodes: bitcode overflow");
	if(p->left==0) {
		if(p->right!=0)err("ascodes: tree ends wrong");
		outcodes[p->val]=hcode;
	}
	else {
		if(p->right==0)err("ascodes: tree ends oddly");
		ascodes(p->right,(hcode<<1)|1,outcodes);
		ascodes(p->left,(hcode<<1)|0,outcodes);
	}
	rhfree(p);
}

flipcodes(nel,codes)
huffnum nel;
huffcode *codes;
{
	long i,j,k;
	long x;
	for(i=0; i<nel; i++) {
		if(codes[i]==0)continue;
		x=codes[i];
		codes[i]=1;
		while(x>1) {
			codes[i]=codes[i]*2|(x&1);
			x>>=1;
		}
	}
}
/* self-similar Huffman codes for neg exp distr.
	calls outbitf(1+forward code)
	calls inbit()
*/
writesim(median,intv)
{
	int i;
	static nc,dc,ec,omedian;
	if(median!=omedian) {
		omedian=median;
		for(i=1; (1L<<i)<=median; i++);
		nc=1L<<--i;
		dc=2*nc-median;
		ec=4*nc-median;
	}
	while(intv>=median) {
		outbitf(03L);
		intv-=median;
	}
	outbitf(02L);
	if(intv<dc)outbitf(intv+nc);
	else outbitf(ec+intv);
}
readsim(median) {
	int i,j,k;
	static nc,oi,dc,omedian;
	if(median!=omedian) {
		omedian=median;
		for(i=1; (1L<<i)<=median; i++);
		nc=1L<<--i;
		oi=i;
		dc=2*nc-median;
	}
	k=0; while(INBIT())k+=median;
	for(i=j=0; i<oi; i++)j+=j+INBIT();
	if(j<dc)return k+j;
	return k-dc+j+j+INBIT();
}
#ifdef AMBIGUOUS
prsense(i)
{
	fprintf(STDERR," &%d%s",i-hwof(i),p[hwof(i)].h.tt+tbase);
}
ambpath(a,b)
char *a,*b;
{
	int i,j,k;
	int froma,fromb,nfroma,nfromb;
	int e;
	int it;
	int pf;
	pf=0;
	it=1;
	e=inset(a);
	if(e==0) {
		fprintf(STDERR,"can't find %s\n",a);
		return;
	}
	k=inset(b);
	if(k==0) {
		fprintf(STDERR,"%s not found\n",b);
		return;
	}
	for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0; else p[i].h.v=0;
	fromb=0;
	for(i=0; i<k; i++) {
		getcol(j);
		col[j].entry=set[i];
		col[j].next=fromb;
		fromb=j;
		p[set[i]].s.v=it;
	}
	e=inset(a);
	froma=0;
	for(i=0; i<e; i++) {
		getcol(j);
		col[j].entry=set[i];
		col[j].next=froma;
		froma=j;
		p[set[i]].s.v=it;
	}
loop:
	pf=0;
	for(k=froma; k; k=col[k].next) {
		j=hwof(col[k].entry);
		p[j].h.v|=1;
		if(p[j].h.v==3)pf++;
	}
	for(k=fromb; k; k=col[k].next) {
		j=hwof(col[k].entry);
		p[j].h.v|=2;
		if(p[j].h.v==3)pf++;
	}
	if(pf>0) {
		ambiguous(froma,fromb);
		goto pathx;
	}
	if(froma==0&&fromb==0) {
		fprintf(STDERR,"no link found\n");
		return;
	}
	it++;
	nfroma=0;
	while(froma) {
		i=p[col[froma].entry].s.st;
		j=froma;
		froma=col[j].next;
		freecol(j);
		while(j=(unpackw(i++)&WMASK)) {
			if(p[j].s.type&&p[j].s.v==0) {
				getcol(k);
				col[k].next=nfroma;
				col[k].entry=j;
				nfroma=k;
				p[j].s.v=it;
			}
		}
	}
	froma=nfroma;
	nfromb=0;
	while(fromb) {
		i=p[col[fromb].entry].s.st;
		j=fromb;
		fromb=col[j].next;
		freecol(j);
		while(j=(unpackw(i++)&WMASK)) {
			if(p[j].s.type&&p[j].s.v==0) {
				getcol(k);
				col[k].next=nfromb;
				col[k].entry=j;
				nfromb=k;
				p[j].s.v=it;
			}
		}
	}
	fromb=nfromb;
	goto loop;
pathx:
	freeall(froma);
	freeall(fromb);
}
hwof(i)
{
	while(i>0&&p[i].h.type!=0)i--;
	if(i==0)err("hwof()");
	return i;
}
ambiguous(fa,fb)
{
	int f,g,i,j,k;
	int dbest;
	int enda,endb;
	int la,lb;
	int length;
	int best,a;
	dbest=9999;
	for(best=0; best<2; best++)
	for(f=fa; f; f=(f==fb)?0:fb)for(g=f; g; g=col[g].next) {
		enda=endb=0;
		la=lb=9999;
		i=hwof(col[g].entry);
		if(p[i].h.v!=3)continue;
		for(j=1; p[i+j].s.type; j++) {
			if(p[i+j].s.v==0)continue;
			a=pathtoend(i+j);
			if(enda==0)enda=a,la=p[i+j].s.v;
			else
			if(hwof(col[a].entry)==hwof(col[enda].entry)) {
				if(p[i+j].s.v<la) {
					freeall(enda);
					enda=a;
					la=p[i+j].s.v;
				}
				else freeall(a);
			}
			else if(endb==0)endb=a,lb=p[i+j].s.v;
			else
			if(hwof(col[a].entry)==hwof(col[endb].entry)) {
				if(p[i+j].s.v<lb) {
					freeall(endb);
					endb=a;
					lb=p[i+j].s.v;
				}
				else freeall(a);
			}
		}
		length=la+lb;
		if(best==0&&length<dbest)dbest=length;
		else if(best==1&&length==dbest) {
			prrev(enda);
			fprintf(STDERR,"\n");
			prrev(endb);
			fprintf(STDERR,"\n\n");
			p[i].h.v=0;
		}
		freeall(enda);
		freeall(endb);
	}
}
pathtoend(x)
{
	int i,j,k;
	for(k=0;;) {
		getcol(j);
		col[j].entry=x;
		col[j].next=k;
		k=j;
		if(p[x].s.v<=1)return k;
		i=p[x].s.st;
		while(j=(WMASK&unpackw(i++))) {
			if(p[j].s.type&&p[j].s.v==p[x].s.v-1)break;
		}
		x=j;
	}
}
prrev(i)
{
	if(i==0)return;
	prrev(col[i].next);
	prsense(col[i].entry);
}
#endif