V10/cmd/spell.old/huff.c

/*	@(#)huff.c	1.3	*/
#include <stdio.h>
#include "huff.h"
/* huff.h should be local and hidden; it was made
 * accessible outside for dirty reasons: 20% faster spell
 */
struct huff huffcode;

/* Infinite Huffman code
 *
 * Let the messages be exponentially distributed with ratio r:
 * 	P{message k} = r^k*(1-r),	k=0,1,...
 * Let the messages be coded in base q, and suppose
 * 	r^n = 1/q
 * If each decade (base q) contains n codes, then
 * the messages assigned to each decade will be q times
 * as probable as the next. Moreover the code for the tail of
 * the distribution after truncating one decade should look
 * just like the original, but longer by one leading digit q-1.
 * 	q(z+n) = z + (q-1)q^w
 * where z is first code of decade, w is width of code (in shortest
 * full decade). Examples, base 2:
 * 	r^1 = 1/2	r^5 = 1/2
 * 	0		0110
 * 	10		0111
 * 	110		1000
 * 	1110		1001
 * 	...		1010
 * 			10110
 * 	w=1,z=0		w=4,z=0110
 * Rewriting slightly
 * 	(q-1)z + q*n = (q-1)q^w
 * whence z is a multiple of q and n is a multiple of q-1. Let
 * 	z = cq, n = d(q-1)
 * We pick w to be the least integer such that
 * 	d = n/(q-1) <= q^(w-1)
 * Then solve for c
 * 	c = q^(w-1) - d
 * If c is not zero, the first decade may be preceded by
 * even shorter (w-1)-digit codes 0,1,...,c-1. Thus
 * the example code with r^5 = 1/2 becomes
 * 	000
 * 	001
 * 	010
 * 	0110
 * 	0111
 * 	1000
 * 	1001
 * 	1010
 * 	10110
 * 	...
 * 	110110
 * 	...
 * The expected number of base-q digits in a codeword is then
 *	w - 1 + r^c/(1-r^n)
 * The present routines require q to be a power of 2
 */
/* There is a lot of hanky-panky with left justification against
 * sign instead of simple left justification because
 * unsigned long is not available
 */
#define L (BYTE*(sizeof(long))-1)	/* length of signless long */
#define MASK (~(1L<<L))		/* mask out sign */

/* decode the prefix of word y (which is left justified against sign)
 * place mesage number into place pointed to by kp
 * return length (in bits) of decoded prefix or 0 if code is out of
 * range
 */
decode(y,pk)
long y;
long *pk;
{
	register l;
	long v;
	if(y < cs) {
		*pk = y >> (L+QW-w);
		return(w-QW);
	}
	for(l=w,v=v0; y>=qcs; y=(y<<QW)&MASK,v+=n)
		if((l+=QW) > L)
			return(0);
	*pk = v + (y>>(L-w));
	return(l);
}

/* encode message k and put result (right justified) into
 * place pointed to by py.
 * return length (in bits) of result,
 * or 0 if code is too long
 */
encode(k,py)
long k;
long *py;
{
	register l;
	long y;
	if(k < c) {
		*py = k;
		return(w-QW);
	}
	for(k-=c,y=1,l=w; k>=n; k-=n,y<<=QW)
		if((l+=QW) > L)
			return(0);
	*py = ((y-1)<<w) + cq + k;
	return(l);
}
/* Initialization code, given expected value of k
 * E(k) = r/(1-r) = a
 * and given base width b
 * return expected length of coded messages
 */
struct qlog {
	long p;
	double u;
};
double
huff(a)
float a;
{
	struct qlog qlog();
	struct qlog z;
	register i, q;
	long d, j;
	double r = a/(1.0 + a);
	double rc, rq;
	for(i=0,q=1,rq=r; i<QW; i++,q*=2,rq*=rq)
		continue;
	rq /= r;	/* rq = r^(q-1) */
	z = qlog(rq, 1./q, 1L, rq);
	d = z.p;
	n = d*(q-1);
	if(n!=d*(q-1))
		abort();	/*time to make n long*/
	for(w=QW,j=1; j<d; w+=QW,j*=q)
		continue;
	c = j - d;
	cq = c*q;
	cs = cq<<(L-w);
	qcs = (((long)(q-1)<<w) + cq) << (L-QW-w);
	v0 = c - cq;
	for(i=0,rc=1; i<c; i++,rc*=r)	/* rc = r^c */
		continue;
	return(w + QW*(rc/(1-z.u) - 1));
}

struct qlog
qlog(x, y, p, u)	/*find smallest p so x^p<=y */
double x,y,u;
long p;
{
	struct qlog z;
	if(u/x <= y) {
		z.p = 0;
		z.u = 1;
	} else {
		z = qlog(x, y, p+p, u*u);
		if(u*z.u/x > y) {
			z.p += p;
			z.u *= u;
		}
	}
	return(z);
}

whuff()
{
	fwrite((char*)&huffcode,sizeof(huffcode),1,stdout);
}

rhuff(f)
int f;
{
	register size, i;

	size = sizeof(huffcode);
	return(((i = read(f,(char*)&huffcode,size)) == size)?1:0);
}