# #include <local-system> /* Huffman code input to output with key * Output file format: * PACKED flag defined below (integer) * Number of chars in expanded file (float) * Number of words in expanded tree (integer) * Tree in 'compressed' form: * If 0<=byte<=0376, expand by zero padding to left * If byte=0377, next two bytes for one word * Terminal nodes: First word is zero; second is character * Non-terminal nodes: Incremental 0/1 pointers * Code string for number of characters in expanded file */ #define SUF0 '.' #define SUF1 'z' #define NNODES 512 #define LNAME 80 #define PACKED 017437 /* <US><US> - Unlikely value */ struct node { struct node *zlink; /* 0 link */ struct node *olink; /* 1 link */ #ifndef FLOATING_PACK long freq; #endif FLOATING_PACK #ifdef FLOATING_PACK float freq; #endif FLOATING_PACK struct node *sortl; /* Pointer to next lower frequency node */ struct node *sorth; /* Pointer to next higher frequency node */ } nodes[NNODES], *root, *leaves[256], sortstart, max; struct { char minor; char major; int inumber; int flags; #ifdef AUSAM unsigned uid; #endif AUSAM char nlinks; #ifndef AUSAM char uid; char gid; #endif AUSAM char size0; int size1; int addr[8]; long actime; long modtime; } status; int used, depth, freqflag ; int tree[1024]; /* Stores tree in puttree; codes in gcode and encoding */ struct {int integ;}; char code[5]; extern errno; struct iobuf { int fildes; int nleft; char *nextp; char buf[512]; } buf, obuf; main(argc, argv) int argc; char *argv[]; #ifndef FLOATING_PACK { long nchars; /* Bits, then chars in output file */ #endif FLOATING_PACK #ifdef FLOATING_PACK { double nchars; /* Bits, then chars in output file */ #endif FLOATING_PACK register struct node *n; register int i; register char *cp; char filename[LNAME]; struct node *order[256]; int j, k, sep, treesize, ncodes; struct {int hi, lo;} ; struct {char byte;} ; for (k=1; k<argc; k++) { if (argv[k][0] == '-' && argv[k][1] == '\0') { freqflag = 1 - freqflag; continue; } sep = -1; cp = filename; for (i=0; i < (LNAME-3) && (*cp = argv[k][i]); i++) if (*cp++ == '/') sep = i; #ifdef AGSM if (cp[-1]==SUF1 && cp[-2]==SUF0) { printf ("%s: Already packed\n", filename); continue; } #endif AGSM if (i >= (LNAME-3) || (i-sep) > 13) { printf ("%s: File name too long\n",argv[k]); continue; } if (fopen(filename,&buf) < 0) { printf ("%s: Unable to open\n", argv[k]); continue; } #ifdef AUSAM newfstat(buf.fildes,&status); #endif AUSAM #ifndef AUSAM fstat(buf.fildes,&status); #endif AUSAM if( (status.flags&060000) != 0 ) { printf ("%s: Not a plain file\n",filename); goto closein; } if( status.nlinks != 1 ) { printf("'%s' has links\n", filename); goto closein; } *cp++ = SUF0; *cp++ = SUF1; *cp = '\0'; if( stat(filename, obuf.buf) != -1) { printf("%s: Already exists\n", filename); goto closein; } if (fcreat(filename,&obuf) == -1) { printf ("%s: Unable to create\n", argv[k]); goto closein; } chmod(filename,status.flags&07777); #ifdef AUSAM chown(filename, status.uid); #endif AUSAM #ifndef AUSAM chown(filename,(status.gid<<8)|status.uid); /* IAN J Jan '77 */ #endif AUSAM errno = used = 0; for (i = 256; i--; ) leaves[i] = 0; sortcount(); if (used < 2) { printf ("%s: Trivial file\n", argv[k]); goto forgetit; } n = &max; for (i = ncodes = used; i--; ) order[i] = n = n->sortl; formtree(); putw(PACKED,&obuf); putw(root->freq.hi, &obuf); putw(root->freq.lo, &obuf); treesize = puttree(); depth = 0; /* Reset for reuse by gcode */ gcode(0,root); /* leaves[i] now points to code for i */ if (freqflag) /* Output stats */ #ifndef FLOATING_PACK { printf ("\n%s: %D Bytes\n",argv[k],root->freq); #endif FLOATING_PACK #ifdef FLOATING_PACK { printf ("\n%s: %.0f Bytes\n",argv[k],root->freq); #endif FLOATING_PACK for (i=ncodes; i--; ) { n = order[i]; #ifndef FLOATING_PACK printf ("%10D%8D%% <%3o> = <", n->freq, 100*n->freq/root->freq, #endif FLOATING_PACK #ifdef FLOATING_PACK printf ("%10.0f%8.3f%% <%3o> = <", n->freq, 100.*n->freq/root->freq, #endif FLOATING_PACK n->olink.integ&0377); if (n->olink.integ<040 || n->olink.integ > 0177) printf ("> "); else printf ("%c> ",n->olink.integ); cp = leaves[n->olink.integ]; for (j=0; j < *cp; j++) putchar('0' + ((cp[1+(j>>3)] >> (7-(j&07)))&01)); putchar('\n'); } } nchars = 0; for (i=ncodes; i--; ) nchars =+ (nodes[i].freq * ((leaves[nodes[i].olink.integ]->byte)&0377)); nchars = (nchars + 7)/8 + treesize + 8; #ifndef FLOATING_PACK if (freqflag) printf ("%s: Packed size: %D bytes\n", #endif FLOATING_PACK #ifdef FLOATING_PACK if (freqflag) printf ("%s: Packed size: %.0f bytes\n", #endif FLOATING_PACK argv[k], nchars); /* If compression won't save a block, forget it */ if ((i = (nchars+511)/512) >= (j = (root->freq+511)/512)) { printf ("%s: Not packed (no blocks saved)\n", argv[k]); goto forgetit; } seek(buf.fildes,0,0); buf.nleft = 0; if (compress() == 0) { unlink(argv[k]); #ifndef FLOATING_PACK printf ("%s: %D%% Compression\n", argv[k], 100*(root->freq - nchars)/root->freq); #endif FLOATING_PACK #ifdef FLOATING_PACK printf ("%s: %.0f%% Compression\n", argv[k], 100.*(root->freq - nchars)/root->freq); #endif FLOATING_PACK } else { perror( filename ); printf ("%s: I/O Error - File unchanged\n", argv[k]); forgetit: unlink(filename); } closeboth: close (obuf.fildes); closein: close (buf.fildes); smdate( filename , status.modtime ); /* preserve modified time */ } } sortcount() { register struct node *p, *q; register int c; max.sorth = sortstart.sorth = &max; sortstart.sortl = max.sortl = &sortstart; #ifndef FLOATING_PACK max.freq = 2000000000; #endif FLOATING_PACK #ifdef FLOATING_PACK max.freq = 1.0e+30; #endif FLOATING_PACK while ((c = getc(&buf)) >= 0) { if ((p = leaves[c]) == 0) { p = leaves[c] = &nodes[used++]; p->zlink = 0; p->olink.integ = c; p->freq = 1; q = p->sorth = sortstart.sorth; p->sortl = &sortstart; q->sortl = sortstart.sorth = p; } else { if ((p->freq =+ 1) > (q = p->sorth)->freq) { do { q = q->sorth; } while (q->freq < p->freq); /* Move node p in front of node q */ p->sortl->sorth = p->sorth; p->sorth->sortl = p->sortl; p->sortl = q->sortl; p->sorth = q; q->sortl->sorth = p; q->sortl = p; } } } } formtree() /* Form Huffman code tree */ { register struct node *p, *q, *r; p = sortstart.sorth; while ((q = p->sorth) != &max) { /* Create a new node by combining * the two lowest frequency nodes */ r = &nodes[used++]; r->freq = p->freq + q->freq; r->zlink = p; r->olink = q; p = q->sorth; while (r->freq > p->freq) p = p->sorth; r->sortl = p->sortl; r->sorth = p; p->sortl->sorth = r; p->sortl = r; p = q->sorth; } return (root = p); } puttree() /* Returns tree size (bytes) */ { register int i,j; register struct iobuf *b; struct {char *charp;} ; int extra; /* full words in tree */ extra = depth = 0; maketree(root); b = &obuf; putw(depth, b); /* Size of tree */ for (i = 0; i<depth; i++) { j = tree[i]; if (j.charp < 0377) putc(j,b); else { putc(0377,b); putw(j,b); extra++; } } return (depth + extra*2); } maketree(no) struct node *no; { register int d; register struct node *n; n = no; d = depth; depth =+ 2; if (n->zlink == 0) /* Terminal node */ { tree[d] = 0; tree[d+1] = n->olink.integ; } else { tree[d] = maketree(n->zlink) - d; tree[d+1] = maketree(n->olink) - d; } return(d); } gcode(len,nod) /* Recursive routine to compute code table */ int len; /* code length at time of call */ struct node *nod; { register struct node *n; register int l, bit; int i; char *t, *u; struct {char ch[];}; n = nod; l = len; if (n->zlink == 0) { /* Terminal. Copy #bits and code, set pointer in leaves */ /* Treat tree as char array */ leaves[n->olink.integ] = t = &tree->ch[depth]; *t++ = l; depth++; u = code; do { *t++ = *u++; depth++; } while ((l =- 8) > 0); } else { bit = 0200 >> (l&07); code[i = l>>3] =& ~bit; gcode(++l, n->zlink); code[i] =| bit; gcode(l,n->olink); } } compress() /* Here's the time consumer */ { register int i, word, bits; int c; char *p; bits = 0; while ((i = getc(&buf)) >= 0) { p = leaves[i]; c = *p++ & 0377; for (i=0; i<c; i++) /* Output c bits */ { word =<< 1; if ((p[i>>3] << (i&07)) & 0200) word++; /* Logical Or 1 */ ++bits; if ((bits =& 017) == 0) { putw(word, &obuf); if ( errno ) return( 1 ); } } } if (bits) putw(word << (16-bits), &obuf); fflush(&obuf); return( errno ); }