2.11BSD/src/local/decompr16.c
/* decompr16: decompress 16bit compressed files on a 16bit Intel processor
* running minix.
* This kludge was hacked together by John N. White on 6/30/91 and
* is Public Domain.
* Use chmem =500 decompr16.
* decompr16 needs about 280k to run in pipe mode. (56k per copy)
*
* This program acts as a filter:
* decompr16 < compressed_file > decompressed_file
*
* The arguments -0 to -4 causes only the corresponding pass to be done. Thus:
* decompr16 -0 < compressed_file | decompr16 -1 | decompr16 -2 |
* decompr16 -3 | decompr16 -4 > decompressed_file
* will also work.
* If RAM is scarce these passes can be done sequentially using tmp files.
*
* Compress uses a modified LZW compression algorithm. A compressed file
* is a set of indices into a dictionary of strings. The number of bits
* used to store each index depends on the number of entries currently
* in the dictionary. If there are between 257 and 512 entries, 9 bits
* are used. With 513 entries, 10 bits are used, etc. The initial dictionary
* consists of 0-255 (which are the corresponding chars) and 256 (which
* is a special CLEAR code). As each index in the compressed file is read,
* a new entry is added to the dictionary consisting of the current string
* with the first char of the next string appended. When the dictionary
* is full, no further entries are added. If a CLEAR code is received,
* the dictionary will be completely reset. The first two bytes of the
* compressed file are a magic number, and the third byte indicates the
* maximum number of bits, and whether the CLEAR code is used (older versions
* of compress didn't have CLEAR).
*
* This program works by forking four more copies of itself. The five
* programs form a pipeline. Copy 0 reads from stdin and writes to
* copy 1, which writes to copy 2, then to copy 3, then to copy 4 (the
* original) which writes to stdout. If given a switch -#, where # is a
* digit from 0 to 4 (example: -2), the program will run as that copy,
* reading from stdin and writing to stdout. This allows decompressing
* with very limited RAM because only one of the five passes is in
* memory at a time.
*
* The compressed data is a series of string indices (and a header at
* the beginning and an occasional CLEAR code). As these indices flow
* through the pipes, each program decodes the ones it can. The result
* of each decoding will be indices that the following programs can handle.
*
* Each of the 65536 strings in the dictionary is an earlier string with
* some character added to the end (except for the the 256 predefined
* single char strings). When new entries are made to the dictionary,
* the string index part will just be the last index to pass through.
* But the char part is the first char of the next string, which isn't
* known yet. So the string can be stored as a pair of indices. When
* this string is specified, it is converted to this pair of indices,
* which are flagged so that the first will be decoded in full while
* the second will be decoded to its first char. The dictionary takes
* 256k to store (64k strings of 2 indices of 2 bytes each). This is
* too big for a 64k data segment, so it is divided into 5 equal parts.
* Copy 0 of the program maintains the high part and copy 4 holds the
* low part.
*/
#define BUFSZ 256 /* size of i/o buffers */
#define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
#define DICTSZ (unsigned)13056 /* # of local dictionary entries */
#define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
int pipein=0, pipeout=1; /* file nums for pipeline input and output */
int fildes[2]; /* for use with pipe() */
int ibufp, obufp, ibufe; /* pointers to bufs, (inp, output, end) */
int ipbufp=BUFSZ_2, opbufp; /* pointers to pipe bufs */
int pnum= -1; /* ID of this copy */
unsigned short ipbuf[BUFSZ_2]; /* for buffering input */
unsigned short opbuf[BUFSZ_2]; /* for buffering output */
unsigned char *ibuf=(unsigned char*)ipbuf, *obuf=(unsigned char*)opbuf;
unsigned short dindex[DICTSZ]; /* dictionary: index to substring */
unsigned short dchar[DICTSZ]; /* dictionary: last char of string */
unsigned iindex, tindex, tindex2;/* holds index being processed */
unsigned base; /* where in global dict local dict starts */
unsigned tbase;
unsigned locend; /* where in global dict local dict ends */
unsigned curend=256; /* current end of global dict */
unsigned maxend; /* max end of global dict */
int dcharp; /* ptr to dchar that needs next index entry */
int curbits; /* number of bits for getbits() to read */
int maxbits; /* limit on number of bits */
int clearflg; /* if set, allow CLEAR */
int inmod; /* mod 8 for getbits() */
main(argc, argv) char **argv; {
int i;
/* check for arguments */
if(argc>=2){
if(**++argv != '-' || (pnum = (*argv)[1]-'0') < 0 || pnum > 4)
die("bad args\n");
}
if(pnum<=0){ /* if this is pass 0 */
/* check header of compressed file */
if(mygetc() != 0x1F || mygetc() != 0x9D)/* check magic number */
die("not a compressed file\n");
iindex=mygetc(); /* get compression style */
} else getpipe(); /* get compression style */
maxbits = iindex&0x1F;
clearflg = (iindex&0x80) != 0;
if(maxbits<9 || maxbits>16) /* check for valid maxbits */
die("can't decompress\n");
if((pnum & ~3) == 0) putpipe(iindex, 0); /* pass style to next copy */
if(pnum<0){ /* if decompr should set up pipeline */
/* fork copies and setup pipeline */
for(pnum=0; pnum<4; pnum++){ /* do four forks */
if(pipe(fildes)) die("pipe() error\n");
if((i=fork()) == -1) die("fork() error\n");
if(i){ /* if this is the copy */
pipeout = fildes[1];
break;
}
/* if this is the original */
pipein = fildes[0];
close(fildes[1]); /* don't accumulate these */
}
}
/* Preliminary inits. Note: end/maxend/curend are highest, not highest+1 */
base = DICTSZ*(4-pnum) + 256, locend = base+DICTSZ-1;
maxend = (1<<maxbits)-1;
if(maxend>locend) maxend=locend;
for(;;){
curend = 255+clearflg; /* init dictionary */
dcharp = DICTSZ; /* flag for none needed */
curbits = 9; /* init curbits (for copy 0) */
for(;;){ /* for each index in input */
if(pnum) getpipe(); /* get next index */
else{ /* get index using getbits() */
if(curbits<maxbits && (1<<curbits)<=curend){
/* curbits needs to be increased */
/* due to uglyness in compress, these indices
* in the compressed file are wasted */
while(inmod) getbits();
curbits++;
}
getbits();
}
if(iindex==256 && clearflg){
if(pnum<4) putpipe(iindex, 0);
/* due to uglyness in compress, these indices
* in the compressed file are wasted */
while(inmod) getbits();
break;
}
tindex=iindex;
/* convert the index part, ignoring spawned chars */
while(tindex>=base) tindex = dindex[tindex-base];
putpipe(tindex, 0); /* pass on the index */
/* save the char of the last added entry, if any */
if(dcharp<DICTSZ) dchar[dcharp++] = tindex;
if(curend<maxend) /* if still building dictionary */
if(++curend >= base)
dindex[dcharp=(curend-base)] = iindex;
/* Do spawned chars. They are naturally produced in the wrong
* order. To get them in the right order without using memory,
* a series of passes, progressively less deep, are used */
tbase = base;
while((tindex=iindex) >= tbase){/* for each char to spawn */
while((tindex2=dindex[tindex-base]) >= tbase)
tindex=tindex2; /* scan to desired char */
putpipe(dchar[tindex-base], 1);/* put it to the pipe */
tbase=tindex+1;
}
}
}
}
/* If s, write to stderr. Flush buffers if needed. Then exit. */
die(s) char *s; {
static int recurseflag=0;
if(recurseflag++) exit(1);
if(s) while(*s) write(2, s++, 1);
if(obufp) write(1, obuf, obufp); /* flush stdout buf if needed */
do putpipe(EOF_INDEX, 0); while(opbufp); /* flush pipe if needed */
exit(s ? 1 : 0);
}
/* Put a char to stdout. */
myputc(c){
obuf[obufp++] = c;
if(obufp >= BUFSZ){ /* if stdout buf full */
write(1, obuf, BUFSZ); /* flush to stdout */
obufp=0;
}
}
/* Get a char from stdin. If EOF, then die() and exit. */
mygetc(){
unsigned u;
if(ibufp >= ibufe){ /* if stdin buf empty */
if((ibufe = read(0, ibuf, BUFSZ)) <= 0)
die(0); /* if EOF, do normal exit */
ibufp=0;
}
return(ibuf[ibufp++]);
}
/* Put curbits bits into index from stdin. Note: only copy 0 uses this.
* The bits within a byte are in the correct order. But when the bits
* cross a byte boundry, the lowest bits will be in the higher part of
* the current byte, and the higher bits will be in the lower part of
* the next byte. */
getbits(){
int have;
static unsigned curbyte; /* byte having bits extracted from it */
static int left; /* how many bits are left in curbyte */
inmod = (inmod+1) & 7; /* count input mod 8 */
iindex=curbyte, have=left;
if(curbits-have > 8) iindex |= (unsigned)mygetc() << have, have+=8;
iindex |= ((curbyte=mygetc()) << have) & ~((unsigned)0xFFFF << curbits);
curbyte >>= curbits-have, left = 8 - (curbits-have);
}
/* Get an index from the pipeline. If flagged firstonly, handle it here. */
getpipe(){
static short flags;
static int n=0; /* number of flags in flags */
for(;;){ /* while index with firstonly flag set */
if(n<=0){
if(ipbufp >= BUFSZ_2){ /* if pipe input buf empty */
if(read(pipein, (char*)ipbuf, BUFSZ) != BUFSZ)
die("bad pipe read\n");
ipbufp=0;
}
flags = ipbuf[ipbufp++];
n = 15;
}
iindex = ipbuf[ipbufp++];
if(iindex>curend)
die(iindex == EOF_INDEX ? 0 : "invalid data\n");
flags<<=1, n--;
/* assume flags<0 if highest remaining flag is set */
if(flags>=0) /* if firstonly flag for index is not set */
return; /* return with valid non-firstonly index */
/* handle firstonly case here */
while(iindex>=base) iindex = dindex[iindex-base];
putpipe(iindex, 1);
}
}
/* put an index to the pipeline */
putpipe(u, flag) unsigned u; {
static unsigned short flags, *flagp;
static int n=0; /* number of flags in flags */
if(pnum==4){ /* if we should write to stdout */
myputc(u); /* index will BE the char value */
return;
}
if(n==0) /* if we need to reserve a flag entry */
flags=0, flagp = opbuf + opbufp++;
opbuf[opbufp++] = u; /* add index to buffer */
flags = (flags<<1) | flag; /* add firstonly flag */
if(++n >= 15){ /* if block of 15 indicies */
n=0, *flagp=flags; /* insert flags entry */
if(opbufp >= BUFSZ_2){ /* if pipe out buf full */
opbufp=0;
if(write(pipeout, (char*)opbuf, BUFSZ) != BUFSZ)
die("bad pipe write\n");
}
}
}