V10/cmd/deroff.c

Compare this file to the similar file:
Show the results in this format:

char *xxxvers = "@(#)deroff.c	1.7";


#include <stdio.h>

/* Deroff command -- strip troff, eqn, and Tbl sequences from
a file.  Has three flags argument, -w, to cause output one word per line
rather than in the original format.
-mm (or -ms) causes the corresponding macro's to be interpreted
so that just sentences are output
-ml  also gets rid of lists.
-i causes deroff to ignore .so and .nx commands.
Deroff follows .so and .nx commands, removes contents of macro
definitions, equations (both .EQ ... .EN and $...$),
Tbl command sequences, and Troff backslash constructions.

All input is through the C macro; the most recently read character is in c.
*/

#define C ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() :( c == '\n'?(linect++,c):c) ))
#define C1 ( (c=getc(infile)) == EOF ? eof() : (c == '\n' ? (linect++,c): c))
#define SKIP while(C != '\n') 
#define SKIP1 while(C1 != '\n')
#define SKIP_TO_COM SKIP; SKIP; pc=c; while(C != '.' || pc != '\n' || C > 'Z')pc=c

#define YES 1
#define NO 0
#define MS 0
#define MM 1
#define ONE 1
#define TWO 2

#define NOCHAR -2
#define SPECIAL 0
#define APOS 1
#define PUNCT 2
#define DIGIT 3
#define LETTER 4

int linect = 0;
int numflag = 0;
int wordflag = NO;
int msflag = NO;
int iflag = NO;
int mac = MM;
int disp = 0;
int parag = 0;
int inmacro = NO;
int intable = NO;
int eqnflag = 0;

char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */

char line[512];
char *lp;

int c;
int pc;
int ldelim	= NOCHAR;
int rdelim	= NOCHAR;


int argc;
char **argv;

extern int optind;
extern char*optarg;
char fname[50];
FILE *files[15];
FILE **filesp;
FILE *infile;

main(ac, av)
int ac;
char **av;
{
	register int i;
	int errflg = 0;
	register optchar;
	FILE *opn();

	argc = ac;
	argv = av;
	while ((optchar = getopt(argc, argv, "winpm:")) != EOF) switch(optchar) {
	case 'n':
		numflag = 1;
		break;
	case 'w':
		wordflag = YES;
		break;
	case 'm':
		msflag = YES;
		if (*optarg == 'm')
			mac = MM;
		else if (*optarg == 's')
			mac = MS;
		else if (*optarg == 'l')
			disp = 1;
		else errflg++;
		break;
	case 'p':
		parag=YES;
		break;
	case 'i':
		iflag = YES;
		break;
	case '?':
		errflg++;
	}
	if (errflg)
		fatal("usage: deroff [ -w ] [ -m (m s l) ] [ -i ] [ file ] ... \n", (char *) NULL);
	if ( optind == argc )
		infile = stdin;
	else
		infile = opn(argv[optind++]);
	files[0] = infile;
	filesp = &files[0];

	for(i='a'; i<='z' ; ++i)
		chars[i] = LETTER;
	for(i='A'; i<='Z'; ++i)
		chars[i] = LETTER;
	for(i='0'; i<='9'; ++i)
		chars[i] = DIGIT;
	chars['\''] = APOS;
	chars['&'] = APOS;
	chars['\b'] = APOS;
	chars['.'] = PUNCT;
	chars[','] = PUNCT;
	chars[';'] = PUNCT;
	chars['?'] = PUNCT;
	chars[':'] = PUNCT;
	work();
}
char *calloc();






skeqn()
{
	while((c = getc(infile)) != rdelim)
		if(c == '\n')linect++;
		else if(c == '\\')
			c = getc(infile);
		else if(c == EOF)
			c = eof();
		else if(c == '"')
			while( (c = getc(infile)) != '"')
				if(c == '\n')linect++;
				else if(c == EOF)
					c = eof();
				else if(c == '\\'){
					if((c = getc(infile)) == EOF)
						c = eof();
					else if(c == '\n')linect++;
				}
	if(msflag)
		eqnflag = 1;
	return(c = ' ');
}


char *devnull = "/dev/null";
FILE *opn(p)
register char *p;
{
	FILE *fd;

again:
	if( (fd = fopen(p, "r")) == NULL){
		if(msflag || p==devnull)
			fatal("Cannot open file %s - quitting\n", p);
		else {
			fprintf(stderr,"Deroff: Cannot open file %s - continuing\n",
				p);
			p = devnull;
			goto again;
		}
	}
	linect = 0;
	if(numflag)printf(".F %s\n",p);

	return(fd);
}



eof()
{
	if(infile != stdin)
		fclose(infile);
	if(filesp > files)
		infile = *--filesp;
	else if(optind < argc)
	{
		infile = opn(argv[optind++]);
	}
	else
		exit(0);

	return(C);
}



getfname()
{
	register char *p;
	struct chain { 
		struct chain *nextp; 
		char *datap; 
	} *chainblock;
	register struct chain *q;
	static struct chain *namechain	= NULL;
	char *csskip = "/usr/lib/tmac/tmac.cs";
	char *sskip = "/usr/lib/tmac/tmac.s";
	char *copys();

	while(C == ' ') ;

	for(p = fname ; (*p=c)!= '\n' && c!=' ' && c!='\t' && c!='\\' ; ++p)
		C;
	*p = '\0';
	while(c != '\n')
		C;
	if(!strcmp(fname, csskip) || !strcmp(fname, sskip)){
		fname[0] = '\0';
		return;
	}
	/* see if this name has already been used */

	for(q = namechain ; q; q = q->nextp)
		if( ! strcmp(fname, q->datap))
		{
			fname[0] = '\0';
			return;
		}

	q = (struct chain *) calloc(1, sizeof(*chainblock));
	q->nextp = namechain;
	q->datap = copys(fname);
	namechain = q;
}




fatal(s,p)
char *s, *p;
{
	fprintf(stderr, "Deroff: ");
	fprintf(stderr, s, p);
	exit(1);
}

work()
{

	for( ;; )
	{
		eqnflag = 0;
		if(C == '.'  ||  c == '\'')
			comline();
		else
			regline(NO,TWO);
	}
}




regline(macline,const)
int macline;
int const;
{
	line[0] = c;
	lp = line;
	for( ; ; )
	{
		if(c == '\\')
		{
			*lp = ' ';
			backsl();
			if ( c == '%')	/* no blank for hyphenation char */
				lp--;
		}
		if(c == '\n') break;
		if(intable && c=='T')
		{
			*++lp = C;
			if(c=='{' || c=='}')
			{
				lp[-1] = ' ';
				*lp = C;
			}
		}
		else {
			if((msflag == 1) && (eqnflag == 1)){
				eqnflag = 0;
				*++lp = 'x';
			}
			*++lp = C;
		}
	}

	*lp = '\0';

	if(line[0] != '\0'){
		if(wordflag)
			putwords();
		else if(macline)
			putmac(line,const);
		else
			puts(line);
		if(numflag &&(linect%10 == 0))printf(".%d\n",linect);
	}
}




putmac(s,const)
register char *s;
int const;
{
	register char *t;
	register found;
	int last;
	found = 0;

	while(*s)
	{
		while(*s==' ' || *s=='\t')
			putchar(*s++);
		for(t = s ; *t!=' ' && *t!='\t' && *t!='\0' ; ++t)
			;
		if(*s == '\"')s++;
		if(t>s+const && chars[ s[0] ]==LETTER && chars[ s[1] ]==LETTER){
			while(s < t)
				if(*s == '\"')s++;
				else
					putchar(*s++);
			last = *(t-1);
			found++;
		}
		else if(found && chars[ s[0] ] == PUNCT && s[1] == '\0')
			putchar(*s++);
		else{
			last = *(t-1);
			s = t;
		}
	}
	putchar('\n');
	if(msflag && chars[last] == PUNCT){
		printf(" %c\n",last);
	}
}



putwords()	/* break into words for -w option */
{
	register char *p, *p1;
	int i, nlet;


	for(p1 = line ; ;)
	{
		/* skip initial specials ampersands and apostrophes */
		while( chars[*p1] < DIGIT)
			if(*p1++ == '\0') return;
		nlet = 0;
		for(p = p1 ; (i=chars[*p]) != SPECIAL ; ++p)
			if(i == LETTER) ++nlet;

		if(nlet>1)   /* MDM definition of word */
		{
			/* delete trailing ampersands and apostrophes */
			while(p[-1]=='\'' || p[-1]=='&'|| chars[ p[-1] ] == PUNCT)
				--p;
			while(p1 < p) putchar(*p1++);
			putchar('\n');
		}
		else
			p1 = p;
	}
}


comline()
{
	register int c1, c2;

com:
	while(C==' ' || c=='\t')
		;
comx:
	if( (c1=c) == '\n')
		return;
	c2 = C;
	if(c1=='.' && c2!='.')
		inmacro = NO;
	if(msflag && c1 == '['){
		refer(c2);
		return;
	}
	if(parag && mac==MM && c1 == 'P' && c2 == '\n'){
		printf(".P\n");
		return;
	}
	if(c2 == '\n')
		return;

	if(c1 == '\\' && c2 == '\"')
		SKIP;
	else if(c1=='E' && c2=='Q' && filesp==files)
		eqn();
	else if(c1=='T' && (c2=='S' || c2=='C' || c2=='&') && filesp==files){
		if(msflag){ 
			stbl(); 
		}
		else tbl(); 
	}
	else if(c1=='T' && c2=='E')
		intable = NO;
	else if(!inmacro && c1=='d' && c2=='e')
		macro();
	else if(!inmacro && c1=='i' && c2=='g')
		macro();
	else if(!inmacro && c1=='a' && c2 == 'm')
		macro();
	else if(c1=='s' && c2=='o')
	{
		if(iflag)
			SKIP;
		else
		{
			getfname();
			if( fname[0] ){
				infile = *++filesp = opn( fname );
				if(!infile)
					infile = *--filesp;
			}
		}
	}
	else if(c1=='n' && c2=='x')
		if(iflag)
			SKIP;
		else
		{
			getfname();
			if(fname[0] == '\0') exit(0);
			if(infile != stdin)
				fclose(infile);
			infile = *filesp = opn(fname);
		}
	else if(c1 == 't' && c2 == 'm')
		SKIP;
	else if(c1=='h' && c2=='w')
		SKIP; 
	else if(msflag && c1 == 'T' && c2 == 'L'){
		SKIP_TO_COM;
		goto comx; 
	}
	else if(msflag && c1=='N' && c2 == 'R')SKIP;
	else if(parag && msflag && (c1 == 'P' || c1 == 'I' || c1 == 'L') && c2 == 'P'){
		printf(".%c%c",c1,c2);
		while(C != '\n')putchar(c);
		putchar('\n');
	}
	else if(parag && mac==MM && c1 == 'P' && c2 == ' '){
		printf(".%c%c",c1,c2);
		while(C != '\n')putchar(c);
		putchar('\n');
	}
	else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
		if(mac==MM)SKIP;
		else {
			SKIP_TO_COM;
			goto comx; 
		}
	}
	else if(msflag && c1 == 'F' && c2 == 'S'){
		SKIP_TO_COM;
		goto comx; 
	}
	else if(msflag && (c1 == 'S' || c1 == 'N') && c2 == 'H'){
		if(parag){
			printf(".%c%c",c1,c2);
			while(C != '\n')putchar(c);
			putchar(c);
			putchar('!');
			while(1){
				while(C != '\n')putchar(c);
				putchar('\n');
				if(C == '.')goto com;
				putchar('!');
				putchar(c);
			}
		}
		else {
			SKIP_TO_COM;
			goto comx; 
		}
	}
	else if(c1 == 'U' && c2 == 'X'){
		if(wordflag)printf("UNIX\n");
		else printf("UNIX ");
	}
	else if(msflag && c1 == 'O' && c2 == 'K'){
		SKIP_TO_COM;
		goto comx; 
	}
	else if(msflag && c1 == 'N' && c2 == 'D')
		SKIP;
	else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')){
		if(parag){
			printf(".%c%c",c1,c2);
			while(C != '\n')putchar(c);
			putchar('\n');
		}
		else {
			SKIP;
		}
	}
	else if(msflag && mac==MM && c2=='L'){
		if(disp || c1 == 'R')sdis('L','E');
		else{
			SKIP;
			printf(" .");
		}
	}
	else if(!msflag &&c1 == 'P' && c2 == 'S'){
		inpic();
	}
	else if(msflag && (c1 == 'D' || c1 == 'N' || c1 == 'K'|| c1=='P') && c2 == 'S')
	{ 
		sdis(c1,'E'); 
	}		/* removed RS-RE */
	else if(msflag && (c1 == 'K' && c2 == 'F'))
	{ 
		sdis(c1,'E'); 
	}
	else if(msflag && c1 == 'n' && c2 == 'f')
		sdis('f','i');
	else if(msflag && c1 == 'c' && c2 == 'e')
		sce();
	else
	{
		if(c1=='.' && c2=='.'){
			if(msflag){
				SKIP;
				return;
			}
			while(C == '.')
				;
		}
		++inmacro;
		if(c1 <= 'Z' && msflag)regline(YES,ONE);
		else {
			if(wordflag)c1=C;
			regline(YES,TWO);
		}
		--inmacro;
	}
}



macro()
{
	if(msflag){
		do { 
			SKIP1; 
		} while(C1!='.' || C1!='.' || C1=='.');	/* look for  .. */
		if(c != '\n')SKIP;
		return;
	}
	SKIP;
	inmacro = YES;
}




sdis(a1,a2)
char a1,a2;
{
	register int c1,c2;
	register int eqnf;
	int lct;
	if(a1 == 'P'){
		if(C1 == ' ')
			while(C1 == ' ');
		if(c == '<'){
			SKIP1;
			return;
		}
	}
	lct = 0;
	eqnf=1;
	if(c != '\n')
		SKIP1;
	while(1){
		while(C1 != '.')
			if(c == '\n')continue;
			else SKIP1;
		if((c1=C1) == '\n')continue;
		if((c2=C1) == '\n'){
			if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
				return;
			continue;
		}
		if(c1==a1 && c2 == a2){
			SKIP1;
			if(lct != 0){
				lct--;
				continue;
			}
			if(eqnf)printf(" .");
			putchar('\n');
			return;
		}
		else if(a1 == 'L' && c2 == 'L'){
			lct++;
			SKIP1;
		}
		else if(a1 == 'D' && c1 == 'E' && c2 == 'Q'){
			eqn(); 
			eqnf=0;
		}
		else if(a1 == 'f'){
			if((mac == MS && c2 == 'P') ||
				(mac==MM && c1 == 'H' && c2 == 'U')){
				SKIP1;
				return;
			}
			SKIP1;
		}
		else SKIP1;
	}
}
tbl()
{
	while(C != '.');
	SKIP;
	intable = YES;
}
stbl()
{
	while(C != '.');
	SKIP_TO_COM;
	if(c != 'T' || C != 'E'){
		SKIP;
		pc=c;
		while(C != '.' || pc != '\n' || C != 'T' || C != 'E')pc=c;
	}
}

eqn()
{
	register int c1, c2;
	register int dflg;
	char last;

	last=0;
	dflg = 1;
	SKIP;

	for( ;;)
	{
		if(C1 == '.'  || c == '\'')
		{
			while(C1==' ' || c=='\t')
				;
			if(c=='E' && C1=='N')
			{
				SKIP;
				if(msflag && dflg){
					putchar('x');
					putchar(' ');
					if(last){
						putchar(last); 
						putchar('\n'); 
					}
				}
				return;
			}
		}
		else if(c == 'd')	/* look for delim */
		{
			if(C1=='e' && C1=='l')
				if( C1=='i' && C1=='m')
				{
					while(C1 == ' ');
					if((c1=c)=='\n' || (c2=C1)=='\n'
					    || (c1=='o' && c2=='f' && C1=='f') )
					{
						ldelim = NOCHAR;
						rdelim = NOCHAR;
					}
					else	{
						ldelim = c1;
						rdelim = c2;
					}
				}
			dflg = 0;
		}

		if(c != '\n') while(C1 != '\n'){ 
			if(chars[c] == PUNCT)last = c;
			else if(c != ' ')last = 0;
		}
	}
}


backsl()	/* skip over a complete backslash construction */
{
	int bdelim;

sw:  
	switch(C1)
	{
	case '"':
		SKIP1;
		return;
	case 's':
		if(C1 == '\\') backsl();
		else	{
			while(C1>='0' && c<='9') ;
			ungetc(c,infile);
			c = '0';
		}
		--lp;
		return;

	case 'f':
	case 'n':
	case '*':
		if(C1 != '(')
			return;

	case '(':
		if(msflag){
			if(C == 'e'){
				if(C1 == 'm'){
					*lp = '-';
					return;
				}
			}
			else if(c != '\n')C1;
			return;
		}
		if(C1 != '\n') C1;
		return;

	case '$':
		C1;	/* discard argument number */
		return;

	case 'b':
	case 'x':
	case 'v':
	case 'h':
	case 'w':
	case 'o':
	case 'l':
	case 'L':
		if( (bdelim=C1) == '\n')
			return;
		while(C1!='\n' && c!=bdelim)
			if(c == '\\') backsl();
		return;

	case '\\':
		if(inmacro)
			goto sw;
	default:
		return;
	}
}




char *copys(s)
register char *s;
{
	register char *t, *t0;

	if( (t0 = t = calloc( (unsigned)(strlen(s)+1), sizeof(*t) ) ) == NULL)
		fatal("Cannot allocate memory", (char *) NULL);

	while( *t++ = *s++ )
		;
	return(t0);
}
sce(){
	register char *ap;
	register int n, i;
	char a[10];
	for(ap=a;C != '\n';ap++){
		*ap = c;
		if(ap == &a[9]){
			SKIP;
			ap=a;
			break;
		}
	}
	if(ap != a)n = atoi(a);
	else n = 1;
	for(i=0;i<n;){
		if(C == '.'){
			if(C == 'c'){
				if(C == 'e'){
					while(C == ' ');
					if(c == '0'){
						SKIP;
						break;
					}
					else SKIP;
				}
				else SKIP;
			}
			else if(c == 'P' || C == 'P'){
				if(c != '\n')SKIP;
				break;
			}
			else if(c != '\n')SKIP;
		}
		else {
			SKIP;
			i++;
		}
	}
}
refer(c1)
{
	register int c2;
	if(c1 != '\n')
		SKIP;
	while(1){
		if(C != '.')
			SKIP;
		else {
			if(C != ']')
				SKIP;
			else {
				while(C != '\n')
					c2=c;
				if(chars[c2] == PUNCT)printf(" %c",c2);
				return;
			}
		}
	}
}
inpic(){
	register int c1;
	register char *p1;
/*	SKIP1;*/
	while(C1 != '\n')
		if(c == '<'){
			SKIP1;
			return;
		}
	p1 = line;
	c = '\n';
	while(1){
		c1 = c;
		if(C1 == '.' && c1 == '\n'){
			if(C1 != 'P'){
				if(c == '\n')continue;
				else { SKIP1; c='\n'; continue;}
			}
			if(C1 != 'E'){
				if(c == '\n')continue;
				else { SKIP1; c='\n';continue; }
			}
			SKIP1;
			return;
		}
		else if(c == '\"'){
			while(C1 != '\"'){
				if(c == '\\'){
					if(C1 == '\"')continue;
					ungetc(c,infile);
					backsl();
				}
				else *p1++ = c;
			}
			*p1++ = ' ';
		}
		else if(c == '\n' && p1 != line){
			*p1 = '\0';
			if(wordflag)putwords(NO);
			else {
				puts(line);
				putchar('\n');
			}
			p1 = line;
		}
	}
}