V10/cmd/deroff.c
char *xxxvers = "@(#)deroff.c 1.7";
#include <stdio.h>
/* Deroff command -- strip troff, eqn, and Tbl sequences from
a file. Has three flags argument, -w, to cause output one word per line
rather than in the original format.
-mm (or -ms) causes the corresponding macro's to be interpreted
so that just sentences are output
-ml also gets rid of lists.
-i causes deroff to ignore .so and .nx commands.
Deroff follows .so and .nx commands, removes contents of macro
definitions, equations (both .EQ ... .EN and $...$),
Tbl command sequences, and Troff backslash constructions.
All input is through the C macro; the most recently read character is in c.
*/
#define C ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() :( c == '\n'?(linect++,c):c) ))
#define C1 ( (c=getc(infile)) == EOF ? eof() : (c == '\n' ? (linect++,c): c))
#define SKIP while(C != '\n')
#define SKIP1 while(C1 != '\n')
#define SKIP_TO_COM SKIP; SKIP; pc=c; while(C != '.' || pc != '\n' || C > 'Z')pc=c
#define YES 1
#define NO 0
#define MS 0
#define MM 1
#define ONE 1
#define TWO 2
#define NOCHAR -2
#define SPECIAL 0
#define APOS 1
#define PUNCT 2
#define DIGIT 3
#define LETTER 4
int linect = 0;
int numflag = 0;
int wordflag = NO;
int msflag = NO;
int iflag = NO;
int mac = MM;
int disp = 0;
int parag = 0;
int inmacro = NO;
int intable = NO;
int eqnflag = 0;
char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
char line[512];
char *lp;
int c;
int pc;
int ldelim = NOCHAR;
int rdelim = NOCHAR;
int argc;
char **argv;
extern int optind;
extern char*optarg;
char fname[50];
FILE *files[15];
FILE **filesp;
FILE *infile;
main(ac, av)
int ac;
char **av;
{
register int i;
int errflg = 0;
register optchar;
FILE *opn();
argc = ac;
argv = av;
while ((optchar = getopt(argc, argv, "winpm:")) != EOF) switch(optchar) {
case 'n':
numflag = 1;
break;
case 'w':
wordflag = YES;
break;
case 'm':
msflag = YES;
if (*optarg == 'm')
mac = MM;
else if (*optarg == 's')
mac = MS;
else if (*optarg == 'l')
disp = 1;
else errflg++;
break;
case 'p':
parag=YES;
break;
case 'i':
iflag = YES;
break;
case '?':
errflg++;
}
if (errflg)
fatal("usage: deroff [ -w ] [ -m (m s l) ] [ -i ] [ file ] ... \n", (char *) NULL);
if ( optind == argc )
infile = stdin;
else
infile = opn(argv[optind++]);
files[0] = infile;
filesp = &files[0];
for(i='a'; i<='z' ; ++i)
chars[i] = LETTER;
for(i='A'; i<='Z'; ++i)
chars[i] = LETTER;
for(i='0'; i<='9'; ++i)
chars[i] = DIGIT;
chars['\''] = APOS;
chars['&'] = APOS;
chars['\b'] = APOS;
chars['.'] = PUNCT;
chars[','] = PUNCT;
chars[';'] = PUNCT;
chars['?'] = PUNCT;
chars[':'] = PUNCT;
work();
}
char *calloc();
skeqn()
{
while((c = getc(infile)) != rdelim)
if(c == '\n')linect++;
else if(c == '\\')
c = getc(infile);
else if(c == EOF)
c = eof();
else if(c == '"')
while( (c = getc(infile)) != '"')
if(c == '\n')linect++;
else if(c == EOF)
c = eof();
else if(c == '\\'){
if((c = getc(infile)) == EOF)
c = eof();
else if(c == '\n')linect++;
}
if(msflag)
eqnflag = 1;
return(c = ' ');
}
char *devnull = "/dev/null";
FILE *opn(p)
register char *p;
{
FILE *fd;
again:
if( (fd = fopen(p, "r")) == NULL){
if(msflag || p==devnull)
fatal("Cannot open file %s - quitting\n", p);
else {
fprintf(stderr,"Deroff: Cannot open file %s - continuing\n",
p);
p = devnull;
goto again;
}
}
linect = 0;
if(numflag)printf(".F %s\n",p);
return(fd);
}
eof()
{
if(infile != stdin)
fclose(infile);
if(filesp > files)
infile = *--filesp;
else if(optind < argc)
{
infile = opn(argv[optind++]);
}
else
exit(0);
return(C);
}
getfname()
{
register char *p;
struct chain {
struct chain *nextp;
char *datap;
} *chainblock;
register struct chain *q;
static struct chain *namechain = NULL;
char *csskip = "/usr/lib/tmac/tmac.cs";
char *sskip = "/usr/lib/tmac/tmac.s";
char *copys();
while(C == ' ') ;
for(p = fname ; (*p=c)!= '\n' && c!=' ' && c!='\t' && c!='\\' ; ++p)
C;
*p = '\0';
while(c != '\n')
C;
if(!strcmp(fname, csskip) || !strcmp(fname, sskip)){
fname[0] = '\0';
return;
}
/* see if this name has already been used */
for(q = namechain ; q; q = q->nextp)
if( ! strcmp(fname, q->datap))
{
fname[0] = '\0';
return;
}
q = (struct chain *) calloc(1, sizeof(*chainblock));
q->nextp = namechain;
q->datap = copys(fname);
namechain = q;
}
fatal(s,p)
char *s, *p;
{
fprintf(stderr, "Deroff: ");
fprintf(stderr, s, p);
exit(1);
}
work()
{
for( ;; )
{
eqnflag = 0;
if(C == '.' || c == '\'')
comline();
else
regline(NO,TWO);
}
}
regline(macline,const)
int macline;
int const;
{
line[0] = c;
lp = line;
for( ; ; )
{
if(c == '\\')
{
*lp = ' ';
backsl();
if ( c == '%') /* no blank for hyphenation char */
lp--;
}
if(c == '\n') break;
if(intable && c=='T')
{
*++lp = C;
if(c=='{' || c=='}')
{
lp[-1] = ' ';
*lp = C;
}
}
else {
if((msflag == 1) && (eqnflag == 1)){
eqnflag = 0;
*++lp = 'x';
}
*++lp = C;
}
}
*lp = '\0';
if(line[0] != '\0'){
if(wordflag)
putwords();
else if(macline)
putmac(line,const);
else
puts(line);
if(numflag &&(linect%10 == 0))printf(".%d\n",linect);
}
}
putmac(s,const)
register char *s;
int const;
{
register char *t;
register found;
int last;
found = 0;
while(*s)
{
while(*s==' ' || *s=='\t')
putchar(*s++);
for(t = s ; *t!=' ' && *t!='\t' && *t!='\0' ; ++t)
;
if(*s == '\"')s++;
if(t>s+const && chars[ s[0] ]==LETTER && chars[ s[1] ]==LETTER){
while(s < t)
if(*s == '\"')s++;
else
putchar(*s++);
last = *(t-1);
found++;
}
else if(found && chars[ s[0] ] == PUNCT && s[1] == '\0')
putchar(*s++);
else{
last = *(t-1);
s = t;
}
}
putchar('\n');
if(msflag && chars[last] == PUNCT){
printf(" %c\n",last);
}
}
putwords() /* break into words for -w option */
{
register char *p, *p1;
int i, nlet;
for(p1 = line ; ;)
{
/* skip initial specials ampersands and apostrophes */
while( chars[*p1] < DIGIT)
if(*p1++ == '\0') return;
nlet = 0;
for(p = p1 ; (i=chars[*p]) != SPECIAL ; ++p)
if(i == LETTER) ++nlet;
if(nlet>1) /* MDM definition of word */
{
/* delete trailing ampersands and apostrophes */
while(p[-1]=='\'' || p[-1]=='&'|| chars[ p[-1] ] == PUNCT)
--p;
while(p1 < p) putchar(*p1++);
putchar('\n');
}
else
p1 = p;
}
}
comline()
{
register int c1, c2;
com:
while(C==' ' || c=='\t')
;
comx:
if( (c1=c) == '\n')
return;
c2 = C;
if(c1=='.' && c2!='.')
inmacro = NO;
if(msflag && c1 == '['){
refer(c2);
return;
}
if(parag && mac==MM && c1 == 'P' && c2 == '\n'){
printf(".P\n");
return;
}
if(c2 == '\n')
return;
if(c1 == '\\' && c2 == '\"')
SKIP;
else if(c1=='E' && c2=='Q' && filesp==files)
eqn();
else if(c1=='T' && (c2=='S' || c2=='C' || c2=='&') && filesp==files){
if(msflag){
stbl();
}
else tbl();
}
else if(c1=='T' && c2=='E')
intable = NO;
else if(!inmacro && c1=='d' && c2=='e')
macro();
else if(!inmacro && c1=='i' && c2=='g')
macro();
else if(!inmacro && c1=='a' && c2 == 'm')
macro();
else if(c1=='s' && c2=='o')
{
if(iflag)
SKIP;
else
{
getfname();
if( fname[0] ){
infile = *++filesp = opn( fname );
if(!infile)
infile = *--filesp;
}
}
}
else if(c1=='n' && c2=='x')
if(iflag)
SKIP;
else
{
getfname();
if(fname[0] == '\0') exit(0);
if(infile != stdin)
fclose(infile);
infile = *filesp = opn(fname);
}
else if(c1 == 't' && c2 == 'm')
SKIP;
else if(c1=='h' && c2=='w')
SKIP;
else if(msflag && c1 == 'T' && c2 == 'L'){
SKIP_TO_COM;
goto comx;
}
else if(msflag && c1=='N' && c2 == 'R')SKIP;
else if(parag && msflag && (c1 == 'P' || c1 == 'I' || c1 == 'L') && c2 == 'P'){
printf(".%c%c",c1,c2);
while(C != '\n')putchar(c);
putchar('\n');
}
else if(parag && mac==MM && c1 == 'P' && c2 == ' '){
printf(".%c%c",c1,c2);
while(C != '\n')putchar(c);
putchar('\n');
}
else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
if(mac==MM)SKIP;
else {
SKIP_TO_COM;
goto comx;
}
}
else if(msflag && c1 == 'F' && c2 == 'S'){
SKIP_TO_COM;
goto comx;
}
else if(msflag && (c1 == 'S' || c1 == 'N') && c2 == 'H'){
if(parag){
printf(".%c%c",c1,c2);
while(C != '\n')putchar(c);
putchar(c);
putchar('!');
while(1){
while(C != '\n')putchar(c);
putchar('\n');
if(C == '.')goto com;
putchar('!');
putchar(c);
}
}
else {
SKIP_TO_COM;
goto comx;
}
}
else if(c1 == 'U' && c2 == 'X'){
if(wordflag)printf("UNIX\n");
else printf("UNIX ");
}
else if(msflag && c1 == 'O' && c2 == 'K'){
SKIP_TO_COM;
goto comx;
}
else if(msflag && c1 == 'N' && c2 == 'D')
SKIP;
else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')){
if(parag){
printf(".%c%c",c1,c2);
while(C != '\n')putchar(c);
putchar('\n');
}
else {
SKIP;
}
}
else if(msflag && mac==MM && c2=='L'){
if(disp || c1 == 'R')sdis('L','E');
else{
SKIP;
printf(" .");
}
}
else if(!msflag &&c1 == 'P' && c2 == 'S'){
inpic();
}
else if(msflag && (c1 == 'D' || c1 == 'N' || c1 == 'K'|| c1=='P') && c2 == 'S')
{
sdis(c1,'E');
} /* removed RS-RE */
else if(msflag && (c1 == 'K' && c2 == 'F'))
{
sdis(c1,'E');
}
else if(msflag && c1 == 'n' && c2 == 'f')
sdis('f','i');
else if(msflag && c1 == 'c' && c2 == 'e')
sce();
else
{
if(c1=='.' && c2=='.'){
if(msflag){
SKIP;
return;
}
while(C == '.')
;
}
++inmacro;
if(c1 <= 'Z' && msflag)regline(YES,ONE);
else {
if(wordflag)c1=C;
regline(YES,TWO);
}
--inmacro;
}
}
macro()
{
if(msflag){
do {
SKIP1;
} while(C1!='.' || C1!='.' || C1=='.'); /* look for .. */
if(c != '\n')SKIP;
return;
}
SKIP;
inmacro = YES;
}
sdis(a1,a2)
char a1,a2;
{
register int c1,c2;
register int eqnf;
int lct;
if(a1 == 'P'){
if(C1 == ' ')
while(C1 == ' ');
if(c == '<'){
SKIP1;
return;
}
}
lct = 0;
eqnf=1;
if(c != '\n')
SKIP1;
while(1){
while(C1 != '.')
if(c == '\n')continue;
else SKIP1;
if((c1=C1) == '\n')continue;
if((c2=C1) == '\n'){
if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
return;
continue;
}
if(c1==a1 && c2 == a2){
SKIP1;
if(lct != 0){
lct--;
continue;
}
if(eqnf)printf(" .");
putchar('\n');
return;
}
else if(a1 == 'L' && c2 == 'L'){
lct++;
SKIP1;
}
else if(a1 == 'D' && c1 == 'E' && c2 == 'Q'){
eqn();
eqnf=0;
}
else if(a1 == 'f'){
if((mac == MS && c2 == 'P') ||
(mac==MM && c1 == 'H' && c2 == 'U')){
SKIP1;
return;
}
SKIP1;
}
else SKIP1;
}
}
tbl()
{
while(C != '.');
SKIP;
intable = YES;
}
stbl()
{
while(C != '.');
SKIP_TO_COM;
if(c != 'T' || C != 'E'){
SKIP;
pc=c;
while(C != '.' || pc != '\n' || C != 'T' || C != 'E')pc=c;
}
}
eqn()
{
register int c1, c2;
register int dflg;
char last;
last=0;
dflg = 1;
SKIP;
for( ;;)
{
if(C1 == '.' || c == '\'')
{
while(C1==' ' || c=='\t')
;
if(c=='E' && C1=='N')
{
SKIP;
if(msflag && dflg){
putchar('x');
putchar(' ');
if(last){
putchar(last);
putchar('\n');
}
}
return;
}
}
else if(c == 'd') /* look for delim */
{
if(C1=='e' && C1=='l')
if( C1=='i' && C1=='m')
{
while(C1 == ' ');
if((c1=c)=='\n' || (c2=C1)=='\n'
|| (c1=='o' && c2=='f' && C1=='f') )
{
ldelim = NOCHAR;
rdelim = NOCHAR;
}
else {
ldelim = c1;
rdelim = c2;
}
}
dflg = 0;
}
if(c != '\n') while(C1 != '\n'){
if(chars[c] == PUNCT)last = c;
else if(c != ' ')last = 0;
}
}
}
backsl() /* skip over a complete backslash construction */
{
int bdelim;
sw:
switch(C1)
{
case '"':
SKIP1;
return;
case 's':
if(C1 == '\\') backsl();
else {
while(C1>='0' && c<='9') ;
ungetc(c,infile);
c = '0';
}
--lp;
return;
case 'f':
case 'n':
case '*':
if(C1 != '(')
return;
case '(':
if(msflag){
if(C == 'e'){
if(C1 == 'm'){
*lp = '-';
return;
}
}
else if(c != '\n')C1;
return;
}
if(C1 != '\n') C1;
return;
case '$':
C1; /* discard argument number */
return;
case 'b':
case 'x':
case 'v':
case 'h':
case 'w':
case 'o':
case 'l':
case 'L':
if( (bdelim=C1) == '\n')
return;
while(C1!='\n' && c!=bdelim)
if(c == '\\') backsl();
return;
case '\\':
if(inmacro)
goto sw;
default:
return;
}
}
char *copys(s)
register char *s;
{
register char *t, *t0;
if( (t0 = t = calloc( (unsigned)(strlen(s)+1), sizeof(*t) ) ) == NULL)
fatal("Cannot allocate memory", (char *) NULL);
while( *t++ = *s++ )
;
return(t0);
}
sce(){
register char *ap;
register int n, i;
char a[10];
for(ap=a;C != '\n';ap++){
*ap = c;
if(ap == &a[9]){
SKIP;
ap=a;
break;
}
}
if(ap != a)n = atoi(a);
else n = 1;
for(i=0;i<n;){
if(C == '.'){
if(C == 'c'){
if(C == 'e'){
while(C == ' ');
if(c == '0'){
SKIP;
break;
}
else SKIP;
}
else SKIP;
}
else if(c == 'P' || C == 'P'){
if(c != '\n')SKIP;
break;
}
else if(c != '\n')SKIP;
}
else {
SKIP;
i++;
}
}
}
refer(c1)
{
register int c2;
if(c1 != '\n')
SKIP;
while(1){
if(C != '.')
SKIP;
else {
if(C != ']')
SKIP;
else {
while(C != '\n')
c2=c;
if(chars[c2] == PUNCT)printf(" %c",c2);
return;
}
}
}
}
inpic(){
register int c1;
register char *p1;
/* SKIP1;*/
while(C1 != '\n')
if(c == '<'){
SKIP1;
return;
}
p1 = line;
c = '\n';
while(1){
c1 = c;
if(C1 == '.' && c1 == '\n'){
if(C1 != 'P'){
if(c == '\n')continue;
else { SKIP1; c='\n'; continue;}
}
if(C1 != 'E'){
if(c == '\n')continue;
else { SKIP1; c='\n';continue; }
}
SKIP1;
return;
}
else if(c == '\"'){
while(C1 != '\"'){
if(c == '\\'){
if(C1 == '\"')continue;
ungetc(c,infile);
backsl();
}
else *p1++ = c;
}
*p1++ = ' ';
}
else if(c == '\n' && p1 != line){
*p1 = '\0';
if(wordflag)putwords(NO);
else {
puts(line);
putchar('\n');
}
p1 = line;
}
}
}