#include <stdio.h> /* * This program examines each of its arguments for C function * definitions, and puts them in a file "tags" for use by the editor * (and anyone else who wants to). */ /* * program history: * ken arnold wrote this program. ask him. * brought over to the vax by peter b. kessler 7/79 * who disavows any knowledge of its actions, * except for the stuff related to the construction * of the search patterns. * Some additional enhancements made by Mark Horton, involving * the options and special treatment of "main", "}" at beginning * of line, and a few bug fixes. */ #define reg register #define logical char #define TRUE (1) #define FALSE (0) #define iswhite(arg) (_wht[arg]) /* T if char is white */ #define begtoken(arg) (_btk[arg]) /* T if char can start token */ #define intoken(arg) (_itk[arg]) /* T if char can be in token */ #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ #define max(I1,I2) (I1 > I2 ? I1 : I2) struct nd_st { /* sorting structure */ char *func; /* function name */ char *file; /* file name */ char *pat; /* search pattern */ logical been_warned; /* set if noticed dup */ struct nd_st *left,*right; /* left and right sons */ }; long ftell(); #ifdef DEBUG char *unctrl(); #endif typedef struct nd_st NODE; logical number, /* T if on line starting with # */ term = FALSE, /* T if print on terminal */ makefile= TRUE, /* T if to creat "tags" file */ gotone, /* found a func already on line */ /* boolean "func" (see init) */ _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177]; char searchar = '?'; /* use ?...? searches */ #define MAXPATTERN 50 /* according to bill */ int lineno; /* line number of current line */ char line[256], /* current input line */ *curfile, /* current input file name */ *outfile= "tags", /* output file */ *white = " \f\t\n", /* white chars */ *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token ending chars */ *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", /* token starting chars */ *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789", /* valid in-token chars */ *notgd = ",;"; /* non-valid after-function chars */ int file_num; /* current file number */ int aflag; /* -a: append to tags */ int uflag; /* -u: update tags */ int wflag; /* -w: suppress warnings */ FILE *inf, /* ioptr for current input file */ *outf; /* ioptr for tags file */ long lineftell; /* ftell after getc( inf ) == '\n' */ NODE *head; /* the head of the sorted binary tree */ main(ac,av) int ac; char *av[]; { char cmd[100]; int i; while (ac > 1 && av[1][0] == '-') { for (i=1; av[1][i]; i++) { switch(av[1][i]) { case 'a': aflag++; break; case 'u': uflag++; break; case 'w': wflag++; break; default: goto usage; } } ac--; av++; } if (ac <= 1) { usage: printf("Usage: ctags [-au] file ...\n"); exit(1); } init(); /* set up boolean "functions" */ /* * loop through files finding functions */ for (file_num = 1; file_num < ac; file_num++) find_funcs(av[file_num]); if (uflag) { for (i=1; i<ac; i++) { sprintf(cmd, "mv %s OTAGS ; fgrep -v '\t%s\t' OTAGS > %s ; rm OTAGS", outfile, av[i], outfile); system(cmd); } aflag++; } if ((outf = fopen(outfile, aflag ? "a" : "w")) == NULL) { perror(outfile); exit(1); } put_funcs(head); /* put the data in "tags" */ exit(0); } /* * This routine sets up the boolean psuedo-functions which work * by seting boolean flags dependent upon the corresponding character * Every char which is NOT in that string is not a white char. Therefore, * all of the array "_wht" is set to FALSE, and then the elements * subscripted by the chars in "white" are set to TRUE. Thus "_wht" * of a char is TRUE if it is the string "white", else FALSE. * It also open up the "tags" output file. */ init() { reg char *sp; reg int i; for (i = 0; i < 0177; i++) { _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; _gd[i] = TRUE; } for (sp = white; *sp; sp++) _wht[*sp] = TRUE; for (sp = endtk; *sp; sp++) _etk[*sp] = TRUE; for (sp = intk; *sp; sp++) _itk[*sp] = TRUE; for (sp = begtk; *sp; sp++) _btk[*sp] = TRUE; for (sp = notgd; *sp; sp++) _gd[*sp] = FALSE; } /* * This program opens the specified file and calls the function * which finds the function defenitions. */ find_funcs(file) char *file; { if ((inf=fopen(file,"r")) == NULL) { perror(file); return; } curfile = (char *) calloc(strlen(file)+1,1); strcpy(curfile, file); lineno = 1; C_funcs(); /* find the C-style functions */ fclose(inf); } /* * This routine finds functions in C syntax and adds them * to the list. */ C_funcs() { reg char c, /* current input char */ *token, /* start of current token */ *tp; /* end of current token */ logical incom, /* T if inside a comment */ inquote, /* T if inside a quoted string */ inchar, /* T if inside a single char ' */ midtoken; /* T if in middle of token */ char *sp; /* current input char */ char tok[100]; long insub; /* level of "{}"s deep */ /* * init boolean flags, counters, and pointers */ number = gotone = midtoken = inquote = inchar = incom = FALSE; insub = 0L; sp = tp = token = line; #ifdef DEBUG printf(" t s c m q c g n\n"); printf(" s t k u o i u h o u\n"); printf(" c p p n b m d o r t m\n"); #endif while ((*sp=c=getc(inf)) != EOF) { #ifdef DEBUG printf("%2.2s: ",unctrl(c)); printf("%2.2s ",unctrl(*sp)); printf("%2.2s ",unctrl(*tp)); printf("%2.2s ",unctrl(*token)); printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); #endif /* * action based on mixture of character type, *sp, * and logical flags */ if (c == '\\') { c = *++sp = getc(inf); /* * Handling of backslash is very naive. * We do, however, turn escaped newlines * into spaces. */ if (c = '\n') c = ' '; } else if (incom) { if (c == '*') { while ((*++sp=c=getc(inf)) == '*') { #ifdef DEBUG printf("%2.2s- ",unctrl(c)); printf("%2.2s ",unctrl(*sp)); printf("%2.2s ",unctrl(*tp)); printf("%2.2s ",unctrl(*token)); printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); #endif continue; } #ifdef DEBUG printf("%2.2s- ",unctrl(c)); printf("%2.2s ",unctrl(*sp)); printf("%2.2s ",unctrl(*tp)); printf("%2.2s ",unctrl(*token)); printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); #endif if (c == '/') incom = FALSE; } } else if (inquote) { /* * Too dumb to know about \" not being magic, but * they usually occur in pairs anyway. */ if ( c == '"' ) inquote = FALSE; continue; } else if (inchar) { if ( c == '\'' ) inchar = FALSE; continue; } else if (c == '"') inquote = TRUE; else if (c == '\'') inchar = TRUE; else if (c == '/') if ((*++sp=c=getc(inf)) == '*') incom = TRUE; else ungetc(*sp,inf); else if (c == '#' && sp == line) number = TRUE; else if (c == '{') insub++; else if (c == '}') if (sp == line) /* * Kludge to get back in sync after getting confused. * We really shouldn't be looking at indenting style, * but tricking with the preprocessor can get us off, * and most people indent this way anyway. * This resets level of indenting to zero if '}' is * found at beginning of line. */ insub = 0; else insub--; else if (!insub && !inquote && !inchar && !gotone) { if (midtoken) { if (endtoken(c)) { if (start_func(&sp,token,tp)) { strncpy(tok,token,tp-token+1); tok[tp-token+1] = 0; add_func(tok); gotone = TRUE; } midtoken = FALSE; token = sp; } else if (intoken(c)) tp++; } else if (begtoken(c)) { token = tp = sp; midtoken = TRUE; } } /* * move on to next char, and set flags accordingly */ sp++; if (c == '\n') { tp = token = sp = line; lineftell = ftell( inf ); #ifdef DEBUG printf("lineftell saved as %ld\n",lineftell); #endif number = gotone = midtoken = inquote = inchar = FALSE; lineno++; } } } /* * This routine checks to see if the current token is * at the start of a function. It updates the input line * so that the '(' will be in it when it returns. */ start_func(lp,token,tp) char **lp,*token,*tp; { reg char c,*sp,*tsp; static logical found; logical firsttok; /* T if have seen first token in ()'s */ int bad; sp = *lp; c = *sp; bad = FALSE; if (!number) /* space is not allowed in macro defs */ while (iswhite(c)) { *++sp = c = getc(inf); #ifdef DEBUG printf("%2.2s:\n",unctrl(c)); #endif } /* the following tries to make it so that a #define a b(c) */ /* doesn't count as a define of b. */ else { logical define; define = TRUE; for (tsp = "define"; *tsp && token < tp; tsp++) if (*tsp != *token++) { define = FALSE; break; } if (define) found = 0; else found++; if (found >= 2) { gotone = TRUE; badone: bad = TRUE; goto ret; } } if (c != '(') goto badone; firsttok = FALSE; while ((*++sp=c=getc(inf)) != ')') { /* * This line used to confuse ctags: * int (*oldhup)(); * This fixes it. A nonwhite char before the first * token, other than a / (in case of a comment in there) * makes this not a declaration. */ if (begtoken(c) || c=='/') firsttok++; else if (!iswhite(c) && !firsttok) goto badone; #ifdef DEBUG printf("%2.2s:\n",unctrl(c)); #endif } #ifdef DEBUG printf("%2.2s:\n",unctrl(c)); #endif while (iswhite(*++sp=c=getc(inf))) #ifdef DEBUG printf("%2.2s:\n",unctrl(c)) #endif ; #ifdef DEBUG printf("%2.2s:\n",unctrl(c)); #endif ret: *lp = --sp; ungetc(c,inf); return !bad && isgood(c); } /* * This routine adds a function to the list */ add_func(token) char *token; { reg char *fp,*pp; reg NODE *np; if ((np = (NODE *) calloc(1,sizeof (NODE))) == NULL) { printf("too many functions to sort\n"); put_funcs(head); free_tree(head); head = np = (NODE *) calloc(1,sizeof (NODE)); } if (strcmp(token,"main") == 0) { /* * Since there are so many directories with lots of * misc. complete programs in them, main tends to get * redefined a lot. So we change all mains to instead * refer to the name of the file, without leading * pathname components and without a trailing .c. */ fp = curfile; for (pp=curfile; *pp; pp++) if (*pp == '/') fp = pp+1; *token = 'M'; strcpy(token+1, fp); pp = &token[strlen(token)-2]; if (*pp == '.') *pp = 0; } fp = np->func = (char *) calloc(strlen(token)+1,sizeof (char)); np->file = curfile; strcpy(fp, token); { /* * this change to make the whole line the pattern */ long saveftell = ftell( inf ); int patlen; char ch; patlen = 0; fseek( inf , lineftell , 0 ); #ifdef DEBUG printf("saveftell=%ld, lseek back to %ld\n",saveftell,lineftell); #endif ch = getc( inf ); while ( ch != '\n' && ch != searchar && patlen < MAXPATTERN ) { patlen ++; ch = getc( inf ); } pp = np -> pat = (char *) calloc( patlen + 2 , sizeof( char ) ); fseek( inf , lineftell , 0 ); ch = getc( inf ); while ( patlen -- ) { *pp ++ = ch; ch = getc( inf ); } if ( ch == '\n' ) *pp ++ = '$'; *pp = '\0'; fseek( inf , saveftell , 0 ); #ifdef DEBUG printf("seek back to %ld, ftell is now %ld\n",saveftell,ftell(inf)); #endif } #ifdef DEBUG printf("\"%s\"\t\"%s\"\t\"%s\"\n",np->func,np->file,np->pat); #endif if (head == NULL) head = np; else add_node(np,head); } /* * This routine cfrees the entire tree from the node down. */ free_tree(node) NODE *node; { while (node) { free_tree(node->right); cfree(node); node = node->left; } } /* * This routine finds the node where the new function node * should be added. */ add_node(node,cur_node) NODE *node,*cur_node; { reg int dif; dif = strcmp(node->func,cur_node->func); #ifdef DEBUG printf("strcmp(\"%s\",\"%s\") == %d\n",node->func,cur_node->func,dif); #endif if (dif == 0) { if (node->file == cur_node->file) { if (!wflag) { fprintf(stderr,"Duplicate function in file \"%s\", line %d: %s\n",node->file,lineno,node->func); fprintf(stderr,"Second entry ignored\n"); } return; } else { if (!cur_node->been_warned) if (!wflag) fprintf(stderr,"Duplicate function name in files %s and %s: %s (Warning only)\n", node->file, cur_node->file, node->func); cur_node->been_warned = TRUE; } } if (dif < 0) if (cur_node->left != NULL) add_node(node,cur_node->left); else { #ifdef DEBUG printf("adding to left branch\n"); #endif cur_node->left = node; } else if (cur_node->right != NULL) add_node(node,cur_node->right); else { #ifdef DEBUG printf("adding to right branch\n"); #endif cur_node->right = node; } } /* * This routine puts the functions in the file. */ put_funcs(node) NODE *node; { if (node == NULL) return; put_funcs(node->left); fprintf(outf,"%s\t%s\t%c^%s%c\n",node->func,node->file ,searchar,node->pat,searchar); put_funcs(node->right); } #ifdef DEBUG char * unctrl(c) char c; { static char buf[3]; if (c>=' ' && c<='~') { buf[0] = c; buf[1] = 0; } else if (c > '~') { buf[0] = '^'; buf[1] = '?'; buf[2] = 0; } else if (c < 0) { buf[0] = buf[1] = '?'; buf[2] = 0; } else { buf[0] = '\\'; buf[2] = 0; switch(c) { case '\b': buf[1] = 'b'; break; case '\t': buf[1] = 't'; break; case '\n': buf[1] = 'n'; break; default: buf[0] = '^'; buf[1] = c + 64; } } return(buf); } #endif