Ultrix-3.1/src/cmd/ctrace/scanner.l
%{
/*
* SCCSID: @(#)scanner.l 3.0 4/21/86
* Based on: scanner.l 1.3 (System V)
*/
/**********************************************************************
* Copyright (c) Digital Equipment Corporation 1984, 1985, 1986. *
* All Rights Reserved. *
* Reference "/usr/src/COPYRIGHT" for applicable restrictions. *
**********************************************************************/
/* ctrace - C program debugging tool
*
* C statement scanner
*/
#include <ctype.h> /* isdigit macro */
#include "global.h"
#include "y.tab.h"
#define scanmore() yymore()
#define token(X) return(token_fcn(X))
#define LEX_EOF 0 /* EOF is 0 in lex */
char indentation[INDENTMAX]; /* leading white space for statement */
enum bool too_long = no; /* statement too long to fit in buffer */
int token_start = 0; /* buffer index for current token */
int last_yychar = LEX_EOF; /* used for parser error handling */
static int last_token = LEX_EOF; /* used for typedef name identification */
static int stmt_start = 0; /* buffer index for the start of the statement */
static char macro[TOKENMAX]; /* preprocessor macro name */
static char dummy; /* dummy target for sscanf match */
static char *pp; /* preprocessor statement name */
static char *s; /* string pointer */
char *strchr(), *strrchr(), *strsave();
%}
assignchars [+\-*/%&^|><][><]?
identifier [a-zA-Z_][a-zA-Z_0-9]*
number "."?[0-9][.0-9a-fA-FlLxX]*
indent [ \t]
space [ \t\f\b]
nlspace [ \t\f\b\n]
%%
\#{space}* { /* read the preprocessor statement */
pp = yytext + yyleng;
preprocessor();
/* if this is a #define statement */
if (*pp == 'd') {
/* look for a constant terminated by a
semicolon. This will cause compiler errors
when used as "a=CONSTANT" even if not traced
*/
if (yytext[yyleng - 1] == ';') {
fatal("cannot handle preprocessor code, use -P option");
}
/* look for a macro that, if traced, will cause compiler errors */
if (sscanf(pp, "define %[a-zA-Z0-9_](%*[^)]) %*[^,.=;{}]%1c", macro, &dummy) == 2
|| sscanf(pp, "define %[a-zA-Z0-9_] %*[^,.=;{}]%1c", macro, &dummy) == 2) {
add_symbol(macro, MACRO);
}
}
/* save the new line number from a #line statement */
if (sscanf(pp, "line %d", &yylineno) == 1) {
--yylineno; /* \n hasn't been read */
/* save any file name */
if ((s = strchr(pp, '"')) != NULL) {
*(strrchr(s, '"')) = '\0';
if ((filename = strsave(s + 1)) == NULL) {
fatal("out of memory");
}
s[strlen(s)] = '"'; /* replace quote */
}
}
/* treat some preprocessor statements as tokens when in a function */
if (fcn_body) {
if (strncmp(pp, "if", 2) == 0) {
token(PP_IF);
}
else if (strncmp(pp, "el", 2) == 0) {
token(PP_ELSE);
}
else if (strncmp(pp, "en", 2) == 0) {
token(PP_ENDIF);
}
}
/* echo only the preprocessor statement when in a
function because it may be within a C statement */
if (fcn_body) {
--token_start; /* back up over \n */
printf("%s", yytext + token_start);
yytext[token_start] = '\0';
yyleng = token_start;
scanmore();
}
else { /* start fresh with a new token */
token_start = 0;
}
/* add a newline so any following trace code is put on
a new line. This of course changes the line count
so a (non-zero) #line statement must be added
-- unless we are looking at preprocessor output */
putchar('\n');
if (pound_line == no) {
printf("#line %d \"%s\"\n",
(yylineno?yylineno:1), filename);
}
}
"/*" { /* output comments immediately to prevent line
buffer overflow and to preserve embedded newlines */
yyleng -= 2; /* delete the leading comment delimiter */
comment();
scanmore(); /* this comment may be embedded in a statement */
}
\n{space}*/(\n|"/*") |
{space}+/(\n|"/*") { /* output empty lines, trailing space, and comment leading space */
if (token_start == 0)
ECHO;
else {
token_start = yyleng; scanmore(); }
}
{space}+ { /* look for the start of a statement */
if (token_start == 0)
stmt_start = yyleng;
token_start = yyleng; scanmore(); }
\n{indent}* { /* look for the start of a statement */
if (token_start == 0) {
stmt_start = yyleng;
/* save the indentation of the last statement that begins on a new line */
strncpy(indentation, yytext + 1, INDENTMAX);
}
token_start = yyleng; scanmore(); }
{number} token(CONSTANT);
\' token(scanfor('\'')); /* allow meta-character constants */
\" token(scanfor('"'));
{identifier}/{nlspace}+{identifier} | /* must have intermediate blanks to prevent a match on "aa" */
{identifier}/{nlspace}*"*"[^=] {
/* check for a new typedef name */
if (token_start == stmt_start || last_token == CLASS)
token(add_symbol(yytext + token_start, TYPE));
else
token(lookup(yytext + token_start));
}
{identifier}/{nlspace}*"(" {
/* check for a new function name */
token(add_symbol(yytext + token_start, FUNCTION));
}
{identifier} token(lookup(yytext + token_start));
"||" token(OROR);
"&&" token(ANDAND);
"==" |
"!=" token(EQUOP);
"<=" |
">=" |
"<" |
">" token(RELOP);
{assignchars}= | /* must be after "<=" and ">=" rules */
={assignchars} token(ASSIGNOP);
">>" |
"<<" token(SHIFTOP);
"/" |
"%" token(DIVOP);
"!" |
"~" token(NOTCOMPL);
"++" |
"--" token(INCOP);
"." |
"->" token(DOTARROW);
. token(yytext[yyleng - 1]);
%%
static
preprocessor()
{
register char c, next_c; /* EOF is 0 in lex so these can be characters */
/* change "# <number>" to "#line <number>" */
if (isdigit(c = input())) {
strcat(yytext, "line ");
yyleng += 5;
}
unput(c);
/* the preprocessor statement will be output as it is read so
imbedded comments are not moved to the beginning of the line */
/* it will also be saved in yytext for later scanning */
if (!fcn_body) {
ECHO;
}
/* scan for the terminating newline */
while ((c = input()) != '\n') {
switch (c) {
case '\\': /* escape character */
if (!fcn_body)
output(c);
if (yyleng < STMTMAX)
yytext[yyleng++] = c;
c = input();
break;
case '/': /* check for a comment */
if ((next_c = input()) == '*') {
comment(); /* output the comment */
continue; /* don't save the slash */
}
else
unput(next_c); /* c may be a newline */
break;
}
if (!fcn_body)
output(c);
if (yyleng < STMTMAX)
yytext[yyleng++] = c;
}
unput(c); /* put back the newline */
yytext[yyleng] = '\0'; /* terminate the string */
}
static
comment()
{
register char c, last_c; /* EOF is 0 in lex so these can be characters */
/* output comments immediately to prevent line buffer overflow
and to preserve embedded newlines */
printf("/*");
/* find the trailing comment delimiter */
last_c = input();
putchar(last_c);
while ((c = input()) != LEX_EOF) {
putchar(c);
if (last_c == '*' && c == '/')
break;
last_c = c;
}
}
static
scanfor(terminator)
register char terminator;
{
register char c; /* EOF is 0 in lex so c can be a char */
/* scan until the terminator is found */
while ((c = yytext[yyleng++] = input()) != terminator) {
switch (c) {
case '\\': /* escape character */
yytext[yyleng++] = input();
break;
case LEX_EOF: /* end of file */
return(LEX_EOF);
break;
}
/* see if this token will overflow the statement buffer */
if (yyleng >= STMTMAX) /* '\\' may cause yyleng to be > STMTMAX */ {
dump_stmt();
}
}
yytext[yyleng] = '\0';
return(CONSTANT);
}
static
token_fcn(token_value)
register int token_value;
{
/* check for a long statement */
if (yyleng > STMTMAX) {
dump_stmt();
}
yylval.symbol.start = token_start;
yylval.symbol.end = token_start = yyleng;
scanmore();
last_yychar = last_token;
last_token = token_value;
return(token_value);
}
dump_stmt()
{
too_long = yes; /* statement is too long */
yytext[yyleng] = '\0'; /* empty the statement buffer */
ECHO;
yyleng = 0;
}
/* dummy routine to avoid need for lex library on non-UNIX systems */
static
yywrap()
{
return(1);
}