/* * Copyright (c) 1993 David I. Bell * Permission is granted to use, distribute, or modify this source, * provided that this copyright notice remains intact. * * Read input file characters into tokens */ #include "stdarg.h" #include "calc.h" #include "token.h" #include "string.h" #define isletter(ch) ((((ch) >= 'a') && ((ch) <= 'z')) || \ (((ch) >= 'A') && ((ch) <= 'Z'))) #define isdigit(ch) (((ch) >= '0') && ((ch) <= '9')) #define issymbol(ch) (isletter(ch) || isdigit(ch) || ((ch) == '_')) /* * Current token. */ static struct { short t_type; /* type of token */ char *t_str; /* string value or symbol name */ long t_numindex; /* index of numeric value */ } curtoken; static BOOL rescan; /* TRUE to reread current token */ static BOOL newlines; /* TRUE to return newlines as tokens */ static BOOL allsyms; /* TRUE if always want a symbol token */ static STRINGHEAD strings; /* list of constant strings */ static char *numbuf; /* buffer for numeric tokens */ static long numbufsize; /* current size of numeric buffer */ long errorcount; /* number of compilation errors */ /* * Table of keywords */ struct keyword { char *k_name; /* keyword name */ int k_token; /* token number */ }; static struct keyword keywords[] = { "if", T_IF, "else", T_ELSE, "for", T_FOR, "while", T_WHILE, "do", T_DO, "continue", T_CONTINUE, "break", T_BREAK, "goto", T_GOTO, "return", T_RETURN, "local", T_LOCAL, "global", T_GLOBAL, "print", T_PRINT, "switch", T_SWITCH, "case", T_CASE, "default", T_DEFAULT, "quit", T_QUIT, "exit", T_QUIT, "define", T_DEFINE, "read", T_READ, "show", T_SHOW, "help", T_HELP, "write", T_WRITE, "mat", T_MAT, "obj", T_OBJ, NULL, 0 }; static void eatcomment(), eatstring(); static int eatsymbol(), eatnumber(); /* * Initialize all token information. */ void inittokens() { initstr(&strings); newlines = FALSE; allsyms = FALSE; rescan = FALSE; setprompt(PROMPT1); } void tokenmode(flag) { newlines = FALSE; allsyms = FALSE; if (flag & TM_NEWLINES) newlines = TRUE; if (flag & TM_ALLSYMS) allsyms = TRUE; setprompt(newlines ? PROMPT1 : PROMPT2); } /* * Routine to read in the next token from the input stream. * The type of token is returned as a value. If the token is a string or * symbol name, information is saved so that the value can be retrieved. */ int gettoken() { int ch; /* current input character */ int type; /* token type */ if (rescan) { /* rescanning */ rescan = FALSE; return curtoken.t_type; } curtoken.t_str = NULL; curtoken.t_numindex = 0; type = T_NULL; while (type == T_NULL) { ch = nextchar(); if (allsyms && ((ch!=' ') && (ch!=';') && (ch!='"') && (ch!='\n'))) { reread(); type = eatsymbol(); break; } switch (ch) { case ' ': case '\t': case '\0': break; case '\n': if (newlines) type = T_NEWLINE; break; case EOF: type = T_EOF; break; case '{': type = T_LEFTBRACE; break; case '}': type = T_RIGHTBRACE; break; case '(': type = T_LEFTPAREN; break; case ')': type = T_RIGHTPAREN; break; case '[': type = T_LEFTBRACKET; break; case ']': type = T_RIGHTBRACKET; break; case ';': type = T_SEMICOLON; break; case ':': type = T_COLON; break; case ',': type = T_COMMA; break; case '?': type = T_QUESTIONMARK; break; case '"': case '\'': type = T_STRING; eatstring(ch); break; case '^': switch (nextchar()) { case '=': type = T_POWEREQUALS; break; default: type = T_POWER; reread(); } break; case '=': switch (nextchar()) { case '=': type = T_EQ; break; default: type = T_ASSIGN; reread(); } break; case '+': switch (nextchar()) { case '+': type = T_PLUSPLUS; break; case '=': type = T_PLUSEQUALS; break; default: type = T_PLUS; reread(); } break; case '-': switch (nextchar()) { case '-': type = T_MINUSMINUS; break; case '=': type = T_MINUSEQUALS; break; default: type = T_MINUS; reread(); } break; case '*': switch (nextchar()) { case '=': type = T_MULTEQUALS; break; case '*': switch (nextchar()) { case '=': type = T_POWEREQUALS; break; default: type = T_POWER; reread(); } break; default: type = T_MULT; reread(); } break; case '/': switch (nextchar()) { case '/': switch (nextchar()) { case '=': type = T_SLASHSLASHEQUALS; break; default: reread(); type = T_SLASHSLASH; break; } break; case '=': type = T_DIVEQUALS; break; case '*': eatcomment(); break; default: type = T_DIV; reread(); } break; case '%': switch (nextchar()) { case '=': type = T_MODEQUALS; break; default: type = T_MOD; reread(); } break; case '<': switch (nextchar()) { case '=': type = T_LE; break; case '<': switch (nextchar()) { case '=': type = T_LSHIFTEQUALS; break; default: reread(); type = T_LEFTSHIFT; break; } break; default: type = T_LT; reread(); } break; case '>': switch (nextchar()) { case '=': type = T_GE; break; case '>': switch (nextchar()) { case '=': type = T_RSHIFTEQUALS; break; default: reread(); type = T_RIGHTSHIFT; break; } break; default: type = T_GT; reread(); } break; case '&': switch (nextchar()) { case '&': type = T_ANDAND; break; case '=': type = T_ANDEQUALS; break; default: type = T_AND; reread(); break; } break; case '|': switch (nextchar()) { case '|': type = T_OROR; break; case '=': type = T_OREQUALS; break; default: type = T_OR; reread(); break; } break; case '!': switch (nextchar()) { case '=': type = T_NE; break; default: type = T_NOT; reread(); break; } break; case '\\': switch (nextchar()) { case '\n': setprompt(PROMPT2); break; default: scanerror(T_NULL, "Unknown token character '%c'", ch); } break; default: if (isletter(ch)) { reread(); type = eatsymbol(); break; } if (isdigit(ch) || (ch == '.')) { reread(); type = eatnumber(); break; } scanerror(T_NULL, "Unknown token character '%c'", ch); } } curtoken.t_type = (short)type; return type; } /* * Continue to eat up a comment string. * The leading slash-asterisk has just been scanned at this point. */ static void eatcomment() { int ch; for (;;) { ch = nextchar(); if (ch == '*') { ch = nextchar(); if (ch == '/') return; reread(); } if ((ch == EOF) || (ch == '\0') || (newlines && (ch == '\n') && inputisterminal())) { reread(); scanerror(T_NULL, "Unterminated comment"); return; } } } /* * Read in a string and add it to the literal string pool. * The leading single or double quote has been read in at this point. */ static void eatstring(quotechar) { register char *cp; /* current character address */ int ch; /* current character */ char buf[MAXSTRING+1]; /* buffer for string */ cp = buf; for (;;) { ch = nextchar(); switch (ch) { case '\0': case EOF: case '\n': reread(); scanerror(T_NULL, "Unterminated string constant"); *cp = '\0'; curtoken.t_str = addliteral(buf); return; case '\\': ch = nextchar(); switch (ch) { case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case 't': ch = '\t'; break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; case '\n': setprompt(PROMPT2); continue; case EOF: reread(); continue; } *cp++ = (char)ch; break; case '"': case '\'': if (ch == quotechar) { *cp = '\0'; curtoken.t_str = addliteral(buf); return; } /* fall into default case */ default: *cp++ = (char)ch; } } } /* * Read in a symbol name which may or may not be a keyword. * If allsyms is set, keywords are not looked up and almost all chars * will be accepted for the symbol. Returns the type of symbol found. */ static int eatsymbol() { register struct keyword *kp; /* pointer to current keyword */ register char *cp; /* current character pointer */ int ch; /* current character */ int cc; /* character count */ static char buf[SYMBOLSIZE+1]; /* temporary buffer */ cp = buf; cc = SYMBOLSIZE; if (allsyms) { for (;;) { ch = nextchar(); if ((ch == ' ') || (ch == ';') || (ch == '\n')) break; if (cc-- > 0) *cp++ = (char)ch; } reread(); *cp = '\0'; if (cc < 0) scanerror(T_NULL, "Symbol too long"); curtoken.t_str = buf; return T_SYMBOL; } for (;;) { ch = nextchar(); if (!issymbol(ch)) break; if (cc-- > 0) *cp++ = (char)ch; } reread(); *cp = '\0'; if (cc < 0) scanerror(T_NULL, "Symbol too long"); for (kp = keywords; kp->k_name; kp++) if (strcmp(kp->k_name, buf) == 0) return kp->k_token; curtoken.t_str = buf; return T_SYMBOL; } /* * Read in and remember a possibly numeric constant value. * The constant is inserted into a constant table so further uses * of the same constant will not take more memory. This can also * return just a period, which is used for element accesses and for * the old numeric value. */ static int eatnumber() { register char *cp; /* current character pointer */ long len; /* parsed size of number */ long res; /* result of parsing number */ if (numbufsize == 0) { numbuf = (char *)malloc(128+1); if (numbuf == NULL) error("Cannot allocate number buffer"); numbufsize = 128; } cp = numbuf; len = 0; for (;;) { if (len >= numbufsize) { cp = (char *)realloc(numbuf, numbufsize + 1001); if (cp == NULL) error("Cannot reallocate number buffer"); numbuf = cp; numbufsize += 1000; cp = &numbuf[len]; } *cp = nextchar(); *(++cp) = '\0'; if ((numbuf[0] == '.') && isletter(numbuf[1])) { reread(); return T_PERIOD; } res = qparse(numbuf, QPF_IMAG); if (res < 0) { reread(); scanerror(T_NULL, "Badly formatted number"); curtoken.t_numindex = addnumber("0"); return T_NUMBER; } if (res != ++len) break; } cp[-1] = '\0'; reread(); if ((numbuf[0] == '.') && (numbuf[1] == '\0')) { curtoken.t_numindex = 0; return T_OLDVALUE; } cp -= 2; res = T_NUMBER; if ((*cp == 'i') || (*cp == 'I')) { *cp = '\0'; res = T_IMAGINARY; } curtoken.t_numindex = addnumber(numbuf); return res; } /* * Return the string value of the current token. */ char * tokenstring() { return curtoken.t_str; } /* * Return the constant index of a numeric token. */ long tokennumber() { return curtoken.t_numindex; } /* * Push back the token just read so that it will be seen again. */ void rescantoken() { rescan = TRUE; } /* * Describe an error message. * Then skip to the next specified token (or one more powerful). */ #ifdef VARARGS # define VA_ALIST skip, fmt, va_alist # define VA_DCL int skip; char *fmt; va_dcl #else # ifdef __STDC__ # define VA_ALIST int skip, char *fmt, ... # define VA_DCL # else # define VA_ALIST skip, fmt # define VA_DCL int skip; char *fmt; # endif #endif /*VARARGS*/ void scanerror(VA_ALIST) VA_DCL { va_list ap; char *name; /* name of file with error */ char buf[MAXERROR+1]; errorcount++; name = inputname(); if (name) fprintf(stderr, "\"%s\", line %ld: ", name, linenumber()); #ifdef VARARGS va_start(ap); #else va_start(ap, fmt); #endif vsprintf(buf, fmt, ap); va_end(ap); fprintf(stderr, "%s\n", buf); switch (skip) { case T_NULL: return; case T_COMMA: rescan = TRUE; for (;;) { switch (gettoken()) { case T_NEWLINE: case T_SEMICOLON: case T_LEFTBRACE: case T_RIGHTBRACE: case T_EOF: case T_COMMA: rescan = TRUE; return; } } default: fprintf(stderr, "Unknown skip token for scanerror\n"); /* fall into semicolon case */ /*FALLTHRU*/ case T_SEMICOLON: rescan = TRUE; for (;;) switch (gettoken()) { case T_NEWLINE: case T_SEMICOLON: case T_LEFTBRACE: case T_RIGHTBRACE: case T_EOF: rescan = TRUE; return; } } } /* END CODE */