4.4BSD/usr/src/contrib/calc-1.26.4/token.c
/*
* Copyright (c) 1993 David I. Bell
* Permission is granted to use, distribute, or modify this source,
* provided that this copyright notice remains intact.
*
* Read input file characters into tokens
*/
#include "stdarg.h"
#include "calc.h"
#include "token.h"
#include "string.h"
#define isletter(ch) ((((ch) >= 'a') && ((ch) <= 'z')) || \
(((ch) >= 'A') && ((ch) <= 'Z')))
#define isdigit(ch) (((ch) >= '0') && ((ch) <= '9'))
#define issymbol(ch) (isletter(ch) || isdigit(ch) || ((ch) == '_'))
/*
* Current token.
*/
static struct {
short t_type; /* type of token */
char *t_str; /* string value or symbol name */
long t_numindex; /* index of numeric value */
} curtoken;
static BOOL rescan; /* TRUE to reread current token */
static BOOL newlines; /* TRUE to return newlines as tokens */
static BOOL allsyms; /* TRUE if always want a symbol token */
static STRINGHEAD strings; /* list of constant strings */
static char *numbuf; /* buffer for numeric tokens */
static long numbufsize; /* current size of numeric buffer */
long errorcount; /* number of compilation errors */
/*
* Table of keywords
*/
struct keyword {
char *k_name; /* keyword name */
int k_token; /* token number */
};
static struct keyword keywords[] = {
"if", T_IF,
"else", T_ELSE,
"for", T_FOR,
"while", T_WHILE,
"do", T_DO,
"continue", T_CONTINUE,
"break", T_BREAK,
"goto", T_GOTO,
"return", T_RETURN,
"local", T_LOCAL,
"global", T_GLOBAL,
"print", T_PRINT,
"switch", T_SWITCH,
"case", T_CASE,
"default", T_DEFAULT,
"quit", T_QUIT,
"exit", T_QUIT,
"define", T_DEFINE,
"read", T_READ,
"show", T_SHOW,
"help", T_HELP,
"write", T_WRITE,
"mat", T_MAT,
"obj", T_OBJ,
NULL, 0
};
static void eatcomment(), eatstring();
static int eatsymbol(), eatnumber();
/*
* Initialize all token information.
*/
void
inittokens()
{
initstr(&strings);
newlines = FALSE;
allsyms = FALSE;
rescan = FALSE;
setprompt(PROMPT1);
}
void
tokenmode(flag)
{
newlines = FALSE;
allsyms = FALSE;
if (flag & TM_NEWLINES)
newlines = TRUE;
if (flag & TM_ALLSYMS)
allsyms = TRUE;
setprompt(newlines ? PROMPT1 : PROMPT2);
}
/*
* Routine to read in the next token from the input stream.
* The type of token is returned as a value. If the token is a string or
* symbol name, information is saved so that the value can be retrieved.
*/
int
gettoken()
{
int ch; /* current input character */
int type; /* token type */
if (rescan) { /* rescanning */
rescan = FALSE;
return curtoken.t_type;
}
curtoken.t_str = NULL;
curtoken.t_numindex = 0;
type = T_NULL;
while (type == T_NULL) {
ch = nextchar();
if (allsyms && ((ch!=' ') && (ch!=';') && (ch!='"') && (ch!='\n'))) {
reread();
type = eatsymbol();
break;
}
switch (ch) {
case ' ':
case '\t':
case '\0':
break;
case '\n':
if (newlines)
type = T_NEWLINE;
break;
case EOF: type = T_EOF; break;
case '{': type = T_LEFTBRACE; break;
case '}': type = T_RIGHTBRACE; break;
case '(': type = T_LEFTPAREN; break;
case ')': type = T_RIGHTPAREN; break;
case '[': type = T_LEFTBRACKET; break;
case ']': type = T_RIGHTBRACKET; break;
case ';': type = T_SEMICOLON; break;
case ':': type = T_COLON; break;
case ',': type = T_COMMA; break;
case '?': type = T_QUESTIONMARK; break;
case '"':
case '\'':
type = T_STRING;
eatstring(ch);
break;
case '^':
switch (nextchar()) {
case '=': type = T_POWEREQUALS; break;
default: type = T_POWER; reread();
}
break;
case '=':
switch (nextchar()) {
case '=': type = T_EQ; break;
default: type = T_ASSIGN; reread();
}
break;
case '+':
switch (nextchar()) {
case '+': type = T_PLUSPLUS; break;
case '=': type = T_PLUSEQUALS; break;
default: type = T_PLUS; reread();
}
break;
case '-':
switch (nextchar()) {
case '-': type = T_MINUSMINUS; break;
case '=': type = T_MINUSEQUALS; break;
default: type = T_MINUS; reread();
}
break;
case '*':
switch (nextchar()) {
case '=': type = T_MULTEQUALS; break;
case '*':
switch (nextchar()) {
case '=': type = T_POWEREQUALS; break;
default: type = T_POWER; reread();
}
break;
default: type = T_MULT; reread();
}
break;
case '/':
switch (nextchar()) {
case '/':
switch (nextchar()) {
case '=': type = T_SLASHSLASHEQUALS; break;
default: reread(); type = T_SLASHSLASH; break;
}
break;
case '=': type = T_DIVEQUALS; break;
case '*': eatcomment(); break;
default: type = T_DIV; reread();
}
break;
case '%':
switch (nextchar()) {
case '=': type = T_MODEQUALS; break;
default: type = T_MOD; reread();
}
break;
case '<':
switch (nextchar()) {
case '=': type = T_LE; break;
case '<':
switch (nextchar()) {
case '=': type = T_LSHIFTEQUALS; break;
default: reread(); type = T_LEFTSHIFT; break;
}
break;
default: type = T_LT; reread();
}
break;
case '>':
switch (nextchar()) {
case '=': type = T_GE; break;
case '>':
switch (nextchar()) {
case '=': type = T_RSHIFTEQUALS; break;
default: reread(); type = T_RIGHTSHIFT; break;
}
break;
default: type = T_GT; reread();
}
break;
case '&':
switch (nextchar()) {
case '&': type = T_ANDAND; break;
case '=': type = T_ANDEQUALS; break;
default: type = T_AND; reread(); break;
}
break;
case '|':
switch (nextchar()) {
case '|': type = T_OROR; break;
case '=': type = T_OREQUALS; break;
default: type = T_OR; reread(); break;
}
break;
case '!':
switch (nextchar()) {
case '=': type = T_NE; break;
default: type = T_NOT; reread(); break;
}
break;
case '\\':
switch (nextchar()) {
case '\n': setprompt(PROMPT2); break;
default: scanerror(T_NULL, "Unknown token character '%c'", ch);
}
break;
default:
if (isletter(ch)) {
reread();
type = eatsymbol();
break;
}
if (isdigit(ch) || (ch == '.')) {
reread();
type = eatnumber();
break;
}
scanerror(T_NULL, "Unknown token character '%c'", ch);
}
}
curtoken.t_type = (short)type;
return type;
}
/*
* Continue to eat up a comment string.
* The leading slash-asterisk has just been scanned at this point.
*/
static void
eatcomment()
{
int ch;
for (;;) {
ch = nextchar();
if (ch == '*') {
ch = nextchar();
if (ch == '/')
return;
reread();
}
if ((ch == EOF) || (ch == '\0') ||
(newlines && (ch == '\n') && inputisterminal())) {
reread();
scanerror(T_NULL, "Unterminated comment");
return;
}
}
}
/*
* Read in a string and add it to the literal string pool.
* The leading single or double quote has been read in at this point.
*/
static void
eatstring(quotechar)
{
register char *cp; /* current character address */
int ch; /* current character */
char buf[MAXSTRING+1]; /* buffer for string */
cp = buf;
for (;;) {
ch = nextchar();
switch (ch) {
case '\0':
case EOF:
case '\n':
reread();
scanerror(T_NULL, "Unterminated string constant");
*cp = '\0';
curtoken.t_str = addliteral(buf);
return;
case '\\':
ch = nextchar();
switch (ch) {
case 'n': ch = '\n'; break;
case 'r': ch = '\r'; break;
case 't': ch = '\t'; break;
case 'b': ch = '\b'; break;
case 'f': ch = '\f'; break;
case '\n':
setprompt(PROMPT2);
continue;
case EOF:
reread();
continue;
}
*cp++ = (char)ch;
break;
case '"':
case '\'':
if (ch == quotechar) {
*cp = '\0';
curtoken.t_str = addliteral(buf);
return;
}
/* fall into default case */
default:
*cp++ = (char)ch;
}
}
}
/*
* Read in a symbol name which may or may not be a keyword.
* If allsyms is set, keywords are not looked up and almost all chars
* will be accepted for the symbol. Returns the type of symbol found.
*/
static int
eatsymbol()
{
register struct keyword *kp; /* pointer to current keyword */
register char *cp; /* current character pointer */
int ch; /* current character */
int cc; /* character count */
static char buf[SYMBOLSIZE+1]; /* temporary buffer */
cp = buf;
cc = SYMBOLSIZE;
if (allsyms) {
for (;;) {
ch = nextchar();
if ((ch == ' ') || (ch == ';') || (ch == '\n'))
break;
if (cc-- > 0)
*cp++ = (char)ch;
}
reread();
*cp = '\0';
if (cc < 0)
scanerror(T_NULL, "Symbol too long");
curtoken.t_str = buf;
return T_SYMBOL;
}
for (;;) {
ch = nextchar();
if (!issymbol(ch))
break;
if (cc-- > 0)
*cp++ = (char)ch;
}
reread();
*cp = '\0';
if (cc < 0)
scanerror(T_NULL, "Symbol too long");
for (kp = keywords; kp->k_name; kp++)
if (strcmp(kp->k_name, buf) == 0)
return kp->k_token;
curtoken.t_str = buf;
return T_SYMBOL;
}
/*
* Read in and remember a possibly numeric constant value.
* The constant is inserted into a constant table so further uses
* of the same constant will not take more memory. This can also
* return just a period, which is used for element accesses and for
* the old numeric value.
*/
static int
eatnumber()
{
register char *cp; /* current character pointer */
long len; /* parsed size of number */
long res; /* result of parsing number */
if (numbufsize == 0) {
numbuf = (char *)malloc(128+1);
if (numbuf == NULL)
error("Cannot allocate number buffer");
numbufsize = 128;
}
cp = numbuf;
len = 0;
for (;;) {
if (len >= numbufsize) {
cp = (char *)realloc(numbuf, numbufsize + 1001);
if (cp == NULL)
error("Cannot reallocate number buffer");
numbuf = cp;
numbufsize += 1000;
cp = &numbuf[len];
}
*cp = nextchar();
*(++cp) = '\0';
if ((numbuf[0] == '.') && isletter(numbuf[1])) {
reread();
return T_PERIOD;
}
res = qparse(numbuf, QPF_IMAG);
if (res < 0) {
reread();
scanerror(T_NULL, "Badly formatted number");
curtoken.t_numindex = addnumber("0");
return T_NUMBER;
}
if (res != ++len)
break;
}
cp[-1] = '\0';
reread();
if ((numbuf[0] == '.') && (numbuf[1] == '\0')) {
curtoken.t_numindex = 0;
return T_OLDVALUE;
}
cp -= 2;
res = T_NUMBER;
if ((*cp == 'i') || (*cp == 'I')) {
*cp = '\0';
res = T_IMAGINARY;
}
curtoken.t_numindex = addnumber(numbuf);
return res;
}
/*
* Return the string value of the current token.
*/
char *
tokenstring()
{
return curtoken.t_str;
}
/*
* Return the constant index of a numeric token.
*/
long
tokennumber()
{
return curtoken.t_numindex;
}
/*
* Push back the token just read so that it will be seen again.
*/
void
rescantoken()
{
rescan = TRUE;
}
/*
* Describe an error message.
* Then skip to the next specified token (or one more powerful).
*/
#ifdef VARARGS
# define VA_ALIST skip, fmt, va_alist
# define VA_DCL int skip; char *fmt; va_dcl
#else
# ifdef __STDC__
# define VA_ALIST int skip, char *fmt, ...
# define VA_DCL
# else
# define VA_ALIST skip, fmt
# define VA_DCL int skip; char *fmt;
# endif
#endif
/*VARARGS*/
void
scanerror(VA_ALIST)
VA_DCL
{
va_list ap;
char *name; /* name of file with error */
char buf[MAXERROR+1];
errorcount++;
name = inputname();
if (name)
fprintf(stderr, "\"%s\", line %ld: ", name, linenumber());
#ifdef VARARGS
va_start(ap);
#else
va_start(ap, fmt);
#endif
vsprintf(buf, fmt, ap);
va_end(ap);
fprintf(stderr, "%s\n", buf);
switch (skip) {
case T_NULL:
return;
case T_COMMA:
rescan = TRUE;
for (;;) {
switch (gettoken()) {
case T_NEWLINE:
case T_SEMICOLON:
case T_LEFTBRACE:
case T_RIGHTBRACE:
case T_EOF:
case T_COMMA:
rescan = TRUE;
return;
}
}
default:
fprintf(stderr, "Unknown skip token for scanerror\n");
/* fall into semicolon case */
/*FALLTHRU*/
case T_SEMICOLON:
rescan = TRUE;
for (;;) switch (gettoken()) {
case T_NEWLINE:
case T_SEMICOLON:
case T_LEFTBRACE:
case T_RIGHTBRACE:
case T_EOF:
rescan = TRUE;
return;
}
}
}
/* END CODE */