OpenSolaris_b135/cmd/tic/tic_scan.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
/*	Copyright (c) 1988 AT&T	*/
/*	  All Rights Reserved  	*/


/*
 * University Copyright- Copyright (c) 1982, 1986, 1988
 * The Regents of the University of California
 * All Rights Reserved
 *
 * University Acknowledgment- Portions of this document are derived from
 * software developed by the University of California, Berkeley, and its
 * contributors.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 *			COPYRIGHT NOTICE
 *
 *	This software is copyright(C) 1982 by Pavel Curtis
 *
 *	Permission is granted to reproduce and distribute
 *	this file by any means so long as no fee is charged
 *	above a nominal handling fee and so long as this
 *	notice is always included in the copies.
 *
 *	Other rights are reserved except as explicitly granted
 *	by written permission of the author.
 *		Pavel Curtis
 *		Computer Science Dept.
 *		405 Upson Hall
 *		Cornell University
 *		Ithaca, NY 14853
 *
 *		Ph- (607) 256-4934
 *
 *		Pavel.Cornell@Udel-Relay(ARPAnet)
 *		decvax!cornell!pavel(UUCPnet)
 */

/*
 *	comp_scan.c --- Lexical scanner for terminfo compiler.
 *
 *   $Log:	RCS/comp_scan.v $
 * Revision 2.1  82/10/25  14:45:55  pavel
 * Added Copyright Notice
 *
 * Revision 2.0  82/10/24  15:17:12  pavel
 * Beta-one Test Release
 *
 * Revision 1.3  82/08/23  22:30:03  pavel
 * The REAL Alpha-one Release Version
 *
 * Revision 1.2  82/08/19  19:10:06  pavel
 * Alpha Test Release One
 *
 * Revision 1.1  82/08/12  18:37:46  pavel
 * Initial revision
 *
 *
 */


#include <stdio.h>
#include <ctype.h>
#include "compiler.h"

#define	iswhite(ch)	(ch == ' ' || ch == '\t')


static int	first_column;		/* See 'next_char()' below */

static void backspace(void);
void reset_input(void);
void panic_mode(int);



/*
 *	int
 *	get_token()
 *
 *	Scans the input for the next token, storing the specifics in the
 *	global structure 'curr_token' and returning one of the following:
 *
 *		NAMES		A line beginning in column 1.  'name'
 *				will be set to point to everything up to
 *				but not including the first comma on the line.
 *		BOOLEAN		An entry consisting of a name followed by
 *				a comma.  'name' will be set to point to the
 *				name of the capability.
 *		NUMBER		An entry of the form
 *					name#digits,
 *				'name' will be set to point to the capability
 *				name and 'valnumber' to the number given.
 *		STRING		An entry of the form
 *					name=characters,
 *				'name' is set to the capability name and
 *				'valstring' to the string of characters, with
 *				input translations done.
 *		CANCEL		An entry of the form
 *					name@,
 *				'name' is set to the capability name and
 *				'valnumber' to -1.
 *		EOF		The end of the file has been reached.
 *
 */

int
get_token()
{
	long		number;
	int		type;
	register int	ch;
	static char	buffer[1024];
	register char	*ptr;
	int		dot_flag = FALSE;

	while ((ch = next_char()) == '\n' || (isascii(ch) && iswhite(ch)));

	if (ch == EOF)
	    type = EOF;
	else {
	    if (ch == '.') {
		dot_flag = TRUE;

		while ((ch = next_char()) == ' ' || ch == '\t');
	    }

	    if (! isascii(ch) || ! isalnum(ch)) {
		warning("Illegal character - '%c'", ch);
		panic_mode(',');
	    }

	    ptr = buffer;
	    if (ch != '\n') *(ptr++) = ch;

	    if (first_column) {
		while ((ch = next_char()) != ',' && ch != '\n' && ch != EOF)
		    *(ptr++) = ch;

		if (ch == EOF)
		    err_abort("Premature EOF");
		else if (ch == '\n') {
		    warning("Newline in middle of terminal name");
		    panic_mode(',');
		}

		*ptr = '\0';
		curr_token.tk_name = buffer;
		type = NAMES;
	    } else {
		ch = next_char();
		while (isascii(ch) && isalnum(ch)) {
		    *(ptr++) = ch;
		    ch = next_char();
		}

		*ptr++ = '\0';
		switch (ch) {
		    case ',':
			curr_token.tk_name = buffer;
			type = BOOLEAN;
			break;

		    case '@':
			if (next_char() != ',')
			    warning("Missing comma");
			curr_token.tk_name = buffer;
			type = CANCEL;
			break;

		    case '#':
			number = 0;
			if ((ch = next_char()) == ',')
				warning("Missing numeric value");
			backspace();
			if ((ch = next_char()) == '0') {
			    if ((ch = next_char()) == 'x' || ch == 'X') {
				while (isascii(ch = next_char()) &&
				    isxdigit(ch)) {
				    number *= 16;
				    if (isdigit(ch))
					number += ch - '0';
				    else if (ch >= 'a' && ch <= 'f')
					number += 10 + ch - 'a';
				    else
					number += 10 + ch - 'A';
				}
			    } else {
				backspace();
				while ((ch = next_char()) >= '0' &&
				    ch <= '7')
				    number = number * 8 + ch - '0';
				}
			    } else {
				    backspace();
				    while (isascii(ch = next_char()) &&
					isdigit(ch))
					number = number * 10 + ch - '0';
			    }
			if (ch != ',')
			    warning("Missing comma");
			curr_token.tk_name = buffer;
			curr_token.tk_valnumber = number;
			type = NUMBER;
			break;

		    case '=':
			ch = trans_string(ptr);
			if (ch != NULL && ch != ',')
			    warning("Missing comma");
			if (ch == NULL)
				warning("NULL string value");
			curr_token.tk_name = buffer;
			curr_token.tk_valstring = ptr;
			type = STRING;
			break;

		    default:
			warning("Illegal character - '%c'", ch);
		}
	    } /* end else (first_column == FALSE) */
	} /* end else (ch != EOF) */

	if (dot_flag == TRUE)
	    DEBUG(8, "Commented out ", "");

	if (debug_level >= 8) {
	    fprintf(stderr, "Token: ");
	    switch (type) {
		case BOOLEAN:
			fprintf(stderr, "Boolean;  name='%s'\n",
			    curr_token.tk_name);
			break;

		case NUMBER:
			fprintf(stderr, "Number; name = '%s', value = %d\n",
			    curr_token.tk_name, curr_token.tk_valnumber);
			break;

		case STRING:
			fprintf(stderr, "String; name = '%s', value = '%s'\n",
			    curr_token.tk_name, curr_token.tk_valstring);
			break;

		case CANCEL:
			fprintf(stderr, "Cancel; name = '%s'\n",
			    curr_token.tk_name);
		    break;

		case NAMES:
			fprintf(stderr, "Names; value = '%s'\n",
			    curr_token.tk_name);
			break;

		case EOF:
			fprintf(stderr, "End of file\n");
			break;

		default:
			warning("Bad token type");
	    }
	}

	if (dot_flag == TRUE)	/* if commented out, use the next one */
	    type = get_token();

	return (type);
}



/*
 *	int
 *	next_char()
 *
 *	Returns the next character in the input stream.  Comments and leading
 *	white space are stripped.  The global state variable 'firstcolumn' is
 *	set TRUE if the character returned is from the first column of the
 * 	inputline.  The global variable curr_line is incremented for each new.
 *	line. The global variable curr_file_pos is set to the file offset
 *	of the beginning of each line.
 *
 */

int	curr_column = -1;
char	line[1024];

int
next_char()
{
	char	*rtn_value;
	long	ftell();
	char	*p;

	if (curr_column < 0 || curr_column > 1023 ||
	    line[curr_column] == '\0') {
	    do {
			curr_file_pos = ftell(stdin);

			if ((rtn_value = fgets(line, 1024, stdin)) == NULL)
				return (EOF);
			curr_line++;
			p = &line[0];
			while (*p && iswhite(*p)) {
				p++;
			}
	    } while (*p == '#');

	    curr_column = 0;
	    while (isascii(line[curr_column]) && iswhite(line[curr_column]))
		curr_column++;
	}

	if (curr_column == 0 && line[0] != '\n')
	    first_column = TRUE;
	else
	    first_column = FALSE;

	return (line[curr_column++]);
}


static void
backspace(void)
{
	curr_column--;

	if (curr_column < 0)
		syserr_abort("Backspaced off beginning of line");
}



/*
 *	reset_input()
 *
 *	Resets the input-reading routines.  Used after a seek has been done.
 *
 */

void
reset_input(void)
{
	curr_column = -1;
}



/*
 *	int
 *	trans_string(ptr)
 *
 *	Reads characters using next_char() until encountering a comma, a new
 *	entry, or end-of-file.  The returned value is the character which
 *	caused reading to stop.  The following translations are done on the
 *	input:
 *
 *		^X  goes to  ctrl-X (i.e. X & 037)
 *		{\E,\n,\r,\b,\t,\f}  go to
 *			{ESCAPE,newline,carriage-return,backspace,tab,formfeed}
 *		{\^,\\}  go to  {carat,backslash}
 *		\ddd (for ddd = up to three octal digits)  goes to
 *							the character ddd
 *
 *		\e == \E
 *		\0 == \200
 *
 */

int
trans_string(char *ptr)
{
	register int	count = 0;
	int		number;
	register int	i;
	register int	ch;

	while ((ch = next_char()) != ',' && ch != EOF && !first_column) {
	    if (ch == '^') {
		ch = next_char();
		if (ch == EOF)
		    err_abort("Premature EOF");

		if (!isascii(ch) || ! isprint(ch)) {
		    warning("Illegal ^ character - '%c'", ch);
		}

		if (ch == '@')
		    *(ptr++) = 0200;
		else
		    *(ptr++) = ch & 037;
	    } else if (ch == '\\') {
		ch = next_char();
		if (ch == EOF)
		    err_abort("Premature EOF");

		if (ch >= '0' && ch <= '7') {
		    number = ch - '0';
		    for (i = 0; i < 2; i++) {
			ch = next_char();
			if (ch == EOF)
			    err_abort("Premature EOF");

			if (ch < '0' || ch > '7') {
			    backspace();
			    break;
			}

			number = number * 8 + ch - '0';
		    }

		    if (number == 0)
			number = 0200;
		    *(ptr++) = (char)number;
		} else {
		    switch (ch) {
			case 'E':
			case 'e':	*(ptr++) = '\033';	break;

			case 'l':
			case 'n':	*(ptr++) = '\n';	break;

			case 'r':	*(ptr++) = '\r';	break;

			case 'b':	*(ptr++) = '\010';	break;

			case 's':	*(ptr++) = ' ';		break;

			case 'f':	*(ptr++) = '\014';	break;

			case 't':	*(ptr++) = '\t';	break;

			case '\\':	*(ptr++) = '\\';	break;

			case '^':	*(ptr++) = '^';		break;

			case ',':	*(ptr++) = ',';		break;

			case ':':	*(ptr++) = ':';		break;

			default:
			    warning("Illegal character in \\ sequence - '%c'",
				ch);
			    *(ptr++) = ch;
		    } /* endswitch (ch) */
		} /* endelse (ch < '0' ||  ch > '7') */
	    } /* end else if (ch == '\\') */
	    else {
		if (ch != '\n') *(ptr++) = ch;
	    }

	    count ++;

	    if (count > 1000)
		warning("Very long string found.  Missing comma?");
	} /* end while */

	if (ch == EOF)
	    warning("Premature EOF - missing comma?");
	/* start of new description */
	else if (first_column) {
	    backspace();
	    warning("Missing comma?");
	    /* pretend we did get a comma */
	    ch = ',';
	}

	*ptr = '\0';

	if (count == 0)
		return (NULL);
	return (ch);
}

/*
 * Panic mode error recovery - skip everything until a "ch" is found.
 */
void
panic_mode(int ch)
{
	int c;

	for (;;) {
		c = next_char();
		if (c == ch)
			return;
		if (c == EOF)
			return;
	}
}