4.4BSD/usr/src/usr.bin/lex/scan.l

/*-
 * Copyright (c) 1989, 1990, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Vern Paxson of Lawrence Berkeley Laboratory.
 * 
 * The United States Government has rights in this work pursuant 
 * to contract no. DE-AC03-76SF00098 between the United States
 * Department of Energy and the University of California.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)scan.l	8.1 (Berkeley) 6/6/93
 */

/* scan.l - scanner for flex input */

%{
#undef yywrap

#include "flexdef.h"
#include "parse.h"

#ifndef lint
static char copyright[] =
"@(#) Copyright (c) 1989, 1990, 1993\n\
	The Regents of the University of California.  All rights reserved.\n";

static char rcsid[] =
    "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.8 90/05/26 16:53:23 vern Exp $ (LBL)";
#endif

#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext )
#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" );

#undef YY_DECL
#define YY_DECL \
	int flexscan()

#define RETURNCHAR \
	yylval = yytext[0]; \
	return ( CHAR );

#define RETURNNAME \
	(void) strcpy( nmstr, (char *) yytext ); \
	return ( NAME );

#define PUT_BACK_STRING(str, start) \
	for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \
	    unput((str)[i])

#define CHECK_REJECT(str) \
	if ( all_upper( str ) ) \
	    reject = true;

#define CHECK_YYMORE(str) \
	if ( all_lower( str ) ) \
	    yymore_used = true;
%}

%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION

WS		[ \t\f]+
OPTWS		[ \t\f]*
NOT_WS		[^ \t\f\n]

NAME		[a-z_][a-z_0-9-]*
NOT_NAME	[^a-z_\n]+

SCNAME		{NAME}

ESCSEQ		\\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2})

%%
    static int bracelevel, didadef;
    int i, indented_code, checking_used, new_xlation;
    int doing_codeblock = false;
    Char nmdef[MAXLINE], myesc();

^{WS}			indented_code = true; BEGIN(CODEBLOCK);
^#.*\n			++linenum; /* treat as a comment */
^"/*"			ECHO; BEGIN(C_COMMENT);
^"%s"{NAME}?		return ( SCDECL );
^"%x"{NAME}?		return ( XSCDECL );
^"%{".*\n		{
			++linenum;
			line_directive_out( stdout );
			indented_code = false;
			BEGIN(CODEBLOCK);
			}

{WS}			return ( WHITESPACE );

^"%%".*			{
			sectnum = 2;
			line_directive_out( stdout );
			BEGIN(SECT2PROLOG);
			return ( SECTEND );
			}

^"%used"		{
	pinpoint_message( "warning - %%used/%%unused have been deprecated" );
			checking_used = REALLY_USED; BEGIN(USED_LIST);
			}
^"%unused"		{
			checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
	pinpoint_message( "warning - %%used/%%unused have been deprecated" );
			checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
			}


^"%"[aeknopt]{WS}.*\n	{
#ifdef NOTDEF
			fprintf( stderr,
			     "old-style lex command at line %d ignored:\n\t%s",
				 linenum, yytext );
#endif
			++linenum;
			}

^"%"[cr]{OPTWS}		/* ignore old lex directive */

%t{OPTWS}\n		{
			++linenum;
			xlation =
			    (int *) malloc( sizeof( int ) * (unsigned) csize );

			if ( ! xlation )
			    flexfatal(
				"dynamic memory failure building %t table" );

			for ( i = 0; i < csize; ++i )
			    xlation[i] = 0;

			num_xlations = 0;

			BEGIN(XLATION);
			}

^"%"[^sxanpekotcru{}]{OPTWS}	synerr( "unrecognized '%' directive" );

^{NAME}			{
			(void) strcpy( nmstr, (char *) yytext );
			didadef = false;
			BEGIN(PICKUPDEF);
			}

{SCNAME}		RETURNNAME;
^{OPTWS}\n		++linenum; /* allows blank lines in section 1 */
{OPTWS}\n		++linenum; return ( '\n' );
.			synerr( "illegal character" ); BEGIN(RECOVER);


<C_COMMENT>"*/"		ECHO; BEGIN(INITIAL);
<C_COMMENT>"*/".*\n	++linenum; ECHO; BEGIN(INITIAL);
<C_COMMENT>[^*\n]+	ECHO;
<C_COMMENT>"*"		ECHO;
<C_COMMENT>\n		++linenum; ECHO;


<CODEBLOCK>^"%}".*\n	++linenum; BEGIN(INITIAL);
<CODEBLOCK>"reject"	ECHO; CHECK_REJECT(yytext);
<CODEBLOCK>"yymore"	ECHO; CHECK_YYMORE(yytext);
<CODEBLOCK>{NAME}|{NOT_NAME}|.	ECHO;
<CODEBLOCK>\n		{
			++linenum;
			ECHO;
			if ( indented_code )
			    BEGIN(INITIAL);
			}


<PICKUPDEF>{WS}		/* separates name and definition */

<PICKUPDEF>{NOT_WS}.*	{
			(void) strcpy( (char *) nmdef, (char *) yytext );

			for ( i = strlen( (char *) nmdef ) - 1;
			      i >= 0 &&
			      nmdef[i] == ' ' || nmdef[i] == '\t';
			      --i )
			    ;

			nmdef[i + 1] = '\0';

                        ndinstal( nmstr, nmdef );
			didadef = true;
			}

<PICKUPDEF>\n		{
			if ( ! didadef )
			    synerr( "incomplete name definition" );
			BEGIN(INITIAL);
			++linenum;
			}

<RECOVER>.*\n		++linenum; BEGIN(INITIAL); RETURNNAME;


<USED_LIST>\n		++linenum; BEGIN(INITIAL);
<USED_LIST>{WS}
<USED_LIST>"reject"	{
			if ( all_upper( yytext ) )
			    reject_really_used = checking_used;
			else
			    synerr( "unrecognized %used/%unused construct" );
			}
<USED_LIST>"yymore"	{
			if ( all_lower( yytext ) )
			    yymore_really_used = checking_used;
			else
			    synerr( "unrecognized %used/%unused construct" );
			}
<USED_LIST>{NOT_WS}+	synerr( "unrecognized %used/%unused construct" );


<XLATION>"%t"{OPTWS}\n	++linenum; BEGIN(INITIAL);
<XLATION>^{OPTWS}[0-9]+	++num_xlations; new_xlation = true;
<XLATION>^.		synerr( "bad row in translation table" );
<XLATION>{WS}		/* ignore whitespace */

<XLATION>{ESCSEQ}	{
			xlation[myesc( yytext )] =
				(new_xlation ? num_xlations : -num_xlations);
			new_xlation = false;
			}
<XLATION>.		{
			xlation[yytext[0]] =
				(new_xlation ? num_xlations : -num_xlations);
			new_xlation = false;
			}

<XLATION>\n		++linenum;


<SECT2PROLOG>.*\n/{NOT_WS}	{
			++linenum;
			ACTION_ECHO;
			MARK_END_OF_PROLOG;
			BEGIN(SECT2);
			}

<SECT2PROLOG>.*\n	++linenum; ACTION_ECHO;

<SECT2PROLOG><<EOF>>	MARK_END_OF_PROLOG; yyterminate();

<SECT2>^{OPTWS}\n	++linenum; /* allow blank lines in section 2 */

<SECT2>^({WS}|"%{")	{
			indented_code = (yytext[0] != '%');
			doing_codeblock = true;
			bracelevel = 1;

			if ( indented_code )
			    ACTION_ECHO;

			BEGIN(CODEBLOCK_2);
			}

<SECT2>"<"		BEGIN(SC); return ( '<' );
<SECT2>^"^"		return ( '^' );
<SECT2>\"		BEGIN(QUOTE); return ( '"' );
<SECT2>"{"/[0-9]		BEGIN(NUM); return ( '{' );
<SECT2>"{"[^0-9\n][^}\n]*	BEGIN(BRACEERROR);
<SECT2>"$"/[ \t\n]	return ( '$' );

<SECT2>{WS}"%{"		{
			bracelevel = 1;
			BEGIN(PERCENT_BRACE_ACTION);
			return ( '\n' );
			}
<SECT2>{WS}"|".*\n	continued_action = true; ++linenum; return ( '\n' );

<SECT2>{WS}		{
			/* this rule is separate from the one below because
			 * otherwise we get variable trailing context, so
			 * we can't build the scanner using -{f,F}
			 */
			bracelevel = 0;
			continued_action = false;
			BEGIN(ACTION);
			return ( '\n' );
			}

<SECT2>{OPTWS}/\n	{
			bracelevel = 0;
			continued_action = false;
			BEGIN(ACTION);
			return ( '\n' );
			}

<SECT2>^{OPTWS}\n	++linenum; return ( '\n' );

<SECT2>"<<EOF>>"	return ( EOF_OP );

<SECT2>^"%%".*		{
			sectnum = 3;
			BEGIN(SECT3);
			return ( EOF ); /* to stop the parser */
			}

<SECT2>"["([^\\\]\n]|{ESCSEQ})+"]"	{
			int cclval;

			(void) strcpy( nmstr, (char *) yytext );

			/* check to see if we've already encountered this ccl */
			if ( (cclval = ccllookup( (Char *) nmstr )) )
			    {
			    yylval = cclval;
			    ++cclreuse;
			    return ( PREVCCL );
			    }
			else
			    {
			    /* we fudge a bit.  We know that this ccl will
			     * soon be numbered as lastccl + 1 by cclinit
			     */
			    cclinstal( (Char *) nmstr, lastccl + 1 );

			    /* push back everything but the leading bracket
			     * so the ccl can be rescanned
			     */
			    PUT_BACK_STRING((Char *) nmstr, 1);

			    BEGIN(FIRSTCCL);
			    return ( '[' );
			    }
			}

<SECT2>"{"{NAME}"}"	{
			register Char *nmdefptr;
			Char *ndlookup();

			(void) strcpy( nmstr, (char *) yytext );
			nmstr[yyleng - 1] = '\0';  /* chop trailing brace */

			/* lookup from "nmstr + 1" to chop leading brace */
			if ( ! (nmdefptr = ndlookup( nmstr + 1 )) )
			    synerr( "undefined {name}" );

			else
			    { /* push back name surrounded by ()'s */
			    unput(')');
			    PUT_BACK_STRING(nmdefptr, 0);
			    unput('(');
			    }
			}

<SECT2>[/|*+?.()]	return ( yytext[0] );
<SECT2>.		RETURNCHAR;
<SECT2>\n		++linenum; return ( '\n' );


<SC>","			return ( ',' );
<SC>">"			BEGIN(SECT2); return ( '>' );
<SC>">"/"^"		BEGIN(CARETISBOL); return ( '>' );
<SC>{SCNAME}		RETURNNAME;
<SC>.			synerr( "bad start condition name" );

<CARETISBOL>"^"		BEGIN(SECT2); return ( '^' );


<QUOTE>[^"\n]		RETURNCHAR;
<QUOTE>\"		BEGIN(SECT2); return ( '"' );

<QUOTE>\n		{
			synerr( "missing quote" );
			BEGIN(SECT2);
			++linenum;
			return ( '"' );
			}


<FIRSTCCL>"^"/[^-\n]	BEGIN(CCL); return ( '^' );
<FIRSTCCL>"^"/-		return ( '^' );
<FIRSTCCL>-		BEGIN(CCL); yylval = '-'; return ( CHAR );
<FIRSTCCL>.		BEGIN(CCL); RETURNCHAR;

<CCL>-/[^\]\n]		return ( '-' );
<CCL>[^\]\n]		RETURNCHAR;
<CCL>"]"		BEGIN(SECT2); return ( ']' );


<NUM>[0-9]+		{
			yylval = myctoi( yytext );
			return ( NUMBER );
			}

<NUM>","			return ( ',' );
<NUM>"}"			BEGIN(SECT2); return ( '}' );

<NUM>.			{
			synerr( "bad character inside {}'s" );
			BEGIN(SECT2);
			return ( '}' );
			}

<NUM>\n			{
			synerr( "missing }" );
			BEGIN(SECT2);
			++linenum;
			return ( '}' );
			}


<BRACEERROR>"}"		synerr( "bad name in {}'s" ); BEGIN(SECT2);
<BRACEERROR>\n		synerr( "missing }" ); ++linenum; BEGIN(SECT2);


<PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".*		bracelevel = 0;
<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject"	{
			ACTION_ECHO;
			CHECK_REJECT(yytext);
			}
<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore"	{
			ACTION_ECHO;
			CHECK_YYMORE(yytext);
			}
<PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|.	ACTION_ECHO;
<PERCENT_BRACE_ACTION,CODEBLOCK_2>\n			{
			++linenum;
			ACTION_ECHO;
			if ( bracelevel == 0 ||
			     (doing_codeblock && indented_code) )
			    {
			    if ( ! doing_codeblock )
				fputs( "\tYY_BREAK\n", temp_action_file );
			    
			    doing_codeblock = false;
			    BEGIN(SECT2);
			    }
			}


	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
<ACTION>"{"		ACTION_ECHO; ++bracelevel;
<ACTION>"}"		ACTION_ECHO; --bracelevel;
<ACTION>[^a-z_{}"'/\n]+	ACTION_ECHO;
<ACTION>{NAME}		ACTION_ECHO;
<ACTION>"/*"		ACTION_ECHO; BEGIN(ACTION_COMMENT);
<ACTION>"'"([^'\\\n]|\\.)*"'"	ACTION_ECHO; /* character constant */
<ACTION>\"		ACTION_ECHO; BEGIN(ACTION_STRING);
<ACTION>\n		{
			++linenum;
			ACTION_ECHO;
			if ( bracelevel == 0 )
			    {
			    fputs( "\tYY_BREAK\n", temp_action_file );
			    BEGIN(SECT2);
			    }
			}
<ACTION>.		ACTION_ECHO;

<ACTION_COMMENT>"*/"	ACTION_ECHO; BEGIN(ACTION);
<ACTION_COMMENT>[^*\n]+	ACTION_ECHO;
<ACTION_COMMENT>"*"	ACTION_ECHO;
<ACTION_COMMENT>\n	++linenum; ACTION_ECHO;
<ACTION_COMMENT>.	ACTION_ECHO;

<ACTION_STRING>[^"\\\n]+	ACTION_ECHO;
<ACTION_STRING>\\.	ACTION_ECHO;
<ACTION_STRING>\n	++linenum; ACTION_ECHO;
<ACTION_STRING>\"	ACTION_ECHO; BEGIN(ACTION);
<ACTION_STRING>.	ACTION_ECHO;

<ACTION,ACTION_COMMENT,ACTION_STRING><<EOF>>	{
			synerr( "EOF encountered inside an action" );
			yyterminate();
			}


<SECT2,QUOTE,CCL>{ESCSEQ}	{
			yylval = myesc( yytext );
			return ( CHAR );
			}

<FIRSTCCL>{ESCSEQ}	{
			yylval = myesc( yytext );
			BEGIN(CCL);
			return ( CHAR );
			}


<SECT3>.*(\n?)		ECHO;
%%


int yywrap()

    {
    if ( --num_input_files > 0 )
	{
	set_input_file( *++input_files );
	return ( 0 );
	}

    else
	return ( 1 );
    }


/* set_input_file - open the given file (if NULL, stdin) for scanning */

void set_input_file( file )
char *file;

    {
    if ( file )
	{
	infilename = file;
	yyin = fopen( infilename, "r" );

	if ( yyin == NULL )
	    lerrsf( "can't open %s", file );
	}

    else
	{
	yyin = stdin;
	infilename = "<stdin>";
	}
    }