AUSAM/source/ded/match.c

#include "ded.h"
#include "match.h"
#include "char.h"

/* two macro definitions, to help with writing recursive calls */
#define REC fline, fcol, buf, &fline, &fcol, &buf
#define TAILREC fline, fcol, buf, resline, rescol, resbufp


/************************************************************************
 * backtracking pattern matcher, capable of making multi-line matches   *
 *                                                                      *
 * the (enormous number of) parameters are as follows -                 *
 *                                                                      *
 *  pat : an RE pattern, built by build_re                              *
 *                                                                      *
 *  fline, fcol : the current match position                            *
 *      (fcol is -1 at the start of the line,  EOLCOL at the end of the *
 *       line, to help with 'begin-line' and 'end-line' character       *
 *       processing)                                                    *
 *                                                                      *
 *  buf : address of current line buffer                                *
 *                                                                      *
 *  resline, rescol : addresses of variables which must receive         *
 *      the values of fline, fcol should the match succeed              *
 *                                                                      *
 *  resbufp : the address of a variable which must                      *
 *      receive the address of current line buffer should the match     *
 *      succeed                                                         *
 *                                                                      *
 *  NOTE : multi-line matches carry a run-time overhead,                *
 *      though by providing a special screen buffer I avoid use of the  *
 *      heap. The screen buffer manipulations are efficient and clear   *
 *      (I hope!) and in fact this procedure would be more complicated  *
 *      and less efficient had it used the heap or the stack.           *
 ************************************************************************/

int match(pat, fline, fcol, buf, resline, rescol, resbufp)
struct RE pat[];
int fline, fcol;
char *buf;
int *resline, *rescol;
char **resbufp;
 { register struct RE *pat_element;
    register char *buf_element, c;

    pat_element = pat;
    buf_element = buf+(fcol<0 ? 0 : fcol);

    while (true)
     { switch (pat_element->type)
	 { case p_FIN:
		/* success !!!! */
	    succeed:
		*resline = fline;
		*rescol = fcol;
		*resbufp = buf;
		return(true);

	    case p_CHAR:
		if (fcol==EOLCOL) break;
		else
		if (*buf_element++ != (pat_element++)->info)
		  return(false);
		else
		 { fcol = buf_element-buf; continue; }

	    /* separator between words ('<space>) */
	    case p_SEP:
		if (fcol==EOLCOL) break;
		else
		if (*buf_element==c_SPACE ||
		    *buf_element==0)
		 { while (*buf_element==c_SPACE) buf_element++;
		    if (*buf_element==0)
		     { fcol = EOLCOL;
			if (match(pat_element, TAILREC)) return(true);
		     }
		    else
		      fcol = buf_element-buf;

		    pat_element++;
		    continue;
		 }
		else
		  return(false);

	    case p_ANY:
		if (fcol==EOLCOL) break;
		else
		if (*buf_element++ == 0) return(false);
		else
		 { fcol = buf_element-buf; pat_element++; continue; }

	    case p_BOL:
		/* multiple begin-line characters don't have much
		 * meaning - at least not one I can program
		 */
		if (fcol==EOLCOL) break;
		else
		if (fcol >= 0) return(false);
		else
		 { fcol = 0; pat_element++; continue; }

	    case p_EOL:
		/* this code implements multiple end-line
		 * characters rather neatly
		 */
		if (fcol==EOLCOL) break;
		else
		if (*buf_element != 0) return(false);
		else
		 { pat_element++; fcol = EOLCOL; continue; }

	    case p_OPT:
		/* '? - e.g. '(abc')'?def -
		 * searches for LONGEST matching string
		 */
		if ( match(pat_element+1, REC) )
		  buf_element = buf+(fcol<0 ? 0 : fcol);
		pat_element =+ pat_element->info;
		continue;

	    case p_PLUS:
		/* '+ - e.g. abc'[0-9']'+ -
		 * searches for LONGEST matching string
		 */
		if (match(pat_element+1, REC))
		  buf_element = buf+(fcol<0 ? 0 : fcol);
		else return(false);

		if (match(pat_element, REC)) goto succeed;
		else
		 { pat_element =+ pat_element->info; continue; }

	    case p_PPLUS:
		/* '++ - e.g. .fn'$'++.en -
		 * searches for SHORTEST matching string
		 * (useful for multi-line matches)
		 */
		while (true)
		 { if (!match(pat_element+1, REC)) return(false);

		    if (match(pat_element+pat_element->info, REC))
		      goto succeed;
		 }

	    case p_STAR:
		/* '* - e.g. '^'.'*'$ -
		 * searches for LONGEST matching string
		 */
		if (bothmatch(pat_element+1, pat_element, REC))
		  goto succeed;
		else
		 { pat_element =+ pat_element->info; continue; }

	    case p_SSTAR:
		/* '** - e.g. '^.fn'$'('.'*')'**'^-*'$ -
		 * searches for SHORTEST matching string -
		 * like '++ this is useful in multi-line matches
		 */
		while (true)
		 { if (match(pat_element+pat_element->info, REC))
		      goto succeed;

		    if (!match(pat_element+1, REC)) return(false);
		 }

	    case p_INSET:
	    case p_OUTSET:
		if (fcol==EOLCOL) break;
		else
		 { c = *buf_element++;

		    if (matchset(c,pat_element+1,fcol) ==
			(pat_element->type==p_INSET ? false : true) )
		      return(false);
		    else
		     { pat_element =+ pat_element->info;
			if (c==0) fcol = EOLCOL;
			else fcol = buf_element-buf;
			continue;
		     }
		 }

	    default:
		editerror("invalid pattern type (%d) in match",
				pat_element->type);
	 }

	/* end of switch - come here on break (only when fcol==EOLCOL)
	 * - read in next line
	 * - do not match past end of file, and do not match more than a
	 * single screen full of data
	 */
	fline++;
	if (fline>maxl || fline-ms_line>=NINROWS) return(false);

	buf = cachebuf(fline);
	buf_element = buf;
	fcol = -1;
     } /* end of while (true) */
 }
/* end of match procedure */

/************************************************************************
 *  a procedure to help with multiple backtracking - when the first     *
 *      part of a multiple match works, subsequent matches must proceed *
 *      from that position, but when any part fails, matching must      *
 *      backtrack right over any successful previous matches            *
 ************************************************************************/

int bothmatch(pat1,pat2,fline,fcol,buf,resline,rescol,resbufp)
struct RE pat1[], pat2[];
int fline, fcol;
char *buf;
int *resline, *rescol;
char **resbufp;
 { if (match(pat1, REC))
      return(match(pat2,TAILREC));
    else
      return(false);
 }
/* end of bothmatch procedure */

/************************************************************************
 * procedure to help with matching character sets - e.g. '[a-z0-9']     *
 ************************************************************************/

/* memo to self : do something about efficiency of 0-9, a-z etc. */
int matchset(p_c, set, fcol)
char p_c;
struct RE set[];
 { register struct RE *set_element;
    register char c;

    c = p_c;
    set_element = set;

    while(true)
     { switch (set_element->type)
	 { case p_CHAR:
		if (c==set_element->info) return(true);
		else break;

	    case p_BOL:
		if (fcol<0) return(true);
		else break;

	    case p_EOL:
		if (c==0) return(true);
		else break;

	    case p_ONEOF:
		/* there follows a piece of horrid conciseness -
		 * C lovers would like it, I suppose
		 */
		if ((set_element++)->info<=c && c<=set_element->info)
		  return(true);
		else break;

	    case p_FIN:
		return(false);

	    default:
		editerror("invalid set type (%d) in matchset",
				set_element->type);
	 }

	set_element++;
     }
 }
/* end of matchset procedure */