1BSD/ex-1.1/ex_re.c

Compare this file to the similar file:
Show the results in this format:

#include "ex.h"
#include "ex_re.h"
/*
 * Ex - a text editor
 * Bill Joy UCB June/September 1977
 */

compile(eof, oknl)
	int eof;
	char oknl;
{
	register c;
	register char *ep;
	char *lastep;
	char bracket[NBRA], *bracketp, *rhsp;
	int cclcnt;

	if (letter(eof) || digit(eof))
		error("Re delimiter must not be letter or digit|Regular expressions cannot be delimited by letters or digits");
	ep = expbuf;
	c = getchar();
	if (eof == '\\')
		switch (c) {

		case '/':
		case '?':
			if (scanre.sexpbuf[0] == 0)
noscanre:
				error("No previous scan re|No previous scanning regular expression");
			resre(&scanre);
			return (c);

		case '&':
			if (subre.sexpbuf[0] == 0)
nosubre:
				error("No previous substitute re|No previous substitute regular expression");
			resre(&subre);
			return (c);

		default:
			error("Badly formed re|Regular expression \\ must be followed by /, ?, or &");
		}
	if (c == eof || c == '\n' || c == EOF) {
		if (*ep == 0)
			error("No previous re|No previous regular expression");
		if (c == '\n' && oknl == 0)
			error("Missing closing delimiter@for regular expression");
		if (c == '\n')
			ungetchar(c);
		return (eof);
	}
	bracketp = bracket;
	nbra = 0;
	circfl = 0;
	if (c == '^') {
		c = getchar();
		circfl++;
	}
	ungetchar(c);
	for (;;) {
		if (ep >= &expbuf[ESIZE - 2])
complex:
			cerror("Re too complex|Regular expression too complicated");
		c = getchar();
		if (c == eof || c == EOF) {
			if (bracketp != bracket)
				cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
			*ep++ = CEOF;
			return (eof);
		}
		if (value(MAGIC)) {
			if (c != '*' || ep == expbuf)
				lastep = ep;
		} else
			if (c != '\\' || peekchar() != '*' || ep == expbuf)
				lastep = ep;
		switch (c) {

		case '\\':
			c = getchar();
			switch (c) {

			case '(':
				if (nbra >= NBRA)
					cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
				*bracketp++ = nbra;
				*ep++ = CBRA;
				*ep++ = nbra++;
				continue;

			case ')':
				if (bracketp <= bracket)
					cerror("Extra \\)|More \\)'s than \\('s in regular expression");
				*ep++ = CKET;
				*ep++ = *--bracketp;
				continue;

			case '<':
				*ep++ = CBRC;
				continue;

			case '>':
				*ep++ = CLET;
				continue;
			}
			if (value(MAGIC) == 0)
magic:
			switch (c) {

			case '.':
				*ep++ = CDOT;
				continue;

			case '~':
				rhsp = rhsbuf;
				while (*rhsp) {
					if (*rhsp & QUOTE) {
						c = *rhsp & 0177;
						if (c == '&')
							error("Replacement pattern contains &@- cannot use in re");
						if (c >= '1' && c <= '9')
							error("Replacement pattern contains \\d@- cannot use in re");
					}
					if (ep >= &expbuf[ESIZE-2])
						goto complex;
					*ep++ = CCHR;
					*ep++ = *rhsp++ & 0177;
				}
				continue;

			case '*':
				if (ep == expbuf)
					break;
				if (*lastep == CBRA || *lastep == CKET)
					cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
				if (*lastep == CCHR && (lastep[1] & QUOTE))
					cerror("Illegal *|Can't * a \\n in regular expression");
				*lastep =| STAR;
				continue;

			case '[':
				*ep++ = CCL;
				*ep++ = 0;
				cclcnt = 1;
				c = getchar();
				if (c == '^') {
					c = getchar();
					ep[-2] = NCCL;
				}
				if (c == ']')
					cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
				while (c != ']') {
					if (c == '\\' && any(peekchar(), "]-^\\"))
						c = getchar() | QUOTE;
					if (c == '\n' || c == EOF)
						cerror("Missing ]");
					*ep++ = c;
					cclcnt++;
					if (ep >= &expbuf[ESIZE])
						goto complex;
					c = getchar();
				}
				lastep[1] = cclcnt;
				continue;
			}
			if (c == EOF) {
				ungetchar(EOF);
				c = '\\';
				goto defchar;
			}
			*ep++ = CCHR;
			if (c == '\n')
				cerror("No newlines in re's|Can't escape newlines into regular expressions");
			if (c < '1' || c > NBRA + '1') {
				*ep++ = c;
				continue;
			}
			c =- '1';
			if (c >= nbra)
				cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
			*ep++ = c | QUOTE;
			continue;

		case '\n':
			if (oknl) {
				ungetchar(c);
				*ep++ = CEOF;
				return (eof);
			}
			cerror("Badly formed re|Missing closing delimiter for regular expression");

		case '$':
			if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
				*ep++ = CDOL;
				continue;
			}
			goto defchar;

		case '.':
		case '~':
		case '*':
		case '[':
			if (value(MAGIC))
				goto magic;
defchar:
		default:
			*ep++ = CCHR;
			*ep++ = c;
			continue;
		}
	}
}

cerror(s)
	char *s;
{

	expbuf[0] = 0;
	error(s);
}

same(a, b)
	register int a, b;
{

	return (a == b || value(IGNORECASE) && (a ^ b) == ' ' && letter(a) == letter(b));
}

execute(gf, addr)
	int *addr;
{
	register char *p1, *p2;
	register c;

	if (gf) {
		if (circfl)
			return (0);
		locs = p1 = loc2;
	} else {
		if (addr == zero)
			return (0);
		p1 = getline(*addr);
		locs = 0;
	}
	p2 = expbuf;
	if (circfl) {
		loc1 = p1;
		return (advance(p1, p2));
	}
	/* fast check for first character */
	if (*p2 == CCHR) {
		c = p2[1];
		do {
			if (c != *p1 && (!value(IGNORECASE) ||
			    (c ^ *p1) != ' ' || letter(c) != letter(*p1)))
				continue;
			if (advance(p1, p2)) {
				loc1 = p1;
				return (1);
			}
		} while (*p1++);
		return (0);
	}
	/* regular algorithm */
	do {
		if (advance(p1, p2)) {
			loc1 = p1;
			return (1);
		}
	} while (*p1++);
	return (0);
}

#define	uletter(c)	(letter(c) || c == '_')

advance(lp, ep)
	register char *lp, *ep;
{
	register char *curlp;
	char *nextep, *sp, *sp1, c;

	for (;;) switch (*ep++) {

	case CCHR:
		if (*ep & QUOTE) {
			c = *ep++ & 0177;
			sp = braslist[c];
			sp1 = braelist[c];
			while (sp < sp1) {
				if (!same(*sp, *lp))
					return (0);
				sp++, lp++;
			}
			continue;
		}
		if (!same(*ep, *lp))
			return (0);
		ep++, lp++;
		continue;

	case CDOT:
		if (*lp++)
			continue;
		return (0);

	case CDOL:
		if (*lp == 0)
			continue;
		return (0);

	case CEOF:
		loc2 = lp;
		return (1);

	case CCL:
		if (cclass(ep, *lp++, 1)) {
			ep =+ *ep;
			continue;
		}
		return (0);

	case NCCL:
		if (cclass(ep, *lp++, 0)) {
			ep =+ *ep;
			continue;
		}
		return (0);

	case CBRA:
		braslist[*ep++] = lp;
		continue;

	case CKET:
		braelist[*ep++] = lp;
		continue;

	case CDOT|STAR:
		curlp = lp;
		while (*lp++)
			continue;
		goto star;

	case CCHR|STAR:
		curlp = lp;
		while (same(*lp, *ep))
			lp++;
		lp++;
		ep++;
		goto star;

	case CCL|STAR:
	case NCCL|STAR:
		curlp = lp;
		while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
			continue;
		ep =+ *ep;
		goto star;
star:
		do {
			lp--;
			if (lp == locs)
				break;
			if (advance(lp, ep))
				return (1);
		} while (lp > curlp);
		return (0);

	case CBRC:
		if (lp == expbuf)
			continue;
		if (uletter(*lp) && !uletter(lp[-1]) && !digit(lp[-1]))
			continue;
		return (0);

	case CLET:
		if (!uletter(*lp) && !digit(*lp))
			continue;
		return (0);

	default:
		error("Re internal error@- if possible remember what you did and tell system staff");
	}
}

cclass(set, c, af)
	register char *set;
	register c;
	int af;
{
	register n;

	if (c == 0)
		return (0);
	if (value(IGNORECASE) && ucletter(c))
		c = letter(c);
	n = *set++;
	while (--n)
		if (n > 2 && set[1] == '-') {
			if (c >= (set[0] & 0177) && c <= (set[2] & 0177))
				return (af);
			set =+ 3;
			n =- 2;
		} else
			if ((*set++ & 0177) == c)
				return (af);
	return (!af);
}

copy(to, from, size)
	register char *from, *to;
	register int size;
{

	if (size > 0)
		do
			*to++ = *from++;
		while (--size > 0);
}