2.9BSD/usr/contrib/jove/jove_re.c

Compare this file to the similar file:
Show the results in this format:

/*
   Jonathan Payne at Lincoln-Sudbury Regional High School 5-25-83

   jove_re.c

   Much of this code was taken from /usr/src/cmd/ed.c.  It has been
   modified a lot and features have been added, but the general algorithm
   is the same. */

#include "jove.h"

#define	CBRA		1	/* \( */
#define	CCHR		2	/* Normal comparisn */
#define	CDOT		4	/* Matches any character */
#define	CCL		6	/* [0-9] matches 0123456789 */
#define	NCCL		8	/* a '^' inside a [...] */
#define	CDOL		10	/* Match at end of the line */
#define	CEOF		12	/* End of line */
#define	CKET		14	/* \) */
#define	CBACK		16	/* \1 \2 \3 ... */
#define CIRC		18	/* At beginning of line */

#define CWORD		20	/* A word character	\w */
#define CNWORD		22	/* Not a word character \W */
#define CBOUND		24	/* At a word boundary   \b */
#define CNBOUND		25	/* Not at a word boundary \B */

#define	STAR	01

char	*loc1,		/* Beginning of found search */
	*loc2,		/* End of found search */
	*locs;		/* Where to start comparison */

#define	NBRA	5
#define NALTS	10

char	*braslist[NBRA],
	*braelist[NBRA],
	*alternates[NALTS];
int	nbra;

#define SEARCHSIZE 100
#define EOL	-1

char	unmatched[] = "unmatched parens?";
char	toolong[] = "too long";
char	syntax[] = "syntax?";

char	*reptr,		/* Pointer to the expression to search for */
	rhsbuf[LBSIZE/2],
	expbuf[ESIZE+4],
	searchbuf[SEARCHSIZE];
int	dir,
	REpeekc;

#define CharCom(a, b) (IsFlagSet(globflags, CASEIND) ? \
	(Upper(a) == Upper(b)) : (a == b))

REerror(str)
char	*str;
{
	complain("RE error: %s", str);
}

nextc()
{
	int	c;

	if (c = REpeekc)
		REpeekc = 0;
	else if (*reptr == 0)
		c = EOL;
	else
		c = *reptr++;
	return c;
}

QRepSearch()
{
	replace(1);
}

RepSearch()
{
	replace(0);
}

/* Prompt for search and replacement string and do the substitution.
   The point is saved and restored at the end. */

replace(query)
{
	MARK	*save = MakeMark(curline, curchar);

	REpeekc = 0;
	reptr = ask(searchbuf[0] ? searchbuf : (char *) 0,
			": %sreplace-search ", query ? "query-" : "");
	strcpy(searchbuf, reptr);
	compile(EOL, IsFlagSet(globflags, MAGIC));
			/* Compile search string complaining
			 * about errors before asking for 
			 * the replacement string.
			 */
	reptr = ask("", "Replace \"%s\" with: ", searchbuf);
	compsub(EOL);
		/* Compile replacement string */
	substitute(query);
	ToMark(save);
	DelMark(save);
}

/* Return a buffer location of where the search string in searchbuf is.
   NOTE: this procedure used to munge all over linebuf, but doesn't
   anymore (It uses genbuf instead!). */

BUFLOC *
dosearch(str, direction, magic)
char	*str;
{
	static BUFLOC	ret;
	LINE	*a1;
	int	fromchar;

	dir = direction;
	reptr = str;		/* Read from this string */
	REpeekc = 0;
	compile(EOL, magic);

	a1 = curline;
	fromchar = curchar;
	if (dir < 0)
		fromchar--;
	
	if (fromchar >= strlen(linebuf) && dir > 0)
		a1 = a1->l_next, fromchar = 0;
	else if (fromchar < 0 && dir < 0)
		a1 = a1->l_prev, fromchar = a1 ? length(a1) : 0;
	for (;;) {
		if (a1 == 0)
			break;
		if (execute(fromchar, a1))
			break;
		a1 = dir > 0 ? a1->l_next : a1->l_prev;
		if (dir < 0 && !a1)	/* don't do the length if == 0 */
			break;
		fromchar = dir > 0 ? 0 : length(a1);
	}

	if (a1) {
		ret.p_line = a1;
		ret.p_char = dir > 0 ? loc2 - genbuf : loc1 - genbuf;
		return &ret;
	}
	return 0;
}

setsearch(str)
char	*str;
{
	strcpy(searchbuf, str);
}

ForSearch()
{
	search(1);
}

RevSearch()
{
	search(0);
}

/* Prompts for a search string.  Complains if the string cannot be found.
   Searches don't wrap line in ed/vi */

search(forward)
{
	BUFLOC	*newdot;

	dir = forward ? 1 : -1;
	reptr = ask(searchbuf[0] ? searchbuf : 0, FuncName());

	setsearch(reptr);
	message(searchbuf);
	UpdateMesg();		/* Only message not whole screen! */
	newdot = dosearch(searchbuf, dir, IsFlagSet(globflags, MAGIC));
	if (newdot == 0)
		complain("Search for \"%s\" failed", searchbuf);
	SetMark();
	SetDot(newdot);
}

/* Perform the substitution.  Both the replacement and search strings have 
   been compiled already.  This knows how to deal with query replace. */

substitute(query)
{
	LINE	*lp;
	int	numdone = 0,
		fromchar = curchar,
		stop = 0;

	dir = 1;	/* Always going forward */
	for (lp = curline; !stop && lp; lp = lp->l_next, fromchar = 0) {
		while (!stop && execute(fromchar, lp)) {
			DotTo(lp, loc2 - genbuf);
			fromchar = curchar;
			if (query) {
 				message("Replace (Type '?' for help)? ");
reswitch:
				switch (Upper((*Getchar)())) {
				case '.':
					stop++;	/* Stop after this */

				case ' ':
					break;	/* Do the replace */

				case '\177':	/* Skip this one */
					continue;

				case 'R':	/* Allow user to move around */
				    {
					BUFFER	*oldbuf = curbuf;
				    	MARK	*m;
				    	char	pushexp[ESIZE + 4];

				    	fromchar = loc1 - genbuf;
				    	m = MakeMark(curline, fromchar);
				    	curchar = loc2 - genbuf;
					message("Recursive edit ...");
				    	copynchar(pushexp, expbuf, ESIZE + 4);
					Recurse();
				    	copynchar(expbuf, pushexp, ESIZE + 4);
					SetBuf(oldbuf);
				    	ToMark(m);
				    	DelMark(m);
				    	fromchar = curchar;
				    	lp = curline;
					continue;
				    }

				case 'P':	/* Replace all */
					query = 0;	/* Replace all */
					break;

				case '\n':	/* Stop this replace search */
				case '\r':
					goto done;

				default:
					rbell();
				case '?':
message("<SP> => yes, <DEL> => no, <CR> => stop, 'r' => recursive edit, 'p' => proceed");
					goto reswitch;
				}

			}
			dosub(linebuf);		/* Do the substitution */
			numdone++;
			SetModified(curbuf);
			fromchar = curchar = loc2 - genbuf;
			exp = 1;
			makedirty(curline);
			if (query) {
				message(mesgbuf);	/* No blinking ... */
				redisplay();	/* Show the change */
			}
			if (linebuf[fromchar] == 0)
				break;
		}
	}
done:
	if (numdone == 0)
		complain("None found");
	s_mess("%d substitution%s", numdone, numdone ? "s" : "");
}

/* Compile the substitution string */

compsub(seof)
{
	register int	c;
	register char	*p;

	p = rhsbuf;
	for (;;) {
		c = nextc();
		if (c == '\\')
			c = nextc() | 0200;
		if (c == seof)
			break;
		*p++ = c;
		if (p >= &rhsbuf[LBSIZE/2])
			REerror(toolong);
	}
	*p++ = 0;
}

dosub(base)
char	*base;
{
	register char	*lp,
			*sp,
			*rp;
	int	c;

	lp = genbuf;
	sp = base;
	rp = rhsbuf;
	while (lp < loc1)
		*sp++ = *lp++;
	while (c = *rp++ & 0377) {
		if (c == '&') {
			sp = place(LBSIZE, base, sp, loc1, loc2);
			continue;
		} else if (c & 0200 && (c &= 0177) >= '1' && c < nbra + '1') {
			sp = place(LBSIZE, base, sp, braslist[c - '1'], braelist[c - '1']);
			continue;
		}
		*sp++ = c & 0177;
		if (sp >= &base[LBSIZE])
			REerror(toolong);
	}
	lp = loc2;
	loc2 = genbuf + max(1, sp - base);
			/* At least one character forward after the replace
			   to prevent infinite number of replacements in the
			   same place, e.g. replace "^" with "" */
	
	while (*sp++ = *lp++)
		if (sp >= &base[LBSIZE])
			len_error(ERROR);
}

char *
place(size, base, sp, l1, l2)
char	*base;
register char	*sp,
		*l1,
		*l2;
{
	while (l1 < l2) {
		*sp++ = *l1++;
		if (sp >= &base[size])
			len_error(ERROR);
	}

	return sp;
}

putmatch(which, buf, size)
char	*buf;
{
	*(place(size, buf, buf, braslist[which - 1], braelist[which - 1])) = 0;
}

/* Compile the string, that nextc knows about, into a sequence of
   [TYPE][CHAR] where type says what kind of comparison to make
   and CHAR is the character to compare with.  CHAR is actually
   optional, like in the case of '.' there is no CHAR. */

compile(aeof, magic)
{
	register int	xeof,
			c;
	register char	*ep;
	char	*lastep,
		bracket[NBRA],
		*bracketp,
		**alt = alternates;
	int	cclcnt;

	ep = expbuf;
	*alt++ = ep;
	xeof = aeof;
	bracketp = bracket;

	nbra = 0;
	lastep = 0;
	for (;;) {
		if (ep >= &expbuf[ESIZE])
			REerror(toolong);
		c = nextc();
		if (c == xeof) {
			if (bracketp != bracket)
				REerror(unmatched);
			*ep++ = CEOF;
			*alt = 0;
			return;
		}
		if (c != '*')
			lastep = ep;
		if (!magic && index("*[.", c))
			goto defchar;
		switch (c) {
		case '\\':
			c = nextc();
			if (!magic && index("()0123456789|wWbB", c))
				goto defchar;
			switch (c) {
			case 'w':
				*ep++ = CWORD;
				continue;

			case 'W':
				*ep++ = CNWORD;
				continue;

			case 'b':
				*ep++ = CBOUND;
				continue;

			case 'B':
				*ep++ = CNBOUND;
				continue;

			case '(':
				if (nbra >= NBRA)
					REerror(unmatched);
				*bracketp++ = nbra;
				*ep++ = CBRA;
				*ep++ = nbra++;
				continue;

			case ')':
				if (bracketp <= bracket)
					REerror(unmatched);
				*ep++ = CKET;
				*ep++ = *--bracketp;
				continue;
			case '|':	/* Another alternative */
				if (alt >= &alternates[NALTS])
					REerror("Too many alternates");
				*ep++ = CEOF;
				*alt++ = ep;
				continue;

			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
				*ep++ = CBACK;
				*ep++ = c - '1';
				continue;

			}
			goto defchar;

		case '.':
			*ep++ = CDOT;
			continue;

		case '\n':
			goto cerror;

		case '*':
			if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
				goto defchar;
			*lastep |= STAR;
			continue;

		case '^':
			if (ep != expbuf && ep[-1] != CEOF)
				goto defchar;
			*ep++ = CIRC;
			continue;

		case '$':
			if (((REpeekc = nextc()) != xeof) && REpeekc != '\\')
				goto defchar;
			*ep++ = CDOL;
			continue;

		case '[':
			*ep++ = CCL;
			*ep++ = 0;
			cclcnt = 1;
			if ((c = nextc()) == '^') {
				c = nextc();
				ep[-2] = NCCL;
			}
			do {
				if (c == '\\')
					c = nextc();
				if (c == '\n')
					goto cerror;
				if (c == xeof)
					REerror("Missing ']'");
				*ep++ = c;
				cclcnt++;
				if (ep >= &expbuf[ESIZE])
					REerror(toolong);
			} while ((c = nextc()) != ']');
			lastep[1] = cclcnt;
			continue;

		defchar:
		default:
			*ep++ = CCHR;
			*ep++ = c;
		}
	}
   cerror:
	expbuf[0] = 0;
	nbra = 0;
	REerror(syntax);
}

/* Look for a match in `line' starting at `fromchar' from the beginning */

execute(fromchar, line)
LINE	*line;
{
	register char	*p1,
			c;

	for (c = 0; c < NBRA; c++) {
		braslist[c] = 0;
		braelist[c] = 0;
	}
	ignore(getright(line, genbuf));
	locs = p1 = genbuf + fromchar;

	if (dir < 0)
		locs = genbuf;

	/* fast check for first character */

	if (expbuf[0] == CCHR && !alternates[1]) {
		c = expbuf[1];
		do {
			if (!CharCom(*p1, c))
				continue;
			if (advance(p1, expbuf)) {
				loc1 = p1;
				return 1;
			}
		} while (dir > 0 ? *p1++ : p1-- > genbuf);
		return 0;
	}
	/* regular algorithm */
	do {
		register char	**alt = alternates;

		while (*alt)
			if (advance(p1, *alt++)) {
				loc1 = p1;
				return 1;
			}
	} while (dir > 0 ? *p1++ : p1-- > genbuf);

	return 0;
}

/* Does the actual comparison, lp is the buffer line and ep is the
   expression pointer.  Returns 1 if it found a match or 0 if it
   didn't. */

advance(lp, ep)
register char	*ep,
		*lp;
{
	register char	*curlp;
	int	i;

	for (;;) switch (*ep++) {

	case CIRC:	/* If we are not at the beginning
			   of the line there isn't a
			   match. */

		if (lp == genbuf)
			continue;
		return 0;

	case CCHR:	/* Normal comparison, if they're the same, continue */
		if (CharCom(*ep++, *lp++))
			continue;
		return 0;

	case CDOT:	/* Any character matches.  Can only fail if we are
			   at the end of the buffer line already */
		if (*lp++)
			continue;
		return 0;

	case CDOL:	/* Only works if we are at the end of the line */
		if (*lp == 0)
			continue;
		return 0;

	case CEOF:	/* If we get here then we made it! */
		loc2 = lp;
		return 1;

	case CWORD:	/* Is this a word character? */
		if (*lp && isword(*lp)) {
			lp++;
			continue;
		}
		return 0;

	case CNWORD:	/* Is this NOT a word character */
		if (*lp && !isword(*lp)) {
			lp++;
			continue;
		}
		return 0;

	case CBOUND:	/* Is this at a word boundery */
		if (	(*lp == 0) ||		/* end of line */
			(lp == genbuf) ||	/* Beginning of line */
			(isword(*lp) != isword(*(lp - 1)))	)
					/* Characters are of different
					   type i.e one is part of a
					   word and the other isn't. */
			continue;
		return 0;

	case CNBOUND:	/* Is this NOT a word boundery */
		if (	(*lp) &&	/* Not end of the line */
			(isword(*lp) == isword(*(lp - 1)))	)
			continue;
		return 0;

	case CCL:	/* Is this a member of [] */
		if (cclass(ep, *lp++, 1)) {
			ep += *ep;
			continue;
		}
		return 0;

	case NCCL:	/* Is this NOT a member of [] */
		if (cclass(ep, *lp++, 0)) {
			ep += *ep;
			continue;
		}
		return 0;

	case CBRA:
		braslist[*ep++] = lp;
		continue;

	case CKET:
		braelist[*ep++] = lp;
		continue;

	case CBACK:
		if (braelist[i = *ep++] == 0)
			goto badcase;
		if (backref(i, lp)) {
			lp += braelist[i] - braslist[i];
			continue;
		}
		return 0;

	case CWORD | STAR:	/* Any number of word characters */
		curlp = lp;
		while (*lp++ && isword(*(lp - 1)))
			;
		goto star;

	case CNWORD | STAR:
		curlp = lp;
		while (*lp++ && !isword(*(lp - 1)))
			;
		goto star;

	case CBACK | STAR:
		curlp = lp;
		while (backref(i, lp))
			lp += braelist[i] - braslist[i];
		while (lp >= curlp) {
			if (advance(lp, ep))
				return 1;
			lp -= braelist[i] - braslist[i];
		}
		continue;

	case CDOT | STAR:	/* Any number of any character */
		curlp = lp;
		while (*lp++)
			;
		goto star;

	case CCHR | STAR:	/* Any number of the current character */
		curlp = lp;
		while (CharCom(*lp++, *ep))
			;
		ep++;
		goto star;

	case CCL | STAR:
	case NCCL | STAR:
		curlp = lp;
		while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
			;
		ep += *ep;
		goto star;

	star:
		do {
			lp--;
			if (lp < locs)
				break;
			if (advance(lp, ep))
				return 1;
		} while (lp > curlp);
		return 0;

	default:
badcase:
		REerror("advance");
	}
	/* NOTREACHED */
}

backref(i, lp)
register int	i;
register char	*lp;
{
	register char	*bp;

	bp = braslist[i];
	while (*bp++ == *lp++)
		if (bp >= braelist[i])
			return 1;
	return 0;
}

cclass(set, c, af)
register char	*set,
		c;
{
	register int	n;
	char	*base;

	if (c == 0)
		return 0;
	n = *set++;
	base = set;
	while (--n) {
		if (*set == '-' && set > base) {
			if (c >= *(set - 1) && c <= *(set + 1))
				return af;
		} else if (*set++ == c)
			return af;
	}
	return !af;
}