4.4BSD/usr/src/contrib/nvi/nvi/search.c

Compare this file to the similar file:
Show the results in this format:

/*-
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char sccsid[] = "@(#)search.c	8.1 (Berkeley) 6/9/93";
#endif /* not lint */

#include <sys/types.h>

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include "vi.h"

static int	check_delta __P((SCR *, EXF *, long, recno_t));
static int	check_word __P((SCR *, char **, int *, int *));
static int	ctag_conv __P((SCR *, char **, int *));
static int	get_delta __P((SCR *, char **, long *));
static int	resetup __P((SCR *, regex_t **, enum direction,
		    char *, char **, long *, int *, u_int));

/*
 * resetup --
 *	Set up a search for a regular expression.
 */
static int
resetup(sp, rep, dir, ptrn, epp, deltap, wordoffsetp, flags)
	SCR *sp;
	regex_t **rep;
	enum direction dir;
	char *ptrn, **epp;
	long *deltap;
	int *wordoffsetp;
	u_int flags;
{
	int delim, eval, re_flags, replaced;
	char *p, *t;

	/* Set return information the default. */
	*deltap = 0;
	*wordoffsetp = 0;

	/*
	 * Use saved pattern if no pattern supplied, or if only a delimiter
	 * character is supplied.  Only the pattern was saved, historic vi
	 * did not reuse any delta supplied.
	 */
	if (ptrn == NULL || ptrn[1] == '\0') {
		if (!F_ISSET(sp, S_RE_SET)) {
noprev:			msgq(sp, M_INFO, "No previous search pattern.");
			return (1);
		}
		*rep = &sp->sre;
		return (0);
	}

	re_flags = 0;				/* Set flags. */
	if (O_ISSET(sp, O_EXTENDED))
		re_flags |= REG_EXTENDED;
	if (O_ISSET(sp, O_IGNORECASE))
		re_flags |= REG_ICASE;

	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
		/* Set delimiter. */
		delim = *ptrn++;

		/* Find terminating delimiter, handling escaped delimiters. */
		for (p = t = ptrn;;) {
			if (p[0] == '\0' || p[0] == delim) {
				*t = '\0';
				if (p[0] == delim)
					++p;
				break;
			}
			if (p[1] == delim && p[0] == '\\')
				++p;
			*t++ = *p++;
		}

		/*
		 * If characters after the terminating delimiter, it may
		 * be an error, or may be an offset.  In either case, we
		 * return the end of the string, whatever it may be, or
		 * change the end pointer to reference a NULL.  Don't just
		 * whack the string, in case it's text space.
		 */
		if (*p) {
			if (LF_ISSET(SEARCH_TERM)) {
				msgq(sp, M_ERR,
				    "Characters after search string.");
				return (1);
			}
			if (get_delta(sp, &p, deltap))
				return (1);
			if (epp != NULL)
				*epp = p;
		} else {
			/*
			 * STATIC: NEVER WRITTEN.
			 * Can't be const, because the normal case isn't.
			 */
			static char ebuf[1];
			if (epp != NULL)
				*epp = ebuf;
		}

		/* If the pattern was empty, use the previous pattern. */
		if (ptrn == NULL || *ptrn == '\0') {
			if (!F_ISSET(sp, S_RE_SET))
				goto noprev;
			*rep = &sp->sre;
			return (0);
		}

		/* Replace any word search pattern. */
		if (check_word(sp, &ptrn, &replaced, wordoffsetp))
			return (1);
	} else if (LF_ISSET(SEARCH_TAG)) {
		if (ctag_conv(sp, &ptrn, &replaced))
			return (1);
		re_flags &= ~REG_EXTENDED;
	}

	/* Compile the RE. */
	if (eval = regcomp(*rep, (char *)ptrn, re_flags))
		re_error(sp, eval, *rep);
	else if (LF_ISSET(SEARCH_SET)) {
		F_SET(sp, S_RE_SET);
		sp->searchdir = dir;
		sp->sre = **rep;
	}

	/* Free up any extra memory. */
	if (replaced)
		FREE_SPACE(sp, ptrn, 0);
	return (eval);
}

#define	EMPTYMSG	"File empty; nothing to search."
#define	EOFMSG		"Reached end-of-file without finding the pattern."
#define	NOTFOUND	"Pattern not found."
#define	SOFMSG		"Reached top-of-file without finding the pattern."
#define	WRAPMSG		"Search wrapped."

int
f_search(sp, ep, fm, rm, ptrn, eptrn, flags)
	SCR *sp;
	EXF *ep;
	MARK *fm, *rm;
	char *ptrn, **eptrn;
	u_int flags;
{
	regmatch_t match[1];
	regex_t *re, lre;
	recno_t lastlno, lno;
	size_t coff, len;
	long delta;
	int eval, wordoffset, wrapped;
	char *l;

	if (file_lline(sp, ep, &lno))
		return (1);
	if (lno == 0) {
		if (LF_ISSET(SEARCH_MSG))
			msgq(sp, M_INFO, EMPTYMSG);
		return (1);
	}

	re = &lre;
	if (resetup(sp, &re, FORWARD, ptrn, eptrn, &delta, &wordoffset, flags))
		return (1);

	/*
	 * Start searching immediately after the cursor.  If at the end of the
	 * line, start searching on the next line.  This is incompatible (read
	 * bug fix) with the historic vi -- searches for the '$' pattern never
	 * moved forward, and "-t foo" didn't work if "foo" was the first thing
	 * in the file.
	 */
	if (LF_ISSET(SEARCH_FILE)) {
		lno = 1;
		coff = 0;
	} else {
		if ((l = file_gline(sp, ep, fm->lno, &len)) == NULL) {
			GETLINE_ERR(sp, fm->lno);
			return (1);
		}
		if (fm->cno + 1 >= len) {
			if (fm->lno == lno) {
				if (!O_ISSET(sp, O_WRAPSCAN)) {
					if (LF_ISSET(SEARCH_MSG))
						msgq(sp, M_INFO, EOFMSG);
					return (1);
				}
				lno = 1;
			} else
				lno = fm->lno + 1;
			coff = 0;
		} else {
			lno = fm->lno;
			coff = fm->cno + 1;
		}
	}

	/*
	 * f_search is called from the ex_tagfirst() routine, which runs
	 * before the screen really exists.  Make sure we don't step on
	 * anything.
	 */
	if (sp->s_position != NULL) {
		if (sp->s_position(sp, ep, &lastlno, 0, P_BOTTOM))
			return (1);
		(void)sp->s_busy_cursor(sp, NULL);
	} else
		lastlno = OOBLNO;

	wrapped = 0;
	for (;; ++lno, coff = 0) {
		if ((l = file_gline(sp, ep, lno, &len)) == NULL) {
			if (wrapped) {
				if (LF_ISSET(SEARCH_MSG))
					msgq(sp, M_INFO, NOTFOUND);
				break;
			}
			if (!O_ISSET(sp, O_WRAPSCAN)) {
				if (LF_ISSET(SEARCH_MSG))
					msgq(sp, M_INFO, EOFMSG);
				break;
			}
			lno = 0;
			wrapped = 1;
			continue;
		}

		/* If already at EOL, just keep going. */
		if (len && coff == len)
			continue;

		/* If it's going to be awhile, put up a message. */
		if (lno == lastlno)
			(void)sp->s_busy_cursor(sp, "Searching...");

		/* Set the termination. */
		match[0].rm_so = coff;
		match[0].rm_eo = len;

#if defined(DEBUG) && defined(SEARCHDEBUG)
		TRACE(sp, "F search: %lu from %u to %u\n",
		    lno, coff, len ? len - 1 : len);
#endif
		/* Search the line. */
		eval = regexec(re, (char *)l, 1, match,
		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
		if (eval == REG_NOMATCH)
			continue;
		if (eval != 0) {
			re_error(sp, eval, re);
			break;
		}
		
		/* Warn if wrapped. */
		if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG))
			msgq(sp, M_INFO, WRAPMSG);

		/*
		 * If an offset, see if it's legal.  It's possible to match
		 * past the end of the line with $, so check for that case.
		 */
		if (delta) {
			if (check_delta(sp, ep, delta, lno))
				break;
			rm->lno = delta + lno;
			rm->cno = 0;
		} else {
#if defined(DEBUG) && defined(SEARCHDEBUG)
			TRACE(sp, "found: %qu to %qu\n",
			    match[0].rm_so, match[0].rm_eo);
#endif
			rm->lno = lno;
			rm->cno = match[0].rm_so;

			if (wordoffset)
				++rm->cno;

			if (rm->cno >= len)
				rm->cno = len ? len - 1 : 0;
		}
		return (0);
	}
	return (1);
}

int
b_search(sp, ep, fm, rm, ptrn, eptrn, flags)
	SCR *sp;
	EXF *ep;
	MARK *fm, *rm;
	char *ptrn, **eptrn;
	u_int flags;
{
	regmatch_t match[1];
	regex_t *re, lre;
	recno_t firstlno, lno;
	size_t coff, len, last;
	long delta;
	int eval, wordoffset, wrapped;
	char *l;

	if (file_lline(sp, ep, &lno))
		return (1);
	if (lno == 0) {
		if (LF_ISSET(SEARCH_MSG))
			msgq(sp, M_INFO, EMPTYMSG);
		return (1);
	}

	re = &lre;
	if (resetup(sp, &re, BACKWARD, ptrn, eptrn, &delta, &wordoffset, flags))
		return (1);

	/* If in the first column, start searching on the previous line. */
	if (fm->cno == 0) {
		if (fm->lno == 1) {
			if (!O_ISSET(sp, O_WRAPSCAN)) {
				if (LF_ISSET(SEARCH_MSG))
					msgq(sp, M_INFO, SOFMSG);
				return (1);
			}
		} else
			lno = fm->lno - 1;
	} else
		lno = fm->lno;

	if (sp->s_position(sp, ep, &firstlno, 0, P_TOP))
		return (1);
	(void)sp->s_busy_cursor(sp, NULL);

	wrapped = 0;
	for (coff = fm->cno;; --lno, coff = 0) {
		if (lno == 0) {
			if (!O_ISSET(sp, O_WRAPSCAN)) {
				if (LF_ISSET(SEARCH_MSG))
					msgq(sp, M_INFO, SOFMSG);
				break;
			}
			if (file_lline(sp, ep, &lno))
				return (1);
			if (lno == 0) {
				if (LF_ISSET(SEARCH_MSG))
					msgq(sp, M_INFO, EMPTYMSG);
				break;
			}
			++lno;
			wrapped = 1;
			continue;
		} else if (lno == fm->lno && wrapped) {
			if (LF_ISSET(SEARCH_MSG))
				msgq(sp, M_INFO, NOTFOUND);
			break;
		}

		if ((l = file_gline(sp, ep, lno, &len)) == NULL)
			return (1);

		/* If it's going to be awhile, put up a message. */
		if (lno == firstlno)
			(void)sp->s_busy_cursor(sp, "Searching...");

		/* Set the termination. */
		match[0].rm_so = 0;
		match[0].rm_eo = coff ? coff - 1 : len;

#if defined(DEBUG) && defined(SEARCHDEBUG)
		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
#endif
		/* Search the line. */
		eval = regexec(re, (char *)l, 1, match,
		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
		if (eval == REG_NOMATCH)
			continue;
		if (eval != 0) {
			re_error(sp, eval, re);
			break;
		}

		/* Warn if wrapped. */
		if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG))
			msgq(sp, M_INFO, WRAPMSG);
		
		if (delta) {
			if (check_delta(sp, ep, delta, lno))
				break;
			rm->lno = delta + lno;
			rm->cno = 0;
		} else {
#if defined(DEBUG) && defined(SEARCHDEBUG)
			TRACE(sp, "found: %qu to %qu\n",
			    match[0].rm_so, match[0].rm_eo);
#endif
			/*
			 * Find the last acceptable one in this line.  This
			 * is really painful, we need a cleaner interface to
			 * regexec to make this possible.
			 */
			for (;;) {
				last = match[0].rm_so;
				match[0].rm_so = match[0].rm_eo + 1;
				if (match[0].rm_so >= len)
					break;
				match[0].rm_eo = coff ? coff : len;
				eval = regexec(re,
				    (char *)l, 1, match, REG_STARTEND);
				if (eval == REG_NOMATCH)
					break;
				if (eval != 0) {
					re_error(sp, eval, re);
					return (1);
				}
			}
			rm->lno = lno;
			rm->cno = last;

			if (wordoffset)
				++rm->cno;
		}
		return (0);
	}
	return (1);
}

/*
 * check_word --
 *	Vi special cases the pattern "\<ptrn\>", doing "word" searches.
 */
static int
check_word(sp, ptrnp, replacedp, wordoffsetp)
	SCR *sp;
	char **ptrnp;
	int *replacedp, *wordoffsetp;
{
	size_t blen, needspace;
	int cnt;
	char *bp, *p, *t;

	/* Count up the "word" patterns. */
	*replacedp = *wordoffsetp = 0;
	for (p = *ptrnp, cnt = 0; *p; ++p)
		if (p[0] == '\\' && p[1] && p[1] == '<')
			++cnt;
	if (cnt == 0)
		return (0);

	/* Report back if altered the start of the search pattern. */
	p = *ptrnp;
	if (p[0] == '\\' && p[1] == '<')
		*wordoffsetp = 1;

	/* Get enough memory to hold the final pattern. */
	needspace = strlen(*ptrnp) + cnt * sizeof(RE_NOTINWORD) * 2;
	GET_SPACE(sp, bp, blen, needspace);

	for (p = *ptrnp, t = bp; *p;)
		if (p[0] == '\\' && p[1] &&
		    p[1] == '<' || p[1] == '>') {
			memmove(t, RE_NOTINWORD, sizeof(RE_NOTINWORD) - 1);
			t += sizeof(RE_NOTINWORD) - 1;
			p += 2;
		} else
			*t++ = *p++;
	*t = '\0';

	*ptrnp = bp;
	*replacedp = 1;
	return (0);
}

/*
 * ctag_conv --
 *	Convert a tags search path into something that regex can handle.
 */
static int
ctag_conv(sp, ptrnp, replacedp)
	SCR *sp;
	char **ptrnp;
	int *replacedp;
{
	size_t blen, len;
	int lastdollar;
	char *bp, *p, *t;

	*replacedp = 0;

	len = strlen(p = *ptrnp);

	/* Max memory usage is 2 times the length of the string. */
	GET_SPACE(sp, bp, blen, len * 2);

	t = bp;

	/* The last charcter is a '/' or '?', we just strip it. */
	if (p[len - 1] == '/' || p[len - 1] == '?')
		p[len - 1] = '\0';

	/* The next-to-last character is a '$', and it's magic. */
	if (p[len - 2] == '$') {
		lastdollar = 1;
		p[len - 2] = '\0';
	} else
		lastdollar = 0;

	/* The first character is a '/' or '?', we just strip it. */
	if (p[0] == '/' || p[0] == '?')
		++p;

	/* The second character is a '^', and it's magic. */
	if (p[0] == '^')
		*t++ = *p++;
		
	/*
	 * Escape every other magic character we can find, stripping the
	 * backslashes ctags inserts to escape the search delimiter
	 * characters.
	 */
	while (p[0]) {
		/* Ctags escapes the search delimiter characters. */
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?'))
			++p;
		else if (strchr("^.[$*", p[0]))
			*t++ = '\\';
		*t++ = *p++;
	}
	if (lastdollar)
		*t++ = '$';
	*t++ = '\0';

	*ptrnp = bp;
	*replacedp = 1;
	return (0);
}

/*
 * get_delta --
 *	Get a line delta.  The trickiness is that the delta can be pretty
 *	complicated, i.e. "+3-2+3" is allowed.
 */
static int
get_delta(sp, dp, valp)
	SCR *sp;
	char **dp;
	long *valp;
{
	long val, tval;

	for (tval = 0; **dp;) {
		if (!strchr("+-0123456789", **dp)) {
			msgq(sp, M_ERR, "Characters after delta string.");
			return (1);
		}
		errno = 0;
		val = strtol(*dp, dp, 10);
		if (errno == ERANGE) {
			if (val == LONG_MAX)
				msgq(sp, M_ERR, "Delta value overflow.");
			else if (val == LONG_MIN)
				msgq(sp, M_ERR, "Delta value underflow.");
			else
				msgq(sp, M_ERR, "Error: %s.", strerror(errno));
			return (1);
		}
		if (val >= 0) {
			if (LONG_MAX - val < tval) {
				msgq(sp, M_ERR, "Delta value overflow.");
				return (1);
			}
		} else
			if (-(LONG_MIN - tval) > val) {
				msgq(sp, M_ERR, "Delta value underflow.");
				return (1);
			}
		tval += val;
	}
	*valp = tval;
	return (0);
}

/*
 * check_delta --
 *	Check a line delta to see if it's legal.
 */
static int
check_delta(sp, ep, delta, lno)
	SCR *sp;
	EXF *ep;
	long delta;
	recno_t lno;
{
	if (delta < 0 && (recno_t)delta >= lno) {
		msgq(sp, M_ERR, "Search offset before line 1.");
		return (1);
	}
	if (file_gline(sp, ep, lno + delta, NULL) == NULL) {
		msgq(sp, M_ERR, "Search offset past end-of-file.");
		return (1);
	}
	return (0);
}

/*
 * re_error --
 *	Report a regular expression error.
 */
void
re_error(sp, errcode, preg)
	SCR *sp;
	int errcode;
	regex_t *preg;
{
	size_t s;
	char *oe;

	s = regerror(errcode, preg, "", 0);
	if ((oe = malloc(s)) == NULL)
		msgq(sp, M_ERR, "Error: %s", strerror(errno));
	else {
		(void)regerror(errcode, preg, oe, s);
		msgq(sp, M_ERR, "RE error: %s", oe);
	}
	free(oe);
}