4.4BSD/usr/src/contrib/nvi/nvi/search.c
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)search.c 8.1 (Berkeley) 6/9/93";
#endif /* not lint */
#include <sys/types.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "vi.h"
static int check_delta __P((SCR *, EXF *, long, recno_t));
static int check_word __P((SCR *, char **, int *, int *));
static int ctag_conv __P((SCR *, char **, int *));
static int get_delta __P((SCR *, char **, long *));
static int resetup __P((SCR *, regex_t **, enum direction,
char *, char **, long *, int *, u_int));
/*
* resetup --
* Set up a search for a regular expression.
*/
static int
resetup(sp, rep, dir, ptrn, epp, deltap, wordoffsetp, flags)
SCR *sp;
regex_t **rep;
enum direction dir;
char *ptrn, **epp;
long *deltap;
int *wordoffsetp;
u_int flags;
{
int delim, eval, re_flags, replaced;
char *p, *t;
/* Set return information the default. */
*deltap = 0;
*wordoffsetp = 0;
/*
* Use saved pattern if no pattern supplied, or if only a delimiter
* character is supplied. Only the pattern was saved, historic vi
* did not reuse any delta supplied.
*/
if (ptrn == NULL || ptrn[1] == '\0') {
if (!F_ISSET(sp, S_RE_SET)) {
noprev: msgq(sp, M_INFO, "No previous search pattern.");
return (1);
}
*rep = &sp->sre;
return (0);
}
re_flags = 0; /* Set flags. */
if (O_ISSET(sp, O_EXTENDED))
re_flags |= REG_EXTENDED;
if (O_ISSET(sp, O_IGNORECASE))
re_flags |= REG_ICASE;
if (LF_ISSET(SEARCH_PARSE)) { /* Parse the string. */
/* Set delimiter. */
delim = *ptrn++;
/* Find terminating delimiter, handling escaped delimiters. */
for (p = t = ptrn;;) {
if (p[0] == '\0' || p[0] == delim) {
*t = '\0';
if (p[0] == delim)
++p;
break;
}
if (p[1] == delim && p[0] == '\\')
++p;
*t++ = *p++;
}
/*
* If characters after the terminating delimiter, it may
* be an error, or may be an offset. In either case, we
* return the end of the string, whatever it may be, or
* change the end pointer to reference a NULL. Don't just
* whack the string, in case it's text space.
*/
if (*p) {
if (LF_ISSET(SEARCH_TERM)) {
msgq(sp, M_ERR,
"Characters after search string.");
return (1);
}
if (get_delta(sp, &p, deltap))
return (1);
if (epp != NULL)
*epp = p;
} else {
/*
* STATIC: NEVER WRITTEN.
* Can't be const, because the normal case isn't.
*/
static char ebuf[1];
if (epp != NULL)
*epp = ebuf;
}
/* If the pattern was empty, use the previous pattern. */
if (ptrn == NULL || *ptrn == '\0') {
if (!F_ISSET(sp, S_RE_SET))
goto noprev;
*rep = &sp->sre;
return (0);
}
/* Replace any word search pattern. */
if (check_word(sp, &ptrn, &replaced, wordoffsetp))
return (1);
} else if (LF_ISSET(SEARCH_TAG)) {
if (ctag_conv(sp, &ptrn, &replaced))
return (1);
re_flags &= ~REG_EXTENDED;
}
/* Compile the RE. */
if (eval = regcomp(*rep, (char *)ptrn, re_flags))
re_error(sp, eval, *rep);
else if (LF_ISSET(SEARCH_SET)) {
F_SET(sp, S_RE_SET);
sp->searchdir = dir;
sp->sre = **rep;
}
/* Free up any extra memory. */
if (replaced)
FREE_SPACE(sp, ptrn, 0);
return (eval);
}
#define EMPTYMSG "File empty; nothing to search."
#define EOFMSG "Reached end-of-file without finding the pattern."
#define NOTFOUND "Pattern not found."
#define SOFMSG "Reached top-of-file without finding the pattern."
#define WRAPMSG "Search wrapped."
int
f_search(sp, ep, fm, rm, ptrn, eptrn, flags)
SCR *sp;
EXF *ep;
MARK *fm, *rm;
char *ptrn, **eptrn;
u_int flags;
{
regmatch_t match[1];
regex_t *re, lre;
recno_t lastlno, lno;
size_t coff, len;
long delta;
int eval, wordoffset, wrapped;
char *l;
if (file_lline(sp, ep, &lno))
return (1);
if (lno == 0) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, EMPTYMSG);
return (1);
}
re = &lre;
if (resetup(sp, &re, FORWARD, ptrn, eptrn, &delta, &wordoffset, flags))
return (1);
/*
* Start searching immediately after the cursor. If at the end of the
* line, start searching on the next line. This is incompatible (read
* bug fix) with the historic vi -- searches for the '$' pattern never
* moved forward, and "-t foo" didn't work if "foo" was the first thing
* in the file.
*/
if (LF_ISSET(SEARCH_FILE)) {
lno = 1;
coff = 0;
} else {
if ((l = file_gline(sp, ep, fm->lno, &len)) == NULL) {
GETLINE_ERR(sp, fm->lno);
return (1);
}
if (fm->cno + 1 >= len) {
if (fm->lno == lno) {
if (!O_ISSET(sp, O_WRAPSCAN)) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, EOFMSG);
return (1);
}
lno = 1;
} else
lno = fm->lno + 1;
coff = 0;
} else {
lno = fm->lno;
coff = fm->cno + 1;
}
}
/*
* f_search is called from the ex_tagfirst() routine, which runs
* before the screen really exists. Make sure we don't step on
* anything.
*/
if (sp->s_position != NULL) {
if (sp->s_position(sp, ep, &lastlno, 0, P_BOTTOM))
return (1);
(void)sp->s_busy_cursor(sp, NULL);
} else
lastlno = OOBLNO;
wrapped = 0;
for (;; ++lno, coff = 0) {
if ((l = file_gline(sp, ep, lno, &len)) == NULL) {
if (wrapped) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, NOTFOUND);
break;
}
if (!O_ISSET(sp, O_WRAPSCAN)) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, EOFMSG);
break;
}
lno = 0;
wrapped = 1;
continue;
}
/* If already at EOL, just keep going. */
if (len && coff == len)
continue;
/* If it's going to be awhile, put up a message. */
if (lno == lastlno)
(void)sp->s_busy_cursor(sp, "Searching...");
/* Set the termination. */
match[0].rm_so = coff;
match[0].rm_eo = len;
#if defined(DEBUG) && defined(SEARCHDEBUG)
TRACE(sp, "F search: %lu from %u to %u\n",
lno, coff, len ? len - 1 : len);
#endif
/* Search the line. */
eval = regexec(re, (char *)l, 1, match,
(match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
if (eval == REG_NOMATCH)
continue;
if (eval != 0) {
re_error(sp, eval, re);
break;
}
/* Warn if wrapped. */
if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, WRAPMSG);
/*
* If an offset, see if it's legal. It's possible to match
* past the end of the line with $, so check for that case.
*/
if (delta) {
if (check_delta(sp, ep, delta, lno))
break;
rm->lno = delta + lno;
rm->cno = 0;
} else {
#if defined(DEBUG) && defined(SEARCHDEBUG)
TRACE(sp, "found: %qu to %qu\n",
match[0].rm_so, match[0].rm_eo);
#endif
rm->lno = lno;
rm->cno = match[0].rm_so;
if (wordoffset)
++rm->cno;
if (rm->cno >= len)
rm->cno = len ? len - 1 : 0;
}
return (0);
}
return (1);
}
int
b_search(sp, ep, fm, rm, ptrn, eptrn, flags)
SCR *sp;
EXF *ep;
MARK *fm, *rm;
char *ptrn, **eptrn;
u_int flags;
{
regmatch_t match[1];
regex_t *re, lre;
recno_t firstlno, lno;
size_t coff, len, last;
long delta;
int eval, wordoffset, wrapped;
char *l;
if (file_lline(sp, ep, &lno))
return (1);
if (lno == 0) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, EMPTYMSG);
return (1);
}
re = &lre;
if (resetup(sp, &re, BACKWARD, ptrn, eptrn, &delta, &wordoffset, flags))
return (1);
/* If in the first column, start searching on the previous line. */
if (fm->cno == 0) {
if (fm->lno == 1) {
if (!O_ISSET(sp, O_WRAPSCAN)) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, SOFMSG);
return (1);
}
} else
lno = fm->lno - 1;
} else
lno = fm->lno;
if (sp->s_position(sp, ep, &firstlno, 0, P_TOP))
return (1);
(void)sp->s_busy_cursor(sp, NULL);
wrapped = 0;
for (coff = fm->cno;; --lno, coff = 0) {
if (lno == 0) {
if (!O_ISSET(sp, O_WRAPSCAN)) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, SOFMSG);
break;
}
if (file_lline(sp, ep, &lno))
return (1);
if (lno == 0) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, EMPTYMSG);
break;
}
++lno;
wrapped = 1;
continue;
} else if (lno == fm->lno && wrapped) {
if (LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, NOTFOUND);
break;
}
if ((l = file_gline(sp, ep, lno, &len)) == NULL)
return (1);
/* If it's going to be awhile, put up a message. */
if (lno == firstlno)
(void)sp->s_busy_cursor(sp, "Searching...");
/* Set the termination. */
match[0].rm_so = 0;
match[0].rm_eo = coff ? coff - 1 : len;
#if defined(DEBUG) && defined(SEARCHDEBUG)
TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
#endif
/* Search the line. */
eval = regexec(re, (char *)l, 1, match,
(match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
if (eval == REG_NOMATCH)
continue;
if (eval != 0) {
re_error(sp, eval, re);
break;
}
/* Warn if wrapped. */
if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG))
msgq(sp, M_INFO, WRAPMSG);
if (delta) {
if (check_delta(sp, ep, delta, lno))
break;
rm->lno = delta + lno;
rm->cno = 0;
} else {
#if defined(DEBUG) && defined(SEARCHDEBUG)
TRACE(sp, "found: %qu to %qu\n",
match[0].rm_so, match[0].rm_eo);
#endif
/*
* Find the last acceptable one in this line. This
* is really painful, we need a cleaner interface to
* regexec to make this possible.
*/
for (;;) {
last = match[0].rm_so;
match[0].rm_so = match[0].rm_eo + 1;
if (match[0].rm_so >= len)
break;
match[0].rm_eo = coff ? coff : len;
eval = regexec(re,
(char *)l, 1, match, REG_STARTEND);
if (eval == REG_NOMATCH)
break;
if (eval != 0) {
re_error(sp, eval, re);
return (1);
}
}
rm->lno = lno;
rm->cno = last;
if (wordoffset)
++rm->cno;
}
return (0);
}
return (1);
}
/*
* check_word --
* Vi special cases the pattern "\<ptrn\>", doing "word" searches.
*/
static int
check_word(sp, ptrnp, replacedp, wordoffsetp)
SCR *sp;
char **ptrnp;
int *replacedp, *wordoffsetp;
{
size_t blen, needspace;
int cnt;
char *bp, *p, *t;
/* Count up the "word" patterns. */
*replacedp = *wordoffsetp = 0;
for (p = *ptrnp, cnt = 0; *p; ++p)
if (p[0] == '\\' && p[1] && p[1] == '<')
++cnt;
if (cnt == 0)
return (0);
/* Report back if altered the start of the search pattern. */
p = *ptrnp;
if (p[0] == '\\' && p[1] == '<')
*wordoffsetp = 1;
/* Get enough memory to hold the final pattern. */
needspace = strlen(*ptrnp) + cnt * sizeof(RE_NOTINWORD) * 2;
GET_SPACE(sp, bp, blen, needspace);
for (p = *ptrnp, t = bp; *p;)
if (p[0] == '\\' && p[1] &&
p[1] == '<' || p[1] == '>') {
memmove(t, RE_NOTINWORD, sizeof(RE_NOTINWORD) - 1);
t += sizeof(RE_NOTINWORD) - 1;
p += 2;
} else
*t++ = *p++;
*t = '\0';
*ptrnp = bp;
*replacedp = 1;
return (0);
}
/*
* ctag_conv --
* Convert a tags search path into something that regex can handle.
*/
static int
ctag_conv(sp, ptrnp, replacedp)
SCR *sp;
char **ptrnp;
int *replacedp;
{
size_t blen, len;
int lastdollar;
char *bp, *p, *t;
*replacedp = 0;
len = strlen(p = *ptrnp);
/* Max memory usage is 2 times the length of the string. */
GET_SPACE(sp, bp, blen, len * 2);
t = bp;
/* The last charcter is a '/' or '?', we just strip it. */
if (p[len - 1] == '/' || p[len - 1] == '?')
p[len - 1] = '\0';
/* The next-to-last character is a '$', and it's magic. */
if (p[len - 2] == '$') {
lastdollar = 1;
p[len - 2] = '\0';
} else
lastdollar = 0;
/* The first character is a '/' or '?', we just strip it. */
if (p[0] == '/' || p[0] == '?')
++p;
/* The second character is a '^', and it's magic. */
if (p[0] == '^')
*t++ = *p++;
/*
* Escape every other magic character we can find, stripping the
* backslashes ctags inserts to escape the search delimiter
* characters.
*/
while (p[0]) {
/* Ctags escapes the search delimiter characters. */
if (p[0] == '\\' && (p[1] == '/' || p[1] == '?'))
++p;
else if (strchr("^.[$*", p[0]))
*t++ = '\\';
*t++ = *p++;
}
if (lastdollar)
*t++ = '$';
*t++ = '\0';
*ptrnp = bp;
*replacedp = 1;
return (0);
}
/*
* get_delta --
* Get a line delta. The trickiness is that the delta can be pretty
* complicated, i.e. "+3-2+3" is allowed.
*/
static int
get_delta(sp, dp, valp)
SCR *sp;
char **dp;
long *valp;
{
long val, tval;
for (tval = 0; **dp;) {
if (!strchr("+-0123456789", **dp)) {
msgq(sp, M_ERR, "Characters after delta string.");
return (1);
}
errno = 0;
val = strtol(*dp, dp, 10);
if (errno == ERANGE) {
if (val == LONG_MAX)
msgq(sp, M_ERR, "Delta value overflow.");
else if (val == LONG_MIN)
msgq(sp, M_ERR, "Delta value underflow.");
else
msgq(sp, M_ERR, "Error: %s.", strerror(errno));
return (1);
}
if (val >= 0) {
if (LONG_MAX - val < tval) {
msgq(sp, M_ERR, "Delta value overflow.");
return (1);
}
} else
if (-(LONG_MIN - tval) > val) {
msgq(sp, M_ERR, "Delta value underflow.");
return (1);
}
tval += val;
}
*valp = tval;
return (0);
}
/*
* check_delta --
* Check a line delta to see if it's legal.
*/
static int
check_delta(sp, ep, delta, lno)
SCR *sp;
EXF *ep;
long delta;
recno_t lno;
{
if (delta < 0 && (recno_t)delta >= lno) {
msgq(sp, M_ERR, "Search offset before line 1.");
return (1);
}
if (file_gline(sp, ep, lno + delta, NULL) == NULL) {
msgq(sp, M_ERR, "Search offset past end-of-file.");
return (1);
}
return (0);
}
/*
* re_error --
* Report a regular expression error.
*/
void
re_error(sp, errcode, preg)
SCR *sp;
int errcode;
regex_t *preg;
{
size_t s;
char *oe;
s = regerror(errcode, preg, "", 0);
if ((oe = malloc(s)) == NULL)
msgq(sp, M_ERR, "Error: %s", strerror(errno));
else {
(void)regerror(errcode, preg, oe, s);
msgq(sp, M_ERR, "RE error: %s", oe);
}
free(oe);
}