4.4BSD/usr/src/contrib/nvi/nvi/ex/ex_substitute.c
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)ex_substitute.c 8.1 (Berkeley) 6/9/93";
#endif /* not lint */
#include <sys/types.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "vi.h"
#include "excmd.h"
enum which {AGAIN, MUSTSETR, FIRST};
static int checkmatchsize __P((SCR *, regex_t *));
static inline int regsub __P((SCR *,
char *, char **, size_t *, size_t *));
static int substitute __P((SCR *, EXF *,
EXCMDARG *, char *, regex_t *, enum which));
int
ex_substitute(sp, ep, cmdp)
SCR *sp;
EXF *ep;
EXCMDARG *cmdp;
{
regex_t *re, lre;
int eval, reflags;
char *endp, *sub;
char delim[2];
/*
* Historic vi only permitted '/' to begin the substitution command.
* We permit ';' as well, since users often want to operate on UNIX
* pathnames. We don't just allow anything because the flag chars
* wouldn't work.
*/
if (*cmdp->string == '/' || *cmdp->string == ';') {
/* Delimiter is the first character. */
delim[0] = cmdp->string[0];
delim[1] = '\0';
/* Get the substitute string. */
endp = cmdp->string + 1;
sub = strsep(&endp, delim);
/* Get the replacement string, save it off. */
if (endp == NULL || *endp == NULL) {
msgq(sp, M_ERR, "No replacement string specified.");
return (1);
}
if (sp->repl != NULL)
free(sp->repl);
sp->repl = strsep(&endp, delim);
sp->repl = strdup(sp->repl);
sp->repl_len = strlen(sp->repl);
/* If the substitute string is empty, use the last one. */
if (*sub == NULL) {
if (!F_ISSET(sp, S_RE_SET)) {
msgq(sp, M_ERR,
"No previous regular expression.");
return (1);
}
if (checkmatchsize(sp, &sp->sre))
return (1);
return (substitute(sp, ep,
cmdp, endp ? endp : "", &sp->sre, AGAIN));
}
/* Set RE flags. */
reflags = 0;
if (O_ISSET(sp, O_EXTENDED))
reflags |= REG_EXTENDED;
if (O_ISSET(sp, O_IGNORECASE))
reflags |= REG_ICASE;
/* Compile the RE. */
re = &lre;
if (eval = regcomp(re, (char *)sub, reflags)) {
re_error(sp, eval, re);
return (1);
}
/*
* Set saved RE. Historic practice is that substitutes set
* direction as well as the RE.
*/
sp->sre = lre;
sp->searchdir = FORWARD;
F_SET(sp, S_RE_SET);
if (checkmatchsize(sp, &sp->sre))
return (1);
return (substitute(sp, ep, cmdp, endp ? endp : "", re, FIRST));
}
return (substitute(sp, ep, cmdp, cmdp->string, &sp->sre, MUSTSETR));
}
int
ex_subagain(sp, ep, cmdp)
SCR *sp;
EXF *ep;
EXCMDARG *cmdp;
{
if (!F_ISSET(sp, S_RE_SET)) {
msgq(sp, M_ERR, "No previous regular expression.");
return (1);
}
return (substitute(sp, ep, cmdp, cmdp->string, &sp->sre, AGAIN));
}
/*
* The nasty part of the substitution is what happens when the replacement
* string contains newlines. It's a bit tricky -- consider the information
* that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
* to build a set of newline offets which we use to break the line up later,
* when the replacement is done. Don't change it unless you're pretty damned
* confident.
*/
#define NEEDNEWLINE(sp) { \
if (sp->newl_len == sp->newl_cnt) { \
sp->newl_len += 25; \
if ((sp->newl = realloc(sp->newl, \
sp->newl_len * sizeof(size_t))) == NULL) { \
msgq(sp, M_ERR, \
"Error: %s", strerror(errno)); \
sp->newl_len = 0; \
return (1); \
} \
} \
}
#define BUILD(sp, l, len) { \
if (lbclen + (len) > lblen) { \
lblen += MAX(lbclen + (len), 256); \
if ((lb = realloc(lb, lblen)) == NULL) { \
msgq(sp, M_ERR, \
"Error: %s", strerror(errno)); \
lbclen = 0; \
return (1); \
} \
} \
memmove(lb + lbclen, l, len); \
lbclen += len; \
}
#define NEEDSP(sp, len, pnt) { \
if (lbclen + (len) > lblen) { \
lblen += MAX(lbclen + (len), 256); \
if ((lb = realloc(lb, lblen)) == NULL) { \
msgq(sp, M_ERR, \
"Error: %s", strerror(errno)); \
lbclen = 0; \
return (1); \
} \
pnt = lb + lbclen; \
} \
}
/*
* substitute --
* Do the substitution. This stuff is *really* tricky. There are
* lots of special cases, and general nastiness. Don't mess with it
* unless you're pretty confident.
*/
static int
substitute(sp, ep, cmdp, s, re, cmd)
SCR *sp;
EXF *ep;
EXCMDARG *cmdp;
char *s;
regex_t *re;
enum which cmd;
{
MARK from, to;
recno_t elno, lno, lastline;
size_t blen, cnt, last, lbclen, lblen, len, offset;
int eflags, eval, linechanged, quit;
int cflag, gflag, lflag, nflag, pflag, rflag;
char *bp, *lb;
/*
* Historic vi permitted the '#', 'l' and 'p' options in vi mode,
* but it only displayed the last change and they really don't
* make any sense. In the current model the problem is combining
* them with the 'c' flag -- the screen would have to flip back
* and forth between the confirm screen and the ex print screen,
* which would be pretty awful. Not worth the effort.
*/
cflag = gflag = lflag = nflag = pflag = rflag = 0;
for (; *s; ++s)
switch (*s) {
case ' ':
case '\t':
break;
case '#':
if (F_ISSET(sp, S_MODE_VI)) {
msgq(sp, M_ERR,
"'#' flag not supported in vi mode.");
return (1);
}
nflag = 1;
break;
case 'c':
cflag = 1;
break;
case 'g':
gflag = 1;
break;
case 'l':
if (F_ISSET(sp, S_MODE_VI)) {
msgq(sp, M_ERR,
"'l' flag not supported in vi mode.");
return (1);
}
lflag = 1;
break;
case 'p':
if (F_ISSET(sp, S_MODE_VI)) {
msgq(sp, M_ERR,
"'p' flag not supported in vi mode.");
return (1);
}
pflag = 1;
break;
case 'r':
if (cmd == FIRST) {
msgq(sp, M_ERR,
"Regular expression specified; r flag meaningless.");
return (1);
}
if (!F_ISSET(sp, S_RE_SET)) {
msgq(sp, M_ERR,
"No previous regular expression.");
return (1);
}
rflag = 1;
break;
default:
goto usage;
}
if (rflag == 0 && cmd == MUSTSETR) {
usage: msgq(sp, M_ERR, "Usage: %s", cmdp->cmd->usage);
return (1);
}
/* Get some space. */
GET_SPACE(sp, bp, blen, 512);
/*
* lb: build buffer pointer.
* lbclen: current length of built buffer.
* lblen; length of build buffer.
*/
lb = NULL;
lbclen = lblen = 0;
/*
* Since multiple changes can happen in a line, we only increment
* the change count on the first change to a line.
*/
lastline = OOBLNO;
/* For each line... */
for (quit = 0, lno = cmdp->addr1.lno,
elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
/* Get the line. */
if ((s = file_gline(sp, ep, lno, &len)) == NULL) {
GETLINE_ERR(sp, lno);
return (1);
}
/*
* Make a local copy if doing confirmation -- when calling
* the confirm routine we're likely to lose our cached copy.
*/
if (cflag) {
ADD_SPACE(sp, bp, blen, len)
memmove(bp, s, len);
s = bp;
}
/* Reset the buffer pointer. */
lbclen = 0;
/*
* We don't want to have to do a setline if the line didn't
* change -- keep track of whether or not this line changed.
*/
linechanged = 0;
/* It's not nul terminated, but we pretend it is. */
eflags = REG_STARTEND;
/* The search area is from 's' to the end of the line. */
nextmatch: sp->match[0].rm_so = 0;
sp->match[0].rm_eo = len;
/* Get the next match. */
skipmatch: eval = regexec(re,
(char *)s, re->re_nsub + 1, sp->match, eflags);
/*
* There wasn't a match -- if there was an error, deal with
* it. If there was a previous match in this line, resolve
* the changes into the database. Otherwise, just move on.
*/
if (eval == REG_NOMATCH) {
if (linechanged)
goto endmatch;
continue;
}
if (eval != 0) {
re_error(sp, eval, re);
goto ret1;
}
/* Confirm change. */
if (cflag) {
/*
* Set the cursor position for confirmation. Note,
* if we matched on a '$', the cursor may be past
* the end of line.
*
* XXX
* May want to "fix" this in the confirm routine;
* the confirm routine may be able to display a
* cursor past EOL.
*/
from.lno = lno;
from.cno = sp->match[0].rm_so;
to.lno = lno;
to.cno = sp->match[0].rm_eo;
if (len != 0) {
if (to.cno >= len)
to.cno = len - 1;
if (from.cno >= len)
from.cno = len - 1;
}
switch (sp->s_confirm(sp, ep, &from, &to)) {
case YES:
break;
case NO:
/*
* Copy the bytes before the match and the
* bytes in the match into the build buffer.
*/
BUILD(sp, s, sp->match[0].rm_eo);
goto skip;
case QUIT:
/* Set the quit flag. */
quit = 1;
/* If in a global, pass the info back. */
if (F_ISSET(sp, S_GLOBAL))
F_SET(sp, S_GLOBAL_QUIT);
/*
* If any changes, resolve them, otherwise
* return to the main loop.
*/
if (linechanged)
goto endmatch;
continue;
}
}
/* Copy the bytes before the match into the build buffer. */
BUILD(sp, s, sp->match[0].rm_so);
/* Substitute the matching bytes. */
if (regsub(sp, s, &lb, &lbclen, &lblen))
goto ret1;
/* Set the change flag so we know this line was modified. */
linechanged = 1;
/* Move the pointers past the matched bytes. */
skip: s += sp->match[0].rm_eo;
len -= sp->match[0].rm_eo;
/*
* If doing a global change with confirmation, we have to
* update the screen. The basic idea is to store the line
* so the screen update routines can find it, but start at
* the old offset.
*/
if (linechanged && gflag && cflag) {
/* Save offset. */
offset = lbclen;
/* Copy the suffix. */
if (len)
BUILD(sp, s, len)
/* Store inserted lines, adjusting the build buffer. */
last = 0;
if (sp->newl_cnt) {
for (cnt = 0; cnt < sp->newl_cnt;
++cnt, ++lno, ++elno, ++lastline) {
if (file_iline(sp, ep, lno,
lb + last, sp->newl[cnt] - last))
goto ret1;
last = sp->newl[cnt] + 1;
++sp->rptlines[L_ADDED];
}
lbclen -= last;
offset -= last;
sp->newl_cnt = 0;
linechanged = 1;
}
/* Store the changed line. */
if (linechanged)
if (file_sline(sp, ep, lno, lb + last, lbclen))
goto ret1;
/* Get a new copy of the line. */
if ((s = file_gline(sp, ep, lno, &len)) == NULL) {
GETLINE_ERR(sp, lno);
goto ret1;
}
ADD_SPACE(sp, bp, blen, len)
memmove(bp, s, len);
s = bp;
/* Restart the build. */
lbclen = 0;
/* Update changed line counter. */
if (lastline != lno) {
++sp->rptlines[L_CHANGED];
lastline = lno;
}
/* Start in the middle of the line. */
sp->match[0].rm_so = offset;
sp->match[0].rm_eo = len;
goto skipmatch;
}
/*
* If it's a global change, and there's something left in
* the line, check it.
*/
if (len && gflag) {
eflags |= REG_NOTBOL;
goto nextmatch;
}
/* Copy any remaining bytes into the build buffer. */
endmatch: if (len)
BUILD(sp, s, len)
/* Store inserted lines, adjusting the build buffer. */
last = 0;
if (sp->newl_cnt) {
for (cnt = 0; cnt < sp->newl_cnt;
++cnt, ++lno, ++elno, ++lastline) {
if (file_iline(sp, ep,
lno, lb + last, sp->newl[cnt] - last))
goto ret1;
last = sp->newl[cnt] + 1;
++sp->rptlines[L_ADDED];
}
lbclen -= last;
sp->newl_cnt = 0;
linechanged = 1;
}
/* Store the changed line. */
if (linechanged)
if (file_sline(sp, ep, lno, lb + last, lbclen))
goto ret1;
/* Update changed line counter. */
if (lastline != lno) {
++sp->rptlines[L_CHANGED];
lastline = lno;
}
/* Display as necessary. */
if (lflag || nflag || pflag) {
from.lno = to.lno = lno;
from.cno = to.cno = 0;
if (lflag)
ex_print(sp, ep, &from, &to, E_F_LIST);
if (nflag)
ex_print(sp, ep, &from, &to, E_F_HASH);
if (pflag)
ex_print(sp, ep, &from, &to, E_F_PRINT);
}
}
/*
* Cursor moves to last line changed, unless doing confirm,
* in which case don't move it.
*/
if (!cflag && lastline != OOBLNO)
sp->lno = lastline;
/*
* Note if nothing found. Else, if nothing displayed to the
* screen, put something up.
*/
if (sp->rptlines[L_CHANGED] == 0 && !F_ISSET(sp, S_GLOBAL))
msgq(sp, M_INFO, "No match found.");
else if (!lflag && !nflag && !pflag)
F_SET(sp, S_AUTOPRINT);
FREE_SPACE(sp, bp, blen);
return (0);
ret1: FREE_SPACE(sp, bp, blen);
return (1);
}
/*
* regsub --
* Do the substitution for a regular expression.
*/
static inline int
regsub(sp, ip, lbp, lbclenp, lblenp)
SCR *sp;
char *ip; /* Input line. */
char **lbp;
size_t *lbclenp, *lblenp;
{
size_t lbclen, lblen; /* Local copies. */
size_t mlen; /* Match length. */
size_t rpl; /* Remaining replacement length. */
char *rp; /* Replacement pointer. */
int ch;
int no; /* Match replacement offset. */
char *p; /* Build buffer pointer. */
char *lb; /* Local copies. */
lb = *lbp; /* Get local copies. */
lbclen = *lbclenp;
lblen = *lblenp;
rp = sp->repl; /* Set up replacment info. */
rpl = sp->repl_len;
for (p = lb + lbclen; rpl--;) {
ch = *rp++;
if (ch == '&') { /* Entire pattern. */
no = 0;
goto sub;
/* Partial pattern. */
} else if (ch == '\\' && isdigit(*rp)) {
no = *rp++ - '0';
--rpl;
sub: if (sp->match[no].rm_so != -1 &&
sp->match[no].rm_eo != -1) {
mlen =
sp->match[no].rm_eo - sp->match[no].rm_so;
NEEDSP(sp, mlen, p);
memmove(p, ip + sp->match[no].rm_so, mlen);
p += mlen;
lbclen += mlen;
}
} else { /* Newline, ordinary characters. */
if (sp->special[ch] == K_CR ||
sp->special[ch] == K_NL) {
NEEDNEWLINE(sp);
sp->newl[sp->newl_cnt++] = lbclen;
} else if (ch == '\\' && (*rp == '\\' || *rp == '&'))
ch = *rp++;
NEEDSP(sp, 1, p);
*p++ = ch;
++lbclen;
}
}
*lbp = lb; /* Update caller's information. */
*lbclenp = lbclen;
*lblenp = lblen;
return (0);
}
static int
checkmatchsize(sp, re)
SCR *sp;
regex_t *re;
{
/* Build nsub array as necessary. */
if (sp->matchsize < re->re_nsub + 1) {
sp->matchsize = re->re_nsub + 1;
if ((sp->match = realloc(sp->match,
sp->matchsize * sizeof(regmatch_t))) == NULL) {
msgq(sp, M_ERR, "Error: %s", strerror(errno));
sp->matchsize = 0;
return (1);
}
}
return (0);
}