OpenSolaris_b135/cmd/awk_xpg4/awk4.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * awk -- functions
 *
 * Copyright (c) 1995, 1996 by Sun Microsystems, Inc.
 * All rights reserved.
 *
 * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
 *
 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include "awk.h"
#include "y.tab.h"
#include <time.h>
#include <sys/wait.h>

static uint	nargs(NODE *np);
static NODE	*dosub(NODE *np, int glob);
static NODE	*docasetr(NODE *np, int upper);
static int	asortcmp(const void *npp1, const void *npp2);

static char	nargerr[] = "wrong number of arguments to function \"%s\"";
static NODE	*asortfunc;		/* Function call for asort() */
static NODE	*asnp1, *asnp2;		/* index1, index2 nodes */
static int	asarraylen;		/* strlen(array)+1 for asort */

/*
 * Return the value of exp(x).
 * Usage:	y = exp(x)
 *		y = exp()
 */
NODE *
f_exp(NODE *np)
{
	register uint na;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, s_exp);
	return (realnode(exp(exprreal(na==0 ? field0 : getlist(&np)))));
}

/*
 * Return the integer part of the argument.
 * Usage:	i = int(r)
 *		i = int()
 */
NODE *
f_int(NODE *np)
{
	register uint na;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, s_int);
	return (intnode(exprint(na==0 ? field0 : getlist(&np))));
}

/*
 * Logarithm function.
 * Usage:	y = log(x)
 *		y = log()
 */
NODE *
f_log(NODE *np)
{
	register uint na;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, s_log);
	return (realnode(log(exprreal(na==0 ? field0 : getlist(&np)))));
}

/*
 * Square root function.
 * Usage:	y = sqrt(x)
 *		y = sqrt()
 */
NODE *
f_sqrt(NODE *np)
{
	register uint na;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, s_sqrt);
	return (realnode(sqrt(exprreal(na==0 ? field0 : getlist(&np)))));
}

/*
 * Trigonometric sine function.
 * Usage:	y = sin(x)
 */
NODE *
f_sin(NODE *np)
{
	if (nargs(np) != 1)
		awkerr(nargerr, s_sin);
	return (realnode(sin(exprreal(getlist(&np)))));
}

/*
 * Trigonometric cosine function.
 * Usage:	y = cos(x)
 */
NODE *
f_cos(NODE *np)
{
	if (nargs(np) != 1)
		awkerr(nargerr, s_cos);
	return (realnode(cos(exprreal(getlist(&np)))));
}

/*
 * Arctangent of y/x.
 * Usage:	z = atan2(y, x)
 */
NODE *
f_atan2(NODE *np)
{
	double y, x;

	if (nargs(np) != 2)
		awkerr(nargerr, s_atan2);
	y = (double)exprreal(getlist(&np));
	x = (double)exprreal(getlist(&np));
	return (realnode(atan2(y, x)));
}

/*
 * Set the seed for the random number generator function -- rand.
 * Usage:	srand(x)
 *		srand()
 */
NODE *
f_srand(NODE *np)
{
	register uint na;
	register uint seed;
	static uint oldseed = 0;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, s_srand);
	if (na == 0)
		seed = (uint)time((time_t *)0); else
		seed = (uint)exprint(getlist(&np));
	srand(seed);
	na = oldseed;
	oldseed = seed;
	return (intnode((INT)na));
}

/*
 * Generate a random number.
 * Usage:	x = rand()
 */
NODE *
f_rand(NODE *np)
{
	double result;
	int expon;
	ushort rint;

	if (nargs(np) != 0)
		awkerr(nargerr, s_rand);
	rint = rand() & SHRT_MAX;
	result = frexp((double)rint, &expon);
	return (realnode((REAL)ldexp(result, expon-15)));
}

/*
 * Substitute function.
 * Usage:	n = sub(regex, replace, target)
 *		n = sub(regex, replace)
 */
NODE *
f_sub(NODE *np)
{
	return (dosub(np, 1));
}

/*
 * Global substitution function.
 * Usage:	n = gsub(regex, replace, target)
 *		n = gsub(regex, replace)
 */
NODE *
f_gsub(NODE *np)
{
	return (dosub(np, 0));
}

/*
 * Do actual substitutions.
 * `glob' is the number to substitute, 0 for all.
 */
static NODE *
dosub(NODE *np, int glob)
{
	wchar_t *text;
	register wchar_t *sub;
	register uint n;
	register uint na;
	register REGEXP rp;
	NODE *left;
	static wchar_t *buf;

	if ((na = nargs(np)) != 2 && na != 3)
		awkerr(nargerr, glob==0 ? s_gsub : s_sub);
	rp = getregexp(getlist(&np));
	sub = exprstring(getlist(&np));
	if (na == 3) {
		left = getlist(&np);
		text = exprstring(left);
	} else {
		left = field0;
		text = linebuf;
	}
	switch (REGWDOSUBA(rp, sub, text, &buf, 256, &glob)) {
	case REG_OK:
	case REG_NOMATCH:
		n = glob;
		break;
	case REG_ESPACE:
		if (buf != NULL)
			free(buf);
		awkerr(nomem);
	default:
		awkerr(gettext("regular expression error"));
	}
	(void)assign(left, stringnode(buf, FNOALLOC, wcslen(buf)));
	return (intnode((INT)n));
}

/*
 * Match function.  Return position (origin 1) or 0 for regular
 * expression match in string.  Set new variables RSTART and RLENGTH
 * as well.
 * Usage:	pos = match(string, re)
 */
NODE *
f_match(NODE *np)
{
	register wchar_t *text;
	register REGEXP rp;
	register int pos, length;
	REGWMATCH_T match[10];

	if (nargs(np) != 2)
		awkerr(nargerr, s_match);
	text = exprstring(getlist(&np));
	rp = getregexp(getlist(&np));
	if (REGWEXEC(rp, text, 10, match, 0) == REG_OK) {
		pos = match[0].rm_sp-text+1;
		length = match[0].rm_ep - match[0].rm_sp;
	} else {
		pos = 0;
		length = -1;
	}
	constant->n_int = length;
	(void)assign(vlook(M_MB_L("RLENGTH")), constant);
	return (assign(vlook(M_MB_L("RSTART")), intnode((INT)pos)));
}

/*
 * Call shell or command interpreter.
 * Usage:	status = system(command)
 */
NODE *
f_system(NODE *np)
{
	int retcode;

	if (nargs(np) != 1)
		awkerr(nargerr, s_system);
	(void) fflush(stdout);
	retcode = system(mbunconvert(exprstring(getlist(&np))));
	return (intnode((INT)WEXITSTATUS(retcode)));
}

/*
 * Search for string within string.
 * Usage:	pos = index(string1, string2)
 */
NODE *
f_index(NODE *np)
{
	register wchar_t *s1, *s2;
	register int l1, l2;
	register int result;

	if (nargs(np) != 2)
		awkerr(nargerr, s_index);
	s1 = (wchar_t *)exprstring(getlist(&np));
	s2 = (wchar_t *)exprstring(getlist(&np));
	l1 = wcslen(s1);
	l2 = wcslen(s2);
	result = 1;
	while (l2 <= l1) {
		if (memcmp(s1, s2, l2 * sizeof(wchar_t)) == 0)
			break;
		result++;
		s1++;
		l1--;
	}
	if (l2 > l1)
		result = 0;
	return (intnode((INT)result));
}

/*
 * Return length of argument or $0
 * Usage:	n = length(string)
 *		n = length()
 *		n = length
 */
NODE *
f_length(NODE *np)
{
	register uint na;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, s_length);
	if (na == 0)
		na = lbuflen; else
		na = wcslen((wchar_t *)exprstring(getlist(&np)));
	return (intnode((INT)na));
}

/*
 * Split string into fields.
 * Usage: nfields = split(string, array [, separator]);
 */
NODE *
f_split(NODE *np)
{
	register wchar_t *cp;
	wchar_t *ep, *saved = 0;
	register NODE *tnp, *snp, *otnp;
	register NODE *sep;
	REGEXP old_resep = 0;
	size_t seplen;
	uint n;
	wint_t c;
	wchar_t savesep[20];
	wchar_t  *(*old_awkfield)(wchar_t **) = 0;

	if ((n = nargs(np))<2 || n>3)
		awkerr(nargerr, s_split);
	ep = exprstring(snp = getlist(&np));
	tnp = getlist(&np);
	if (snp->n_type == INDEX && snp->n_left == tnp)
		ep = saved = wsdup(ep);
	if (n == 3) {
		sep = getlist(&np);
	} else
		sep = NNULL;
	switch (tnp->n_type) {
	case ARRAY:
		delarray(tnp);
		break;

	case PARM:
		break;

	case VAR:
		if (isstring(tnp->n_flags) && tnp->n_string==_null)
			break;
	default:
		awkerr(gettext(
			"second parameter to \"split\" must be an array"));
		/*NOTREACHED*/
	}
	/*
	 * If an argument has been passed in to be used as the
	 * field separator check to see if it is a constant regular
	 * expression. If so, use it directly otherwise reduce the
	 * expression, convert the result into a string and assign it
	 * to "FS" (after saving the old value for FS.)
	 */
	if (sep != NNULL) {
		if (sep->n_type == PARM)
			sep = sep->n_next;
		if (sep->n_type == RE) {
			old_resep = resep;
			resep = sep->n_regexp;
			old_awkfield = awkfield;
			awkfield = refield;
		} else {
			sep = exprreduce(sep);
			seplen = wcslen(cp = (wchar_t *)exprstring(varFS));
			(void) memcpy(savesep, cp, 
				(seplen+1) * sizeof(wchar_t));
			(void) assign(varFS, sep);
		}
	}
	/*
	 * Iterate over the record, extracting each field and assigning it to
	 * the corresponding element in the array.
	 */
	otnp = tnp;	/* save tnp for possible promotion */
	tnp = node(INDEX, tnp, constant);
	fcount = 0;
	for (;;) {
		if ((cp = (*awkfield)(&ep)) == NULL) {
			if (fcount == 0) {
				if (otnp->n_type == PARM)
					otnp = otnp->n_next;
				promote(otnp);
			}
			break;
		}
		c = *ep;
		*ep = '\0';
		constant->n_int = ++fcount;
		(void)assign(tnp, stringnode(cp,FALLOC|FSENSE,(size_t)(ep-cp)));
		*ep = c;
	}
	/*
	 * Restore the old record separator/and or regular expression.
	 */
	if (sep != NNULL) {
		if (old_awkfield != 0) {
			resep = old_resep;
			awkfield = old_awkfield;
		} else {
			(void)assign(varFS,
				stringnode(savesep, FSTATIC, seplen));
		}
	}
	if (saved)
		free(saved);
	return (intnode((INT)fcount));
}

/*
 * Sprintf function.
 * Usage:	string = sprintf(format, arg, ...)
 */
NODE *
f_sprintf(NODE *np)
{
        wchar_t *cp;
        size_t length;

        if (nargs(np) == 0)
                awkerr(nargerr, s_sprintf);
        length = xprintf(np, (FILE *)NULL, &cp);
        np = stringnode(cp, FNOALLOC, length);
        return (np);
}

/*
 * Substring.
 * newstring = substr(string, start, [length])
 */
NODE *
f_substr(NODE *np)
{
	register STRING str;
	register size_t n;
	register int start;
	register size_t len;

	if ((n = nargs(np))<2 || n>3)
		awkerr(nargerr, s_substr);
	str = exprstring(getlist(&np));
	if ((start = (int)exprint(getlist(&np))-1) < 0)
		start = 0;
	if (n == 3) {
		int x;
		x = (int)exprint(getlist(&np));
		if (x < 0)
			len = 0;
		else
			len = (size_t)x;
	} else
		len = LARGE;
	n = wcslen((wchar_t *)str);
	if (start > n)
		start = n;
	n -= start;
	if (len > n)
		len = n;
	str += start;
	n = str[len];
	str[len] = '\0';
	np = stringnode(str, FALLOC, len);
	str[len] = n;
	return (np);
}

/*
 * Close an output or input file stream.
 */
NODE *
f_close(NODE *np)
{
	register OFILE *op;
	register char *name;

	if (nargs(np) != 1)
		awkerr(nargerr, s_close);
	name = mbunconvert(exprstring(getlist(&np)));
	for (op = &ofiles[0]; op < &ofiles[NIOSTREAM]; op++)
		if (op->f_fp!=FNULL && strcmp(name, op->f_name)==0) {
			awkclose(op);
			break;
		}
	if (op >= &ofiles[NIOSTREAM])
		return (const1);
	return (const0);
}

/*
 * Return the integer value of the first character of a string.
 * Usage:	char = ord(string)
 */
NODE *
f_ord(NODE *np)
{
	if (nargs(np) != 1)
		awkerr(nargerr, s_ord);
	return (intnode((INT)*exprstring(getlist(&np))));
}

/*
 * Return the argument string in lower case:
 * Usage:
 *	lower = tolower(upper)
 */
NODE *
f_tolower(NODE *np)
{
	return (docasetr(np, 0));
}

/*
 * Return the argument string in upper case:
 * Usage:
 *	upper = toupper(lower)
 */
NODE *
f_toupper(NODE *np)
{
	return (docasetr(np, 1));
}

/*
 * Sort the array into traversal order by the next "for (i in array)" loop.
 * Usage:
 *	asort(array, "cmpfunc")
 * 	cmpfunc(array, index1, index2)
 *		returns:
 *		<0		if 	array[index1] <  array[index2]
 *		 0		if	array[index1] == array[index2]
 *		>0		if	array[index1] >  array[index2]
 */
NODE *
f_asort(NODE *np)
{
	NODE *array;
	STRING funcname;
	register size_t nel;
	register NODE *tnp;
	register NODE *funcnp;
	register NODE **alist, **npp;

	if (nargs(np) != 2)
		awkerr(nargerr, s_asort);
	array = getlist(&np);
	if (array->n_type == PARM)
		array = array->n_next;
	if (array->n_type != ARRAY)
		awkerr(gettext("%s function requires an array"),
			s_asort);
	funcname = exprstring(getlist(&np));
	if ((funcnp = vlookup(funcname, 1)) == NNULL
	 || funcnp->n_type != UFUNC)
		awkerr(gettext("%s: %s is not a function\n"),
		    s_asort, funcname);
	/*
	 * Count size of array, allowing one extra for NULL at end
	 */
	nel = 1;
	for (tnp = array->n_alink; tnp != NNULL; tnp = tnp->n_alink)
		++nel;
	/*
	 * Create UFUNC node that points at the funcnp on left and the
	 * list of three variables on right (array, index1, index2)
	 *				UFUNC
	 *				/    \
	 *			   funcnp    COMMA
	 *				      /   \
	 *				array	  COMMA
	 *					  /    \
	 *					index1 index2
	 */
	if (asortfunc == NNULL) {
		running = 0;
		asortfunc = node(CALLUFUNC, NNULL,
				    node(COMMA, NNULL,
				    node(COMMA,
					asnp1=stringnode(_null, FSTATIC, 0),
					asnp2=stringnode(_null, FSTATIC, 0))));
		running = 1;
	}
	asortfunc->n_left = funcnp;
	asortfunc->n_right->n_left = array;
	asarraylen = wcslen(array->n_name)+1;
	alist = (NODE **) emalloc(nel*sizeof(NODE *));
	/*
	 * Copy array into alist.
	 */
	npp = alist;
	for (tnp = array->n_alink; tnp != NNULL; tnp = tnp->n_alink)
		*npp++ = tnp;
	*npp = NNULL;
	/*
	 * Re-order array to this list
	 */
	qsort((wchar_t *)alist, nel-1, sizeof (NODE *), asortcmp);
	tnp = array;
	npp = alist;
	do {
		tnp = tnp->n_alink = *npp;
	} while (*npp++ != NNULL);
	free((wchar_t *)alist);
	return (constundef);
}

/*
 * Return the number of arguments of a function.
 */
static uint
nargs(NODE *np)
{
	register int n;

	if (np == NNULL)
		return (0);
	n = 1;
	while (np!=NNULL && np->n_type==COMMA) {
		np = np->n_right;
		n++;
	}
	return (n);
}

/*
 * Do case translation.
 */
static NODE *
docasetr(NODE *np, int upper)
{
	register int c;
	register wchar_t *cp;
	register wchar_t *str;
	register uint na;

	if ((na = nargs(np)) > 1)
		awkerr(nargerr, upper ? s_toupper : s_tolower);
	str = strsave(na==0 ? linebuf : exprstring(getlist(&np)));
	cp = str;
	if (upper) {
		while ((c = *cp++) != '\0')
			cp[-1] = towupper(c);
	} else {
		while ((c = *cp++) != '\0')
			cp[-1] = towlower(c);
	}
	return (stringnode((STRING)str, FNOALLOC, (size_t)(cp-str-1)));
}

/*
 * The comparison routine used by qsort inside f_asort()
 */
static int
asortcmp(const void *npp1, const void *npp2)
{
	asnp1->n_strlen =
	    wcslen(asnp1->n_string = (*(NODE **)npp1)->n_name+asarraylen);
	asnp2->n_strlen =
	    wcslen(asnp2->n_string = (*(NODE **)npp2)->n_name+asarraylen);
	return ((int)exprint(asortfunc));
}

#if M_MATHERR
#if !defined(__BORLANDC__)&&defined(__TURBOC__)&&__COMPACT__&&__EMULATE__
/* So it won't optimize registers our FP is using */
#define	flushesbx()	(_BX = 0, _ES = _BX)
#else
#define	flushesbx()	(0)
#endif

/*
 * Math error for awk.
 */
int
matherr(struct exception *ep)
{
	register uint type;
	static char msgs[7][256];
	static int first_time = 1;

	if (first_time) {
		msgs[0] = gettext("Unknown FP error"),
		msgs[1] = gettext("Domain"),
		msgs[2] = gettext("Singularity"),
		msgs[3] = gettext("Overflow"),
		msgs[4] = gettext("Underflow"),
		msgs[5] = gettext("Total loss of precision"),
		msgs[6] = gettext("Partial loss of precision")
		first_time = 0;
	}

	if ((type = ep->type) > (uint)PLOSS)
		type = 0;
	(void)fprintf(stderr, "awk: %s", strmsg(msgs[type]));
	(void)fprintf(stderr, gettext(
		" error in function %s(%g) at NR=%lld\n"),
		((void) flushesbx(), ep->name), ep->arg1, (INT)exprint(varNR));
	return (1);
}
#endif	/*M_MATHERR*/