V10/cmd/sort/field.c

Compare this file to the similar file:
Show the results in this format:

/* Copyright 1990, AT&T Bell Labs */
#include <stdlib.h>
#include <ctype.h>
#include "fsort.h"

#define NF 15		/* 0 is global, 1 to NF-1 are field specs */
#define NP 30		/* NP-1 is largest permitted field number */


static char *modifiers(struct field*, char*, int);
static char *keyspec(struct pos*, char*);
static void globalmods(struct field*);
static void chkfieldno(struct field*);

struct field fields[NF] = {
	{ 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, { NP, 0 } }
};
int nfields = 0;

int tab;
int signedrflag;
int simplekeyed;

#define blank(p) (*(p)==' ' || *(p)=='\t')

enum { OLD, NEW };

	/* interpret 0, 1, or 2 arguments and return how many */
int
fieldarg(char *argv1, char *argv2)
{
	char *av1 = argv1;
	char *av2 = argv2;
	struct field *field;

	if(av1[0] == '+' && isdigit(av1[1])) {
		if(++nfields >= NF)
			fatal("too many fields", argv1, 0);
		field = &fields[nfields];
		field->end.fieldno = NP+1;
		field->style = OLD;

		av1 = keyspec(&field->begin, av1+1);
		if(*modifiers(field, av1, 0))
			goto bad;

		if(av2==0 || av2[0]!='-' || !isdigit(av2[1]))
			return 1;
		av2 = keyspec(&field->end, av2+1);
		argv1 = argv2;	/* in case of diagnostic */
		if(*modifiers(field, av2, 1))
			goto bad;
		return 2;
	} else if(*modifiers(fields, av1+1, -1))
		goto bad;
	return 1;
bad:
	fatal("bad field specification", argv1, 0);
	return 0;	/* dummy */
}

void
optionk(char *arg)
{
	char *a = arg;
	struct field *field;
	if(++nfields >= NF)
		fatal("too many fields", arg, 0);
	field = &fields[nfields];
	field->begin.charno = 1;
	field->end.fieldno = NP+1;
	field->style = NEW;

	a = keyspec(&field->begin, a);
	a = modifiers(field, a, 0);
	if(*a == ',') {
		a = keyspec(&field->end, a+1);
		a = modifiers(field, a, 1);
	}
	if(*a == 0)
		return;
bad:
	fatal("bad -k specification", arg, 0);
}

static char *
keyspec(struct pos *p, char *arg)
{
	if(!isdigit(*arg))
		fatal("missing field number", "", 0);
	p->fieldno = strtoul(arg, &arg, 10);
	if(*arg == '.')
		if(!isdigit(*++arg))
			fatal("missing character number", "", 0);
		else
			p->charno = strtoul(arg, &arg, 10);
	return arg;
}

/* keyed = 1 if there are fields present (+ options) or if
   numeric (-ng), translation (-f) or deletion (-idb) options
   are present.  In these cases, a separate key is constructed
   for rsort.  The key, however is not carried on 
   intermediate files.  (It would be interesting to try.)
   It must be reconstructed for the merge phase, and that
   may be expensive, since relatively few comparisons
   happen in that phase.  simplekeyed = 1 if there are options,
   so that pure ascii comparison won't work, but no fields, no
   months, no numerics. */

void
fieldwrapup(void)
{
	int i;
	if(fields->coder == 0) fields->coder = tcode;
	if(fields->trans == 0) fields->trans = ident;
	if(fields->keep == 0) fields->keep = all;
	for(i=1; i<=nfields; i++) {
		globalmods(&fields[i]);
		chkfieldno(&fields[i]);
	}
	signedrflag = fields->rflag? -1: 1; /* used only by merge.c*/
	simplekeyed = nfields==0 && fields->coder==tcode 
		      && (fields->trans!=ident || fields->keep!=all);
	if(nfields==0 && !keyed)	/* used only by rsort.c */
		rflag = fields->rflag;
	if(nfields > 0)
		keyed = 1;
}

static void
conflict(void)
{
	warn("conflicting key types", "", 0);
}

static void
dupla(uchar **oldp, uchar *new)
{
	if(*oldp != 0 && *oldp != new)
		conflict();
	*oldp = new;
}

static void
duplb(int (**oldp)(uchar*,uchar*,int,struct field*), int (*new)(uchar*,uchar*,int,struct field*))
{
	if(*oldp != 0 && *oldp != new)
		conflict();
	*oldp = new;
}

/* eflag=-1 global flags, =0 field start, =1 field end */

static char *
modifiers(struct field *field, char *argv1, int eflag)
{
	for( ; *argv1; argv1++) {
		switch(*argv1) {
		case 'c': cflag = 1; goto ckeflag;
		case 'm': mflag = 1; goto ckeflag;
		case 'u': uflag = 1; 
		case 's': sflag = 1; goto ckeflag;
		case 't':
			if(eflag!=-1) goto ckeflag;
			warn("-t in combined argument","",0);
			if((tab = *++argv1) == 0)
				fatal("no -t character", "" ,0);
			continue;
		ckeflag:
			if(eflag!=-1)
				fatal("bad field modifier ",argv1,1);
			continue;
		}
		switch(*argv1) {
		case 'b': if(eflag==1) field->eflag = 1;
			  else field->bflag = 1; goto ckglob;
		case 'r': field->rflag = 1; goto ckglob;
		case 'f': dupla(&field->trans, fold); break;
		case 'd': dupla(&field->keep, dict); break;
		case 'i': dupla(&field->keep, ascii); break;
		case 'g': duplb(&field->coder, gcode); break;
		case 'n': duplb(&field->coder, ncode); break;
		case 'M': duplb(&field->coder, Mcode); break;
		default:
			goto done;
		}
		keyed = 1;
	ckglob:
		if(field==fields && nfields>0)
			warn("field spec precedes global option",argv1,1);
	}
done:
	if(field->coder==ncode && field->keep)
		conflict();
	return argv1;
}

static void
globalmods(struct field *field)
{
	int flagged = field->bflag | field->eflag | field->rflag;
	if(!field->coder) field->coder = tcode;
	else flagged++;
	if(!field->trans) field->trans = ident;
	else flagged++;
	if(!field->keep) field->keep = all;
	else flagged++;
	if(!flagged) {
		field->coder = fields->coder;
		field->trans = fields->trans;
		field->keep = fields->keep;
		field->rflag = fields->rflag;
		field->bflag = fields->bflag;
		if(field->style == NEW)
			field->eflag = fields->bflag;
	}
}

/* convert field representation from numbers given in arguments
   to a 0-origin first,last+1 representation, with a negative
   quantity for a character offset to the end of this field */

static void
chkfieldno(struct field *field)
{
	if(field->style == NEW) {
		if(--field->begin.fieldno < 0 ||
		   --field->begin.charno < 0 ||
		   --field->end.fieldno < 0)
			fatal("improper 0 in field specifier", "", 0);
		if(field->end.charno == 0)
			field->end.charno--;
	} else if(field->end.charno==0 && field->end.fieldno>0) {
		if(tab && field->eflag)
			fatal("skipping blanks right after tab char"
			      " is ill-defined", "", 0);
		field->end.fieldno--;
		field->end.charno--;
	} 
	if(field->begin.fieldno > NP)
		field->begin.fieldno = NP;
	if(field->end.fieldno > NP)
		field->end.fieldno = NP;
/*	fprintf(stderr,"%d %d.%d,%d.%d\n",field-fields,field->begin.fieldno, field->begin.charno,field->end.fieldno, field->end.charno);*/
}

int
fieldcode(uchar *dp, uchar *kp, int len, uchar *b)
{
	uchar *posns[NP+1];	/* field start positions */
	uchar *cp;
	struct field *field;
	uchar *op = kp;
	uchar *ep;
	uchar *bound = kp + MAXREC;
	int i;
	int np;
	if(bound > b)
		bound = b;
	posns[0] = dp;
	if(tab)
		for(np=1, i=len, cp=dp; i>0 && np<NP; i--) {
			if(*cp++ != tab)
				continue;
			posns[np++] = cp;
		}
	else
		for(np=1, i=len, cp=dp; i>0 && np<NP; ) {
			while(blank(cp) && i>0)
				cp++, i--;
			while(!blank(cp) && i>0)
				cp++, i--;
			posns[np++] = cp;
		}

	if(nfields > 0)
		field = &fields[1];
	else
		field = &fields[0];
	i = nfields;
	do {
		int t = field->begin.fieldno;
		uchar *xp = dp + len;
		if(t < np) {
			cp = posns[t];
			if(field->bflag && nfields)
				while(cp<xp && blank(cp))
					cp++;
			cp += field->begin.charno;
			if(cp > xp)
				cp = xp;
		} else
			cp = xp;
		t = field->end.fieldno;
		if(t < np) {
			if(field->end.charno < 0) {
				if(t >= np-1)
					ep = xp;
				else {
					ep = posns[t+1];
					if(tab) ep--;
				}
			} else {
				ep = posns[t];
				if(field->eflag)
					while(ep<xp && blank(ep))
						ep++;
				ep += field->end.charno;
			}
			if(ep > xp)
				ep = xp;
			else if(ep < cp)
				ep = cp;
		} else
			ep = xp;
		t = ep - cp;
		if(op+room(t) > bound)
			return -1;
		op += (*field->coder)(cp, op, ep-cp, field);
		field++;
	} while(--i > 0);
	return op - kp;
}

	/* Encode text field subject to options -r -fdi -b.
	   Fields are separated by 0 (or 255 if rflag is set)
           the anti-ambiguity stuff prevents such codes from
	   happening otherwise by coding real zeros and ones
	   as 0x0101 and 0x0102, and similarly for complements */

int
tcode(uchar *dp, uchar *kp, int len, struct field *f)
{
	uchar *cp = kp;
	int c;
	uchar *keep = f->keep;
	uchar *trans = f->trans;
	int reverse = f->rflag? ~0: 0;
	while(--len >= 0) {
		c = *dp++;
		if(keep[c]) {
			c = trans[c];
			if(c <= 1) {	/* anti-ambiguity */
				*cp++ = 1^reverse;
				c++;
			} else if(c >= 254) {
				*cp++ = 255^reverse;
				c--;
			}
			*cp++ = c^reverse;
		}
	}
	*cp++ = reverse;
	return cp - kp;
}

static char *month[] = { "jan", "feb", "mar", "apr", "may", 
	         "jun", "jul", "aug", "sep", "oct", "nov", "dec" };

int
Mcode(uchar *dp, uchar *kp, int len, struct field *f)
{
	int j = -1;
	int i;
	uchar *cp;
	for( ; len>0; dp++, len--) {
		if(*dp!=' ' && *dp!='\t')
			break;
	}
	if(len >= 3)
		while(++j < 12) {
			cp = (uchar*)month[j];
			for(i=0; i<3; i++)
				if((dp[i]|('a'-'A')) != *cp++)
					break;
			if(i >= 3)
				break;
		}
	*kp = j>=12? 0: j+1;
	if(f->rflag)
		*kp ^= ~0;
	return 1;
}