4.3BSD/usr/ingres/source/decomp/reformat.c

Compare this file to the similar file:
Show the results in this format:

# include	<ingres.h>
# include	<catalog.h>
# include	<aux.h>
# include	<tree.h>
# include	<symbol.h>
# include	<pv.h>
# include	"globs.h"
# include	<access.h>
# include	<sccs.h>

SCCSID(@(#)reformat.c	8.3	1/16/85)

/*
**	REFORMAT.C -- "reformat" module of DECOMPOSITION
**
**	reformat() examines each relation which will be
**	involved in a one variable subquery to see if it
**	is cost effective to modify the relation to a more
**	usefull structure. Included in this file are all the
**	routines associated with reformat:
**
**		reformat -- reformats each relation if cost effective
**
**		findlinks -- determines what one variable clauses (if any)
**				are associated with the current variable
**				and the substitution variable.
**
**		rangrf    -- decides whether to actually reformat and does it.
**
**		primrf    -- performs a projection on a user relation
**
**		ckpkey    -- determines if relation is already structured usefully.
**
**		findwid   -- determines width of target list.
**
**		rel_tup   -- returns how many tuples fit on one page
*/
/*
** Reformat -- Examine each variable to see if it should be reformated.
**
**	The algorithm is:
**	(1) Find all variables for which, after tuple substitution,
**	    will have one variable equality clauses on them.
**	(2) Estimate how much it will cost to execute them.
**	(3) Ignore those relations which are already keyed usefully.
**	(4) If it is a user relation, determine if it will be less costly
**	    to first project (and possibly) modify the relation.
**	(5) If it is a _SYS relation, determine if it will be less costly
**	    to modify the relation to hash on the linking domains.
*/

reformat(var, sqlist, locrang, buf, tree)
int	var;
QTREE	*sqlist[];
int	locrang[];
char	buf[];
QTREE	*tree;
{
	register QTREE	*sq;
	register char	*lmap;
	register int	mainvar;
	int		i, j;
	char		linkmap[MAXDOM];

#	ifdef xDTR1
	if (tTf(39, -1))
		printf("REFORMAT: subvar=%d\n", var);
#	endif

	/* if the main tree is single var then put it in sq list */
	mainvar = -1;
	if (tree->sym.value.sym_root.tvarc == 1)
	{
		mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm);
		if (sqlist[mainvar] != 0)
			syserr("help reformat");
		sqlist[mainvar] = tree;
#		ifdef xDTR1
		if (tTf(39, 0))
			printf("including var %d\n", mainvar);
#		endif
	}
	for(i = 0; i < MAXRANGE; i++)
		if (sq = sqlist[i])
		{
#			ifdef xDTR1
			if (tTf(39, 0))
				printf("Sqlist[%d]:\n", i);
#			endif
			lmap = linkmap;
			for (j = 0; j < MAXDOM; j++)
				*lmap++ = 0;
			if (findlinks(sq->right, var, i, linkmap))
			{
#				ifdef xDTR1
				if (tTf(39, 1))
					prlinks("Findlinks found", linkmap);
#				endif
				rangrf(i, var, sq, buf, linkmap, tree, locrang);
			}
		}
	if (mainvar >= 0)
		sqlist[mainvar] = 0;
}
/*
** Findlinks -- Determine whether there are any equalify clauses
**	between the "linkv" and the variable selected for tuple
**	substitution "selv".
**
**	Return:
**		TRUE if there is a linking variable
**		FALSE if not
**
**	Side Effects:
**		The linkmap is set to 1 for each linking domains.
**		ie. if domains 3 is a linking domains then
**		linkmap[3] = 1.
*/

findlinks(node, selv, linkv, linkmap)
QTREE 	*node;
int	selv, linkv;
char	linkmap[];
{
	register QTREE 	*b, *a;
	register int 	s;
	QTREE		*temp;
	extern QTREE	*ckvar();

	a = node;
#	ifdef xDTR1
	if (tTf(39, 2))
	{
		printf("FINDLINKS:");
		nodepr(a);
	}
#	endif
	if (a->sym.type == QLEND) 
		return (0);
	s = findlinks(a->right, selv, linkv, linkmap);

	/*
	** This mess is looking for a clause of the form:
	**		EQ
	**	Var		Var
	**	linkv		selv
	** Where the VARS can be in either order
	*/
	if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ ||
		b->left->sym.type!=VAR || b->right->sym.type!=VAR)
			return (s);

	a = ckvar(b->left);
	b = ckvar(b->right);
	if (b->sym.value.sym_var.varno == linkv) 
	{
		temp = a;
		a = b;
		b = temp;
	}
	if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv))
		return (s);

	linkmap[a->sym.value.sym_var.attno] = 1;
#	ifdef xDTR1
	if (tTf(39, 3))
		printf("found:attno=%d\n", a->sym.value.sym_var.attno);
#	endif
	return (1);
}
/*
** Rangrf -- reformat the variable "var" if it is cost effective.
**
**	Rangrf does the actual work of reformat. If the relation is
**	already keyed usefully then rangrf does nothing.
**	Otherwise rangrf is split into two decisions:
**	A user relation must first be projected and then modified;
**	a _SYS relation can be modified directly.
**
**	It may be cost effective to just project a user relation.
*/

/*ARGSUSED*/
rangrf(var, substvar, sq, buf, linkmap, tree, locrang)
int	var, substvar;
QTREE	*sq;
char 	buf[];
char	linkmap[];
QTREE	*tree;
int	locrang[];
{
	register struct rang_tab	*r, *rs;
	register int			j;
	char				nums[2];
	int				i, newwid;
	QTREE				*pq;
	long				npages, newpages;
	long				origcost, modcost, projcost;
	extern char			*rangename();
	extern long			rel_pages(), hashcost();
	extern QTREE			*makroot();
	extern DESC			*openr1();
	extern QTREE			**mksqlist();

	r = &De.de_rangev[var];
	rs = &De.de_rangev[substvar];
	npages = rel_pages(r->rtcnt, r->rtwid);

	/* calculate original cost of substitution */

	origcost = rs->rtcnt * npages;

#	ifdef xDTR1
	if (tTf(39, 5))
		printf("eval of mod for var %d. orig cost=%ld\n", var, origcost);
#	endif

	/* if relation is already keyed usefully, just return */
	if (i = ckpkey(linkmap, var))
	{
#		ifdef xDTR1
		if (tTf(39, 4))
			printf("already keyed ok %d\n", i);
#		endif
		return;
	}

	/* if this is a primary relation, a projection must be done
	** before any modify can be performed */

	if (!rnum_temp(r->relnum))
	{
		/* evaluation for primary, user relation */

		/* to save some time, don't evaluate if origcost is very small */
		if (origcost < OVHDP + 1 + npages)
			return;

		j = markbuf(buf);

		/* build a projection tree but don't actually create the projection */
		pq = makroot(buf);
		dfind(sq, buf, mksqlist(pq, var));

		newwid = findwid(pq);
		newpages = rel_pages(r->rtcnt, newwid);

		/*
		** Calculate cost of projection + new cost of substitution
		** projcost = readoldpages + writenewpages + runquery + overhead
		*/

		projcost = npages + newpages + rs->rtcnt * newpages + OVHDP;


		/*  CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY
		 *  	AND NEW COST OF SUBSTITUTION AFTER MODIFY    */

		modcost = (npages + newpages) +
				hashcost(newpages) +
				rs->rtcnt +
				OVHDP + OVHDM;

#		ifdef xDTR1
		if (tTf(39, 5))
		{
			printf("primary rel: proj cost=%ld\t", projcost);
			printf("mod cost=%ld\n", modcost);
		}
#		endif

		if (origcost <= modcost)
			if (origcost <= projcost)
			{
				freebuf(buf, j);
				return;
			}

#		ifdef xDTR1
		if (tTf(39, 5))
			printf("doing projection\n");
#		endif

		/* i will be TRUE if the proj was aborted */
		i = primrf(var, pq, locrang);
		freebuf(buf, j);
		if ((projcost <= modcost) || i)
			return;
	}

	/*  IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY  */

	else
	{

		/*  CALCULATE MODIFY COST (which does not include a projection)
		 *  AND NEW COST OF SUBSTITUTION				  */

		modcost = hashcost(npages)
		          + rs->rtcnt + OVHDM;

#		ifdef xDTR1
		if (tTf(39, 5))
			printf("temp rel: mod cost=%ld\n", modcost);
#		endif

		if (origcost <= modcost)
			return;
	}

#	ifdef xDTR1
	if (tTf(39, 5))
		printf("doing modify\n");
#	endif

	initp();
	setp(PV_STR, rangename(var));
	setp(PV_STR, "hash");	/* modify to hash */
	setp(PV_STR, "num");	/* passing domain numbers - not names */

	nums[1] = '\0';
	for (j = 0; j < MAXDOM; j++)
		if (linkmap[j])
		{
			*nums = j;
			setp(PV_STR, nums);
		}

	/* fill relation completely */
	setp(PV_STR, "");
	setp(PV_STR, "fillfactor");
	setp(PV_STR, "100");
	setp(PV_STR, "minpages");
	setp(PV_STR, "1");
	closer1(var);
	call_dbu(mdMODIFY, FALSE);

	/* re-open modified range to get accurate descriptor */
	openr1(var);
}
/*
** Primrf -- Replace a user relation with a projection of the needed domains.
**
**	Primrf retrieves into a temporary relation, the domains
**	specified by the "pq" tree. The range table is updated to
**	reflect the new range.
**
**	In fact a projection is not an accurate way to describe what
**	happens. In order to avoid changing any attribute numbers in
**	the query, the needed domains are projected and the domains
**	inbetween are created as type "c0". This way they occupy
**	no space and the attribute numbering is unchanged. Of course,
**	any attributes beyond the last one needed are simply discarded.
**
**	In previous versions attempts were made to project only the needed
**	domains and to renumber the query tree. This proved to be very
**	expensive and difficult and was not always as optimal as this
**	method.
**
**	The routines newquery() and endquery() will undo the effects
**	of primrf and restore the range table back to its original state.
*/

primrf(var, pq, locrang)
int	var;
QTREE	*pq;
int	locrang[];
{
	register QTREE	*q, **np;
	register int	i;
	int		maxresno, rnum;
	QTREE		*node1[MAXDOM+1], *node2[MAXDOM+1];
	static char	czero[QT_HDR_SIZ + sizeof (struct resdomnode)];	/* a dummy resdom */
	extern DESC	*openr1();
	extern char	*rnum_convert();

#	ifdef xDTR1
	if (tTf(39, 6))
		printf("PRIMRF:\n");
#	endif

	/* renumber RESDOMs to match their VARs */
	for (q = pq->left; q->sym.type != TREE; q = q->left)
		q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno;

	/* form list of RESDOMs from outermost inward */
	node1[lnode(pq->left, node1, 0)] = 0;

	/* form a dummy RESDOM with type CHAR and length 0 */
	q = (QTREE *) czero;
	q->sym.value.sym_resdom.resfrmt = CHAR;
	q->sym.value.sym_resdom.resfrml = 0;

	/* zero node2 */
	for (np = node2, i = 0; i < MAXDOM + 1; i++)
		*np++ = 0;

	/* sort RESDOMs into node2 */
	maxresno = 0;
	for (np = node1; q = *np++; )
	{
		if ((i = q->sym.value.sym_resdom.resno) == 0)
			return (1);	/* abort. Tid is referenced */
		if (i > maxresno)
			maxresno = i;
		node2[i-1] = q;
	}

	/* fill missing RESDOMs with czero */
	for (np = node2, i = 0; i < maxresno; i++, np++)
		if (*np == 0)
			*np = (QTREE *) czero;


	/* set up params for the create */
	initp();
	setp(PV_STR, "0");	/* initial relstat field */
	rnum = rnum_alloc();
	setp(PV_STR, rnum_convert(rnum));
	domnam(node2, "f");
	call_dbu(mdCREATE, FALSE);

	/* now run projection */
	i = var;
	execsq1(pq, i, rnum);
	new_range(i, rnum);
	/* save the name of the new relation */
	savrang(locrang, i);
	openr1(i);
	return (0);
}
/*
** Ckpkey -- determine if a relation is already keyed usefully.
**
**	Ckpkey gets the key structure from paramd() and compares
**	it to the linkmap. If the relation is ISAM and the first keyed
**	domain is in linkmap, or if it is HASH and all keyed domains
**	are in the linkmap, then ckpkey() returns >0, else
**	ckpkey looks for secondary indexes which are usable.
**	if none, then ckpkey() returns 0.
**
**	The value returned is an estimate of the number of
**	pages which must be read to satisfy a query given
**	equality clauses on the linkmap domains and the
**	structure of the relation. The (crude) estimates are:
**		hash	1 page
**		isam	2 pages
**		hash sec index	2 pages
**		isam sec index	3 pages
*/

ckpkey(linkmap, var)
char	linkmap[];
int	var;
{
	register DESC		*d;
	register int		i;
	struct index		itup;
	struct accessparam 	ap;
	TID			lotid, hitid;
	extern DESC		*readopen(), *openr1(), Inddes;


#	ifdef  xDTR1
	if (tTf(39, 11))
		printf("CKPKEY:%s\n", rangename(var));
#	endif

	/* if relation is an unindexed heap, then it cannot be keyed usefully */
	d = openr1(var);
	if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0)
	{
		d = readopen(var);	/* open relation if it hasn't been already */
		paramd(d, &ap);
		if (ckpkey1(linkmap, &ap) == 0)
			return (ap.mode == EXACTKEY ? 1 : 2);	/* success */
		
		if (d->reldum.relindxd > 0)
		{
			opencatalog("indexes", OR_READ);
			setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP);
			setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP);
			if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup))
				syserr("ckpkey:find %d", i);

			while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0)
			{
				if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2))
					continue;
				
				parami(&itup, &ap);
				if (ckpkey1(linkmap, &ap) == 0)
					return (ap.mode == EXACTKEY ? 2 : 3);	/* success */
			}
		}
	}
	return (0);	/* failure. no useful structure */
}



ckpkey1(linkmap, ap)
char			linkmap[];
struct accessparam	*ap;
{
	register int	i, k, anykey;

	if (ap->mode == NOKEY)
		return (1);
	anykey = 0;
	for (i = 0; k = ap->keydno[i]; i++)
	{
		if (linkmap[k] == 0)
		{
			if (ap->mode == EXACTKEY) 
				return (1);
			else 
				break;
		}
		anykey++;
	}
	return (!anykey);
}
/*
** Findwid -- scan the target list of the projection tree
**	to determine the resultant tuple size.
**
**	Return:
**		Size of projected tuple.
*/

findwid(tree)
QTREE	*tree;
{
	register QTREE	*nod, *t;
	register int	wid;

	wid = 0;
	t = tree;

	for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left)
	{
		wid += nod->sym.value.sym_var.varfrml & I1MASK;
	}

	return (wid);
}
/*
**  HASHCOST -- determine cost to modify to hash
**
**	Estimates the cost to modify the relation to hash.
**	The estimate is crude since it assumes that there
**	are no duplicates and that a "unix" page will be
**	the same size as an "ingres" page.
**
**	The cost is based on the parameters which signify
**	the size of the in-core sort buffer and the n-way
**	merge sort plus the cost to read the sorted file
**	and write the new relation twice (that's the was it works!).
**
**	Parameters:
**		npages - the number of pages (estimate) which the
**				relation currently occupies.
**
**	Returns:
**		the cost to hash the relation.
**
**	Side Effects:
**		none
**
**	Called By:
**		rangrf
*/

# define	COREBUFSIZE	32767 / PGSIZE
# define	MERGESIZE	7

long
hashcost(npages)
long	npages;
{
	long		sortpages, total;
	register int	nfiles;

	nfiles = npages / COREBUFSIZE;
	sortpages = 2 * npages;

	while (nfiles > 1)
	{
		nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE;
		sortpages += 2 * npages;
	}

	total = sortpages + npages + npages + npages;
#	ifdef xDTR1
	if (tTf(39, 5))
		printf("hashcost is %ld\n", total);
#	endif

	return (total);
}