OpenSolaris_b135/stand/lib/fs/common/cache.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 *  Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 *  Use is subject to license terms.
 *
 *  This is mostly new code.  Major revisions were made to allow multiple
 *  file systems to share a common cache.  While this consisted primarily
 *  of including a "devid_t" pointer in the hash functions, I also re-
 *  organized everything to eliminate much of the duplicated code that
 *  had existed previously.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/param.h>
#include <sys/vnode.h>
#include <sys/sysmacros.h>
#include <sys/filep.h>
#include <sys/salib.h>
#include <sys/promif.h>

#ifndef	ICACHE_SIZE
/*
 *  These should probably be defined in an architecture-specific header
 *  file.  The values below are analogous to those used in earlier versions
 *  of this module.
 */

#define	ICACHE_SIZE 350	    /* Max number of I-node in file cache	*/
#define	DCACHE_SIZE 1500    /* Max number of cached directories		*/
#define	BCACHE_SIZE 250	    /* Max number of cached disk blocks		*/
#endif

#define	Next 0		    /* Next pointer in Fwd/Bak link		*/
#define	Prev 1		    /* Previous pointer in Fwd/Back links	*/

#define	Frst 0		    /* Ptr to first element of a chain		*/
#define	Last 1		    /* Ptr to last element of a chain		*/

#define	Hash 2		    /* Offset of hash chain ptrs.		*/

typedef struct cache {	    /* Generic cache element:			*/
    struct cache *link[4];  /* .. Fwd/Bak links for hash chain & LRU	*/
    struct cache **chn;	    /* .. Hash chain link			*/
    int		   dev;	    /* .. Device file handle			*/
    void	 *data;	    /* .. Ptr to associated data		*/
    int		  size;	    /* .. Size of cached data			*/
} cache_t;

typedef struct head {	    /* Generic cache header:			*/
	cache_t	   *aged[2];	/* .. LRU list				*/
	int (*cmp)(cache_t *);	/* .. Ptr to comparison function	*/
	int	  size;		/* .. Size of "cache" objects		*/
	int	  maxblks;	/* .. Max number of cached elements	*/
	int	  count;	/* .. Current number of cached elements	*/
	int	  hits;		/* .. Total cache hits			*/
	int	  searches;	/* .. Total searches			*/
	int	  purges;	/* .. Total purges			*/
} head_t;

/* Constructor for cache headers:					*/
#define	cache_head(h, f, t, n) \
	{{(cache_t *)&h, (cache_t *)&h}, f, sizeof (t), n}

int read_opt;		/* Number of times cache was bypassed	*/
static int x_dev;	/* Target device ID saved here!		*/
static int x_len;	/* length of object			*/

#define	LOG2(x) \
	(((x) <= 16)  ?	 4 : /* Yeah, it's ugly.  But it works! */ \
	(((x) <= 32)  ?  5 : /* .. Binary log should be part of */ \
	(((x) <= 64)  ?  6 : /* .. the language!		*/ \
	(((x) <= 128) ?	 7 : 8))))

static cache_t *
get_cache(cache_t *cap, head_t *chp)
{
	/*
	 *  Search cache:
	 *
	 *  The caller pass a pointer to the first "cache" object in the current
	 *  hash chain ["cap"] and a pointer to the corresponding cache header
	 *  ["chp"].  This routine follows the cache chain until it finds an
	 *  entry that matches both the current device [as noted in "x_dev"]
	 *  and the cache-specific comparison ["chp->cmp"].
	 *
	 *  Returns the address of the matching cache object or null if there
	 *  is none.
	 */

	while (cap) {
		/*
		 * Check all entries on the cache chain.  We expect
		 * chains to be relatively short, so we use a simple
		 * linear search.
		 */
		if ((x_dev == cap->dev) && (*chp->cmp)(cap)) {
			/*
			 * Found the entry we're looking for! Move it
			 * to the front of the cache header's LRU list
			 * before returing its addres to the caller.
			 */
			cap->link[Next]->link[Prev] = cap->link[Prev];
			cap->link[Prev]->link[Next] = cap->link[Next];

			cap->link[Prev] = (cache_t *)chp->aged;
			cap->link[Next] = chp->aged[Frst];
			chp->aged[Frst]->link[Prev] = cap;
			chp->aged[Frst] = cap;
			chp->hits += 1;
			break;
		}

		cap = cap->link[Hash+Next];
	}

	chp->searches += 1;
	return (cap);
}

static cache_t *
reclaim_cache(head_t *chp, int dev)
{
	/*
	 * Reclaim a cache element:
	 *
	 * This routine is used to: [a] free the oldest element from
	 * the cache headed at "chp" and return the address of the
	 * corresponding "cache_t" struct (iff dev == -1), or [b] free all
	 * elements on the cache headed at "chp" that belong to the
	 * indicated "dev"ice.
	 */
	cache_t *cap, *cxp;
	cache_t *cpp = (cache_t *)chp;

	while ((cap = cpp->link[Prev]) != (cache_t *)chp) {
		/*
		 * We follow the cache's LRU chain from oldest to
		 * newest member.  This ensures that we remove only
		 * the oldest element when we're called with a
		 * negative "dev" argument.
		 */
		if ((dev == -1) || (dev == cap->dev)) {
			/*
			 * This is one of the (perhaps the only)
			 * elements we're supposed to free.  Remove it
			 * from both the LRU list and its associated
			 * hash chain.  Then free the data bound the
			 * the cache_t element and, if "dev" is
			 * not -1, the element itself!
			 */
			cap->link[Prev]->link[Next] = cap->link[Next];
			cap->link[Next]->link[Prev] = cap->link[Prev];

			if ((cxp = cap->link[Hash+Prev]) != 0)
				cxp->link[Hash+Next] = cap->link[Hash+Next];
			else
				*(cap->chn) = cap->link[Hash+Next];

			if ((cxp = cap->link[Hash+Next]) != 0)
				cxp->link[Hash+Prev] = cap->link[Hash+Prev];

			bkmem_free((caddr_t)cap->data, cap->size);
			if (dev == -1)
				return (cap);

			bkmem_free((caddr_t)cap, chp->size);
			chp->count -= 1;

		} else {
			/*
			 * Skip this element, it's not one of the
			 * ones we want to free up.
			 */
			cpp = cap;
		}
	};

	return (0);
}

static cache_t *
set_cache(cache_t **ccp, head_t *chp, int noreclaim)
{
	/*
	 *  Install a cache element:
	 *
	 *  The caller passes the address of cache descriptor ["chp"] and the
	 *  hash chain into which the new element is to be linked ["ccp"].  This
	 *  routine allocates a new cache_t structure (or, if the maximum number
	 *  of elements has already been allocated, reclaims the oldest element
	 *  from the cache), links it into the indicated hash chain, and returns
	 *  its address to the caller.
	 */
	cache_t *cap;

	if ((chp->count < chp->maxblks) &&
	    (cap = (cache_t *)bkmem_alloc(chp->size))) {
		/*
		 * We haven't reached the maximum cache size yet.
		 * Allocate a new "cache_t" struct to be added to the
		 * cache.
		 */
		chp->count += 1;

	} else {
		if (noreclaim)
			return (NULL);

		/*
		 * Cache is full.  Use the "reclaim_cache" routine to
		 * remove the oldest element from the cache.  This
		 * will become the cache_t struct associated with the
		 * new element.
		 */
		cap = reclaim_cache(chp, -1);
		chp->purges += 1;
	}

	bzero((char *)cap, chp->size);

	cap->chn = ccp;
	cap->link[Prev] = (cache_t *)chp;
	cap->link[Next] = chp->aged[Frst];
	cap->link[Prev]->link[Next] = cap->link[Next]->link[Prev] = cap;

	if ((cap->link[Hash+Next] = *ccp) != 0)
		(*ccp)->link[Hash+Prev] = cap;
	return (*ccp = cap);
}

/*
 *  The File Cache:
 *
 *  This cache (also known as the inode cache) is used to keep track of all
 *  files open on a given device.  The only special data required to locate
 *  a cache entry is the file reference number which is file-system dependent
 *  (for UNIX file systems, it's an inode number).
 */

typedef struct icache {		/* Inode cache element:		*/
	cache_t ic_hdr;		/* .. Standard header		*/
	int	ic_num;		/* .. I-node number		*/
} ic_t;

#define	IC_MAX_HDRS (1 << LOG2(ICACHE_SIZE/6))
#define	IC_HASH(d, i) (((d) + (i)) & (IC_MAX_HDRS - 1))

static int x_inode;

static int		    /* Cache search predicate:			    */
cmp_icache(cache_t *p)
{
	/* Just check the file number ("x_inode") ...	*/
	return (((ic_t *)p)->ic_num == x_inode);
}

static head_t	ic_head = cache_head(ic_head, cmp_icache, ic_t, ICACHE_SIZE);
static cache_t *ic_hash[IC_MAX_HDRS];

void *
get_icache(int dev, int inum)
{
	/*
	 *  Search File Cache:
	 *
	 *  This routine searches the file cache looking for the entry bound to
	 *  the given "dev"ice and file number ["inum"].  If said entry exists,
	 *  it returns the address of the associated file structure.  Otherwise
	 *  it returns null.
	 */
	cache_t *icp;

	x_dev = dev;
	x_inode = inum;
	icp = get_cache(ic_hash[IC_HASH(dev, inum)], &ic_head);

	return (icp ? (caddr_t)icp->data : 0);
}

void
set_icache(int dev, int inum, void *ip, int size)
{
	/*
	 *  Build a File Cache Entry:
	 *
	 * This routne installs the "size"-byte file structure at
	 * "*ip" in the inode cache where it may be retrieved by
	 * subsequent call to get_icache.
	 */
	ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)],
								&ic_head, 0);
	icp->ic_num = inum;
	icp->ic_hdr.data = ip;
	icp->ic_hdr.dev = dev;
	icp->ic_hdr.size = size;
}

int
set_ricache(int dev, int inum, void *ip, int size)
{
	/*
	 * Reliably set the icache
	 *
	 * This routine is the same as set_icache except that it
	 * will return 1 if the entry could not be entered into the cache
	 * without a purge.
	 */
	ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)],
					&ic_head, 1);

	if (icp == NULL)
		return (1);

	icp->ic_num = inum;
	icp->ic_hdr.data = ip;
	icp->ic_hdr.dev = dev;
	icp->ic_hdr.size = size;

	return (0);
}

/*
 *  The Directory Cache:
 *
 *  This cache is designed to speed directory searches.	 Each entry cor-
 *  responds to a directory entry that was used in a pathname resolution.
 *  The idea is that most files used by the boot wil be contained in a hand-
 *  full of directories, so we can speed searches if we know ahead of time
 *  just where these directories are.
 */

typedef struct dcache {		/* Directory cache objects:	*/
	cache_t dc_hdr;		/* .. Standard header		*/
	int	dc_inum;	/* .. File number		*/
	int	dc_pnum;	/* .. Parent diretory's file number */
} dc_t;

#define	DC_MAX_HDRS (1 << LOG2(DCACHE_SIZE/6))
#define	DC_HASH(d, n, l) (((d) + (n)[0] + (n)[(l)-1] + (l)) & (DC_MAX_HDRS-1))

static char *x_name;
static int x_pnum;

static int
cmp_dcache(cache_t *p) /* Cache Search predicate:	*/
{
	/* Check name, length, and parent's file number	*/
	return ((x_len == p->size) && (x_pnum == ((dc_t *)p)->dc_pnum) &&
	    (strcmp((char *)p->data, x_name) == 0));
}

static head_t	dc_head = cache_head(dc_head, cmp_dcache, dc_t, DCACHE_SIZE);
static cache_t *dc_hash[DC_MAX_HDRS];

int
get_dcache(int dev, char *name, int pnum)
{
	/*
	 *  Search Directory Cache:
	 *
	 *  This routine searches the directory cache for an entry
	 *  associated with directory number "pnum" from the given
	 *  file system that de-scribes a file of the given "name".
	 *  If we find such an entry, we return the corresponding file
	 *  number, 0 otherwise.
	 */
	dc_t *dcp;

	x_dev = dev;
	x_len = strlen(name)+1;
	x_pnum = pnum;
	x_name = name;
	dcp = (dc_t *)get_cache(dc_hash[DC_HASH(dev, name, x_len)], &dc_head);

	return (dcp ? dcp->dc_inum : 0);
}

void
set_dcache(int dev, char *name, int pnum, int inum)
{
	/*
	 *  Build Directory Cache Entry:
	 *
	 *  This routine creates directory cache entries to be retrieved later
	 *  via "get_dcache".  The cache key is composed of three parts: The
	 *  device specifier, the file name ("name"), and the file number of
	 *  the directory containing that name ("pnum").  The data portion of
	 *  the entry consists of the file number ("inum").
	 */

	int len = strlen(name)+1;
	dc_t *dcp =
	    (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], &dc_head, 0);

	if (dcp->dc_hdr.data = (void *)bkmem_alloc(len)) {
		/*
		 * Allocate a buffer for the pathname component, and
		 * make this the "data" portion of the generalize
		 * "cache_t" struct. Also fill in the cache-specific
		 * fields (pnum, inum).
		 */
		dcp->dc_pnum = pnum;
		dcp->dc_inum = inum;
		dcp->dc_hdr.dev = dev;
		dcp->dc_hdr.size = len;
		bcopy(name, (char *)dcp->dc_hdr.data, len);

	} else {
		/*
		 * Not enough memory to make a copy of the name!
		 * There's probably not enough to do much else either!
		 */
		prom_panic("no memory for directory cache");
	}
}

int
set_rdcache(int dev, char *name, int pnum, int inum)
{
	/*
	 * Reliably set the dcache
	 *
	 * This routine is the same as set_dcache except that it
	 * return 1 if the entry could not be entered into
	 * the cache without a purge.
	 */
	int len = strlen(name) + 1;
	dc_t *dcp =
		(dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)],
								&dc_head, 1);

	if (dcp == NULL)
		return (1);

	if ((dcp->dc_hdr.data = (void *)bkmem_alloc(len)) == NULL) {
		/*
		 * Not enough memory to make a copy of the name!
		 * There's probably not enough to do much else either!
		 */
		prom_panic("no memory for directory cache");
		/* NOTREACHED */
	}

	/*
	 * Allocate a buffer for the pathname component, and
	 * make this the "data" portion of the generalize
	 * "cache_t" struct. Also fill in the cache-specific
	 * fields (pnum, inum).
	 */
	dcp->dc_pnum = pnum;
	dcp->dc_inum = inum;
	dcp->dc_hdr.dev = dev;
	dcp->dc_hdr.size = len;
	bcopy(name, (char *)dcp->dc_hdr.data, len);

	return (0);
}

/*
 *  Disk Block Cache:
 */

typedef struct bcache {	    /* Disk block cache objects:		*/
	cache_t		bc_hdr;	/* .. Standard header			*/
	unsigned long	bc_blk;	/* .. The block number			*/
} bc_t;

#define	BC_MAX_HDRS (1 << LOG2(BCACHE_SIZE/6))
#define	BC_HASH(d, b, l) (((d) + (b) + ((l) >> 8)) & (BC_MAX_HDRS-1))

static unsigned long x_blkno;

static int
cmp_bcache(cache_t *p) /* Cache Search predicate:		*/
{
	/* Check block number, buffer size	*/
	return ((x_len == p->size) && (x_blkno == ((bc_t *)p)->bc_blk));
}

static head_t	bc_head = cache_head(bc_head, cmp_bcache, bc_t, BCACHE_SIZE);
static cache_t *bc_hash[BC_MAX_HDRS];

caddr_t
get_bcache(fileid_t *fp)
{
	/*
	 *  Search Disk Block Cache:
	 *
	 *  This should be getting pretty monotonous by now.  Aren't generalized
	 *  subroutines ("objects", if you prefer) great?
	 */
	cache_t *bcp;

	x_len = fp->fi_count;
	x_blkno = fp->fi_blocknum;
	x_dev = fp->fi_devp->di_dcookie;
	bcp = get_cache(bc_hash[BC_HASH(x_dev, x_blkno, x_len)], &bc_head);

	return (bcp ? (caddr_t)bcp->data : 0);
}

int
set_bcache(fileid_t *fp)
{
	/*
	 *  Insert Disk Block Cache Entry:
	 *
	 *  In this case, we actually read the requested block into a
	 *  dynamically allocated buffer before inserting it into the
	 *  cache.  If the read fails, we return a non-zero value.
	 *
	 *  The search keys for disk blocks are the block number and
	 *  buffer size.  The data associated with each entry is the
	 *  corresponding data buffer.
	 */
	bc_t *bcp;

	if (fp->fi_memp = bkmem_alloc(x_len = fp->fi_count)) {
		/*
		 *  We were able to succesffully allocate an input
		 *  buffer, now read the data into it.
		 */
		if (diskread(fp) != 0) {
			/*
			 * I/O error on read. Free the input buffer,
			 * print an error message, and bail out.
			 */
			bkmem_free(fp->fi_memp, x_len);
			printf("disk read error\n");
			return (-1);
		}

		x_blkno = fp->fi_blocknum;
		x_dev = fp->fi_devp->di_dcookie;
		bcp = (bc_t *)
			set_cache(&bc_hash[BC_HASH(x_dev, x_blkno, x_len)],
								&bc_head, 0);
		bcp->bc_blk = x_blkno;
		bcp->bc_hdr.dev = x_dev;
		bcp->bc_hdr.size = x_len;
		bcp->bc_hdr.data = (void *)fp->fi_memp;

	} else {
		/*
		 * We could be a bit more convervative here by
		 * calling "set_cache" before we try to allocate a
		 * buffer (thereby giving us a chance to re-use a
		 * previously allocated buffer) but the error recovery
		 * is a bit trickier, and if we're that short on memory
		 * we'll have trouble elsewhere anyway!
		 */
		prom_panic("can't read - no memory");
	}

	return (0);
}

void
release_cache(int dev)
{
	/*
	 *  Reclaim all cache entries:
	 *
	 *  This routine is called by the file-system's "closeall" method.  It
	 *  removes all cache entries associated with that file system from the
	 *  global cache and release any resources bound to said entrires.
	 */

	(void) reclaim_cache(&ic_head, dev);
	(void) reclaim_cache(&dc_head, dev);
	(void) reclaim_cache(&bc_head, dev);
}

void
print_cache_data()
{
	/*
	 *  Print some cacheing statistics ...
	 */
	static char	*tag[] = { "inode", "directory", "disk block", 0};
	static head_t	*hdp[] = { &ic_head, &dc_head, &bc_head, 0};

	int j;

	for (j = 0; tag[j]; j++) {
		/*
		 * Print statistics maintained in the header
		 * ("head_t" struct) of each of the above caches.
		 */
		head_t *hp = hdp[j];

		if (j)
			printf("\n");
		printf("%s cache:\n", tag[j]);
		printf("   max size %d\n", hp->maxblks);
		printf("   actual size %d\n", hp->count);
		printf("   total searches %d\n", hp->searches);
		printf("   cache hits %d\n", hp->hits);
		printf("   cache purges %d\n", hp->purges);
	}

	printf("\nread opts %d\n", read_opt);
}