namei name cache missing (+FIX)

Steven M. Schultz sms at wlv.imsd.contel.com
Sun Feb 4 19:28:42 AEST 1990


Subject: namei name caching missing from 2.10.1BSD +FIX
Index:	sys/{init_main,ufs_namei,ufs_inode,ufs_mount,ufs_syscalls}.c 2.10BSD

Description:
	The name caching done by the namei routine on 4.3BSD wasn't
	implemented under 2.10.1BSD.

Repeat-By:
	By inspection of the sources.

Fix:
	The following patches for:

		/sys/h/inode.h
		/sys/h/namei.h
		/sys/sys/init_main.c
		/sys/sys/ufs_namei.c
		/sys/sys/ufs_inode.c
		/sys/sys/ufs_mount.c
		/sys/sys/ufs_syscalls.c
		/sys/pdp/machdep2.c
		/usr/src/ucb/vmstat.c

	will implement the namei name caching for 2.10.1BSD.  Almost of the
	diffs are small, the largest one is, of course, for ufs_namei.c.

	The name cache is an externally mapped region 8kb max in size.  For
	all but the largest number of inodes this is sufficient room to 
	maintain 4.3's 10% larger number of cache entries than inodes.  (the
	systems here have 224 inodes and the cache holds 242 entries - only
	a few less than the 246 the +10% target).

	NO changes are made to the actual inode structure.   The i_id member
	(a u_short rather than a long - see the comment in the patch for
	inode.h) of the inode is implemented as an array of u_shorts at the
	end of the external cache region.  IF the kernel D-space is sufficient
	to allow an extra 2 bytes per inode, then references of the form
	"nmi_i_id[ip - inode]" can be replaced by "ip->i_id".  This requires
	recompiling the programs which refer to the incore inode structure.

	The slight rearrangment of code at the beginning of ufs_namei 
	served two purposes - it made the code look more like 4.3BSD and
	provided the length of the filename component upon exiting from
	the loop.

	In namei.h, the maximum length of name cached is set to 13 to save
	space, this means that 14 character files will not be cached - there
	aren't that many present in the system, so hopefully this will not be
	a problem.

	Also included is a simple program to display the name caching statistics
	in a 'iostat' type manner.

	From a 11/44 running the new kernel over the weekend so far:

        0 swap ins
        0 swap outs
     7734 pages swapped in
    20593 pages swapped out
        0 page ins
        0 page outs
   698019 cpu context switches
 13489441 device interrupts
   138283 software interrupts
    29639 traps
     1238 overlay emts
  2061224 system calls
   538655 total name lookups (cache hits 57% system 31% per-process)
          badhits 1225, falsehits 1973, toolong 3888
     3811 total calls to xalloc (cache hits 73%)
          sticky 0 flushed 460 unused 0
     5731 total calls to xfree (sticky 2398 cached 3333 swapped 2890)

     Without further ado, here are the changes to add the namei caching:

*** inode.h.old	Wed Aug 10 13:14:09 1988
--- inode.h	Sat Feb  3 23:42:53 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)inode.h	1.1 (2.10BSD Berkeley) 12/1/86
   */
  
  /*
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)inode.h	1.2 (2.10BSD Berkeley) 1/26/90
   */
  
  /*
***************
*** 131,136 ****
--- 131,161 ----
  #define	di_ctime	di_ic2.ic_ctime
  
  #if defined(KERNEL) && !defined(SUPERVISOR)
+ /*
+  * Invalidate an inode. Used by the namei cache to detect stale
+  * information.  For 2.10.XBSD, the i_id field of the inode is externally
+  * allocated with the namecache.  In order to increase the number of cache
+  * entries and reduce somewhat the overhead - the i_id field is made into
+  * a u_short.  If a pdp-11 can invalidate 100 inodes per second, the cache
+  * will have to be invalidated in about 11 minutes.  Ha!
+  */
+ #define cacheinvalall _cinvall
+ 
+ /*
+  * This should probably be done in assembly.
+  * the cacheinvalall routine assumes that the namei cache is mapped.
+ */
+ #include <sys/namei.h>
+ 
+ #define cacheinval(ip)	{ segm seg5; saveseg5(seg5); \
+ 	mapseg5(nmidesc.se_addr,nmidesc.se_desc); \
+ 	nmi_i_id[(ip) - inode] = ++nextinodeid; \
+ 	if (nextinodeid == 0) \
+ 		cacheinvalall(); \
+ 	restorseg5(seg5); }
+ 
+ u_short	nextinodeid;		/* unique id generator */
+ 
  #ifdef EXTERNALITIMES
  memaddr	xitimes;
  u_int	xitdesc;
*** namei.h.old	Sat May 16 11:29:23 1987
--- namei.h	Sat Jan 27 00:31:19 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)namei.h	1.1 (2.10BSD Berkeley) 12/1/86
   */
  
  #ifndef _NAMEI_
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)namei.h	1.2 (2.10BSD Berkeley) 1/26/90
   */
  
  #ifndef _NAMEI_
***************
*** 25,28 ****
--- 25,67 ----
  #define NOCACHE		0x20	/* name must not be left in cache */
  #define FOLLOW		0x40	/* follow symbolic links */
  #define	NOFOLLOW	0x0	/* don't follow symbolic links (pseudo) */
+ 
+ /*
+  * This structure describes the elements in the cache of recent
+  * names looked up by namei.
+  */
+ struct	namecache {
+ 	struct	namecache *nc_forw;	/* hash chain, MUST BE FIRST */
+ 	struct	namecache *nc_back;	/* hash chain, MUST BE FIRST */
+ 	struct	namecache *nc_nxt;	/* LRU chain */
+ 	struct	namecache **nc_prev;	/* LRU chain */
+ 	struct	inode *nc_ip;		/* inode the name refers to */
+ 	ino_t	nc_ino;			/* ino of parent of name */
+ 	dev_t	nc_dev;			/* dev of parent of name */
+ 	dev_t	nc_idev;		/* dev of the name ref'd */
+ 	u_short	nc_id;			/* referenced inode's id */
+ 	char	nc_nlen;		/* length of name */
+ #define	NCHNAMLEN	13	/* maximum name segment length we bother with */
+ 	char	nc_name[NCHNAMLEN];	/* segment name */
+ };
+ #if	defined(KERNEL) && !defined(SUPERVISOR)
+ struct	namecache *namecache;
+ int	nchsize;
+ u_short	*nmi_i_id;
+ #include <machine/seg.h>
+ segm	nmidesc;
+ #endif
+ 
+ /*
+  * Stats on usefulness of namei caches.
+  */
+ struct	nchstats {
+ 	long	ncs_goodhits;		/* hits that we can reall use */
+ 	long	ncs_badhits;		/* hits we must drop */
+ 	long	ncs_falsehits;		/* hits with id mismatch */
+ 	long	ncs_miss;		/* misses */
+ 	long	ncs_long;		/* long names that ignore cache */
+ 	long	ncs_pass2;		/* names found with passes == 2 */
+ 	long	ncs_2passes;		/* number of times we attempt it */
+ };
  #endif
*** machdep2.c.old	Wed Aug  9 22:30:35 1989
--- machdep2.c	Sat Jan 27 20:04:11 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)machdep.c	1.1 (2.10BSD Berkeley) 12/1/86
   */
  
  #include "param.h"
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)machdep.c	1.2 (2.10BSD Berkeley) 1/26/90
   */
  
  #include "param.h"
***************
*** 25,30 ****
--- 25,32 ----
  #include "systm.h"
  #include "ram.h"
  #include "msgbuf.h"
+ #include "namei.h"
+ 
  #ifdef QUOTA
  #include "quota.h"
  #endif
***************
*** 154,159 ****
--- 156,177 ----
  	QUOini();
  #undef C
  #endif
+ 
+ 	{
+ register int B;
+ 
+ 	nchsize = (8192 - ninode * sizeof (u_short)) / sizeof(struct namecache);
+ 	if (nchsize > (ninode * 11 / 10))
+ 		nchsize = ninode * 11 / 10;
+ 	B = (btoc(nchsize * sizeof(struct namecache) + ninode * sizeof(short)));
+ 	if ((nmidesc.se_addr = malloc(coremap, B)) == 0)
+ 		panic("nameimalloc");
+ 	nmidesc.se_desc = ((B - 1) << 8) | RW;
+ 	namecache = (struct namecache *)SEG5;
+ 	nmi_i_id = (u_short *)((caddr_t)
+ 				(SEG5 + nchsize * sizeof (struct namecache)));
+ 	maxmem -= B;
+ 	}
  
  #define B	(size_t)(((long)nbuf * (MAXBSIZE)) / ctob(1))
  	if ((bpaddr = malloc(coremap, B)) == 0)
*** init_main.c.old	Fri Sep  2 21:08:08 1988
--- init_main.c	Sat Feb  3 23:35:19 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)init_main.c	1.1 (2.10BSD Berkeley) 12/1/86
   */
  
  #include "param.h"
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)init_main.c	1.2 (2.10BSD Berkeley) 1/26/90
   */
  
  #include "param.h"
***************
*** 88,93 ****
--- 88,94 ----
  	px_quota[0] = u.u_quota;
  	QUOTAUNMAP();
  #endif
+ 	nchinit();
  	clkstart();
  	iinit();
  
*** ufs_mount.c.old	Wed Mar 22 14:28:43 1989
--- ufs_mount.c	Mon Jan 29 21:32:17 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_mount.c	1.1 (2.10BSD Berkeley) 12/1/86
   */
  
  #include "param.h"
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_mount.c	1.2 (2.10BSD Berkeley) 1/29/90
   */
  
  #include "param.h"
***************
*** 115,120 ****
--- 115,121 ----
  	fs->fs_lasti = 1;
  	if (ip) {
  		ip->i_flag |= IMOUNT;
+ 		cacheinval(ip);
  		IUNLOCK(ip);
  	}
  	return (fs);
***************
*** 161,166 ****
--- 162,168 ----
  	return (EINVAL);
  found:
  	xumount(dev);	/* remove unused sticky files from text table */
+ 	nchinval(dev);	/* flush the name cache */
  	update();
  #ifdef QUOTA
  	if (iflush(dev, mp->m_qinod) < 0)
***************
*** 172,177 ****
--- 174,183 ----
  	QUOTAMAP();
  	closedq(mp);
  	QUOTAUNMAP();
+ 	/*
+ 	 * Here we have to iflush again to get rid of the quota inode.
+ 	 * A drag, but it would be ugly to cheat, & this doesn't happen often
+ 	 */
  	(void)iflush(dev, (struct inode *)NULL);
  #endif
  	ip = mp->m_inodp;
*** ufs_inode.c.old	Tue Jul  5 00:00:00 1988
--- ufs_inode.c	Mon Jan 29 21:39:24 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_inode.c	1.1 (2.10BSD Berkeley) 12/1/86
   */
  
  #include "param.h"
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_inode.c	1.2 (2.10BSD Berkeley) 1/29/90
   */
  
  #include "param.h"
***************
*** 186,191 ****
--- 186,192 ----
  	ip->i_dev = dev;
  	ip->i_fs = fs;
  	ip->i_number = ino;
+ 	cacheinval(ip);
  	ip->i_flag = ILOCKED;
  	ip->i_count++;
  	ip->i_lastr = 0;
***************
*** 261,266 ****
--- 262,297 ----
  	QUOTAUNMAP();
  #endif
  	return (ip);
+ }
+ 
+ /*
+  * Convert a pointer to an inode into a reference to an inode.
+  *
+  * This is basically the internal piece of iget (after the
+  * inode pointer is located) but without the test for mounted
+  * filesystems.  It is caller's responsibility to check that
+  * the inode pointer is valid.
+  */
+ igrab(ip)
+ 	register struct inode *ip;
+ {
+ 	while ((ip->i_flag&ILOCKED) != 0) {
+ 		ip->i_flag |= IWANT;
+ 		sleep((caddr_t)ip, PINOD);
+ 	}
+ 	if (ip->i_count == 0) {		/* ino on free list */
+ 		register struct inode *iq;
+ 
+ 		if (iq = ip->i_freef)
+ 			iq->i_freeb = ip->i_freeb;
+ 		else
+ 			ifreet = ip->i_freeb;
+ 		*ip->i_freeb = iq;
+ 		ip->i_freef = NULL;
+ 		ip->i_freeb = NULL;
+ 	}
+ 	ip->i_count++;
+ 	ip->i_flag |= ILOCKED;
  }
  
  /*
*** ufs_sys.c.old	Sat Apr 29 20:24:44 1989
--- ufs_syscalls.c	Mon Jan 29 21:22:25 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_syscalls.c	1.2 (2.10BSD Berkeley) 4/29/89
   */
  
  #include "param.h"
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_syscalls.c	1.3 (2.10BSD Berkeley) 1/26/90
   */
  
  #include "param.h"
***************
*** 848,854 ****
  	 * and target inodes are returned locked.
  	 */
  	u.u_dirp = (caddr_t)uap->to;
! 	xp = namei(CREATE | LOCKPARENT);
  	if (u.u_error) {
  		error = u.u_error;
  		goto out;
--- 848,854 ----
  	 * and target inodes are returned locked.
  	 */
  	u.u_dirp = (caddr_t)uap->to;
! 	xp = namei(CREATE | LOCKPARENT | NOCACHE);
  	if (u.u_error) {
  		error = u.u_error;
  		goto out;
***************
*** 953,958 ****
--- 953,959 ----
  				error = ENOTDIR;
  				goto bad;
  			}
+ 			cacheinval(dp);
  		} else if (doingdirectory) {
  			error = EISDIR;
  			goto bad;
***************
*** 1035,1040 ****
--- 1036,1042 ----
  					u.u_offset = 0;
  					dirbuf.dotdot_ino = newparent;
  					(void) writei(xp);
+ 					cacheinval(dp);
  				}
  			}
  			u.u_offset = saved_offset;
***************
*** 1225,1231 ****
  	if (u.u_error) {
  		u.u_segflg = UIO_USERSPACE;
  		u.u_dirp = uap->name;
! 		dp = namei(LOOKUP);
  		if (dp) {
  			dp->i_nlink--;
  			dp->i_flag |= ICHG;
--- 1227,1233 ----
  	if (u.u_error) {
  		u.u_segflg = UIO_USERSPACE;
  		u.u_dirp = uap->name;
! 		dp = namei(LOOKUP | NOCACHE);
  		if (dp) {
  			dp->i_nlink--;
  			dp->i_flag |= ICHG;
***************
*** 1302,1307 ****
--- 1304,1310 ----
  		goto out;
  	dp->i_nlink--;
  	dp->i_flag |= ICHG;
+ 	cacheinval(dp);
  	iput(dp);
  	dp = NULL;
  	/*
***************
*** 1317,1322 ****
--- 1320,1326 ----
  	 */
  	ip->i_nlink -= 2;
  	itrunc(ip, (u_long)0);
+ 	cacheinval(ip);
  out:
  	if (dp)
  		iput(dp);
*** ufs_namei.c.old	Sat Apr 30 16:14:48 1988
--- ufs_namei.c	Wed Jan 31 22:59:39 1990
***************
*** 3,9 ****
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_namei.c	1.1 (Berkeley) 12/1/86
   */
  #include "param.h"
  #include "../machine/seg.h"
--- 3,9 ----
   * All rights reserved.  The Berkeley software License Agreement
   * specifies the terms and conditions for redistribution.
   *
!  *	@(#)ufs_namei.c	1.2 (Berkeley) 1/26/90
   */
  #include "param.h"
  #include "../machine/seg.h"
***************
*** 17,23 ****
--- 17,108 ----
  #include "namei.h"
  
  /*
+  * Structures associated with name cacheing.
+  */
+ #define	NCHHASH		16	/* size of hash table */
+ 
+ #if	((NCHHASH)&((NCHHASH)-1)) != 0
+ #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
+ #else
+ #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
+ #endif
+ 
+ union nchash {
+ 	union	nchash *nch_head[2];
+ 	struct	namecache *nch_chain[2];
+ } nchash[NCHHASH];
+ #define	nch_forw	nch_chain[0]
+ #define	nch_back	nch_chain[1]
+ 
+ struct	namecache *nchhead, **nchtail;	/* LRU chain pointers */
+ struct	nchstats nchstats;		/* cache effectiveness statistics */
+ 
+ /*
   * Convert a pathname into a pointer to a locked inode.
+  * This is a very central and rather complicated routine.
+  * If the file system is not maintained in a strict tree hierarchy,
+  * this can result in a deadlock situation (see comments in code below).
+  *
+  * The flag argument is LOOKUP, CREATE, or DELETE depending on whether
+  * the name is to be looked up, created, or deleted. When CREATE or
+  * DELETE is specified, information usable in creating or deleteing a
+  * directory entry is also calculated. If flag has LOCKPARENT or'ed
+  * into it and the target of the pathname exists, namei returns both
+  * the target and its parent directory locked. When creating and
+  * LOCKPARENT is specified, the target may not be ".".  When deleting
+  * and LOCKPARENT is specified, the target may be ".", but the caller
+  * must check to insure it does an irele and iput instead of two iputs.
+  *
+  * The FOLLOW flag is set when symbolic links are to be followed
+  * when they occur at the end of the name translation process.
+  * Symbolic links are always followed for all other pathname
+  * components other than the last.
+  *
+  * Name caching works as follows:
+  *
+  * Names found by directory scans are retained in a cache
+  * for future reference.  It is managed LRU, so frequently
+  * used names will hang around.  Cache is indexed by hash value
+  * obtained from (ino,dev,name) where ino & dev refer to the
+  * directory containing name.
+  *
+  * For simplicity (and economy of storage), names longer than
+  * a maximum length of NCHNAMLEN are not cached; they occur
+  * infrequently in any case, and are almost never of interest.
+  *
+  * Upon reaching the last segment of a path, if the reference
+  * is for DELETE, or NOCACHE is set (rewrite), and the
+  * name is located in the cache, it will be dropped.
+  *
+  * Overall outline of namei:
+  *
+  *	copy in name
+  *	get starting directory
+  * dirloop:
+  *	check accessibility of directory
+  * dirloop2:
+  *	copy next component of name to u.u_dent.d_name
+  *	handle degenerate case where name is null string
+  *	look for name in cache, if found, then if at end of path
+  *	  and deleting or creating, drop it, else to haveino
+  *	search for name in directory, to found or notfound
+  * notfound:
+  *	if creating, return locked directory, leaving info on avail. slots
+  *	else return error
+  * found:
+  *	if at end of path and deleting, return information to allow delete
+  *	if at end of path and rewriting (CREATE and LOCKPARENT), lock target
+  *	  inode and return info to allow rewrite
+  *	if .. and on mounted filesys, look in mount table for parent
+  *	if not at end, add name to cache; if at end and neither creating
+  *	  nor deleting, add name to cache
+  * haveino:
+  *	if symbolic link, massage name in buffer and continue at dirloop
+  *	if more components of name, do next level at dirloop
+  *	return the answer as locked inode
+  *
+  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
+  *	 but unlocked.
   */
  struct inode *
  namei(nameiop)
***************
*** 26,31 ****
--- 111,117 ----
  	register char *cp;		/* pointer into pathname argument */
  /* these variables refer to things which must be freed or unlocked */
  	struct inode *dp = 0;		/* the directory we are searching */
+ 	struct namecache *ncp;		/* cache slot for entry */
  	struct fs *fs;			/* file system that directory is in */
  	struct buf *bp = 0;		/* a buffer of directory entries */
  	struct v7direct *ep;		/* the current directory entry */
***************
*** 36,65 ****
  	off_t endsearch;		/* offset to end directory search */
  	int nlink = 0;			/* number of symbolic links taken */
  	struct inode *pdp;		/* saved dp during symlink work */
  	int lockparent;
  	int isdotdot;			/* != 0 if current name is ".." */
  	int flag;			/* op ie, LOOKUP, CREATE, or DELETE */
  	off_t enduseful;		/* pointer past last used dir slot */
  	char	path[MAXPATHLEN];	/* current path */
  
  	lockparent = nameiop & LOCKPARENT;
  	flag = nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
  	/*
  	 * Copy the name into the buffer.
  	 */
! 	{
! 		register int error;
! 
! 		if (u.u_segflg == UIO_SYSSPACE)
! 			error = copystr(u.u_dirp, path, MAXPATHLEN,
! 			    (u_int *)0);
! 		else
! 			error = copyinstr(u.u_dirp, path, MAXPATHLEN,
! 			    (u_int *)0);
! 		if (error) {
! 			u.u_error = error;
! 			return (NULL);
! 		}
  	}
  
  	/*
--- 122,157 ----
  	off_t endsearch;		/* offset to end directory search */
  	int nlink = 0;			/* number of symbolic links taken */
  	struct inode *pdp;		/* saved dp during symlink work */
+ register int error, i;
  	int lockparent;
+ 	int docache;			/* == 0 do not cache last component */
+ 	int makeentry;			/* != 0 if name to be added to cache */
+ 	unsigned hash;			/* value of name hash for entry */
+ 	union nchash *nhp;		/* cace chain head for entry */
  	int isdotdot;			/* != 0 if current name is ".." */
  	int flag;			/* op ie, LOOKUP, CREATE, or DELETE */
  	off_t enduseful;		/* pointer past last used dir slot */
  	char	path[MAXPATHLEN];	/* current path */
+ 	int	namelen;		/* length of last component */
+ 	segm	seg5;			/* save area for kernel seg5 */
  
  	lockparent = nameiop & LOCKPARENT;
+ 	docache = (nameiop & NOCACHE) ^ NOCACHE;
  	flag = nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
+ 	if (flag == DELETE || lockparent)
+ 		docache = 0;
  	/*
  	 * Copy the name into the buffer.
  	 */
! 	if (u.u_segflg == UIO_SYSSPACE)
! 		error = copystr(u.u_dirp, path, MAXPATHLEN,
! 		    (u_int *)0);
! 	else
! 		error = copyinstr(u.u_dirp, path, MAXPATHLEN,
! 		    (u_int *)0);
! 	if (error) {
! 		u.u_error = error;
! 		return (NULL);
  	}
  
  	/*
***************
*** 98,128 ****
  	/*
  	 * Copy next component of name to u.u_dent.d_name.
  	 */
! 	{
! 		register char	*tp;
! 
! 		for (tp = u.u_dent.d_name; *cp != 0 && *cp != '/'; cp++) {
! 			if (tp == &u.u_dent.d_name[MAXNAMLEN]) {
  #ifdef NO_FILE_NAME_MAPPING
! 				u.u_error = ENAMETOOLONG;
! 				goto bad;
  #else
! 				for (; *cp != 0 && *cp != '/'; cp++);
! 				break;
  #endif
- 			}
- 			if (*cp & 0200)
- 				if ((*cp&0377) == ('/'|0200) || flag != DELETE) {
- 					u.u_error = EINVAL;
- 					goto bad;
- 				}
- 			*tp++ = *cp;
  		}
! 		while (tp < &u.u_dent.d_name[MAXNAMLEN])
! 			*tp++ = '\0';
  	}
  	isdotdot = (u.u_dent.d_name[0] == '.' &&
  		u.u_dent.d_name[1] == '.' && u.u_dent.d_name[2] == '\0');
  
  	/*
  	 * Check for degenerate name (e.g. / or "")
--- 190,222 ----
  	/*
  	 * Copy next component of name to u.u_dent.d_name.
  	 */
! 	hash = 0;
! 	for (i = 0; *cp != 0 && *cp != '/'; cp++) {
! 		if (i == MAXNAMLEN) {
  #ifdef NO_FILE_NAME_MAPPING
! 			u.u_error = ENAMETOOLONG;
! 			goto bad;
  #else
! 			for (; *cp != 0 && *cp != '/'; cp++);
! 			break;
  #endif
  		}
! 		if (*cp & 0200)
! 			if ((*cp&0377) == ('/'|0200) || flag != DELETE) {
! 				u.u_error = EINVAL;
! 				goto bad;
! 			}
! 		u.u_dent.d_name[i++] = *cp;
! 		hash += (unsigned char)*cp * i;
  	}
+ 	namelen = i;
+ 	while (i < MAXNAMLEN)
+ 		u.u_dent.d_name[i++] = '\0';
  	isdotdot = (u.u_dent.d_name[0] == '.' &&
  		u.u_dent.d_name[1] == '.' && u.u_dent.d_name[2] == '\0');
+ 	makeentry = 1;
+ 	if (*cp == '\0' && docache == 0)
+ 		makeentry = 0;
  
  	/*
  	 * Check for degenerate name (e.g. / or "")
***************
*** 139,146 ****
--- 233,357 ----
  
  	/*
  	 * We now have a segment name to search for, and a directory to search.
+ 	 *
+ 	 * Before tediously performing a linear scan of the directory,
+ 	 * check the name cache to see if the directory/name pair
+ 	 * we are looking for is known already.  We don't do this
+ 	 * if the segment name is long, simply so the cache can avoid
+ 	 * holding long names (which would either waste space, or
+ 	 * add greatly to the complexity).
  	 */
+ 	saveseg5(seg5);
+ 	mapseg5(nmidesc.se_addr, nmidesc.se_desc);
+ 	if (namelen > NCHNAMLEN) {
+ 		nchstats.ncs_long++;
+ 		makeentry = 0;
+ 	} else {
+ 		nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
+ 		for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp;
+ 		    ncp = ncp->nc_forw) {
+ 			if (ncp->nc_ino == dp->i_number &&
+ 			    ncp->nc_dev == dp->i_dev &&
+ 			    ncp->nc_nlen == namelen &&
+ 			    !bcmp(ncp->nc_name, u.u_dent.d_name,
+ 				(unsigned)ncp->nc_nlen))
+ 				break;
+ 		}
+ 		if (ncp == (struct namecache *)nhp) {
+ 			nchstats.ncs_miss++;
+ 			ncp = NULL;
+ 		} else {
+ 			if (ncp->nc_id != nmi_i_id[ncp->nc_ip - inode])
+ 				nchstats.ncs_falsehits++;
+ 			else if (!makeentry)
+ 				nchstats.ncs_badhits++;
+ 			else {
+ 				/*
+ 				 * move this slot to end of LRU
+ 				 * chain, if not already there
+ 				 */
+ 				if (ncp->nc_nxt) {
+ 					/* remove from LRU chain */
+ 					*ncp->nc_prev = ncp->nc_nxt;
+ 					ncp->nc_nxt->nc_prev = ncp->nc_prev;
  
+ 					/* and replace at end of it */
+ 					ncp->nc_nxt = NULL;
+ 					ncp->nc_prev = nchtail;
+ 					*nchtail = ncp;
+ 					nchtail = &ncp->nc_nxt;
+ 				}
+ 
+ 				/*
+ 				 * Get the next inode in the path.
+ 				 * See comment above other `IUNLOCK' code for
+ 				 * an explaination of the locking protocol.
+ 				 */
+ 				pdp = dp;
+ 				if (!isdotdot || dp != u.u_rdir)
+ 					dp = ncp->nc_ip;
+ 				if (dp == NULL)
+ 					panic("namei: null cache ino");
+ 				if (pdp == dp)
+ 					dp->i_count++;
+ 				else if (isdotdot) {
+ 					restorseg5(seg5);
+ 					IUNLOCK(pdp);
+ 					igrab(dp);
+ 					mapseg5(nmidesc.se_addr,nmidesc.se_desc);
+ 				} else {
+ 					restorseg5(seg5);
+ 					igrab(dp);
+ 					IUNLOCK(pdp);
+ 					mapseg5(nmidesc.se_addr,nmidesc.se_desc);
+ 				}
+ 
+ 				/*
+ 				 * Verify that the inode that we got
+ 				 * did not change while we were waiting
+ 				 * for it to be locked.
+ 				 */
+ 				if (ncp->nc_id != nmi_i_id[ncp->nc_ip - inode]) {
+ 					restorseg5(seg5);
+ 					iput(dp);
+ 					ILOCK(pdp);
+ 					mapseg5(nmidesc.se_addr,nmidesc.se_desc);
+ 					dp = pdp;
+ 					nchstats.ncs_falsehits++;
+ 				} else {
+ 					u.u_dent.d_ino = dp->i_number;
+ 					/* ni_dent.d_reclen is garbage ... */
+ 					nchstats.ncs_goodhits++;
+ 					restorseg5(seg5);
+ 					goto haveino;
+ 				}
+ 			}
+ 
+ 			/*
+ 			 * Last component and we are renaming or deleting,
+ 			 * the cache entry is invalid, or otherwise don't
+ 			 * want cache entry to exist.
+ 			 */
+ 			/* remove from LRU chain */
+ 			*ncp->nc_prev = ncp->nc_nxt;
+ 			if (ncp->nc_nxt)
+ 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
+ 			else
+ 				nchtail = ncp->nc_prev;
+ 			remque(ncp);		/* remove from hash chain */
+ 			/* insert at head of LRU list (first to grab) */
+ 			ncp->nc_nxt = nchhead;
+ 			ncp->nc_prev = &nchhead;
+ 			nchhead->nc_prev = &ncp->nc_nxt;
+ 			nchhead = ncp;
+ 			/* and make a dummy hash chain */
+ 			ncp->nc_forw = ncp;
+ 			ncp->nc_back = ncp;
+ 			ncp = NULL;
+ 		}
+ 	}
+ 	restorseg5(seg5);
+ 
  	/*
  	 * If this is the same directory that this process
  	 * previously searched, pick up where we last left off.
***************
*** 174,179 ****
--- 385,391 ----
  			    entryoffsetinblock);
  		}
  		numdirpasses = 2;
+ 		nchstats.ncs_2passes++;
  	}
  	endsearch = dp->i_size;
  	enduseful = 0;
***************
*** 281,286 ****
--- 493,500 ----
  	u.u_error = ENOENT;
  	goto bad;
  found:
+ 	if (numdirpasses == 2)
+ 		nchstats.ncs_pass2++;
  	/*
  	 * Found component in pathname.
  	 * If the final component of path name, save information
***************
*** 291,297 ****
  		u.u_ncache.nc_inumber = dp->i_number;
  		u.u_ncache.nc_dev = dp->i_dev;
  	}
! 	u.u_dent = *ep;
  	mapout(bp);
  	brelse(bp);
  	bp = NULL;
--- 505,511 ----
  		u.u_ncache.nc_inumber = dp->i_number;
  		u.u_ncache.nc_dev = dp->i_dev;
  	}
! 	u.u_dent.d_ino = ep->d_ino;
  	mapout(bp);
  	brelse(bp);
  	bp = NULL;
***************
*** 348,356 ****
  	 * in directory file system was mounted on.
  	 */
  	if (isdotdot) {
! 		if (dp == u.u_rdir)
  			u.u_dent.d_ino = dp->i_number;
! 		else if (u.u_dent.d_ino == ROOTINO &&
  		    dp->i_number == ROOTINO) {
  			register struct mount *mp;
  			register dev_t d;
--- 562,571 ----
  	 * in directory file system was mounted on.
  	 */
  	if (isdotdot) {
! 		if (dp == u.u_rdir) {
  			u.u_dent.d_ino = dp->i_number;
! 			makeentry = 0;
! 		} else if (u.u_dent.d_ino == ROOTINO &&
  		    dp->i_number == ROOTINO) {
  			register struct mount *mp;
  			register dev_t d;
***************
*** 430,435 ****
--- 645,691 ----
  			goto bad2;
  	}
  
+ 	/*
+ 	 * Insert name into cache if appropriate.
+ 	 */
+ 	if (makeentry) {
+ 		if (ncp != NULL)
+ 			panic("namei: duplicating cache");
+ 		/*
+ 		 * Free the cache slot at head of lru chain.
+ 		 */
+ 		saveseg5(seg5);
+ 		mapseg5(nmidesc.se_addr, nmidesc.se_desc);
+ 		if (ncp = nchhead) {
+ 			/* remove from lru chain */
+ 			*ncp->nc_prev = ncp->nc_nxt;
+ 			if (ncp->nc_nxt)
+ 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
+ 			else
+ 				nchtail = ncp->nc_prev;
+ 			remque(ncp);		/* remove from old hash chain */
+ 			/* grab the inode we just found */
+ 			ncp->nc_ip = dp;
+ 			/* fill in cache info */
+ 			ncp->nc_ino = pdp->i_number;	/* parents inum */
+ 			ncp->nc_dev = pdp->i_dev;	/* & device */
+ 			ncp->nc_idev = dp->i_dev;	/* our device */
+ 			ncp->nc_id = nmi_i_id[dp - inode];	/* identifier */
+ 			ncp->nc_nlen = namelen;
+ 			bcopy(u.u_dent.d_name, ncp->nc_name,
+ 			    (unsigned)ncp->nc_nlen);
+ 			/* link at end of lru chain */
+ 			ncp->nc_nxt = NULL;
+ 			ncp->nc_prev = nchtail;
+ 			*nchtail = ncp;
+ 			nchtail = &ncp->nc_nxt;
+ 			/* and insert on hash chain */
+ 			insque(ncp, nhp);
+ 			restorseg5(seg5);
+ 		}
+ 	}
+ 
+ haveino:
  	fs = dp->i_fs;
  
  	/*
***************
*** 515,533 ****
  direnter(ip)
  	struct inode *ip;
  {
! #ifdef DIAGNOSTIC
! 	/*
! 	 * There's no reason for this to be here that I can think of.
! 	 * KB
! 	 */
! 	if (!u.u_pdir->i_nlink) {
! 		panic("direnter");
! 	/*
! 		iput(u.u_pdir);
! 		return(ENOTDIR);
! 	*/
! 	}
! #endif
  	u.u_dent.d_ino = ip->i_number;
  	u.u_count = sizeof(struct v7direct);
  	u.u_segflg = UIO_SYSSPACE;
--- 771,777 ----
  direnter(ip)
  	struct inode *ip;
  {
! 
  	u.u_dent.d_ino = ip->i_number;
  	u.u_count = sizeof(struct v7direct);
  	u.u_segflg = UIO_SYSSPACE;
***************
*** 681,684 ****
--- 925,1015 ----
  	if (ip != NULL)
  		iput(ip);
  	return (error);
+ }
+ 
+ /*
+  * Name cache initialization, from main() when we are booting
+  */
+ nchinit()
+ {
+ 	register union nchash *nchp;
+ 	register struct namecache *ncp;
+ 	segm	seg5;
+ 
+ 	saveseg5(seg5);
+ 	mapseg5(nmidesc.se_addr,nmidesc.se_desc);
+ 	nchhead = 0;
+ 	nchtail = &nchhead;
+ 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) {
+ 		ncp->nc_forw = ncp;			/* hash chain */
+ 		ncp->nc_back = ncp;
+ 		ncp->nc_nxt = NULL;			/* lru chain */
+ 		*nchtail = ncp;
+ 		ncp->nc_prev = nchtail;
+ 		nchtail = &ncp->nc_nxt;
+ 		/* all else is zero already */
+ 	}
+ 	for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
+ 		nchp->nch_head[0] = nchp;
+ 		nchp->nch_head[1] = nchp;
+ 	}
+ 	restorseg5(seg5);
+ }
+ 
+ /*
+  * Cache flush, called when filesys is umounted to
+  * remove entries that would now be invalid
+  *
+  * The line "nxtcp = nchhead" near the end is to avoid potential problems
+  * if the cache lru chain is modified while we are dumping the
+  * inode.  This makes the algorithm O(n^2), but do you think I care?
+  */
+ nchinval(dev)
+ 	register dev_t dev;
+ {
+ 	register struct namecache *ncp, *nxtcp;
+ 	segm	seg5;
+ 
+ 	saveseg5(seg5);
+ 	mapseg5(nmidesc.se_addr,nmidesc.se_desc);
+ 	for (ncp = nchhead; ncp; ncp = nxtcp) {
+ 		nxtcp = ncp->nc_nxt;
+ 		if (ncp->nc_ip == NULL ||
+ 		    (ncp->nc_idev != dev && ncp->nc_dev != dev))
+ 			continue;
+ 		/* free the resources we had */
+ 		ncp->nc_idev = NODEV;
+ 		ncp->nc_dev = NODEV;
+ 		ncp->nc_id = NULL;
+ 		ncp->nc_ino = 0;
+ 		ncp->nc_ip = NULL;
+ 		remque(ncp);		/* remove entry from its hash chain */
+ 		ncp->nc_forw = ncp;	/* and make a dummy one */
+ 		ncp->nc_back = ncp;
+ 		/* delete this entry from LRU chain */
+ 		*ncp->nc_prev = nxtcp;
+ 		if (nxtcp)
+ 			nxtcp->nc_prev = ncp->nc_prev;
+ 		else
+ 			nchtail = ncp->nc_prev;
+ 		/* cause rescan of list, it may have altered */
+ 		nxtcp = nchhead;
+ 		/* put the now-free entry at head of LRU */
+ 		ncp->nc_nxt = nxtcp;
+ 		ncp->nc_prev = &nchhead;
+ 		nxtcp->nc_prev = &ncp->nc_nxt;
+ 		nchhead = ncp;
+ 	}
+ 	restorseg5(seg5);
+ }
+ 
+ /*
+  * Name cache invalidation of all entries.
+  */
+ cacheinvalall()
+ {
+ 	register struct namecache *ncp;
+ 
+ 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++)
+ 		ncp->nc_id = 0;
  }
*** vmstat.c.old	Sun Jul  3 20:10:24 1988
--- vmstat.c	Sat Jan 27 22:46:21 1990
***************
*** 523,532 ****
  
  dosum()
  {
- #ifndef BSD2_10
  	struct nchstats nchstats;
  	long nchtotal;
- #endif
  	struct xstats  xstats;
  
  	lseek(mf, (long)nl[X_SUM].n_value, L_SET);
--- 523,530 ----
***************
*** 572,578 ****
  #endif
  	printf("%9D system calls\n", sum.v_syscall);
  #define	nz(x)	((x) ? (x) : 1)
- #ifndef BSD2_10
  	lseek(mf, (long)nl[X_NCHSTATS].n_value, 0);
  	read(mf, &nchstats, sizeof nchstats);
  	nchtotal = nchstats.ncs_goodhits + nchstats.ncs_badhits +
--- 570,575 ----
***************
*** 583,589 ****
  	    nchstats.ncs_pass2 * 100 / nz(nchtotal));
  	printf("%9s badhits %D, falsehits %D, toolong %D\n", "",
  	    nchstats.ncs_badhits, nchstats.ncs_falsehits, nchstats.ncs_long);
- #endif
  	lseek(mf, (long)nl[X_XSTATS].n_value, 0);
  	read(mf, &xstats, sizeof xstats);
  	printf("%9D total calls to xalloc (cache hits %D%%)\n",
--- 580,585 ----

--------------------------------------------------------------------------
	This is a simple program to display the name cache statistics.  If
	an argument is given it is taken as the number of seconds to pause
	before displaying the statistics again.


#include <stdio.h>
#include <nlist.h>
#include <sys/param.h>
#include <sys/dir.h>
#include <sys/namei.h>

struct	nlist nl[] =
	{
	{ "_nchstats" },
	{ "" },
	};

	int	mf, hdr(), lines = 1;

main(argc, argv)
	int argc;
	char **argv;
	{
	register i;
	int	iter;

	nlist("/vmunix", nl);
	if	(nl[0].n_type == 0)
		{
		fprintf(stderr, "no /vmunix namelist\n");
		exit(1);
		}
	mf = open("/dev/kmem", 0);
	if	(mf < 0)
		{
		fprintf(stderr, "cannot open /dev/kmem\n");
		exit(1);
		}
	iter = 0;
	argc--, argv++;
	if	(argc > 1)
		iter = atoi(argv[1]);
	signal(SIGCONT, hdr);
loop:
	if	(--lines == 0)
		hdr();
	nmstats();
	fflush(stdout);
	if	(--iter &&argc > 0)
		{
		sleep(atoi(argv[0]));
		goto loop;
		}
	}

hdr()
	{

	printf("total      good     bad     false        miss     long      pass2     2pass");
	putchar('\n');
	lines = 19;
	}

nmstats()
	{
	long	nchtotal;
	struct nchstats nchstats;

	lseek(mf, (long)nl[0].n_value, 0);
	read(mf, &nchstats, sizeof nchstats);
	nchtotal = nchstats.ncs_goodhits + nchstats.ncs_badhits +
	    nchstats.ncs_falsehits + nchstats.ncs_miss + nchstats.ncs_long;
	printf("%-9D %-9D %-9D %-9D %-9D",nchtotal, nchstats.ncs_goodhits,
		nchstats.ncs_badhits,
		nchstats.ncs_falsehits, nchstats.ncs_miss);
	printf(" %-9D %-9D %-9D", nchstats.ncs_long, nchstats.ncs_pass2,
		nchstats.ncs_2passes);
	putchar('\n');
	}



More information about the Comp.bugs.2bsd mailing list