Coherent4.2.10/coh.386/bio.c

Compare this file to the similar file:
Show the results in this format:

/*
 * $Header: /v/src/rcskrnl/coh.386/RCS/bio.c,v 420.9 1993/12/10 22:04:25 srcadm Exp srcadm $
 */

/*********************************************************************
 *  
 * Coherent, Mark Williams Company, Copyright 1993
 * RCS Header
 * This file contains proprietary information and is considered
 * a trade secret.  Unauthorized use is prohibited.
 *
 *
 * $Id: bio.c,v 420.9 1993/12/10 22:04:25 srcadm Exp srcadm $
 *
 * $Log: bio.c,v $
 * Revision 420.9  1993/12/10  22:04:25  srcadm
 * Fixed problems with character device read/writes (dmareq()).
 * Created functions io_buffer_req() and raw_buf_read() for this
 * purpose.
 * Also, stripped comments and replaced with function headers.
 *
 * Revision 420.8  1993/12/04  00:25:23  srcadm
 * Further mods to io_buffer_req for raw access.
 *
 * Revision 420.7  1993/12/02  18:36:08  srcadm
 * Fixes ps listing data limits so bufneed appears different
 * than buffer_wait, added comments, fixed foobar with u.u_error
 * setting introduced in 420.6 by Louis.
 *
 * Revision 420.6  1993/12/02  00:14:57  srcadm
 * Fixes problem with opening a non-existant block device.
 *
 * Revision 420.5  1993/12/01  23:40:18  srcadm
 * Initial RCS submission.
 *
 *
 */

#ifdef EMBEDDED_VERSION
/* Embedded Version Constant */
char *MWC_BIO_VERSION = "MWC_BIO_VERSION($Revision: 420.9 $)";
#endif

/*
 * Revision 2.8  93/10/29  00:54:55  nigel
 * R98 (aka 4.2 Beta) prior to removing System Global memory
 * 
 * Revision 2.7  93/09/13  07:53:28  nigel
 * Changed so that once again most driver return values are ignored; instead
 * we pass in an extra pointer to an integer where the return value should
 * be placed, along with a (currently NULL) "cred_t *" so that the signature
 * for driver entry points is becoming very much like the DDI/DKI version.
 * 
 * Revision 2.6  93/09/02  18:02:56  nigel
 * Remove spurious globals and prepare for initial DDI/DKI mods
 * 
 * Revision 2.5  93/08/19  10:37:01  nigel
 * r83 ioctl (), corefile, new headers
 * 
 * Revision 2.4  93/08/19  03:26:19  nigel
 * Nigel's r83 (Stylistic cleanup)
 * 
 * Revision 2.2  93/07/26  15:22:22  nigel
 * Nigel's R80
 */

#define	_DDI_DKI	1
#define	_SYSV3		1

#include <common/gregset.h>
#include <common/_canon.h>
#include <kernel/proc_lib.h>
#include <sys/param.h>
#include <sys/confinfo.h>
#include <sys/kmem.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <stddef.h>
#include <limits.h>

#define	_KERNEL		1

#include <kernel/trace.h>
#include <kernel/reg.h>
#include <sys/uproc.h>
#include <sys/inode.h>
#include <sys/mmu.h>
#include <sys/buf.h>
#include <sys/con.h>
#include <sys/io.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/seg.h>
#include <sys/dmac.h>
#include <sys/types.h>
#include <sys/coherent.h>


/*
 * This is here for the old-style Coherent I/O support hacks.
 */

#include <sgtty.h>

static	buf_t	     **	hasharray;	/* pointer to hash buckets */
static	buf_t	      *	firstbuf;	/* pointer to first in LRU chain */
static	buf_t	      *	lastbuf;	/* pointer to last in LRU chain */

static	int		bufneed;	/* waiting for buffers to become free */
static	volatile int	bufsync;	/* waiting on syncing buffers */
static	buf_t	      *	buf_list;	/* first buffer */
static	buf_t	      *	buf_list_end;	/* points after last buffer */
buf_t *dyn_buf_head = (buf_t *)NULL; /* dynamic buffer head */

MAKESR(blockp, _blockp);

int	bufinit_done;

#undef	__LOCAL__
#define	__LOCAL__

/*
 * The following hacks allow the magic address information used in a block to
 * be saved and restored. This is so that ioreq () and dmareq () can use a
 * single regular buffer for all their work rather than juggling several
 * buffers.
 */

struct addr_save {
	caddr_t		save_virt;
	paddr_t		save_phys;
};

#define	SAVE_ADDR(bp,as)	((as)->save_virt = (bp)->b_vaddr, \
				 (as)->save_phys = (bp)->b_paddr)
#define	REST_ADDR(bp,as)	((bp)->b_vaddr = (as)->save_virt, \
				 (bp)->b_paddr = (as)->save_phys, \
				 (bp)->b_proc = NULL)

/*
 * The following hashing algorithm is used by bclaim ().
 */

#define	HASH(device, blockno)	((device * 257) + blockno)


/*
 * The following space is used as workspace by the radix-sorting algorithm.
 */

typedef struct lock_buf_list {
	buf_t 			*buf;
	struct lock_buf_list 	*next;
} buf_list_t;

__DUMB_GATE	radix_gate = __GATE_DECLARE ("radix");
BUF	      *	radix_space [UCHAR_MAX + 1];
buf_list_t    *u_radix_space[UCHAR_MAX + 1];


/********************************************************************
 * Function: static void HASHinsert(buf_t *bp)
 *
 * Description: Insert the current buffer at the head of the
 *		appropriate hash chain.
 *
 * Returns: Nothing.
 *
 * Affects Globals: hasharray[]
 *
 * Affects U-Area: None.
 *
 * Side effects: Changes b_hash field of passed bp.
 *
 * Comments:
 */

static void
HASHinsert (bp)
buf_t * bp;
{
	ASSERT (bp->b_hash == NULL);

	bp->b_hash = hasharray [bp->b_hashval];
	hasharray [bp->b_hashval] = bp;
}


/*
 * Allocate and initialize buffer headers.
 */

void
bufinit ()
{
	paddr_t p;
	caddr_t v;
	int	i;

	p = MAPIO (blockp.sr_segp->s_vmem, 0);
	v = blockp.sr_base;

	if (NBUF < 32)
		cmn_err (CE_PANIC, "NBUF = %d, must be at least 32", NBUF);
	if (NHASH < 32)
		cmn_err (CE_PANIC, "NHASH = %d, must be at least 32", NHASH);

	buf_list = kmem_alloc (NBUF * sizeof (buf_t), KM_NOSLEEP);
	buf_list_end = buf_list + NBUF;
	hasharray = kmem_alloc (NHASH * sizeof (buf_t *), KM_NOSLEEP);
	if (buf_list == NULL || hasharray == NULL)
		cmn_err (CE_PANIC,
			 "bufinit: insufficient memory for %d buffers", NBUF);

	for (i = 0; i < NHASH; ++ i)
		hasharray [i] = NULL;

	/*
	 * initialize the buffer header array with the physical and
	 * virtual addresses of the buffers, NULL values for the
	 * hash chain pointers, and pointers to the successor and
	 * predecessor of the current node.
	 */

	firstbuf = buf_list;
	lastbuf = buf_list_end - 1;

	for (i = 0 ; i < NBUF ; i ++) {
		buf_t	      *	bp = buf_list + i;

		bp->b_hashval = i % NHASH;
		bp->b_dev = NODEV;
		bp->b_edev = NODEV;
		bp->b_flags = 0;
		bp->b_paddr = p;
		bp->b_vaddr = v;
		bp->b_hash = NULL;
		bp->b_sort = NULL;
		bp->b_LRUf = bp + 1;		/* next entry in chain */
		bp->b_LRUb = bp - 1;		/* prev entry in chain */
		bp->b_bufsiz = BSIZE;

		bp->b_proc = NULL;
		bp->b_iodone = NULL;
		bp->b_private = NULL;

		__INIT_BUFFER_LOCK (bp);

		p += BSIZE;
		v += BSIZE;

		HASHinsert (bp);
	}

	/*
	 * the first and last headers are special cases.
	 */

	buf_list->b_LRUb = NULL;		/* no predecessor */
	(buf_list_end - 1)->b_LRUf = NULL;	/* no successor */

	bufinit_done = 1;
}


#if	0
/*
 * Set this up to be called once a second from clock.c to track down buffer
 * locking problems.
 */

#if	__USE_PROTO__
void check_buffer_cache (void)
#else
void
check_buffer_cache ()
#endif
{
	int		i;

	for (i = 0 ; i < NBUF ; i ++) {
		buf_t	      *	bufp = buf_list + i;
		short		s = sphi ();

		switch (bufp->__b_gate->_lock [0]) {
		case 0:
			break;

		case 16:
			cmn_err (CE_NOTE, "stuck buffer #%x #%x blk %d flags #%x",
				 bufp->b_dev, bufp->b_edev, bufp->b_blkno,
				 bufp->b_flags);
			bufp->__b_gate->_lock [0] = 1;
			break;

		default:
			bufp->__b_gate->_lock [0] ++;
			break;
		}
		spl (s);
	}
}
#endif


/*
 * Set up for radix-sort.
 */

#if	__USE_PROTO__
__LOCAL__ void init_radix_sort (void)
#else
__LOCAL__ void
init_radix_sort ()
#endif
{
	__GATE_LOCK (radix_gate, "buffer sort");
}


/*
 * End radix-sorting.
 */

#if	__USE_PROTO__
__LOCAL__ void end_radix_sort (void)
#else
__LOCAL__ void
end_radix_sort ()
#endif
{
	ASSERT (__GATE_LOCKED (radix_gate));
	__GATE_UNLOCK (radix_gate);
}


/*
 * Perform a radix-sort pass on the buffer list rooted at "bp", with the
 * sort digit being indexed at structure offset "digofs", with the list thread
 * being maintained at index "linkofs".
 */


#define	__RADIX_LINK(bp, ofs)	(* (buf_t **) ((char *) bp + linkofs))
#define	__U_RADIX_LINK(bp, ofs)	(* (buf_list_t **) ((char *) bp + linkofs))


#if	__USE_PROTO__
__LOCAL__ buf_list_t * unlocked_radix_pass (buf_list_t * bp, size_t digofs, size_t linkofs)
#else
__LOCAL__ buf_list_t *
unlocked_radix_pass (bp, digofs, linkofs)
buf_list_t      *bp;
size_t		digofs;
size_t		linkofs;
#endif
{
	int		i;
	buf_list_t      *prev;

	ASSERT (__GATE_LOCKED (radix_gate));

	for (i = 0 ; i < UCHAR_MAX + 1 ; i ++)
		u_radix_space [i] = NULL;

	/*
	 * Walk over the input list, putting items into buckets.
	 */

	while (bp != NULL) {
		buf_list_t	      *	next;

		i = * ((unsigned char *) bp->buf + digofs);
		next = __U_RADIX_LINK (bp, linkofs);
		__U_RADIX_LINK (bp, linkofs) = u_radix_space [i];
		u_radix_space [i] = bp;
		bp = next;
	}

	/*
	 * Now construct the output list by walking over each bucket and
	 * reversing pointers.
	 */

	prev = NULL;
	for (i = UCHAR_MAX + 1 ; i -- > 0 ;) {
		bp = u_radix_space [i];

		while (bp != NULL) {
			buf_list_t      *next = __U_RADIX_LINK (bp, linkofs);
			__U_RADIX_LINK (bp, linkofs) = prev;
			prev = bp;
			bp = next;
		}
	}

	/*
	 * Now we have a partial sort on a buffer list, return the head of the
	 * list.
	 */

	return prev;
}



#if	__USE_PROTO__
__LOCAL__ buf_t * radix_pass (buf_t * bp, size_t digofs, size_t linkofs)
#else
__LOCAL__ buf_t *
radix_pass (bp, digofs, linkofs)
buf_t	      *	bp;
size_t		digofs;
size_t		linkofs;
#endif
{
	int		i;
	buf_t	      *	prev;

	ASSERT (__GATE_LOCKED (radix_gate));

	for (i = 0 ; i < UCHAR_MAX + 1 ; i ++)
		radix_space [i] = NULL;

	/*
	 * Walk over the input list, putting items into buckets.
	 */

	while (bp != NULL) {
		buf_t	      *	next;

		i = * ((unsigned char *) bp + digofs);
		next = __RADIX_LINK (bp, linkofs);
		__RADIX_LINK (bp, linkofs) = radix_space [i];
		radix_space [i] = bp;
		bp = next;
	}

	/*
	 * Now construct the output list by walking over each bucket and
	 * reversing pointers.
	 */

	prev = NULL;
	for (i = UCHAR_MAX + 1 ; i -- > 0 ;) {
		bp = radix_space [i];

		while (bp != NULL) {
			buf_t	      *	next = __RADIX_LINK (bp, linkofs);
			__RADIX_LINK (bp, linkofs) = prev;
			prev = bp;
			bp = next;
		}
	}

	/*
	 * Now we have a partial sort on a buffer list, return the head of the
	 * list.
	 */

	return prev;
}



/********************************************************************
 * Function: CON *drvmap(o_dev_t dev, int flags)
 *
 * Description: Maps a device to its CON entry.
 *
 * Returns: Pointer to the CON entry, or NULL on failure.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: Sets u.u_error to ENXIO if device does not exist.
 *
 * Side Effects: None.
 *
 * Comments:
 *
 * NIGEL: This function is the only code that references drvl [] directly
 * other than the bogus code that manages the load and unload entry points,
 * which we will also need to "enhance". What we add to this code is a range
 * check so that it no longer can index off the end of drvl [], and in the
 * case that we would go off the end of drvl [] we vector instead to the
 * STREAMS system and ask it to return a kludged-up "CON *". The mapping
 * code referred to above is for the i286 and does nothing whatsoever, so
 * all this function really does as it stands is a table lookup.
 */

#if	__USE_PROTO__
__LOCAL__ CON * drvmap (o_dev_t dev, int flags)
#else
__LOCAL__ CON *
drvmap (dev, flags)
o_dev_t 	dev;
int		flags;
#endif
{
	DRV * dp;
	unsigned m;

	if ((m = major (dev)) >= drvn) {
		if (m < _maxmajor && _major [m] != NODEV) {
			return (flags & DFBLK) == 0 ?
				& cdevsw [_major [m]].cdev_con :
				& bdevsw [_major [m]].bdev_con;
		}
	} else if ((dp = drvl + m)->d_conp != NULL)
		return dp->d_conp;

	SET_U_ERROR (ENXIO, "drvmap ()");
	return NULL;
}


/*
 * NIGEL: To avoid accidents with block locking, use this function rather than
 * the primitive old-Coherent sleep locks when locking a block that is
 * supposed to have a particular identity. With this buffer cache
 * implementation, we *must* check that a block ID has not changed *every*
 * time we re-lock it. Using this code ensures that.
 */

#if	__USE_PROTO__
__LOCAL__ buf_t * lock_block (buf_t * bufp, o_dev_t dev, daddr_t bno)
#else
__LOCAL__ buf_t *
lock_block (bufp, dev, bno)
buf_t	      *	bufp;
o_dev_t		dev;
daddr_t		bno;
#endif
{
	__LOCK_BUFFER (bufp, "lock_block ()");

	if (bufp->b_dev != dev || bufp->b_bno != bno) {
		__UNLOCK_BUFFER (bufp);
		bufp = NULL;
	}

	return bufp;
}


/*
 * Unlock a block buffer, and if anyone is waiting for a block on the free
 * list to become available, wake them up. This function doesn't wake anyone
 * who is waiting for I/O completion on the buffer.
 */

#if	__USE_PROTO__
void unlock_block (buf_t * bp)
#else
void
unlock_block (bp)
buf_t	      *	bp;
#endif
{
	__UNLOCK_BUFFER (bp);

	if (bufneed) {
		bufneed = 0;
		wakeup (& bufneed);
	}
}


/*
 * Begin writing a block out; if 'sync' is not set, the buffer is marked as
 * asynchronous and the lock that is on the block is detached from the current
 * process.
 */

#if	__USE_PROTO__
__LOCAL__ void buffer_write_start (buf_t * bp, int sync)
#else
__LOCAL__ void
buffer_write_start (bp, sync)
buf_t	      *	bp;
int		sync;
#endif
{
	ASSERT (__IS_BUFFER_LOCKED (bp));
#if	TRACER
	ASSERT (__GATE_LOCK_OWNER (bp->__b_gate) == (char *) SELF);
#endif

	if (sync == BUF_SYNC)
		bp->b_flag &= ~ BFASY;
	else {
		/*
		 * From here on, the gate does not belong to the calling
		 * process.
		 */

		__MAKE_BUFFER_ASYNC (bp);
	}

	bp->b_flag |= BFNTP;
	bp->b_req = BWRITE;
	bp->b_count = BSIZE;

	dblock (bp->b_dev, bp);
}


/*
 * Wait for a buffer to complete any pending I/O.
 */

#if	__USE_PROTO__
__LOCAL__ void buffer_wait (buf_t * bp)
#else
__LOCAL__ void
buffer_wait (bp)
buf_t	      *	bp;
#endif
{
	unsigned short	s;

	ASSERT (__IS_BUFFER_LOCKED (bp));
#if	TRACER
	ASSERT (__GATE_LOCK_OWNER (bp->__b_gate) == (char *) SELF);
#endif

	/*
	 * LOUIS:
	 * buffer_wait shortened to bfwait because our ps output
	 * doesn't have a long enough field.
	 */
	s = sphi ();
	while ((bp->b_flag & BFNTP) != 0)
		x_sleep (bp, pridisk, slpriNoSig, "bfwait");
	spl (s);
}


/*
 * Attempt to locate a block within the buffer cache.
 */

#if	__USE_PROTO__
__LOCAL__ buf_t * buffer_find (o_dev_t dev, daddr_t bno, int sync)
#else
__LOCAL__ buf_t *
buffer_find (dev, bno, sync)
o_dev_t		dev;
daddr_t		bno;
int		sync;
#endif
{
	buf_t	      *	bp;
	unsigned long	hashval = HASH (dev, bno) % NHASH;

rescan:
	for (bp = hasharray [hashval] ; bp != NULL ; bp = bp->b_hash) {
		if (bp->b_bno != bno || bp->b_dev != dev)
			continue;

		if (sync == BUF_ASYNC ||
		    (bp = lock_block (bp, dev, bno)) != NULL)
			return bp;

		/*
		 * This really can happen; the reason is that
		 * when a *read* has an I/O error, the "b_dev"
		 * field is set to NODEV.
		 */

		goto rescan;
	}

	return NULL;
}


/*
 * Mark the indicated buffer as clean.
 */

#if	__USE_PROTO__
void bclean (o_dev_t dev, daddr_t bno)
#else
void
bclean (dev, bno)
o_dev_t		dev;
daddr_t		bno;
#endif
{
	buf_t	      *	bp;

	if ((bp = buffer_find (dev, bno, BUF_SYNC)) != NULL) {
		bp->b_flag &= ~ BFMOD;
		__UNLOCK_BUFFER (bp);
	}
}


/*
 * Generic iodone function for synced blocks. Specialized iodone functions
 * should chain on to this one.
 */

#if	__USE_PROTO__
__LOCAL__ void sync_iodone (buf_t * bp)
#else
__LOCAL__ void
sync_iodone (bp)
buf_t	      *	bp;
#endif
{
	int s;
	ASSERT (bufsync != 0);

	/*
	 * Unlock the block and maintain the count of blocks which have been
	 * started syncing but are not yet finished. Processor level
	 * changing added by Louis who suspects a race condition.
	 */

	unlock_block (bp);

	s = sphi();

	if (-- bufsync == 0) {
		spl(s);
		wakeup (& bufsync);
	}
	else
		spl(s);
}



#if __USE_PROTO__
void
build_sync_list(buf_list_t **bheadpp, int (*sel) __PROTO((int arg, buf_t *bp)),
		int arg, void (*fin) __PROTO((buf_t *bp)))
							  
#else
void
build_sync_list(bheadpp, sel, arg, fin)
buf_list_t **bheadpp;
int (*sel)();
int arg;
void (*fin)();
#endif /* __USE_PROTO__ */
{
  buf_list_t *bhead = NULL;
  int i;
  buf_list_t *tmp_el = NULL;
  buf_t *cur_bp;

  /* First selection pass on static buffers */
  for (i = 0; i < NBUF; i++) {
    buf_t *cur_bp;

    cur_bp = buf_list + i;

    /*
     * Instead of locking buffer while placing them on the list,
     * we will just build a linked list of them.  We only lock
     * them when necessary.
     */

    __LOCK_BUFFER(cur_bp, "bldsync");
    
    /* Check selection function */
    if (sel != NULL && (*sel)(arg, cur_bp) == 0) {
      __UNLOCK_BUFFER(cur_bp);
      continue;
    }

    /* Make sure buffer is present. */
    if (cur_bp->b_flags & BFNTP) {
      __UNLOCK_BUFFER(cur_bp);
      continue;
    }

    if ((cur_bp->b_flags & BFMOD) == 0) {
      /*
       * NIGEL:
       * We check for this only inside the area where
       * we have the block locked because it may be
       * that we are about to change a buffer that is
       * queued for a read, and we needed to wait
       * for that to complete.
       * A read, when we are closing? Yes, because
       * of the action of readahead!
       *
       * Further to the above, it used to be the case
       * that broken utilities like 'df' worked by
       * accessing the block devices directly, and
       * the buffer cache was flushed when the block
       * device was closed, despite it being
       * in use by the mounted filesystem.
       *
       * LOUIS:
       * In addition, by doing it here before the sort,
       * we save clock cycles sorting and we don't have to
       * allocate a container for it.
       */

      if (fin != NULL)
	(*fin)(cur_bp);

      __UNLOCK_BUFFER(cur_bp);
      continue;
    }

    /*
     * Now, instead of placing into the sort list, we instead
     * shove it into a container and link the container.
     */
    __UNLOCK_BUFFER(cur_bp);

    tmp_el = (buf_list_t *)kmem_alloc((size_t)sizeof(buf_list_t),
				      KM_NOSLEEP);

    if (tmp_el == NULL) {
      cmn_err(CE_WARN,
	      "build_sync_list: kernel heap low; sync may not be complete");
      continue;
    }

    tmp_el->buf = cur_bp;
    tmp_el->next = bhead;
    bhead = tmp_el;
  }

  /* Now, do the same for the dynamically allocated buffers */
  for (cur_bp = dyn_buf_head; cur_bp != NULL; cur_bp = cur_bp->b_dynf) {
    /*
     * Instead of locking buffer while placing them on the list,
     * we will just build a linked list of them.  We only lock
     * them when necessary.
     */

    /* Non-syncable buffer? */
    if (cur_bp->b_flags & BFNSY)
      continue;

    __LOCK_BUFFER(cur_bp, "bldsync");
    
    /* Check selection function */
    if (sel != NULL && (*sel)(arg, cur_bp) == 0) {
      __UNLOCK_BUFFER(cur_bp);
      continue;
    }

    /* Make sure buffer is present. */
    if (cur_bp->b_flags & BFNTP) {
      __UNLOCK_BUFFER(cur_bp);
      continue;
    }

    if ((cur_bp->b_flags & BFMOD) == 0) {
      /*
       * NIGEL:
       * We check for this only inside the area where
       * we have the block locked because it may be
       * that we are about to change a buffer that is
       * queued for a read, and we needed to wait
       * for that to complete.
       * A read, when we are closing? Yes, because
       * of the action of readahead!
       *
       * Further to the above, it used to be the case
       * that broken utilities like 'df' worked by
       * accessing the block devices directly, and
       * the buffer cache was flushed when the block
       * device was closed, despite it being
       * in use by the mounted filesystem.
       *
       * LOUIS:
       * In addition, by doing it here before the sort,
       * we save clock cycles sorting and we don't have to
       * allocate a container for it.
       */

      if (fin != NULL)
	(*fin)(cur_bp);

      __UNLOCK_BUFFER(cur_bp);
      continue;
    }

    /*
     * Now, instead of placing into the sort list, we instead
     * shove it into a container and link the container.
     */
    __UNLOCK_BUFFER(cur_bp);

    tmp_el = (buf_list_t *)kmem_alloc((size_t)sizeof(buf_list_t),
				      KM_NOSLEEP);

    if (tmp_el == NULL) {
      cmn_err(CE_WARN,
	      "build_sync_list: kernel heap low; sync may not be complete");
      continue;
    }

    tmp_el->buf = cur_bp;
    tmp_el->next = bhead;
    bhead = tmp_el;
  }
    
  *bheadpp = bhead;
}

  
/********************************************************************
 * Function: void general_sync(int (*sel)(int arg, buf_t *bp),
 *			int arg, void (*fin)(buf_t *bp),
 *			void (*iodone)(buf_t *bp), int sync)
 *
 * Description: General sync function, pass in selection and
 *		finalization functions.
 *
 * Returns: Nothing.
 *
 * Affects Globals: No.
 *
 * Affects U-Area: Unknown.
 *
 * Side Effects: Locks large regions of buffers.
 *
 * Comments:	(LOUIS) There is always the possibility of deadlock
 *		if you sleep holding multiple resources, especially
 *		if the resources are of the same type. In general,
 *		locking n >> 0 buffers is not a good idea even if
 *		you wish to sort them.  The problem is that you
 *		will be putting a lot of I/O-bound processes to
 *		sleep for a long length of time while the writes
 *		are finished.  Sorting is a good idea, however, so
 *		what we *can* do is build a list of unlocked buffers,
 *		then lock and test each one seperately.  This is
 *		experimental because it will eat large portions of
 *		the kalloc arena while the list exists.
 */
#if	__USE_PROTO__
__LOCAL__ void general_sync (int (* sel) __PROTO ((int arg, buf_t * bp)),
			     int arg,  void (* fin) __PROTO ((buf_t * bp)),
			     void (* iodone) __PROTO ((buf_t * bp)),
			     int sync)
#else
__LOCAL__ void
general_sync (sel, arg, fin, iodone, sync)
int	     (*	sel) ();
int		arg;
void	     (*	fin) ();
void	     (*	iodone) ();
int		sync;
#endif
{
  buf_t	      	*bp;
  buf_list_t	*head = NULL;
  buf_list_t	*tmp_el = NULL;
  int		i;
  short		s;

  /* Build a list of buffers we should sync */
  build_sync_list(&head, sel, arg, fin);
  
  /*
   * NIGEL:
   * Sort passes, one for each digit in a block address, from least
   * to most significant.
   *
   * LOUIS:
   * This sort routine has been modified so that it will work
   * with unlocked buffers.  This is no big deal, really.  In
   * the worst situation, the sort gets screwed up.  So what?
   * The arm will just step over 1024 cylinders for nothing.
   * However, that will slow down I/O time without slowing
   * down CPU turnaround time, and, IMHO, sorting should be
   * done by the drivers and not the kernel.
   */
  
  init_radix_sort();
  
  for(i = 0; i < sizeof(daddr_t); i++) {
    head = unlocked_radix_pass(head, offsetof(buf_t, b_bno) +
			       __OCTET_N_OF_R(__NATIVE_32_MAP, i),
			       offsetof(buf_list_t, next));
  }
  
  end_radix_sort();
  
  tmp_el = head;
  
  while(tmp_el != NULL) {
    buf_list_t *next_el;
    
    /*
     * This is the final lock for writing...
     * before was just some grabass; we mean
     * business now.
     */

    __LOCK_BUFFER((bp = tmp_el->buf), "sync");
    
    next_el = tmp_el->next;

    kmem_free(tmp_el, (size_t)sizeof(buf_list_t)); 

    
    /*
     * Recheck everything.  A couple of extra clock
     * cycles won't kill us.  In fact, this is
     * needed if we are accepting multiple process
     * threads in here.
     */
    
    if (bp->b_flags & BFNTP || bp->b_flags & BFERR) {
      __UNLOCK_BUFFER(bp);
      tmp_el = next_el;
      continue;
    }
    
    if (sel != NULL && (*sel)(arg, bp) == 0) {
      __UNLOCK_BUFFER(bp);
      tmp_el = next_el;
      continue;
    }
    
    /*
     * This is in case we missed one we can cheat on.
     */	
    if (!(bp->b_flags & BFMOD)) {
      if (fin)
	(*fin)(bp);
      
      __UNLOCK_BUFFER(bp);
      tmp_el = next_el;
      continue;
    }
    
    /*
     * OK, now that we burned up lots of CPU time, heh,
     * we can actually try to write this guy out.
     *
     * NOTE: This was taken from the original code,
     * but it seems strange that we can just go and
     * adjust b_iodone....hmm.  Also, sleeping on
     * the same variable that multiple contexts can
     * mess with isn't a good idea (bufsync).
     *
     * For one thing, a second process entering this
     * arena can cause a first one to sleep unnecessarily
     * long.
     *
     * ADDENDUM:  This may be more of a problem than
     * first noted.  I believe that there may be a race
     * on bufsync.  As such, I am declaring it now
     * as volatile and raising the processor level around
     * it. (LOUIS)
     *
     */
    if (sync != BUF_ASYNC) {
      int s;
      
      if (iodone != NULL)
	bp->b_iodone = iodone;
      else
	bp->b_iodone = sync_iodone;
      s = sphi();
      bufsync++;
      spl(s);
    }
    
    buffer_write_start(bp, sync);
    tmp_el = next_el;
  }
  
  if (sync == BUF_ASYNC)
    return;
  
  s = sphi();
  while(bufsync)
    x_sleep(&bufsync, pridisk, slpriNoSig, "syncwait");
  spl(s);
}

/*
 * Synchronise the buffer cache.
 */

#if	__USE_PROTO__
void bsync (void)
#else
void
bsync ()
#endif
{
	general_sync (NULL, 0, NULL, NULL, BUF_SYNC);
}


/*
 * Selection predicate for buffer-flush code.
 */

#if	__USE_PROTO__
__LOCAL__ int bflush_sel (int arg, buf_t * bp)
#else
__LOCAL__ int
bflush_sel (arg, bp)
int		arg;
buf_t	      *	bp;
#endif
{
	return bp->b_dev == arg;
}


/*
 * Finalization function for buffer-flush code.
 */

#if	__USE_PROTO__
__LOCAL__ void bflush_finish (buf_t * bp)
#else
__LOCAL__ void
bflush_finish (bp)
buf_t	      *	bp;
#endif
{
	bp->b_dev = NODEV;
	bp->b_flags = BFNTP;
}

/*
 * Invalidate a buffer.
 */
#if	__USE_PROTO__
void buf_invalidate (buf_t * bp)
#else
void
buf_invalidate (bp)
buf_t	      *	bp;
#endif
{
	bp->b_dev = NODEV;
	bp->b_flags = BFNTP;
}


/*
 * iodone function for buffer-flush code.
 */

#if	__USE_PROTO__
__LOCAL__ void bflush_iodone (buf_t * bp)
#else
__LOCAL__ void
bflush_iodone (bp)
buf_t	      *	bp;
#endif
{
	bflush_finish (bp);
	sync_iodone (bp);
}


/*
 * Synchronise all blocks for a particular device in the buffer cache
 * and invalidate all references.
 */

#if	__USE_PROTO__
void bflush (o_dev_t dev)
#else
void
bflush (dev)
o_dev_t dev;
#endif
{
	general_sync (bflush_sel, dev, bflush_finish, bflush_iodone, BUF_SYNC);
}


/*
 * Release the given buffer.
 */

#if	__USE_PROTO__
void brelease (buf_t * bp)
#else
void
brelease (bp)
buf_t * bp;
#endif
{
	if ((bp->b_flag & BFERR) != 0) {
		bp->b_flag &= ~ BFERR;
		bp->b_errno = 0;
		bp->b_dev = NODEV;
	}
	bp->b_flag &= ~ BFNTP;
	wakeup (bp);

	unlock_block (bp);
}


/********************************************************************
 * Function: buf_t *bread(o_dev_t dev, daddr_t bno, int sync)
 *
 * Description: if sync != 0, return a *locked* buffer filled with
 *		a block from device dev and "sector" bno.
 *		Sync == 0 if asychronous, and NULL is returned.
 *
 * Returns: Pointer to buffer, or NULL if fatal error.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: Sets u.u_error on error condition.
 *
 * Side Effects: Adjusts fields of the buffer returned.
 *
 * Comments:	There exists the fact that this function returns a
 *		NULL if b_resid == b_count, but this is not
 *		necessarily an error condition and needs to be
 *		examined in the near future to determine the
 *		correctness of this return value (LOUIS).
 */

#if	__USE_PROTO__
buf_t * bread (o_dev_t dev, daddr_t bno, int sync)
#else
buf_t *
bread (dev, bno, sync)
o_dev_t dev;
daddr_t bno;
int sync;
#endif
{
	buf_t	      *	bp;
	int		err;

	if (((bp = bclaim (dev, bno, NBPSCTR, sync))->b_flag & BFNTP) == 0)
		return bp;

	if (sync == BUF_SYNC)
		ASSERT ((bp->b_flag & BFASY) == 0);
	else {
		/*
		 * If the BFASY flag is set, then we don't need to
		 * actually initiate a new operation. Whatever is
		 * happening to the buffer now is fine by us...
		 */

		if ((bp->b_flag & BFASY) != 0)
			return NULL;

		/*
		 * Since we are actually going to perform some I/O
		 * on the buffer, we need to lock it first (it used
		 * to be that bclaim () would always do this, but that
		 * prevented useful parallelism).
		 */

		if ((bp = lock_block (bp, dev, bno)) == NULL) {
			/*
			 * Looping at this point would be a Bad Idea;
			 * if the last read of the block had an error,
			 * we will wind up here. Instead, we claim
			 * the buffer synchronously.
			 */

			if (((bp = bclaim (dev, bno, NBPSCTR, 1))->b_flag & BFNTP) == 0)
				return bp;
		}

		__MAKE_BUFFER_ASYNC (bp);
	}

	bp->b_req = BREAD;
	bp->b_flags |= B_READ;
	bp->b_count = BSIZE;
	dblock (dev, bp);
	if (sync == BUF_ASYNC)
		return NULL;

	buffer_wait (bp);

	if ((err = geterror (bp)) != 0) {
		SET_U_ERROR (err, "bread ()");
		brelease (bp);
		return NULL;
	}

	/*
	 * This may need to be adjusted.  This isn't necessarily
	 * an error state.
	 */
	if (bp->b_resid == BSIZE) {
		brelease (bp);
		return NULL;
	}

	return bp;
}


/*
 * Write the given buffer out.  If `sync' is set, the write is synchronous,
 * otherwise asynchronous.  This routine must be called with the buffer
 * gate locked.
 */

#if	__USE_PROTO__
void bwrite (buf_t * bp, int sync)
#else
void
bwrite (bp, sync)
buf_t * bp;
int sync;
#endif
{
	buffer_write_start (bp, sync);
	if (sync == BUF_SYNC)
		buffer_wait (bp);
}


/*
 * Perform an LRU chain update by unlinking the specified buffer
 * from it present location in the LRU chain and inserting it
 * at the head of the chain, as pointed to by "firstbuf".  Handle
 * updating "lastbuf" if current buffer is the last buffer on the chain.
 */

#if	__USE_PROTO__
__LOCAL__ void LRUupdate (buf_t * bp)
#else
__LOCAL__ void
LRUupdate (bp)
buf_t * bp;
#endif
{
	if (bp != firstbuf) {
		if (bp == lastbuf)
			lastbuf = bp->b_LRUb;
		if (bp->b_LRUb != NULL)
			bp->b_LRUb->b_LRUf = bp->b_LRUf;
		if (bp->b_LRUf != NULL)
			bp->b_LRUf->b_LRUb = bp->b_LRUb;
		bp->b_LRUb = NULL;
		bp->b_LRUf = firstbuf;
		firstbuf->b_LRUb = bp;
		firstbuf = bp;
	}
}


/*
 * If the requested buffer header is in the hash chain, delete it.
 */

#if	__USE_PROTO__
__LOCAL__ void HASHdelete (buf_t * bp)
#else
__LOCAL__ void
HASHdelete (bp)
buf_t * bp;
#endif
{
	buf_t	      *	prev;

	/*
	 * We expect the block hash chains to be sufficiently short (i.e.
	 * 1 or 2 entries only) that we can do a linear search for the
	 * previous entry.
	 */

	if ((prev = hasharray [bp->b_hashval]) == bp) {
		/*
		 * We're first in the chain
		 */

		hasharray [bp->b_hashval] = bp->b_hash;
	} else {
		while (prev->b_hash != bp) {
			if ((prev = prev->b_hash) == NULL) {
				cmn_err (CE_WARN, "Can't find buffer %x",
					 (unsigned) bp);
				bp->b_hash = NULL;
				return;
			}
		}
		prev->b_hash = bp->b_hash;
	}

	bp->b_hash = NULL;
}


#if __USE_PROTO__
void
LRUinsert(buf_t *bp)
#else
void
LRUinsert(bp)
buf_t *bp;
#endif /* __USE_PROTO__ */
{
	bp->b_LRUf = firstbuf;
	bp->b_LRUb = NULL;
	firstbuf->b_LRUb = bp;
	firstbuf = bp;
}

#if __USE_PROTO__
void 
LRUremove(buf_t *bp)
#else
void
LRUremove(bp)
buf_t *bp;
#endif /* __USE_PROTO__ */
{
	if (bp == firstbuf) {
		firstbuf->b_LRUb = NULL;
		firstbuf = bp->b_LRUf;
		return;
	}

	if (bp == lastbuf) {
		lastbuf->b_LRUf = NULL;
		lastbuf = bp->b_LRUb;
		return;
	}

	bp->b_LRUb->b_LRUf = bp->b_LRUf;
	bp->b_LRUf->b_LRUb = bp->b_LRUb;
}

#if __USE_PROTO__
buf_t *
getrbuf(long flag)
#else
buf_t *
getrbuf(flag)
long flag;
#endif /* __USE_PROTO__ */
{
	buf_t *bp;

	bp = kmem_alloc(sizeof(buf_t), flag);

	if (bp == NULL)
		return NULL;

	bioerror (bp, 0);
	bp->b_iodone = NULL;
	bp->b_flag = BFNTP | BFDYN | BFNSY;
	bp->b_req = BWRITE;
	bp->b_dev = NODEV;
	bp->b_edev = NODEV;
	bp->b_bno = 0;
	bp->b_dynf = dyn_buf_head;
	dyn_buf_head = bp; 
	bp->b_hashval = HASH(0, 0) % NHASH;
	bp->b_hash = NULL;
	bp->b_sort = NULL;
	bp->b_proc = SELF;
	bp->b_iodone = NULL;
	bp->b_private = NULL;

	__INIT_BUFFER_LOCK(bp);

	__LOCK_BUFFER(bp, "getrbuf");

	HASHinsert(bp);
	LRUinsert(bp);

	return bp;
}

#if __USE_PROTO__
void
freerbuf(buf_t *bp)
#else
void
freerbuf(bp)
buf_t *bp;
#endif /* __USE_PROTO__ */
{
  buf_t *bp_prev;
  buf_t *bp_cur;
  
  /* Note, must be called from a user context where the buffer
   * cannot be taken by an interrupt.  In other words, if anything
   * is sleeping on this buffer, it will sleep forever.  This needs
   * to be worked out sooner or later.
   */
  
  HASHdelete(bp);
  LRUremove(bp);
  
  bp_prev = bp_cur = dyn_buf_head;
  
  while (bp_cur != NULL) {
    if (bp_cur == bp)
      break;

    bp_prev = bp_cur;
    bp_cur = bp_cur->b_dynf;
  }

  if (bp_cur == NULL)
    cmn_err(CE_PANIC,
	    "freerbuf: lost dynamic buffer bno: %d dev: (0x%x, 0x%x)",
	    bp->b_bno, major(bp->b_dev), minor(bp->b_dev));

  if (bp_cur == bp_prev) /* found on head */
    dyn_buf_head = bp_cur->b_dynf;
  else
    bp_prev->b_dynf = bp_cur->b_dynf;

  kmem_free(bp, sizeof(buf_t));
}

/*
 * If the requested buffer is in the buffer cache, return a pointer to
 * it.  If not, pick an empty buffer, set it up and return it.
 */
#if	__USE_PROTO__
buf_t * bclaim (o_dev_t dev, daddr_t bno, long bsize, int sync)
#else
buf_t *
bclaim (dev, bno, bsize, sync)
o_dev_t		dev;
daddr_t		bno;
long		bsize;
int		sync;
#endif
{
	buf_t * bp;

	if ((bp = buffer_find (dev, bno, sync)) != NULL) {
		if (sync != BUF_ASYNC) {
			/*
			 * If the buffer had an I/O error, mark it as
			 * invalid.
			 */

			if (bp->b_flag & BFERR) {
				bp->b_flag |= BFNTP;
				bioerror (bp, 0);
			}
			bp->b_req = BWRITE;
			bp->b_flag &= ~ B_READ;
			bp->b_count = bp->b_bufsiz;
		}
		LRUupdate (bp);
		return bp;
	}


	/*
	 * The requested buffer is not resident in our cache.  Locate the
	 * oldest (least recently used) available buffer.  If it's dirty,
	 * queue up an asynchronous write for it and continue searching
	 * for the next old candidate. Once a candidate is found, move it
	 * to the front of the LRU chain, update the hash pointers, mark
	 * the buffer as invalid, unlock our buffer gate and return the
	 * buffer to the requestor.
	 */

	for (;;) {				/* loop until successful */
		unsigned short	s;

		for (bp = lastbuf ; bp != NULL ; bp = bp->b_LRUb) {
			/*
			 * NIGEL: This code assumes that buffers can be locked
			 * only by other process-level code.
			 */

			if (__IS_BUFFER_LOCKED (bp))
				continue;	/* not available */

			if (bp->b_flag & BFMOD) {
#if	1
				general_sync (NULL, 0, NULL, NULL, BUF_ASYNC);
#else
				__LOCK_BUFFER (bp, "bclaim ()");
				bwrite (bp, BUF_ASYNC);	/* flush dirty buffer */
#endif
				continue;
			}

			if (sync == BUF_SYNC)
				__LOCK_BUFFER (bp, "bclaim () #2");

			/*
			 * Update the hash chain for this old
			 * buffer.  Unlink it from it's old location
			 * fixing up any references. Also, update
			 * the LRU chain to move the buffer to the head.
			 */

			HASHdelete (bp);
			LRUupdate (bp);

			bioerror (bp, 0);
			bp->b_iodone = NULL;
			bp->b_flag = BFNTP;
			bp->b_req = BWRITE;
			bp->b_dev = dev;
			bp->b_edev = dev;
			bp->b_bno = bno;
			bp->b_hashval = HASH (dev, bno) % NHASH;
			bp->b_count = bp->b_bufsiz;
	
			HASHinsert (bp);
			return bp;
		}

		/*
		 * LOUIS:
		 * bufneed shortened to bfneed because our ps output
		 * doesn't have the length to display it.
		 */
		s = sphi ();
		bufneed = 1;
		x_sleep ((char *)& bufneed, pridisk, slpriNoSig, "bfneed");
		/* There are no buffers available.  */
		spl (s);
	}
}


/*
 * This is called by the driver when I/O has completed on a buffer.
 */

#if	__USE_PROTO__
void bdone (buf_t * bp)
#else
void
bdone (bp)
buf_t * bp;
#endif
{
	biodone (bp);
}


/********************************************************************
 * Function: int geterror(struct buf * bp) (DDI/DKI)
 *
 * Description:  Get the error number stored in a buffer header, bp.
 *
 * Returns: Error number in buffer header.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: No.
 *
 * Side Effects: None.
 *
 * Comments:	If the buffer error flag is set but the errno is
 *		0, returns EIO anyhow.
 *
 */
#if	__USE_PROTO__
int geterror (buf_t * bp)
#else
int
geterror (bp)
buf_t	      *	bp;
#endif
{
	if (bp->b_errno != 0)
		return bp->b_errno;
	if (bp->b_flag & BFERR)
		return EIO;
	return 0;
}


/********************************************************************
 * Function: void biodone(buf_t *bp) (DDI/DKI)
 *
 * Description: Releases buffer and wakes up any waiting processes.
 *		Should be called by driver when I/O is done.
 *
 * Returns: Nothing.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: No.
 *
 * Side Effects: Unlocks buffer, wakes processes, changes fields in bp.
 *
 * Comments:	If the driver specified an iodone handler, that function
 *		is called with bp and then we return.
 *	
 *		If not specified, unlock the buffer if ASYNC and return.
 *	
 *		Several important fields are modified, and this function
 *		should be reviewed before using within the kernel.
 */
#if	__USE_PROTO__
void biodone (buf_t * bp)
#else
void
biodone (bp)
buf_t	      *	bp;
#endif
{
	__iodone_t	iodone;

	switch (bp->b_req) {
	case BWRITE:
		bp->b_flag &= ~ BFMOD;
		break;

	case BREAD:
		break;

	default:
		/*
		 * The floppy-disk format request comes through here, too.
		 */
		break;
	}

	if ((bp->b_flag & BFERR) != 0)
		bp->b_dev = NODEV;

	bp->b_flag &= ~ BFNTP;
	wakeup (bp);

	if ((iodone = bp->b_iodone) != NULL) {
		bp->b_iodone = NULL;
		(* iodone) (bp);
		return;
	}

	if ((bp->b_flag & BFASY) != 0) {
		bp->b_flag &= ~ BFASY;

		unlock_block (bp);
	}
}


/********************************************************************
 * Function: void bioerror(buf_t *bp, int errno) (DDI/DKI)
 *
 * Description: Change error status of a buffer.
 *
 * Returns: Nothing.
 *
 * Affects Globals: No.
 *
 * Affects U-Area: No.
 *
 * Side Effects: Changes errnum and BFERR in buffer header.
 *
 * Comments:	Call with errno != 0 to set error, or errno == 0 to
 *		clear error.
 */
#if	__USE_PROTO__
void bioerror (buf_t * bp, int errno)
#else
void
bioerror (bp, errno)
buf_t	      *	bp;
int		errno;
#endif
{
	if ((bp->b_errno = errno) != 0)
		bp->b_flag |= BFERR;
	else
		bp->b_flag &= ~ BFERR;
}



/********************************************************************
 * Function: int biowait(buf_t *bp) (DDI/DKI)
 *
 * Description:  Wait (sleep) until I/O finishes.
 *
 * Returns: Error number generate during I/O.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: No.
 *
 * Side Effects: None.
 *
 * Comments:	Buffer and other fields are affected, and the process
 *		may sleep as a result of a call to this function.
 *		See the functions this calls for more info.
 */
#if	__USE_PROTO__
int biowait (buf_t * bp)
#else
int
biowait (bp)
buf_t	      *	bp;
#endif
{
	buffer_wait (bp);
	return geterror (bp);
}


/********************************************************************
 * Function: void brelse(buf_t *bp) (DDI/DKI)
 *
 * Description: Return a buffer to the system.
 *
 * Returns: Nothing.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: No.
 *
 * Side Effects: Wakes processes sleeping on this buffer.
 *
 * Comments:	WARNING!!! Cannot be used in place of brelease()
 *		This should only be used to release buffers obtained through
 *		geteblk() or ngeteblk()!
 */
#if	__USE_PROTO__
void brelse (buf_t * bp)
#else
void
brelse (bp)
buf_t	      *	bp;
#endif
{
	bp->b_flag &= ~ BFNTP;
	wakeup (bp);

	unlock_block (bp);
}


/********************************************************************
 * Function: void clrbuf(buf_t *bp) (DDI/DKI)
 *
 * Description: Clear out a buffer.
 *
 * Returns: Nothing.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: No.
 *
 * Side Effects: bp->b_resid = 0 and zeros buffer data space.
 *
 * Comments:
 */
#if	__USE_PROTO__
void clrbuf (buf_t * bp)
#else
void
clrbuf (bp)
buf_t	      *	bp;
#endif
{
	memset (bp->b_un.b_addr, 0, bp->b_count);
	bp->b_resid = 0;
}


/********************************************************************
 * Function: buf_t *geteblk() (DDI/DKI)
 *
 * Description: Get a free buffer.
 *
 * Returns: Pointer to a locked buffer.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: No.
 *
 * Side Effects: Changes fields in returned buffer.
 *
 * Comments:	Blocks allocated with this should be freed via
 *		brelse() or biodone().
 */
#if	__USE_PROTO__
buf_t * geteblk (void)
#else
buf_t *
geteblk ()
#endif
{
	buf_t	      *	bufp = bclaim (NODEV, (daddr_t) 0, NBPSCTR, BUF_SYNC);

	bufp->b_flags |= BFRAW;
	return bufp;
}


/*
 * Read data from the I/O segment into kernel space.
 *
 * "v" is the destination virtual address.
 * "n" is the number of bytes to read.
 */

#if	__USE_PROTO__
void ioread (IO * iop, char * v, size_t n)
#else
void
ioread (iop, v, n)
IO * iop;
char * v;
size_t n;
#endif
{
	switch (iop->io_seg) {
	case IOSYS:
		memcpy (v, iop->io.vbase, n);
		iop->io.vbase += n;
		break;

	case IOUSR:
		iop->io.vbase += ukcopy (iop->io.vbase, v, n);
		break;

	case IOPHY:
		dmain (n, iop->io.pbase, v);
		iop->io.pbase += n;
		break;
	}
	iop->io_ioc -= n;
}


/*
 * Clear I/O space.
 */

#if	__USE_PROTO__
void ioclear (IO * iop, size_t size)
#else
void
ioclear (iop, size)
IO	      *	iop;
size_t		size;
#endif
{
	switch (iop->io_seg) {
	case IOSYS:
		(void) memset (iop->io.vbase, 0, size);
		iop->io.vbase += size;
		break;

	case IOUSR:
		(void) umemclear (iop->io.vbase, size);
		iop->io.vbase += size;
		break;

	case IOPHY:
		dmaclear (size, iop->io.pbase);
		iop->io.pbase += size;
		break;
	}
	iop->io_ioc -= size;
}


/*
 * Write data from kernel space to the I/O segment.
 */

#if	__USE_PROTO__
void iowrite (IO * iop, char * v, size_t n)
#else
void
iowrite (iop, v, n)
IO * iop;
char * v;
unsigned n;
#endif
{
	switch (iop->io_seg) {
	case IOSYS:
		memcpy (iop->io.vbase, v, n);
		iop->io.vbase += n;
		break;

	case IOUSR:
		iop->io.vbase += kucopy (v, iop->io.vbase, n);
		break;

	case IOPHY:
		dmaout (n, iop->io.pbase, v);
		iop->io.pbase += n;
		break;
	}
	iop->io_ioc -= n;
}


/*
 * Get a character from the I/O segment.
 */

#if	__USE_PROTO__
int iogetc (IO * iop)
#else
int
iogetc (iop)
IO * iop;
#endif
{
	unsigned char	c;

	if (iop->io_ioc == 0)
		return -1;
	-- iop->io_ioc;
	if (iop->io_seg == IOSYS)
		c = * (unsigned char *) iop->io.vbase ++;
	else {
		c = getubd (iop->io.vbase ++);
		if (get_user_error ())
			return -1;
	}
	return c;
}

/*
 * Put a character using the I/O segment.
 */

#if	__USE_PROTO__
int ioputc (unsigned char c, IO * iop)
#else
int
ioputc (c, iop)
unsigned char	c;
IO	      *	iop;
#endif
{
	if (iop->io_ioc == 0)
		return -1;
	-- iop->io_ioc;
	if (iop->io_seg == IOSYS)
		* (char *) iop->io.vbase ++ = c;
	else {
		putubd (iop->io.vbase ++, c);
		if (get_user_error ())
			return -1;
	}

	/*
	 * Originally, this returned the character value, but sign-extension
	 * problems mean that we need to either return the character cast to
	 * unsigned char, or 0. Hal insists on 0, so that is what we do now.
	 */

	return 0;
}


/*
 * Given an I/O structure and a buffer header, see if the addresses
 * in the I/O structure are valid and set up the buffer header.
 *
 * Search the u area segment table for a data segment containing
 * iop->io.vbase.  If one is found, put the corresponding system
 * global address into bp->b_paddr and return the corresponding
 * SEG pointer, else return NULL.
 */

#if	__USE_PROTO__
__LOCAL__ SEG * iomapvp (IO * iop, buf_t * bp)
#else
__LOCAL__ SEG *
iomapvp (iop, bp)
IO * iop;
buf_t * bp;
#endif
{
	SR * srp;
	SEG * sp;
	caddr_t iobase, base;
	unsigned ioc;

	ASSERT (iop->io_seg == IOUSR);

	iobase = iop->io.vbase;
	ioc = iop->io_ioc;

	bp->b_vaddr = iobase;
	bp->b_proc = SELF;

	for (srp = SELF->p_segl; srp < & SELF->p_segl [NUSEG]; srp ++) {
		if ((sp = srp->sr_segp) == NULL)
			continue;
		if ((srp->sr_flag & SRFDATA) == 0)
			continue;
		/*
		 * The following calculation is because the system represents
		 * the 'base' of a stack as its upper limit (because it is the
		 * upper limit that is fixed).
		 */
		base = srp->sr_base;
		if (srp == & SELF->p_segl [SISTACK])
			base -= srp->sr_size;

		if (iobase < base)
			continue;
		if (iobase + ioc > base + sp->s_size)
			continue;
 		bp->b_paddr = MAPIO (sp->s_vmem, iobase - base);
		return sp;
	}

	/* Is the io area in question contained in a shared memory segment? */
	if ((srp = accShm (iobase, ioc)) != NULL) {
		sp = srp->sr_segp;
		base = srp->sr_base;
 		bp->b_paddr = MAPIO (sp->s_vmem, iobase - base);
		return sp;
	}

	return 0;
}

/********************************************************************
 * Function: buf_t *raw_buf_read(buf_t *bp, o_dev_t dev, daddr_t bno,
 *	long bsize)
 *
 * Description: Read a buffer from a device allowing for reading
 *		past the end of device as a non-error.
 *
 * Returns:  Pointer to buffer (a new buffer if bp was NULL).
 *
 * Affects Globals: None.
 *
 * Affects U-Area: Changes u.u_error on error.
 *
 * Side Effects: Changes fields in bp.
 *
 * Comments:	If bp is null, will also allocate a buffer.  Either way,
 *		this function sets up the buffer itself. Also note that
 *		by calling bclaim(), this may cause the calling context
 *		to sleep.  Read errors should be handled by the caller
 *		and checked for by examining bp->b_flags. This differs
 *		from bread() in that it always returns a buffer, always
 *		reads synchronously, and never handles errors.
 */
#if	__USE_PROTO__
buf_t *raw_buf_read(buf_t *bp, o_dev_t dev, daddr_t bno, long bsize)
#else
buf_t *
raw_buf_read(bp, dev, bno, bsize)
buf_t *bp;
o_dev_t dev;
daddr_t bno;
long bsize;
#endif
{
		/*
		 * If no block sent, allocate one.
		 */
		if(bp == NULL)
			if (((bp = bclaim (dev, bno, bsize, BUF_SYNC))->b_flag
			   & BFNTP) == 0)
				return bp;
			
		/*
		 * Set up the request.
		 */
		bp->b_req = BREAD;
		bp->b_flag |= (B_READ | BFNTP);
		bp->b_dev = dev;	
		bp->b_edev = NODEV;
		bp->b_blkno = bno;
		bp->b_bcount = bp->b_bufsiz;
		bp->b_resid = bp->b_bcount;

		/*
		 * Do the request and return the buffer.
		 */
		dblock(dev, bp);
		buffer_wait(bp);

		return bp;
}

/********************************************************************
 * Function: void io_buffer_req(buf_t *bp, IO *iop, o_dev_t dev,
 *		int req, int f)
 *
 * Description: Do raw I/O on behalf of a block device using
 *		the buffer cache as a side-buffer.  Good for
 *		DMA requests over 16Meg addresses and block straddles.
 *
 * Returns: Nothing.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: u.u_error contains error status, places data
 *		   in user space if a read is being done.
 *
 * Side Effects: changes io_ioc, io_seek in iop
 *
 * Comments:	If used for DMA, this assumes that the buffer
 *	    	cache always lies below the 16M address limit.
 *		This code will also nicely handle block straddles.
 */
#if 0
#if	__USE_PROTO__
void io_buffer_req(buf_t *bp, IO *iop, o_dev_t dev, int req, int f)
#else
void
io_buffer_req(bp, iop, dev, req, f)
buf_t *bp;
IO *iop;
o_dev_t dev;
int req;
int f;
#endif
{
	CON		*cp;
	size_t		request_bytes;
	size_t		amount;
	unsigned int	left_edge;
	unsigned int	right_edge;
	unsigned	original_amount;
	caddr_t barea = 0;

	if((cp = drvmap(dev, DFBLK)) == NULL)
		/* u.u_error set by drvmap() */
		return;

	ASSERT(iop != NULL);

	/*
	 * Special consideration must be give to writing.
	 */
	original_amount = iop->io_ioc;

	/*
	 * Since we will accept block straddles, we need to
	 * read in the first block if we are going to write
	 * to the middle of it.  Although this appears to
	 * duplicate itself later, it helps because the block
	 * will already be buffered upon entry to the loop.
	 */
	left_edge = blocko(iop->io_seek);

	bp = getrbuf(KM_NOSLEEP);
	barea = kmem_alloc(512, KM_NOSLEEP);
	ASSERT(barea);
	bp->b_un.b_addr = barea;
	bp->b_paddr = vtovp(barea);
	bp->b_iodone = NULL;
	bp->b_flags |= (BFRAW | BFNTP);
	bp->b_bufsiz = NBPSCTR;
	

	if(left_edge != 0 && req != BREAD && req != BFLFMT) {
		/*
		 *	i)	Allocate and read from cache.
		 *	ii)	Check for errors.
		 *  
		 * NOTE: We set the error flag in the buffer no matter
		 * what so brelease() sees that it is invalid.  Since
		 * this is a raw write, we don't want to sync the buffer
		 * later on!
		 */
#if 0
		bp = raw_buf_read(NULL, dev, blockn(iop->io_seek), NBPSCTR);
#else
		raw_buf_read(bp, dev, blockn(iop->io_seek), NBPSCTR);
#endif

		if(bp->b_flags & BFERR || bp->b_count == bp->b_resid) {
			SET_U_ERROR(bp->b_errno, "ioreq()");
			buf_invalidate(bp);
			brelease(bp);
			kmem_free(barea, 512);
			freerbuf(bp);
			return;
		}
		
	}
#if 0
	else
		bp = geteblk();
#endif

	/*
	 * The following makes no sense whatsoever, and is
	 * copied from ioreq() in case there is an actual
	 * reason for it.  This code needs to be able to
	 * handle block straddles!!!!  However, it is recognized
	 * that we are holding a crap shoot in assuming that a block
	 * is NBPSCTR bytes on all devices.  Eventually for DDI/DKI
	 * we need to provide a mechanism like physiock() and variable
	 * size buffers.
	 */
#if 0
	if(blocko(iop->io_seek) != 0) {
		SET_U_ERROR(EIO, "io_buffer_req()");
		brelease(bp);
		return;
	}
#endif

	while(iop->io_ioc > 0) {

		/*
	 	 * Since we will accept block straddles, we can decide
	 	 * here whether or not we need to read a block in to
	 	 * update it properly.
		 *
		 *	i)	Release what we held.
		 *	ii)	Allocate and read from cache.
		 *	iii)	Check for errors.
	 	 */
		if(iop->io_ioc < NBPSCTR && req != BREAD && req != BFLFMT) {
			/*
			 * We have to mark the buffer as invalid else
			 * sync() will try and sync it.
			 */

#if 0
			bflush_finish(bp);
			brelease(bp);


			bp = raw_buf_read(NULL, dev, blockn(iop->io_seek), NBPSCTR);
#else
			raw_buf_read(bp, dev, blockn(iop->io_seek), NBPSCTR);
#endif
			

			if(bp->b_flags & BFERR || bp->b_count == bp->b_resid) {
				SET_U_ERROR(bp->b_errno, "ioreq()");
				bflush_finish(bp);
				brelease(bp);
				kmem_free(barea, 512);
				freerbuf(bp);
				return;
			}
		}

		/*
		 * Set up the request.  Some of this may
		 * be redundant and a few cycles could be
		 * saved if cleaned up after the drivers are
		 * made sane.
		 */

		if((bp->b_req = req) == BREAD)
			bp->b_flag |= B_READ;
		else {
			bp->b_flag |= BFMOD;
			bp->b_flag &= ~B_READ;
		}

		bp->b_dev = dev;	
		bp->b_edev = NODEV;
		bp->b_flag |= (f | BFNTP);
		bp->b_bno = blockn(iop->io_seek);
		bp->b_count = NBPSCTR;
		bp->b_resid = bp->b_count;

		/*
		 * Compute the size and offsets of the transfer.
		 */
		right_edge = ((left_edge + iop->io_ioc) > NBPSCTR ? NBPSCTR 
		   : (left_edge + iop->io_ioc));
		amount = (size_t)(right_edge - left_edge);
		request_bytes = NBPSCTR;

		ASSERT(amount > 0);

		/*
		 * If a write, copy data from user space
		 * to the buffer.
		 */
		if(req != BREAD)
			ioread(iop, bp->b_vaddr + left_edge,
			   amount);

		/*
		 * Access device.
		 */
		dblock(dev, bp);
		buffer_wait(bp);

		/*
		 * Compute how many bytes were left in the
		 * request.  This allows partially-served
		 * requests to be recognized. 
		 *
		 * Note also that this allows us to validly update
		 * the io_ioc et al. before the error check because
		 * bp->b_resid should be valid even if there is an error.
		 * Now, this is of course a faint dream at this stage,
		 * but one can hope.  
		 *
		 */
		request_bytes -= bp->b_resid;

#if 0
		printf("req: %lu kamount: %lu, io_ioc: %lu, seek: %lu\n",
		   request_bytes, amount, iop->io_ioc, iop->io_seek);
#endif
		
		/*
		 * Update the seek pointer and we may need to adjust the
		 * count of input. This can happen if we only needed a partial
		 * block to begin with.
		 */
		if(request_bytes > amount)
			request_bytes = amount;
		iop->io_seek += request_bytes;

		/*
		 * If a read, copy data from buffer to user space.
		 */	
		if(req == BREAD)
			iowrite(iop, bp->b_vaddr + left_edge,
			   request_bytes);
		
		/* 
		 * Must check for errors here!  Note
		 * that we can't use geterror() as it stands
		 * because reading past the end of a device
		 * is not an error on raw devices, but geterror()
		 * makes it that way!  To check for the end of
		 * a device on a raw device, the raw device
		 * should return bp->b_count == bp->b_resid (i.e.,
		 * nothing got done).
		 */

		if(bp->b_flags & BFERR || bp->b_count == bp->b_resid) {
			/*
			 * Special handling for writes -- 
			 * If this was the first attempt this call,
			 * then error, else, not an error but return
			 * the correct amount actually read.
			 *
			 * We can tell if this is the first pass or
			 * not by comparing the current amount done with
			 * the original_amount sent to us by the
			 * system call.
			 */
			if((bp->b_flags & B_READ) == 0)
				if((iop->io_ioc + amount) < original_amount) {
					iop->io_ioc += amount;
					SET_U_ERROR(0, "IORQ");
					bflush_finish(bp);
					brelease(bp);
					kmem_free(barea, 512);
					freerbuf(bp);
					return;
				}

			/*
			 * Normal handling for reads.
			 */
			SET_U_ERROR(bp->b_errno, "ioreq()");
			bflush_finish(bp);
			brelease(bp);
			kmem_free(barea, 512);
			freerbuf(bp);
			return;
		}

		/* 
		 * For a write of contiguous bytes, this is
		 * always true after the first straddle.
		 */
		left_edge = 0;

	}

	/* 
	 * Release and unlock buffer -- we're done.
	 */
	bflush_finish(bp);
	brelease(bp);
	kmem_free(barea, 512);
	freerbuf(bp);
}
#endif /* 0 -- io_buffer_req() */

#if __USE_PROTO__
int
physiock(int (*strat)(), buf_t *bp, dev_t dev, int rwflag,
	  daddr_t nblocks, IO *uiop)
#else
int
physiock(strat, bp, dev, rwflag, nblocks, uiop)
int (*strat)();
buf_t *bp;
dev_t dev;
int rwflag;
daddr_t nblocks;
IO *uiop;
#endif /* __USE_PROTO__ */
{

  /* Validate range */
  if (blocko(uiop->io_seek + uiop->io_ioc) > (nblocks - 1)) {
    set_user_error(ENXIO);
    return ENXIO;
  }

  if (blocko(uiop->io_seek + uiop->io_ioc) == (nblocks - 1)) {
    if (rwflag == B_READ)
      return 0;
    else {
      set_user_error(ENXIO);
      return ENXIO;
    }
  }

  return io_user_req(bp, uiop, dev, rwflag == B_READ ? BREAD : BWRITE,
		     BFRAW, strat);
}

 
  
/*
 * Given a buffer pointer, an I/O structure, a device, request type, and
 * a flags word, check the I/O structure and perform the I/O request.
 */
#if	__USE_PROTO__
int
io_user_req (buf_t * bp, IO * iop, o_dev_t dev, int req, int f,
		  int (*strat)(buf_t *bp))
#else
int
io_user_req (bp, iop, dev, req, f, strat)
buf_t * bp;
IO * iop;
o_dev_t dev;
int req;
int f;
int (*strat)();
#endif
{
  size_t amount;
  size_t result_bytes;
  int bp_given = 0;
  int tmp_err;
  
  if (drvmap(dev, DFBLK) == NULL)
    return get_user_error();
  
  ASSERT (iop != NULL);
  
  /*
   * NIGEL: The driver-kit docs said otherwise, but this should always
   * be done. Drivers like the HAI-SCSI that are written to the driver-
   * kit spec don't pass BFBLK even though they need to.
   */
  
  if (/* (f & BFBLK) != 0 && */ blocko(iop->io_seek) != 0
      || (bp && (bp->b_bufsiz % NBPSCTR))) {
    SET_U_ERROR (EIO, "ioreq ()");
    return EIO;
  }
  
  
  if (!(bp_given = (bp != NULL))) {
    /* Allocate a raw buffer header */
    bp = getrbuf(KM_NOSLEEP);
    
    if (bp == NULL) {
      set_user_error(EAGAIN);
      return EAGAIN;
    }
    
    bp->b_iodone = NULL;
  }
  
  while (iop->io_ioc > 0) {
    ASSERT(bp->b_flag & BFNSY);

    /* Must be sector aligned at all times */
    if (blocko(iop->io_seek)) {
#if defined(DEBUG)
      cmn_err(CE_WARN, "io_user_req: unlawful alignment?");
#endif

      set_user_error(EIO);
      buf_invalidate(bp);
      brelease(bp);
      if (!bp_given)
	freerbuf(bp);
      return EIO;
    }

    /* Change from virt addr in IO to system global addr in BUF */
    if (!bp_given && !iomapvp(iop, bp)) {
      SET_U_ERROR (EIO, "ioreq ()");
      goto out;
    }
    
    
    /*
     * How much can we transfer? Several multiples of the block
     * size, up to the next page boundary. Of course, we cannot
     * transfer more than requested. If the resulting request size
     * would be less than a block, shuffle a block's worth of data
     * through the buffer cache.
     *
     * First, get distance to next page boundary. Second, bound by
     * remaining transfer size.
     */
    
    if (!bp_given) {
      if (req == BFLFMT)
	amount = iop->io_ioc;
      else { /* Read or write */
	/* amount is number of bytes to next page bdry. */
	amount = (~ (unsigned) iop->io.vbase & (NBPC - 1)) + 1;

	/* Bound again by requested bytes */
	if (amount > iop->io_ioc)
	  amount = iop->io_ioc;
	
	/* Third, round down to multiple of sector size. */
	if (amount > NBPSCTR)
	  amount &= ~ (NBPSCTR - 1);
	
	bp->b_bufsiz = amount;
      }
    } 
    else
      amount = bp->b_bufsiz < iop->io_ioc ? bp->b_bufsiz : iop->io_ioc;


    if ( amount < NBPSCTR && req != BFLFMT) { /* Use the buffer cache */
      buf_t *tmp_bp;

      /* 
       * This forces sector alignment...be careful if you mess
       * with this.
       */
      if (iop->io_ioc >= NBPSCTR)
	amount = NBPSCTR;

      /* Do a read if a read or a partial write */
      if (req == BREAD || (amount < NBPSCTR && req != BREAD
			   && req != BFLFMT)) {

	/* Read sector */
	tmp_bp = raw_buf_read(NULL, dev, blockn(iop->io_seek), NBPSCTR);

	/* Check error result */
	if (tmp_bp->b_flags & BFERR || tmp_bp->b_count == tmp_bp->b_resid) {
	  set_user_error(tmp_err = tmp_bp->b_errno);
	  buf_invalidate(bp);
	  brelease(bp);
	  buf_invalidate(tmp_bp);
	  brelease(tmp_bp);
      
	  if (!bp_given)
	    freerbuf(bp);
	  return tmp_err;
	}

	/* If a read, we're done */
	if (req == BREAD) {
	  iowrite(iop, tmp_bp->b_vaddr, amount);
	  iop->io_seek += amount;
	  buf_invalidate(tmp_bp);
	  brelease(tmp_bp);
	  continue;
	}

      } else { /* Writing a full sector; just get a buffer */
	tmp_bp = bclaim(dev, blockn(iop->io_seek), NBPSCTR, BUF_SYNC);
      }

      /* Set up for a write */
      tmp_bp->b_flags &= ~B_READ;
      tmp_bp->b_flags |= (BFNTP | f | BFMOD);
      tmp_bp->b_dev = dev;
      tmp_bp->b_edev = NODEV;
      tmp_bp->b_bno = blockn (iop->io_seek);
      tmp_bp->b_resid = tmp_bp->b_count = NBPSCTR;

      ASSERT(amount == NBPSCTR);
      ASSERT(req != BREAD);
      ioread(iop, tmp_bp->b_vaddr, amount);

      if (strat)
	(*strat)(tmp_bp);
      else
	dblock(dev, tmp_bp);
      buffer_wait(tmp_bp);

      /* Error checking */
      if (tmp_bp->b_flags & BFERR || tmp_bp->b_count == tmp_bp->b_resid) {
        set_user_error(tmp_err = tmp_bp->b_errno);
        buf_invalidate(bp);
        brelease(bp);
        buf_invalidate(tmp_bp);
        brelease(tmp_bp);
      
        if (!bp_given)
          freerbuf(bp);
        return tmp_err;
      }

      buf_invalidate(tmp_bp);
      brelease(tmp_bp);

      iop->io_seek += amount;
      continue;
    }
	
    /* ELSE, n >= NBPSCTR || req == BFLFMT */

    ASSERT(amount >= NBPSCTR || req == BFLFMT);
    ASSERT(vtop(bp->b_vaddr + 1) == P2P(bp->b_paddr + 1));

    /* Normal startup before issuing I/O request for bp. */
    bp->b_flag |= f | BFNTP;
    
    if ((bp->b_req = req) == BREAD)
      bp->b_flag |= B_READ;
    else {
      bp->b_flag &= ~B_READ;
      bp->b_flag |= BFMOD;
    }
    
    bp->b_dev = dev;
    bp->b_edev = NODEV;
    bp->b_bno = blockn (iop->io_seek);
    bp->b_resid = bp->b_count = amount;

    if (req != BREAD && bp_given)
      ioread(iop, bp->b_vaddr, amount);
    
    if (strat)
      (*strat)(bp);
    else
      dblock(dev, bp);
    buffer_wait(bp);
    
    result_bytes = amount - bp->b_resid;

    if (req == BREAD && bp_given) {
      iowrite (iop, bp->b_vaddr, result_bytes);
    }
    
    if (bp->b_flags & BFERR || bp->b_count == bp->b_resid) {
      set_user_error(tmp_err = bp->b_errno);
      buf_invalidate(bp);
      brelease(bp);
      
      if (!bp_given)
	freerbuf(bp);
      return tmp_err;
    }
    
    if (!bp_given) {
      /* Offsets not updated; io[read|write] not used. */
      
      /* Success; update offsets */
      iop->io_ioc -= result_bytes;
      iop->io.vbase += result_bytes;
    }

    /* Update seek */
    iop->io_seek += result_bytes;
  }
  
 out:
  buf_invalidate(bp);
  brelease(bp);
  if (!bp_given)
    freerbuf(bp);
  return tmp_err;
}


		
/*
 * Given a buffer pointer, an I/O structure, a device, request type, and
 * a flags word, check the I/O structure and perform the I/O request.
 */

#if	__USE_PROTO__
void ioreq (buf_t * bp, IO * iop, o_dev_t dev, int req, int f)
#else
void
ioreq (bp, iop, dev, req, f)
buf_t * bp;
IO * iop;
o_dev_t dev;
int req;
int f;
#endif
{
  io_user_req (bp, iop, dev, req, f, NULL);
}

/*
 * Like ioreq, but guarantee that no DMA straddle occurs.
 * And assume we are called by fl.c, xt.c, dv.c or someone
 * else who obeys the parameter rules that they do.
 */

#if	__USE_PROTO__
void dmareq (buf_t * bp, IO * iop, o_dev_t dev, int req)
#else
void
dmareq(bp, iop, dev, req)
buf_t *bp;
IO *iop;
dev_t dev;
int req;
#endif
{
  io_user_req(bp, iop, dev, req, BFRAW, NULL);
}


#include <kernel/ddi_cpu.h>


/*
 * Initialise devices.
 * Mark all initialized devices as loaded.
 */

void
devinit ()
{
	DRV * dp;
	int i;

	/*
	 * Set up DDI/DKI-related global data structures. Until this is done,
	 * no DDI/DKI routines can run.
	 */

	while (DDI_GLOB_INIT ())
		cmn_err (CE_PANIC, "Unable to set up DDI/DKI global data");

	/*
	 * After the defer tables have been set up, we can call LOCK_TESTS (),
	 * which we couldn't before because the ..._DEALLOC () calls that the
	 * tests perform at the end require defer-table support.
	 */

	if (LOCK_TESTS (0))
		cmn_err (CE_PANIC, "Lock primitives not functional");

	/*
	 * Call the configured init routines for the system. According to the
	 * specification of init (D2DK), this happens before interrupts are
	 * enabled and before there is any real process context for us to
	 * be able to sleep.
	 */

	for (i = 0 ; i < ninit ; i ++)
		(* inittab [i]) ();

	for (dp = drvl, i = 0 ; i < drvn ; i ++, dp ++) {
		if (dp->d_conp && dp->d_conp->c_load) {
#if	TRACER
			_CHIRP (i + '0', 154);
#endif
			(* dp->d_conp->c_load) ();
#if	0 /* #ifdef	TRACER */
			{
				int		i;
				for (i = 0 ; i < 1000000 ; i ++)
					/* DO NOTHING */ ;
			}
#endif
		}
	}


	/*
	 * Now we can configure the interrupts for the system.
	 */

	for (i = 0 ; i < nintr ; i ++)
		__set_interrupt (inttab + i);

	splbase ();


	/*
	 * And finally, we can call the start routines.
	 */

	for (i = 0 ; i < nstart ; i ++)
		(* starttab [i]) ();
}


#if	0
/*
 * Shut things down, in the right order.
 */

#if	__USE_PROTO__
void (devshutdown) (void)
#else
void
devshutdown __ARGS (())
#endif
{
	int		i;

	spltimeout ();
	ddi_cpu_data ()->dc_int_level = 0;

	for (i = 0 ; i < nexit ; i ++)
		(* exittab [i]) ();

	/*
	 * Turn off interrupts.
	 */

	for (i = 0 ; i < nintr ; i ++)
		clrivec (inttab [i].int_vector);

	for (i = 0 ; i < nhalt ; i ++)
		(* halttab [-- _haltlevel]) ();
}
#endif


/********************************************************************
 * Function: struct inode *dopen(o_dev_t dev, int mode, int flags,
 *				 struct inode *inodep)
 *
 * Description: Given an inode of a driver, open that device.
 *
 * Returns: Inode of opened device or NULL on failure.
 *
 * Affects Globals: None.
 *
 * Affects U-Area: Set u.u_error to error on failure.
 *
 * Side Effects: Unknown (can change argument inodep)
 *
 * Comments:
 *
 * NIGEL: In order to make it at all possible to support the System V DDI / DDK
 * calling conventions for driver entry points, it is necessary for this code
 * to pass the * type * of open being made to the underlying device (which is
 * passed in the 'f' parameter below).
 * 
 * LOUIS: It is possible that the inode returned is not the same one
 * passed by the caller.  For this reason, the caller must keep his
 * own handle on the passed inode and something like ip = dopen(xxxx, ip)
 * should be done with that in mind.
 * 
 */

#if	__USE_PROTO__
struct inode * dopen (o_dev_t dev, int mode, int flags, struct inode * inodep)
#else
struct inode *
dopen (dev, mode, flags, inodep)
o_dev_t		dev;
int		mode;
int		flags;
struct inode  *	inodep;
#endif
{
	CON	      *	cp;

	if (major (dev) == 0xFF && (flags & DFCHR) != 0) {
		/*
		 * Clone open; this only applies to character devices.
		 */

		mode |= IPCLONE;
		dev = minor (dev) << 8;
	}

	if ((cp = drvmap (dev, flags)) == NULL)
		/* drvmap() sets u.u_error on failure */
		return NULL;

	if ((cp->c_flag & flags) == 0) {
		SET_U_ERROR (ENXIO, "dopen ()");
		return NULL;
	}

	if (cp->c_open != NULL)
		(* (kernel_open_t) cp->c_open) (dev, mode, flags,
						SELF->p_credp, & inodep);

	return inodep;
}


/*
 * Close a device.
 *
 * NIGEL: In order to be able to support the System V DDI / DDK calling
 * conventions for driver entry points, this function has to be altered to
 * accept a file-mode and character / block mode parameter. Note that the
 * Coherent 4.0 driver kit documentation says that the driver close entry
 * point is passed the same parameters as the open entry. After this mod,
 * this will be true for the first time.
 */

#if	__USE_PROTO__
int dclose (o_dev_t dev, int mode, int flags, __VOID__ * private)
#else
int
dclose (dev, mode, flags, private)
dev_t		dev;
int		mode;
int		flags;
__VOID__      *	private;
#endif
{
	CON	      *	cp;

	if ((cp = drvmap (dev, flags)) == NULL)
		return -1;

	if (cp->c_close != NULL)
		(* (kernel_close_t) cp->c_close) (dev, mode, flags,
						  SELF->p_credp, private);
	return 0;
}


/*
 * Call the block entry point of a device.
 */

#if	__USE_PROTO__
void dblock (o_dev_t dev, buf_t * bp)
#else
void
dblock (dev, bp)
o_dev_t		dev;
buf_t           *	bp;
#endif
{
	CON	      *	cp;

	if ((cp = drvmap (dev, DFBLK)) == NULL) {
		cmn_err (CE_WARN, "bogus block request, flags=#%x",
			 bp->b_flags);
		backtrace (0);
	} else if (cp->c_block != NULL) {
		(* cp->c_block) (bp);
		return;
	}

	bioerror (bp, ENXIO);
	brelease (bp);
	SET_U_ERROR (ENXIO, "dblock ()");
}


/*
 * Read from a device.
 */

#if	__USE_PROTO__
int dread (o_dev_t dev, IO * iop, __VOID__ * private)
#else
int
dread (dev, iop, private)
o_dev_t		dev;
IO	      *	iop;
__VOID__      *	private;
#endif
{
	CON	      *	cp;

	if ((cp = drvmap (dev, DFCHR)) == NULL)
		return -1;

	if (cp->c_read != NULL) {
		(* (kernel_read_t) cp->c_read) (dev, iop, SELF->p_credp,
						private);
		return 0;
	}

	return -1;
}


/*
 * Write to a device.
 */

#if	__USE_PROTO__
int dwrite (o_dev_t dev, IO * iop, __VOID__ * private)
#else
int
dwrite (dev, iop, private)
o_dev_t		dev;
IO	      *	iop;
__VOID__      *	private;
#endif
{
	CON	      *	cp;

	if ((cp = drvmap (dev, DFCHR)) == NULL)
		return -1;

	if (cp->c_write != NULL) {
		(* (kernel_write_t) cp->c_write) (dev, iop, SELF->p_credp,
						  private);
		return 0;
	}

	return -1;
}


/*
 * Call the ioctl function for a device.
 *
 * NIGEL: In order to support the System V DDI / DDK calling conventions for
 * device driver entry points, this function needs to pass a "mode" parameter
 * indicating the open mode of the file. There are only two calls to this
 * function, for uioctl () and in the / dev / tty driver, "io.386/ ct.c" which is
 * passing its arguments back here (ie, a layered open). The "ct.c" call has
 * not been changed.
 *
 * NIGEL: To support the elimination of u_regl, the current user register set
 * is passed in here (NULL if we are being called from a driver).
 */

#if	__USE_PROTO__
int dioctl (o_dev_t dev, int com, __VOID__ * args, int mode,
	    __VOID__ * private, gregset_t * regsetp)
#else
int
dioctl (dev, com, args, mode, private, regsetp)
dev_t		dev;
int		com;
__VOID__      *	args;
int		mode;
__VOID__      *	private;
gregset_t     *	regsetp;
#endif
{
	CON	      *	cp;
	int		rval;

	if ((cp = drvmap (dev, DFCHR)) == NULL)
		return -1;

	if (cp->c_ioctl == NULL) {
		SET_U_ERROR (ENOTTY, "dioctl ()");
		return -1;
	}

	if (regsetp != NULL) {
		/*
		 * Here we do a bunch of special hacks so that the tty code
		 * can remain ignorant of the myriad variants on the tty
		 * ioctl's.
		 */

		if (__xmode_286 (regsetp)) {
			rval = 0;
			tioc (dev, com, args, mode, SELF->p_credp, & rval,
			      cp->c_ioctl);
			return rval;
		}
		if ((com == TIOCGETP &&
		     ! useracc (args, sizeof (struct sgttyb), 1)) ||
		    ((com == TIOCSETP || com == TIOCSETN) &&
		     ! useracc (args, sizeof (struct sgttyb), 0))) {
			SET_U_ERROR (EFAULT, "dioctl ()");
			return -1;
		}
	}

	rval = 0;
	(* (kernel_ioctl_t) cp->c_ioctl) (dev, com, args, mode, SELF->p_credp,
					  & rval, private);
	return rval;
}


/*
 * Call the powerfail entry point of a device.
 */

int
dpower (dev)
dev_t		dev;
{
	CON	      *	cp;

	if ((cp = drvmap (dev, 0)) != NULL && cp->c_power != NULL) {
		(* cp->c_power) (dev);
		return 0;
	}

	return -1;
}


/*
 * Call the timeout entry point of a device.
 */

int
dtime (dev)
dev_t		dev;
{
	CON	      *	cp;

	if ((cp = drvmap (dev, 0)) != NULL && cp->c_timer != NULL) {
		(* cp->c_timer) (dev);
		return 0;
	}

	return -1;
}


/*
 * Poll a device.
 */

#if	__USE_PROTO__
int dpoll (o_dev_t dev, int events, int msec, __VOID__ * private)
#else
int
dpoll (dev, events, msec, private)
o_dev_t		dev;
int		events;
int		msec;
__VOID__      *	private;
#endif
{
	CON	      *	cp;

	if ((cp = drvmap (dev, DFCHR)) != NULL &&
	    (cp->c_flag & DFPOL) != 0 && cp->c_poll != NULL)
		return (* (kernel_poll_t) cp->c_poll) (dev, events, msec,
							private);

	return POLLNVAL;
}