Coherent4.2.10/coh.386/fs3.c
/* $Header: /ker/coh.386/RCS/fs3.c,v 2.8 93/10/29 00:55:16 nigel Exp Locker: nigel $ */
/*
* Filesystem (I/O).
*
* $Log: fs3.c,v $
* Revision 2.8 93/10/29 00:55:16 nigel
* R98 (aka 4.2 Beta) prior to removing System Global memory
*
* Revision 2.7 93/09/13 07:58:07 nigel
* Added some extra checks to see that inodes are locked, changed the tests
* in fread () to reduce the possibility of trying to read past EOF.
*
* Revision 2.6 93/09/02 18:07:11 nigel
* Nigel's r85, minor edits only
*
* Revision 2.5 93/08/19 10:37:18 nigel
* r83 ioctl (), corefile, new headers
*
* Revision 2.4 93/08/19 03:26:30 nigel
* Nigel's r83 (Stylistic cleanup)
*
* Revision 2.2 93/07/26 14:28:33 nigel
* Nigel's R80
*/
#include <common/_tricks.h>
#include <kernel/proc_lib.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <stddef.h>
#include <limits.h>
#define _KERNEL 1
#include <kernel/trace.h>
#include <kernel/reg.h>
#include <sys/buf.h>
#include <canon.h>
#include <sys/con.h>
#include <sys/filsys.h>
#include <sys/mount.h>
#include <sys/io.h>
#include <sys/ino.h>
#include <sys/inode.h>
#include <sys/types.h>
/*
* Given an inode, open it.
*/
#if __USE_PROTO__
struct inode * iopen (struct inode * ip, unsigned mode)
#else
struct inode *
iopen (ip, mode)
struct inode * ip;
unsigned mode;
#endif
{
int type;
struct inode * newip;
ASSERT (ilocked (ip));
type = ip->i_mode & IFMT;
switch (type) {
case IFCHR:
case IFBLK:
iunlock (ip);
newip = dopen (ip->i_rdev, mode,
type == IFCHR ? DFCHR : DFBLK, ip);
ilock (ip, "iopen ()");
ip = newip;
/*
* We assume that if we are returned a different inode from
* the one we passed in, that dopen () returned it unlocked.
* Our caller should deal with that.
*/
break;
case IFDIR:
if (mode & IPW) {
/* Return (EISDIR) if not superuser. */
if (super () == 0) {
/* Override EPERM set when super () failed. */
set_user_error (EISDIR);
break;
}
/*
* Opening a directory O_WRONLY is insane, even
* if you are superuser!
*/
if (mode == IPW)
set_user_error (EISDIR);
}
break;
case IFPIPE:
popen (ip, mode);
break;
}
return ip;
}
/*
* Given an inode, close it.
*
* NIGEL: Modified for new dclose ().
*/
#if __USE_PROTO__
void iclose (struct inode * ip, unsigned mode)
#else
void
iclose (ip, mode)
INODE * ip;
unsigned mode;
#endif
{
int type;
ilock (ip, "iclose () #1");
switch (type = ip->i_mode & IFMT) {
case IFBLK:
if (getment (ip->i_rdev, 0) == NULL)
bflush (ip->i_rdev);
/* FALL THROUGH */
case IFCHR:
iunlock (ip);
dclose (ip->i_rdev, mode, type == IFCHR ? DFCHR : DFBLK,
ip->i_private);
ilock (ip, "iclose () #2");
break;
case IFPIPE:
pclose (ip, mode);
break;
}
idetach (ip);
}
/*
* Read from a file described by an inode and an io strucuture.
*/
#if __USE_PROTO__
void iread (struct inode * ip, IO * iop)
#else
void
iread (ip, iop)
struct inode * ip;
IO * iop;
#endif
{
ASSERT ((ip->i_mode & IFMT) == IFCHR || ilocked (ip));
if (iop->io_ioc == 0)
return;
switch (ip->i_mode & IFMT) {
case IFCHR:
dread (ip->i_rdev, iop, ip->i_private);
break;
case IFBLK:
case IFREG:
case IFDIR:
fread (ip, iop);
break;
case IFPIPE:
pread (ip, iop);
break;
default:
set_user_error (ENXIO);
break;
}
}
/*
* Write to a file described by an inode and io structure.
*/
#if __USE_PROTO__
void iwrite (struct inode * ip, IO * iop)
#else
void
iwrite (ip, iop)
struct inode * ip;
IO * iop;
#endif
{
ASSERT ((ip->i_mode & IFMT) == IFCHR || ilocked (ip));
imodcreat (ip); /* write - mtime, ctime */
if (iop->io_ioc == 0)
return;
switch (ip->i_mode & IFMT) {
case IFCHR:
dwrite (ip->i_rdev, iop, ip->i_private);
break;
case IFBLK:
fwrite (ip, iop);
break;
case IFREG:
case IFDIR:
if (getment (ip->i_dev, 1) == NULL)
return;
fwrite (ip, iop);
break;
case IFPIPE:
pwrite (ip, iop);
break;
default:
set_user_error (ENXIO);
break;
}
}
/*
* Given a block offset within an inode, store the offsets for the indirect
* blocks backwards in the array, `listp', and return a pointer just after the
* position where the first offset is stored.
*/
#if __USE_PROTO__
__LOCAL__ int * lmap (daddr_t blockofs, int * listp, int * numblocks)
#else
__LOCAL__ int *
lmap (blockofs, listp, numblocks)
daddr_t blockofs;
int * listp;
int * numblocks;
#endif
{
int n;
if ((n = ND - blockofs) > 0) {
/*
* Just the one direct block, and further blocks up to the end
* of the block list in the inode.
*/
* listp ++ = blockofs;
* numblocks = n;
return listp;
}
blockofs -= ND;
/*
* First, the initial indirect block, followed by as many further
* layers of indirection as we need.
*/
n = nbnrem (blockofs);
* numblocks = NBN - n;
* listp ++ = n;
if ((blockofs = nbndiv (blockofs)) == 0) {
* listp ++ = ND;
return listp;
}
#if NI > 1
blockofs --; /* Make offset in next indirect block zero-based */
* listp ++ = nbnrem (blockofs);
if ((blockofs = nbndiv (blockofs)) == 0) {
* listp ++ = ND + 1;
return listp;
}
#if NI > 2
blockofs --; /* Make offset in next indirect block zero-based */
* listp ++ = nbnrem (blockofs);
if ((blockofs = nbndiv (blockofs)) == 0) {
* listp ++ = ND + 2;
return listp;
}
#endif
#endif
SET_U_ERROR (EFBIG, "lmap");
return NULL;
}
/*
* Convert the given virtual block to a physical block for the given inode.
* If the block does not map onto a physical block because the file is sparse
* but it does exist, 0 is returned. If an error is encountered, -1 is
* returned.
*
* The parameter below is experimental.
*/
#define EMPTY_BLOCK ((daddr_t) -1)
int t_groupmode = 0;
#if __USE_PROTO__
__LOCAL__ int vmap (struct inode * ip, daddr_t blockofs, int count,
daddr_t * blocklist, int allocflag)
#else
__LOCAL__ int
vmap (ip, blockofs, count, blocklist, allocflag)
INODE * ip;
daddr_t blockofs;
int count;
daddr_t * blocklist;
int allocflag;
#endif
{
daddr_t block;
int list [1 + NI];
int nblocks;
daddr_t * outlist;
buf_t * buf;
int * listp;
int resid = count;
more:
if ((listp = lmap (blockofs, list, & nblocks)) == NULL)
return -1;
if (nblocks > resid)
nblocks = resid;
resid -= nblocks;
blockofs += nblocks;
outlist = ip->i_a.i_addr;
buf = NULL;
while (-- listp != list) {
if ((block = outlist [* listp]) == 0) {
/*
* If an indirect block is not present, then this
* implies that at least the next "nblocks" leaf
* blocks are also not present.
*/
do
* blocklist ++ = EMPTY_BLOCK;
while (-- nblocks > 0);
goto done;
}
if (buf != NULL) {
brelease (buf);
candaddr (block);
}
if ((buf = bread (ip->i_dev, block, BUF_SYNC)) == NULL)
return -1;
outlist = (daddr_t *) buf->b_vaddr;
}
do {
if ((block = outlist [list [0] ++]) == 0)
block = EMPTY_BLOCK;
else if (buf != NULL)
candaddr (block);
* blocklist ++ = block;
} while (-- nblocks > 0);
done:
if (buf != NULL)
brelease (buf);
if (t_groupmode && resid > 0)
goto more;
return count - resid;
}
/*
* The parameter below controls the amount of readahead that happens.
*/
extern int t_readahead;
#define READGROUP 16 /*
* Maximum # of blocks to read as a
* single normal group.
*/
#define READAHEAD 8 /*
* Maximum # of blocks to read ahead.
*/
/*
* Read from a regular or block special file.
*/
#if __USE_PROTO__
void fread (struct inode * ip, IO * iop)
#else
void
fread (ip, iop)
struct inode * ip;
IO * iop;
#endif
{
off_t res;
unsigned off;
dev_t dev;
daddr_t lbn;
daddr_t abn;
daddr_t zbn;
buf_t * bp;
int blk;
daddr_t maxblk;
daddr_t list [READGROUP + READAHEAD];
int do_readahead;
if ((ip->i_mode & IFMT) == IFBLK) {
blk = 1;
dev = ip->i_rdev;
} else {
blk = 0;
dev = ip->i_dev;
}
abn = 0;
zbn = 0;
lbn = blockn (iop->io_seek);
off = blocko (iop->io_seek);
/*
* NIGEL: The commented-out code talks about a mysterious "unsigned
* prob" which does not in reality exist. All this really wants to
* do is pick the minimum of the remaining size and the requested
* size.
*/
#if 0
res = ip->i_size - iop->io_seek;
if (blk != 0 || (res > 0 && res > iop->io_ioc))
res = iop->io_ioc; /* unsigned prob with io_ioc */
if (res <= 0)
return;
#endif
if (blk)
res = iop->io_ioc;
else {
if (iop->io_seek > ip->i_size)
return;
if ((res = ip->i_size - iop->io_seek) > iop->io_ioc)
res = iop->io_ioc;
}
if (res == 0)
return;
/*
* Check for sequential access to see whether we should enable read-
* ahead.
*/
if (lbn == ip->i_lastblock + 1) {
if ((do_readahead = t_readahead) < 0)
do_readahead = 0;
} else
do_readahead = 0;
/*
* We record the larget block-offset within the file to avoid trying
* to read past the end of file with readahead. This causes Bad Things
* to happen with pipes, where funky data is stored in the indirect-
* block slots. For block devices, there is sadly no way to get this
* information under the Coherent device-driver system.
*/
maxblk = blk ? INT_MAX : blockn (ip->i_size + BSIZE - 1);
do {
int count;
if (lbn >= zbn) {
unsigned i;
if ((count = blockn (res + BSIZE - 1) +
do_readahead) > __ARRAY_LENGTH (list))
count = __ARRAY_LENGTH (list);
ASSERT (count > do_readahead);
if (lbn + count >= maxblk)
count = maxblk - lbn;
if (blk == 0 &&
(count = vmap (ip, lbn, count, list, 0)) < 0)
return;
abn = lbn;
for (i = 0, zbn = lbn ; i < count ; i ++, zbn ++) {
if (blk != 0)
list [i] = zbn;
else if (list [i] == EMPTY_BLOCK)
continue;
if (t_readahead == -1)
continue;
(void) bread (dev, list [i], BUF_ASYNC);
}
}
if (res < (count = BSIZE - off))
count = res;
if (list [lbn - abn] == EMPTY_BLOCK)
ioclear (iop, count);
else {
if ((bp = bread (dev, list [lbn - abn],
BUF_SYNC)) == NULL)
return;
iowrite (iop, bp->b_vaddr + off, count);
brelease (bp);
}
if (get_user_error ())
return;
lbn ++;
off = 0;
res -= count;
} while (res > 0);
ip->i_lastblock = lbn - 1;
}
/*
* Given an inode pointer, read the requested virtual block and return a
* buffer with the data. In sparse files, the necessary blocks are allocated.
* If the flag, `fflag' is set, the final buffer is just claimed rather than
* read as we are going to change it's contents completely.
*/
#if __USE_PROTO__
__LOCAL__ buf_t * aread (struct inode * ip, daddr_t blkofs, int claim)
#else
__LOCAL__ buf_t *
aread (ip, blkofs, claim)
struct inode * ip;
daddr_t blkofs;
int claim;
#endif
{
buf_t * bp;
int * listp;
dev_t dev;
int l;
int aflag;
int lflag;
daddr_t * dp;
daddr_t block;
daddr_t blocksave;
int list [1 + NI];
int nblocks;
if ((listp = lmap (blkofs, list, & nblocks)) == NULL)
return NULL;
aflag = 0;
dev = ip->i_dev;
block = ip->i_a.i_addr [l = * -- listp];
if (block == 0) {
aflag = 1;
if ((block = balloc (dev)) == 0)
return NULL;
T_INODE (ip, cmn_err (CE_NOTE,
"inode %d allocated block %d",
ip->i_ino, block));
ip->i_a.i_addr [l] = block;
}
for (;;) {
lflag = listp == list;
/*
* If we are not allocating a new block and the caller is
* going to preserve any of the data that we are going to
* return, then read in the previous block contents.
*/
if (! (aflag || (claim && lflag))) {
if ((bp = bread (dev, block, BUF_SYNC)) == NULL)
return NULL;
} else {
bp = bclaim (dev, block, BSIZE, BUF_SYNC);
/*
* If this is the last block and the caller is just
* going to overwrite it, don't zero-fill.
*/
if (! (claim && lflag))
clrbuf (bp);
bp->b_flag |= BFMOD;
}
blocksave = block;
if (lflag)
return bp;
aflag = 0;
dp = (daddr_t *) bp->b_vaddr;
block = dp [l = * -- listp];
candaddr (block);
/*
* WARNING! This is only legal if the inode is locked!
* Sleazier than anything you've seen before, eh?
* Love, your pal, Louis.
*/
ASSERT(ilocked(ip));
brelease(bp);
if (block == 0) {
aflag = 1;
if ((block = balloc (dev)) == 0) {
return NULL;
}
T_INODE (ip, cmn_err (CE_NOTE,
"inode %d allocated block %d",
ip->i_ino, block));
if ((bp = bread(dev, blocksave, BUF_SYNC)) == NULL)
cmn_err(CE_PANIC,
"Fatal error updating free list");
dp = (daddr_t *)bp->b_vaddr;
dp [l] = block;
candaddr (dp [l]);
bp->b_flag |= BFMOD;
brelease(bp);
}
}
}
/*
* The parameter below controls the way in which blocks are written. It is
* currently experimental.
*
* (2 is best for avoiding disk thrashing iff we can do something clever like
* block-sorting the I/O, especially on syncs)
*/
int t_writemode = BUF_ASYNC;
/*
* Write to a regular or block special file.
*/
#if __USE_PROTO__
void fwrite (struct inode * ip, IO * iop)
#else
void
fwrite (ip, iop)
struct inode * ip;
IO * iop;
#endif
{
unsigned n;
unsigned off;
daddr_t lbn;
buf_t * bp;
int blk;
int com;
lbn = blockn (iop->io_seek);
off = blocko (iop->io_seek);
blk = (ip->i_mode & IFMT) == IFBLK;
while (iop->io_ioc > 0) {
if (iop->io_ioc < (n = BSIZE - off))
n = iop->io_ioc;
com = off == 0 && n == BSIZE;
if (blk == 0)
bp = aread (ip, lbn, com);
else {
if (com)
bp = bclaim (ip->i_rdev, lbn, BSIZE, BUF_SYNC);
else
bp = bread (ip->i_rdev, lbn, BUF_SYNC);
}
if (bp == NULL)
return;
ioread (iop, bp->b_vaddr + off, n);
bp->b_flag |= BFMOD;
if (com && t_writemode != 2 && (ip->i_mode & IFMT) != IFPIPE) {
bwrite (bp, t_writemode);
if (t_writemode == BUF_SYNC)
brelease (bp);
} else
brelease (bp);
if (get_user_error ())
return;
lbn ++;
off = 0;
if ((iop->io_seek += n) > ip->i_size)
if (blk == 0)
ip->i_size = iop->io_seek;
}
}
/*
* Given an inode pointer, read the requested virtual block and return
* a buffer with the data.
*/
#if __USE_PROTO__
buf_t * vread (struct inode * ip, daddr_t blockofs)
#else
buf_t *
vread (ip, blockofs)
struct inode * ip;
daddr_t blockofs;
#endif
{
daddr_t block;
buf_t * bp;
if (vmap (ip, blockofs, 1, & block, 0) < 0)
return NULL;
if (block != EMPTY_BLOCK)
return bread (ip->i_dev, block, BUF_SYNC);
bp = geteblk ();
bp->b_dev = ip->i_dev;
clrbuf (bp);
return bp;
}