Minix1.5/fs/read.c

Compare this file to the similar file:
Show the results in this format:

/* This file contains the heart of the mechanism used to read (and write)
 * files.  Read and write requests are split up into chunks that do not cross
 * block boundaries.  Each chunk is then processed in turn.  Reads on special
 * files are also detected and handled.
 *
 * The entry points into this file are
 *   do_read:	 perform the READ system call by calling read_write
 *   read_write: actually do the work of READ and WRITE
 *   read_map:	 given an inode and file position, look up its zone number
 *   rw_user:	 call the kernel to read and write user space
 *   read_ahead: manage the block read ahead business
 */

#include "fs.h"
#include <fcntl.h>
#include <minix/com.h>
#include "buf.h"
#include "file.h"
#include "fproc.h"
#include "inode.h"
#include "param.h"
#include "super.h"

#define FD_MASK          077	/* max file descriptor is 63 */

PRIVATE message umess;		/* message for asking SYSTASK for user copy */

FORWARD int rw_chunk();

/*===========================================================================*
 *				do_read					     *
 *===========================================================================*/
PUBLIC int do_read()
{
  return(read_write(READING));
}


/*===========================================================================*
 *				read_write				     *
 *===========================================================================*/
PUBLIC int read_write(rw_flag)
int rw_flag;			/* READING or WRITING */
{
/* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */

  register struct inode *rip;
  register struct filp *f;
  register off_t bytes_left, f_size;
  register unsigned off, cum_io;
  register int oflags;
  off_t position;
  int r, chunk, mode_word, usr, seg, block_spec, char_spec, regular;
  struct filp *wf;

  /* MM loads segments by putting funny things in upper 10 bits of 'fd'. */
  if (who == MM_PROC_NR && (fd & (~BYTE)) ) {
	usr = (fd >> 8) & BYTE;
	seg = (fd >> 6) & 03;
	fd &= FD_MASK;		/* get rid of user and segment bits */
  } else {
	usr = who;		/* normal case */
	seg = D;
  }

  /* If the file descriptor is valid, get the inode, size and mode. */
#if (CHIP == INTEL)
  if (who != MM_PROC_NR)	/* only MM > 32K */
#endif

  if (nbytes < 0) return(EINVAL);
  if ( (f = get_filp(fd)) == NIL_FILP) return(err_code);
  if ( ((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0)
	return(EBADF);
  if (nbytes == 0) return(0);	/* so char special files need not check for 0*/
  position = f->filp_pos;
  if (position < 0 || position > MAX_FILE_POS) return(EINVAL);
  oflags = f->filp_flags;
  rip = f->filp_ino;
  f_size = rip->i_size;
  r = OK;
  cum_io = 0;
  mode_word = rip->i_mode & I_TYPE;
  regular = mode_word == I_REGULAR || mode_word == I_NAMED_PIPE;

  char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0);
  block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0);
  if (block_spec && f_size == 0) f_size = MAX_P_LONG;
  rdwt_err = OK;		/* set to EIO if disk error occurs */

  /* Check for character special files. */
  if (char_spec) {
	if ((r = dev_io(rw_flag, oflags & O_NONBLOCK, (dev_t) rip->i_zone[0],
				 position, nbytes, who, buffer)) >= 0) {
		cum_io = r;
		position += r;
		r = OK;
	}
  } else {
	if (rw_flag == WRITING && block_spec == 0) {
		/* Check in advance to see if file will grow too big. */
		if (position > get_super(rip->i_dev)->s_max_size - nbytes )
			return(EFBIG);

		/* check for O_APPEND flag */
		if (oflags & O_APPEND) position = f_size;

		/* Clear the zone containing present EOF if hole about
		 * to be created.  This is necessary because all unwritten
		 * blocks prior to the EOF must read as zeros.
		 */
		if (position > f_size) clear_zone(rip, f_size, 0);
	}

	/* Pipes are a little different.  Check. */
	if (rip->i_pipe &&
	    (r = pipe_check(rip, rw_flag, oflags, nbytes, position)) <= 0)
		return r;

	/* Split the transfer into chunks that don't span two blocks. */
	while (nbytes != 0) {
		off = position % BLOCK_SIZE;	/* offset within a block */
		chunk = MIN(nbytes, BLOCK_SIZE - off);
		if (chunk < 0) chunk = BLOCK_SIZE - off;

		if (rw_flag == READING || (block_spec && rw_flag == WRITING)) {
			bytes_left = f_size - position;
			if (position >= f_size) break;	/* we are beyond EOF */
			if (chunk > bytes_left) chunk = bytes_left;
		}

		/* Read or write 'chunk' bytes. */
		r = rw_chunk(rip, position, off, chunk, nbytes, rw_flag,
							     buffer, seg, usr);
		if (r != OK) break;	/* EOF reached */
		if (rdwt_err < 0) break;

		/* Update counters and pointers. */
		buffer += chunk;	/* user buffer address */
		nbytes -= chunk;	/* bytes yet to be read */
		cum_io += chunk;	/* bytes read so far */
		position += chunk;	/* position within the file */
	}
  }

  /* On write, update file size and access time. */
  if (rw_flag == WRITING) {
	if (regular || mode_word == I_DIRECTORY) {
		if (position > f_size) rip->i_size = position;
		rip->i_update = MTIME; /* mark mtime for update later */
		rip->i_dirt = DIRTY;
	}
  } else {
	if (rip->i_pipe && position >= rip->i_size) {
		/* Reset pipe pointers. */
		rip->i_size = 0;	/* no data left */
		position = 0;		/* reset reader(s) */
		if ( (wf = find_filp(rip, W_BIT)) != NIL_FILP) wf->filp_pos =0;
	}
  }
  f->filp_pos = position;

  /* Check to see if read-ahead is called for, and if so, set it up. */
  if (rw_flag == READING && rip->i_seek == NO_SEEK && position % BLOCK_SIZE== 0
		&& (regular || mode_word == I_DIRECTORY)) {
	rdahed_inode = rip;
	rdahedpos = position;
  }
  rip->i_seek = NO_SEEK;

  if (rdwt_err != OK) r = rdwt_err;	/* check for disk error */
  if (rdwt_err == END_OF_FILE) r = cum_io;
  return(r == OK ? cum_io : r);
}


/*===========================================================================*
 *				rw_chunk				     *
 *===========================================================================*/
PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, buff, seg, usr)
register struct inode *rip;	/* pointer to inode for file to be rd/wr */
off_t position;			/* position within file to read or write */
unsigned off;			/* off within the current block */
int chunk;			/* number of bytes to read or write */
unsigned left;			/* max number of bytes wanted after position */
int rw_flag;			/* READING or WRITING */
char *buff;			/* virtual address of the user buffer */
int seg;			/* T or D segment in user space */
int usr;			/* which user process */
{
/* Read or write (part of) a block. */

  register struct buf *bp;
  register int r;
  int dir, n, block_spec;
  block_nr b;
  dev_t dev;

  block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
  if (block_spec) {
	b = position/BLOCK_SIZE;
	dev = (dev_t) rip->i_zone[0];
  } else {
	b = read_map(rip, position);
	dev = rip->i_dev;
  }

  if (!block_spec && b == NO_BLOCK) {
	if (rw_flag == READING) {
		/* Reading from a nonexistent block.  Must read as all zeros. */
		bp = get_block(NO_DEV, NO_BLOCK, NORMAL);     /* get a buffer */
		zero_block(bp);
	} else {
		/* Writing to a nonexistent block. Create and enter in inode. */
		if ((bp = new_block(rip, position)) == NIL_BUF)return(err_code);
	}
  } else if (rw_flag == READING) {
	/* Read and read ahead if convenient. */
	bp = rahead(rip, b, position, left);
  } else {
	/* Normally an existing block to be partially overwritten is first read
	 * in.  However, a full block need not be read in.  If it is already in
	 * the cache, acquire it, otherwise just acquire a free buffer.
	 */
	n = (chunk == BLOCK_SIZE ? NO_READ : NORMAL);
	if (!block_spec && off == 0 && position >= rip->i_size) n = NO_READ;
	bp = get_block(dev, b, n);
  }

  /* In all cases, bp now points to a valid buffer. */
  if (rw_flag == WRITING && chunk != BLOCK_SIZE && !block_spec &&
					position >= rip->i_size && off == 0)
	zero_block(bp);
  dir = (rw_flag == READING ? TO_USER : FROM_USER);
  r = rw_user(seg, usr, (vir_bytes)buff, (vir_bytes)chunk, bp->b_data+off, dir);
  if (rw_flag == WRITING) bp->b_dirt = DIRTY;
  n = (off + chunk == BLOCK_SIZE ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
  put_block(bp, n);
  return(r);
}


/*===========================================================================*
 *				read_map				     *
 *===========================================================================*/
PUBLIC block_nr read_map(rip, position)
register struct inode *rip;	/* ptr to inode to map from */
off_t position;			/* position in file whose blk wanted */
{
/* Given an inode and a position within the corresponding file, locate the
 * block (not zone) number in which that position is to be found and return it.
 */

  register struct buf *bp;
  register zone_nr z;
  register block_nr b;
  register long excess, zone, block_pos;
  register int scale, boff;

  scale = scale_factor(rip);	/* for block-zone conversion */
  block_pos = position/BLOCK_SIZE;	/* relative blk # in file */
  zone = block_pos >> scale;	/* position's zone */
  boff = block_pos - (zone << scale);	/* relative blk # within zone */

  /* Is 'position' to be found in the inode itself? */
  if (zone < NR_DZONE_NUM) {
	if ( (z = rip->i_zone[(int) zone]) == NO_ZONE) return(NO_BLOCK);
	b = ((block_nr) z << scale) + boff;
	return(b);
  }

  /* It is not in the inode, so it must be single or double indirect. */
  excess = zone - NR_DZONE_NUM;	/* first NR_DZONE_NUM don't count */

  if (excess < NR_INDIRECTS) {
	/* 'position' can be located via the single indirect block. */
	z = rip->i_zone[NR_DZONE_NUM];
  } else {
	/* 'position' can be located via the double indirect block. */
	if ( (z = rip->i_zone[NR_DZONE_NUM+1]) == NO_ZONE) return(NO_BLOCK);
	excess -= NR_INDIRECTS;			/* single indir doesn't count */
	b = (block_nr) z << scale;
	bp = get_block(rip->i_dev, b, NORMAL);	/* get double indirect block */
	z = bp->b_ind[(int)(excess/NR_INDIRECTS)];/*z is zone # for single ind*/
	put_block(bp, INDIRECT_BLOCK);		/* release double ind block */
	excess = excess % NR_INDIRECTS;		/* index into single ind blk */
  }

  /* 'z' is zone num for single indirect block; 'excess' is index into it. */
  if (z == NO_ZONE) return(NO_BLOCK);
  b = (block_nr) z << scale;
  bp = get_block(rip->i_dev, b, NORMAL);	/* get single indirect block */
  z = bp->b_ind[(int) excess];
  put_block(bp, INDIRECT_BLOCK);		/* release single indir blk */
  if (z == NO_ZONE) return(NO_BLOCK);
  b = ((block_nr) z << scale) + boff;
  return(b);
}


/*===========================================================================*
 *				rw_user					     *
 *===========================================================================*/
PUBLIC int rw_user(s, u, vir, bytes, buff, direction)
int s;				/* D or T space (stack is also D) */
int u;				/* process number to r/w (usually = 'who') */
vir_bytes vir;			/* virtual address to move to/from */
vir_bytes bytes;		/* how many bytes to move */
char *buff;			/* pointer to FS space */
int direction;			/* TO_USER or FROM_USER */
{
/* Transfer a block of data.  Two options exist, depending on 'direction':
 *     TO_USER:     Move from FS space to user virtual space
 *     FROM_USER:   Move from user virtual space to FS space
 */

  if (direction == TO_USER ) {
	/* Write from FS space to user space. */
	umess.SRC_SPACE  = D;
	umess.SRC_PROC_NR = FS_PROC_NR;
	umess.SRC_BUFFER = (long) buff;
	umess.DST_SPACE  = s;
	umess.DST_PROC_NR = u;
	umess.DST_BUFFER = (long) vir;
  } else {
	/* Read from user space to FS space. */
	umess.SRC_SPACE  = s;
	umess.SRC_PROC_NR = u;
	umess.SRC_BUFFER = (long) vir;
	umess.DST_SPACE  = D;
	umess.DST_PROC_NR = FS_PROC_NR;
	umess.DST_BUFFER = (long) buff;
  }

  umess.COPY_BYTES = (long) bytes;
  sys_copy(&umess);
  return(umess.m_type);
}


/*===========================================================================*
 *				read_ahead				     *
 *===========================================================================*/
PUBLIC void read_ahead()
{
/* Read a block into the cache before it is needed. */

  register struct inode *rip;
  struct buf *bp;
  block_nr b;

  rip = rdahed_inode;		/* pointer to inode to read ahead from */
  rdahed_inode = NIL_INODE;	/* turn off read ahead */
  if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return;	/* at EOF */
  bp = rahead(rip, b, rdahedpos, BLOCK_SIZE);
  put_block(bp, PARTIAL_DATA_BLOCK);
}


/*===========================================================================*
 *				rahead					     *
 *===========================================================================*/
PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead)
register struct inode *rip;	/* pointer to inode for file to be read */
block_nr baseblock;		/* block at current position */
off_t position;			/* position within file */
unsigned bytes_ahead;		/* bytes beyond position for immediate use */
{
/* Fetch a block from the cache or the device.  If a physical read is
 * required, prefetch as many more blocks as convenient into the cache.
 * This usually covers bytes_ahead plus any more blocks on the last "track".
 * The device driver may decide it knows better about the track geometry
 * and stop reading at any track boundary (or after an error).
 * Rw_scattered() puts an optional flag on all reads to allow this.
 */

  block_nr block;
  unsigned blocks_ahead;
  unsigned blocks_per_track;
  register struct buf *bp;
  int block_spec;
  dev_t dev;
  off_t dev_size;
  off_t file_size;
  unsigned fragment;
  unsigned limit_bufs_in_use;
  unsigned max_track;
  int reading_ahead;
  static struct buf *read_q[NR_BUFS];	/* static so it isn't on stack */
  int read_q_size;
  unsigned track;

  block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
  if (block_spec)
	dev = (dev_t) rip->i_zone[0];
  else
	dev = rip->i_dev;
  bp = get_block(dev, baseblock, PREFETCH);
  if (bp->b_dev != NO_DEV) return(bp);

  /* Guesstimate blocks_per_track.  A bad guess will work but be sub-optimal.
   * Dev_open may eventually do it properly.
   */
  if (block_spec)
	dev_size = rip->i_size;
  else
#if (MACHINE == ATARI)
	dev_size =  80L * 2 * 9 * 512;	/* can be 80L*1*9*512 as well */
#else
	dev_size =  80L * 2 * 15 * 512;	/* change to your usual floppy size */
#endif
  if (dev_size == 0)
	blocks_per_track = 17;	/* hard disk (17 * nr_heads / 2 is too many) */
  if (dev_size < 80L * 2 * 15 * 512)
	blocks_per_track = 9;	/* low-density floppy */
  else if (dev_size < 80L * 2 * 18 * 512)
	blocks_per_track = 15;	/* high-density floppy */
  else
	blocks_per_track = 18;	/* higher-density floppy */

  file_size = rip->i_size;
  if (block_spec && file_size == 0) file_size = MAX_P_LONG;
  fragment = (unsigned) (position % BLOCK_SIZE);
  position = position - fragment + BLOCK_SIZE;
  blocks_ahead = (fragment + bytes_ahead + BLOCK_SIZE - 1) / BLOCK_SIZE - 1;

  /* Set the limit (max + 1) on buffers used. Avoid taking the last 2 buffers
   * for ordinary files, because the cache will thrash if these are needed
   * for indirect blocks.  There is no point in stopping earlier for the
   * immediately-needed part of the read.  Large reads will evict from the
   * cache all blocks except those for the read and the indirect blocks, no
   * matter what is done here.
   */
  limit_bufs_in_use = block_spec ? NR_BUFS : NR_BUFS - 2;

  max_track = bp->b_blocknr / blocks_per_track;
  reading_ahead = FALSE;
  read_q[0] = bp;		/* first buffer must be read */
  read_q_size = 1;

  /* The next loop has 2 phases, controlled by 'reading_ahead'. */
  while (TRUE) {
	if (position >= file_size || bufs_in_use >= limit_bufs_in_use) break;
  	if (blocks_ahead != 0)
		--blocks_ahead;
	else {
		/* All the immediately-needed blocks have been read.  Give
		 * up after seeks and partial reads.
		 */
		if (reading_ahead || rip->i_seek == ISEEK ||
		    (fragment + bytes_ahead) % BLOCK_SIZE != 0) break;

		/* Try for more blocks on the last "track".  Try a few more
		 * than 'blocks_per_track' to allow for blocks out of order.
		 * Reducing 'limit_bufs_in_use' here might reduce thrashing.
		 */
		blocks_ahead = blocks_per_track + 6;
		reading_ahead = TRUE;
  	}
	if (block_spec)
		block = position / BLOCK_SIZE;
	else
		block = read_map(rip, position);
	position += BLOCK_SIZE;
	track = block / blocks_per_track;
	if (reading_ahead) {
		if (track != max_track) continue;
	} else {
		if (track > max_track) max_track = track;
	}
	if (block_spec || block != NO_BLOCK) {
		bp = get_block(dev, block, PREFETCH);
		if (bp->b_dev == NO_DEV)
			read_q[read_q_size++] = bp;
		else
			put_block(bp, FULL_DATA_BLOCK);
	}
  }
  rw_scattered(dev, read_q, read_q_size, READING);
  return(get_block(dev, baseblock, NORMAL));
}