/* This file contains the heart of the mechanism used to read (and write) * files. Read and write requests are split up into chunks that do not cross * block boundaries. Each chunk is then processed in turn. Reads on special * files are also detected and handled. * * The entry points into this file are * do_read: perform the READ system call by calling read_write * read_write: actually do the work of READ and WRITE * read_map: given an inode and file position, look up its zone number * rw_user: call the kernel to read and write user space * read_ahead: manage the block read ahead business */ #include "fs.h" #include <fcntl.h> #include <minix/com.h> #include "buf.h" #include "file.h" #include "fproc.h" #include "inode.h" #include "param.h" #include "super.h" #define FD_MASK 077 /* max file descriptor is 63 */ PRIVATE message umess; /* message for asking SYSTASK for user copy */ FORWARD int rw_chunk(); /*===========================================================================* * do_read * *===========================================================================*/ PUBLIC int do_read() { return(read_write(READING)); } /*===========================================================================* * read_write * *===========================================================================*/ PUBLIC int read_write(rw_flag) int rw_flag; /* READING or WRITING */ { /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ register struct inode *rip; register struct filp *f; register off_t bytes_left, f_size; register unsigned off, cum_io; register int oflags; off_t position; int r, chunk, mode_word, usr, seg, block_spec, char_spec, regular; struct filp *wf; /* MM loads segments by putting funny things in upper 10 bits of 'fd'. */ if (who == MM_PROC_NR && (fd & (~BYTE)) ) { usr = (fd >> 8) & BYTE; seg = (fd >> 6) & 03; fd &= FD_MASK; /* get rid of user and segment bits */ } else { usr = who; /* normal case */ seg = D; } /* If the file descriptor is valid, get the inode, size and mode. */ #if (CHIP == INTEL) if (who != MM_PROC_NR) /* only MM > 32K */ #endif if (nbytes < 0) return(EINVAL); if ( (f = get_filp(fd)) == NIL_FILP) return(err_code); if ( ((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) return(EBADF); if (nbytes == 0) return(0); /* so char special files need not check for 0*/ position = f->filp_pos; if (position < 0 || position > MAX_FILE_POS) return(EINVAL); oflags = f->filp_flags; rip = f->filp_ino; f_size = rip->i_size; r = OK; cum_io = 0; mode_word = rip->i_mode & I_TYPE; regular = mode_word == I_REGULAR || mode_word == I_NAMED_PIPE; char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0); block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0); if (block_spec && f_size == 0) f_size = MAX_P_LONG; rdwt_err = OK; /* set to EIO if disk error occurs */ /* Check for character special files. */ if (char_spec) { if ((r = dev_io(rw_flag, oflags & O_NONBLOCK, (dev_t) rip->i_zone[0], position, nbytes, who, buffer)) >= 0) { cum_io = r; position += r; r = OK; } } else { if (rw_flag == WRITING && block_spec == 0) { /* Check in advance to see if file will grow too big. */ if (position > get_super(rip->i_dev)->s_max_size - nbytes ) return(EFBIG); /* check for O_APPEND flag */ if (oflags & O_APPEND) position = f_size; /* Clear the zone containing present EOF if hole about * to be created. This is necessary because all unwritten * blocks prior to the EOF must read as zeros. */ if (position > f_size) clear_zone(rip, f_size, 0); } /* Pipes are a little different. Check. */ if (rip->i_pipe && (r = pipe_check(rip, rw_flag, oflags, nbytes, position)) <= 0) return r; /* Split the transfer into chunks that don't span two blocks. */ while (nbytes != 0) { off = position % BLOCK_SIZE; /* offset within a block */ chunk = MIN(nbytes, BLOCK_SIZE - off); if (chunk < 0) chunk = BLOCK_SIZE - off; if (rw_flag == READING || (block_spec && rw_flag == WRITING)) { bytes_left = f_size - position; if (position >= f_size) break; /* we are beyond EOF */ if (chunk > bytes_left) chunk = bytes_left; } /* Read or write 'chunk' bytes. */ r = rw_chunk(rip, position, off, chunk, nbytes, rw_flag, buffer, seg, usr); if (r != OK) break; /* EOF reached */ if (rdwt_err < 0) break; /* Update counters and pointers. */ buffer += chunk; /* user buffer address */ nbytes -= chunk; /* bytes yet to be read */ cum_io += chunk; /* bytes read so far */ position += chunk; /* position within the file */ } } /* On write, update file size and access time. */ if (rw_flag == WRITING) { if (regular || mode_word == I_DIRECTORY) { if (position > f_size) rip->i_size = position; rip->i_update = MTIME; /* mark mtime for update later */ rip->i_dirt = DIRTY; } } else { if (rip->i_pipe && position >= rip->i_size) { /* Reset pipe pointers. */ rip->i_size = 0; /* no data left */ position = 0; /* reset reader(s) */ if ( (wf = find_filp(rip, W_BIT)) != NIL_FILP) wf->filp_pos =0; } } f->filp_pos = position; /* Check to see if read-ahead is called for, and if so, set it up. */ if (rw_flag == READING && rip->i_seek == NO_SEEK && position % BLOCK_SIZE== 0 && (regular || mode_word == I_DIRECTORY)) { rdahed_inode = rip; rdahedpos = position; } rip->i_seek = NO_SEEK; if (rdwt_err != OK) r = rdwt_err; /* check for disk error */ if (rdwt_err == END_OF_FILE) r = cum_io; return(r == OK ? cum_io : r); } /*===========================================================================* * rw_chunk * *===========================================================================*/ PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, buff, seg, usr) register struct inode *rip; /* pointer to inode for file to be rd/wr */ off_t position; /* position within file to read or write */ unsigned off; /* off within the current block */ int chunk; /* number of bytes to read or write */ unsigned left; /* max number of bytes wanted after position */ int rw_flag; /* READING or WRITING */ char *buff; /* virtual address of the user buffer */ int seg; /* T or D segment in user space */ int usr; /* which user process */ { /* Read or write (part of) a block. */ register struct buf *bp; register int r; int dir, n, block_spec; block_nr b; dev_t dev; block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL; if (block_spec) { b = position/BLOCK_SIZE; dev = (dev_t) rip->i_zone[0]; } else { b = read_map(rip, position); dev = rip->i_dev; } if (!block_spec && b == NO_BLOCK) { if (rw_flag == READING) { /* Reading from a nonexistent block. Must read as all zeros. */ bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */ zero_block(bp); } else { /* Writing to a nonexistent block. Create and enter in inode. */ if ((bp = new_block(rip, position)) == NIL_BUF)return(err_code); } } else if (rw_flag == READING) { /* Read and read ahead if convenient. */ bp = rahead(rip, b, position, left); } else { /* Normally an existing block to be partially overwritten is first read * in. However, a full block need not be read in. If it is already in * the cache, acquire it, otherwise just acquire a free buffer. */ n = (chunk == BLOCK_SIZE ? NO_READ : NORMAL); if (!block_spec && off == 0 && position >= rip->i_size) n = NO_READ; bp = get_block(dev, b, n); } /* In all cases, bp now points to a valid buffer. */ if (rw_flag == WRITING && chunk != BLOCK_SIZE && !block_spec && position >= rip->i_size && off == 0) zero_block(bp); dir = (rw_flag == READING ? TO_USER : FROM_USER); r = rw_user(seg, usr, (vir_bytes)buff, (vir_bytes)chunk, bp->b_data+off, dir); if (rw_flag == WRITING) bp->b_dirt = DIRTY; n = (off + chunk == BLOCK_SIZE ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK); put_block(bp, n); return(r); } /*===========================================================================* * read_map * *===========================================================================*/ PUBLIC block_nr read_map(rip, position) register struct inode *rip; /* ptr to inode to map from */ off_t position; /* position in file whose blk wanted */ { /* Given an inode and a position within the corresponding file, locate the * block (not zone) number in which that position is to be found and return it. */ register struct buf *bp; register zone_nr z; register block_nr b; register long excess, zone, block_pos; register int scale, boff; scale = scale_factor(rip); /* for block-zone conversion */ block_pos = position/BLOCK_SIZE; /* relative blk # in file */ zone = block_pos >> scale; /* position's zone */ boff = block_pos - (zone << scale); /* relative blk # within zone */ /* Is 'position' to be found in the inode itself? */ if (zone < NR_DZONE_NUM) { if ( (z = rip->i_zone[(int) zone]) == NO_ZONE) return(NO_BLOCK); b = ((block_nr) z << scale) + boff; return(b); } /* It is not in the inode, so it must be single or double indirect. */ excess = zone - NR_DZONE_NUM; /* first NR_DZONE_NUM don't count */ if (excess < NR_INDIRECTS) { /* 'position' can be located via the single indirect block. */ z = rip->i_zone[NR_DZONE_NUM]; } else { /* 'position' can be located via the double indirect block. */ if ( (z = rip->i_zone[NR_DZONE_NUM+1]) == NO_ZONE) return(NO_BLOCK); excess -= NR_INDIRECTS; /* single indir doesn't count */ b = (block_nr) z << scale; bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */ z = bp->b_ind[(int)(excess/NR_INDIRECTS)];/*z is zone # for single ind*/ put_block(bp, INDIRECT_BLOCK); /* release double ind block */ excess = excess % NR_INDIRECTS; /* index into single ind blk */ } /* 'z' is zone num for single indirect block; 'excess' is index into it. */ if (z == NO_ZONE) return(NO_BLOCK); b = (block_nr) z << scale; bp = get_block(rip->i_dev, b, NORMAL); /* get single indirect block */ z = bp->b_ind[(int) excess]; put_block(bp, INDIRECT_BLOCK); /* release single indir blk */ if (z == NO_ZONE) return(NO_BLOCK); b = ((block_nr) z << scale) + boff; return(b); } /*===========================================================================* * rw_user * *===========================================================================*/ PUBLIC int rw_user(s, u, vir, bytes, buff, direction) int s; /* D or T space (stack is also D) */ int u; /* process number to r/w (usually = 'who') */ vir_bytes vir; /* virtual address to move to/from */ vir_bytes bytes; /* how many bytes to move */ char *buff; /* pointer to FS space */ int direction; /* TO_USER or FROM_USER */ { /* Transfer a block of data. Two options exist, depending on 'direction': * TO_USER: Move from FS space to user virtual space * FROM_USER: Move from user virtual space to FS space */ if (direction == TO_USER ) { /* Write from FS space to user space. */ umess.SRC_SPACE = D; umess.SRC_PROC_NR = FS_PROC_NR; umess.SRC_BUFFER = (long) buff; umess.DST_SPACE = s; umess.DST_PROC_NR = u; umess.DST_BUFFER = (long) vir; } else { /* Read from user space to FS space. */ umess.SRC_SPACE = s; umess.SRC_PROC_NR = u; umess.SRC_BUFFER = (long) vir; umess.DST_SPACE = D; umess.DST_PROC_NR = FS_PROC_NR; umess.DST_BUFFER = (long) buff; } umess.COPY_BYTES = (long) bytes; sys_copy(&umess); return(umess.m_type); } /*===========================================================================* * read_ahead * *===========================================================================*/ PUBLIC void read_ahead() { /* Read a block into the cache before it is needed. */ register struct inode *rip; struct buf *bp; block_nr b; rip = rdahed_inode; /* pointer to inode to read ahead from */ rdahed_inode = NIL_INODE; /* turn off read ahead */ if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */ bp = rahead(rip, b, rdahedpos, BLOCK_SIZE); put_block(bp, PARTIAL_DATA_BLOCK); } /*===========================================================================* * rahead * *===========================================================================*/ PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead) register struct inode *rip; /* pointer to inode for file to be read */ block_nr baseblock; /* block at current position */ off_t position; /* position within file */ unsigned bytes_ahead; /* bytes beyond position for immediate use */ { /* Fetch a block from the cache or the device. If a physical read is * required, prefetch as many more blocks as convenient into the cache. * This usually covers bytes_ahead plus any more blocks on the last "track". * The device driver may decide it knows better about the track geometry * and stop reading at any track boundary (or after an error). * Rw_scattered() puts an optional flag on all reads to allow this. */ block_nr block; unsigned blocks_ahead; unsigned blocks_per_track; register struct buf *bp; int block_spec; dev_t dev; off_t dev_size; off_t file_size; unsigned fragment; unsigned limit_bufs_in_use; unsigned max_track; int reading_ahead; static struct buf *read_q[NR_BUFS]; /* static so it isn't on stack */ int read_q_size; unsigned track; block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL; if (block_spec) dev = (dev_t) rip->i_zone[0]; else dev = rip->i_dev; bp = get_block(dev, baseblock, PREFETCH); if (bp->b_dev != NO_DEV) return(bp); /* Guesstimate blocks_per_track. A bad guess will work but be sub-optimal. * Dev_open may eventually do it properly. */ if (block_spec) dev_size = rip->i_size; else #if (MACHINE == ATARI) dev_size = 80L * 2 * 9 * 512; /* can be 80L*1*9*512 as well */ #else dev_size = 80L * 2 * 15 * 512; /* change to your usual floppy size */ #endif if (dev_size == 0) blocks_per_track = 17; /* hard disk (17 * nr_heads / 2 is too many) */ if (dev_size < 80L * 2 * 15 * 512) blocks_per_track = 9; /* low-density floppy */ else if (dev_size < 80L * 2 * 18 * 512) blocks_per_track = 15; /* high-density floppy */ else blocks_per_track = 18; /* higher-density floppy */ file_size = rip->i_size; if (block_spec && file_size == 0) file_size = MAX_P_LONG; fragment = (unsigned) (position % BLOCK_SIZE); position = position - fragment + BLOCK_SIZE; blocks_ahead = (fragment + bytes_ahead + BLOCK_SIZE - 1) / BLOCK_SIZE - 1; /* Set the limit (max + 1) on buffers used. Avoid taking the last 2 buffers * for ordinary files, because the cache will thrash if these are needed * for indirect blocks. There is no point in stopping earlier for the * immediately-needed part of the read. Large reads will evict from the * cache all blocks except those for the read and the indirect blocks, no * matter what is done here. */ limit_bufs_in_use = block_spec ? NR_BUFS : NR_BUFS - 2; max_track = bp->b_blocknr / blocks_per_track; reading_ahead = FALSE; read_q[0] = bp; /* first buffer must be read */ read_q_size = 1; /* The next loop has 2 phases, controlled by 'reading_ahead'. */ while (TRUE) { if (position >= file_size || bufs_in_use >= limit_bufs_in_use) break; if (blocks_ahead != 0) --blocks_ahead; else { /* All the immediately-needed blocks have been read. Give * up after seeks and partial reads. */ if (reading_ahead || rip->i_seek == ISEEK || (fragment + bytes_ahead) % BLOCK_SIZE != 0) break; /* Try for more blocks on the last "track". Try a few more * than 'blocks_per_track' to allow for blocks out of order. * Reducing 'limit_bufs_in_use' here might reduce thrashing. */ blocks_ahead = blocks_per_track + 6; reading_ahead = TRUE; } if (block_spec) block = position / BLOCK_SIZE; else block = read_map(rip, position); position += BLOCK_SIZE; track = block / blocks_per_track; if (reading_ahead) { if (track != max_track) continue; } else { if (track > max_track) max_track = track; } if (block_spec || block != NO_BLOCK) { bp = get_block(dev, block, PREFETCH); if (bp->b_dev == NO_DEV) read_q[read_q_size++] = bp; else put_block(bp, FULL_DATA_BLOCK); } } rw_scattered(dev, read_q, read_q_size, READING); return(get_block(dev, baseblock, NORMAL)); }