/* $NetBSD: genfs_io.c,v 1.14.4.1 2009/01/16 01:48:47 snj Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. * * Development of this software was supported by Google Summer of Code * and the Finnish Cultural Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/param.h> #include <sys/kmem.h> #include <sys/lock.h> #include <sys/vnode.h> #include <miscfs/genfs/genfs_node.h> #include <miscfs/genfs/genfs.h> #include "rump_private.h" void genfs_directio(struct vnode *vp, struct uio *uio, int ioflag) { panic("%s: not implemented", __func__); } /* * miscfs/genfs getpages routine. This is a fair bit simpler than the * kernel counterpart since we're not being executed from a fault handler * and generally don't need to care about PGO_LOCKED or other cruft. * We do, however, need to care about page locking and we keep trying until * we get all the pages within the range. The object locking protocol * is the same as for the kernel: enter with the object lock held, * return with it released. */ int genfs_getpages(void *v) { struct vop_getpages_args /* { struct vnode *a_vp; voff_t a_offset; struct vm_page **a_m; int *a_count; int a_centeridx; vm_prot_t a_access_type; int a_advice; int a_flags; } */ *ap = v; struct vnode *vp = ap->a_vp; struct uvm_object *uobj = (struct uvm_object *)vp; struct vm_page *pg; voff_t curoff, endoff; off_t diskeof; size_t bufsize, remain, bufoff, xfersize; uint8_t *tmpbuf; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1<<bshift; int count = *ap->a_count; int async; int i, error; /* * Ignore async for now, the structure of this routine * doesn't exactly allow for it ... */ async = 0; if (ap->a_centeridx != 0) panic("%s: centeridx != not supported", __func__); if (ap->a_access_type & VM_PROT_WRITE) vp->v_iflag |= VI_ONWORKLST; curoff = ap->a_offset & ~PAGE_MASK; for (i = 0; i < count; i++, curoff += PAGE_SIZE) { retrylookup: pg = uvm_pagelookup(uobj, curoff); if (pg == NULL) break; /* page is busy? we need to wait until it's released */ if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg",0); mutex_enter(&uobj->vmobjlock); goto retrylookup; } pg->flags |= PG_BUSY; if (pg->flags & PG_FAKE) break; ap->a_m[i] = pg; } /* got everything? if so, just return */ if (i == count) { mutex_exit(&uobj->vmobjlock); return 0; } /* * didn't? Ok, allocate backing pages. Start from the first * one we missed. */ for (; i < count; i++, curoff += PAGE_SIZE) { retrylookup2: pg = uvm_pagelookup(uobj, curoff); /* found? busy it and be happy */ if (pg) { if (pg->flags & PG_BUSY) { pg->flags = PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg2", 0); mutex_enter(&uobj->vmobjlock); goto retrylookup2; } else { pg->flags |= PG_BUSY; } /* not found? make a new page */ } else { pg = rumpvm_makepage(uobj, curoff); } ap->a_m[i] = pg; } /* * We have done all the clerical work and have all pages busied. * Release the vm object for other consumers. */ mutex_exit(&uobj->vmobjlock); /* * Now, we have all the pages here & busy. Transfer the range * starting from the missing offset and transfer into the * page buffers. */ GOP_SIZE(vp, vp->v_size, &diskeof, 0); /* align to boundaries */ endoff = trunc_page(ap->a_offset) + (count << PAGE_SHIFT); endoff = MIN(endoff, ((vp->v_writesize+bsize-1) & ~(bsize-1))); curoff = ap->a_offset & ~(MAX(bsize,PAGE_SIZE)-1); remain = endoff - curoff; if (diskeof > curoff) remain = MIN(remain, diskeof - curoff); DPRINTF(("a_offset: %llx, startoff: 0x%llx, endoff 0x%llx\n", (unsigned long long)ap->a_offset, (unsigned long long)curoff, (unsigned long long)endoff)); /* read everything into a buffer */ bufsize = round_page(remain); tmpbuf = kmem_zalloc(bufsize, KM_SLEEP); for (bufoff = 0; remain; remain -= xfersize, bufoff+=xfersize) { struct buf *bp; struct vnode *devvp; daddr_t lbn, bn; int run; lbn = (curoff + bufoff) >> bshift; /* XXX: assume eof */ error = VOP_BMAP(vp, lbn, &devvp, &bn, &run); if (error) panic("%s: VOP_BMAP & lazy bum: %d", __func__, error); DPRINTF(("lbn %d (off %d) -> bn %d run %d\n", (int)lbn, (int)(curoff+bufoff), (int)bn, run)); xfersize = MIN(((lbn+1+run)<<bshift)-(curoff+bufoff), remain); /* hole? */ if (bn == -1) { memset(tmpbuf + bufoff, 0, xfersize); continue; } bp = getiobuf(vp, true); bp->b_data = tmpbuf + bufoff; bp->b_bcount = xfersize; bp->b_blkno = bn; bp->b_lblkno = 0; bp->b_flags = B_READ; bp->b_cflags = BC_BUSY; if (async) { bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } VOP_STRATEGY(devvp, bp); if (bp->b_error) panic("%s: VOP_STRATEGY, lazy bum", __func__); if (!async) putiobuf(bp); } /* skip to beginning of pages we're interested in */ bufoff = 0; while (round_page(curoff + bufoff) < trunc_page(ap->a_offset)) bufoff += PAGE_SIZE; DPRINTF(("first page offset 0x%x\n", (int)(curoff + bufoff))); for (i = 0; i < count; i++, bufoff += PAGE_SIZE) { /* past our prime? */ if (curoff + bufoff >= endoff) break; pg = uvm_pagelookup(&vp->v_uobj, curoff + bufoff); KASSERT(pg); DPRINTF(("got page %p (off 0x%x)\n", pg, (int)(curoff+bufoff))); if (pg->flags & PG_FAKE) { memcpy((void *)pg->uanon, tmpbuf+bufoff, PAGE_SIZE); pg->flags &= ~PG_FAKE; pg->flags |= PG_CLEAN; } ap->a_m[i] = pg; } *ap->a_count = i; kmem_free(tmpbuf, bufsize); return 0; } int genfs_compat_getpages(void *v) { panic("%s: not implemented", __func__); } /* * simplesimplesimple: we put all pages every time. */ int genfs_putpages(void *v) { struct vop_putpages_args /* { struct vnode *a_vp; voff_t a_offlo; voff_t a_offhi; int a_flags; } */ *ap = v; return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi, ap->a_flags, NULL); } /* * This is a slightly strangely structured routine. It always puts * all the pages for a vnode. It starts by releasing pages which * are clean and simultaneously looks up the smallest offset for a * dirty page beloning to the object. If there is no smallest offset, * all pages have been cleaned. Otherwise, it finds a contiguous range * of dirty pages starting from the smallest offset and writes them out. * After this the scan is restarted. */ int genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, struct vm_page **busypg) { char databuf[MAXPHYS]; struct uvm_object *uobj = &vp->v_uobj; struct vm_page *pg, *pg_next; voff_t smallest; voff_t curoff, bufoff; off_t eof; size_t xfersize; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1 << bshift; #if 0 int async = (flags & PGO_SYNCIO) == 0; #else int async = 0; #endif restart: /* check if all pages are clean */ smallest = -1; for (pg = TAILQ_FIRST(&uobj->memq); pg; pg = pg_next) { pg_next = TAILQ_NEXT(pg, listq.queue); /* * XXX: this is not correct at all. But it's based on * assumptions we can make when accessing the pages * only through the file system and not through the * virtual memory subsystem. Well, at least I hope * so ;) */ KASSERT((pg->flags & PG_BUSY) == 0); /* If we can just dump the page, do so */ if (pg->flags & PG_CLEAN || flags & PGO_FREE) { uvm_pagefree(pg); continue; } if (pg->offset < smallest || smallest == -1) smallest = pg->offset; } /* all done? */ if (TAILQ_EMPTY(&uobj->memq)) { vp->v_iflag &= ~VI_ONWORKLST; mutex_exit(&uobj->vmobjlock); return 0; } /* we need to flush */ GOP_SIZE(vp, vp->v_writesize, &eof, 0); for (curoff = smallest; curoff < eof; curoff += PAGE_SIZE) { void *curva; if (curoff - smallest >= MAXPHYS) break; pg = uvm_pagelookup(uobj, curoff); if (pg == NULL) break; /* XXX: see comment about above KASSERT */ KASSERT((pg->flags & PG_BUSY) == 0); curva = databuf + (curoff-smallest); memcpy(curva, (void *)pg->uanon, PAGE_SIZE); rumpvm_enterva((vaddr_t)curva, pg); pg->flags |= PG_CLEAN; } KASSERT(curoff > smallest); mutex_exit(&uobj->vmobjlock); /* then we write */ for (bufoff = 0; bufoff < MIN(curoff-smallest,eof); bufoff+=xfersize) { struct buf *bp; struct vnode *devvp; daddr_t bn, lbn; int run, error; lbn = (smallest + bufoff) >> bshift; error = VOP_BMAP(vp, lbn, &devvp, &bn, &run); if (error) panic("%s: VOP_BMAP failed: %d", __func__, error); xfersize = MIN(((lbn+1+run) << bshift) - (smallest+bufoff), curoff - (smallest+bufoff)); /* * We might run across blocks which aren't allocated yet. * A reason might be e.g. the write operation being still * in the kernel page cache while truncate has already * enlarged the file. So just ignore those ranges. */ if (bn == -1) continue; bp = getiobuf(vp, true); /* only write max what we are allowed to write */ bp->b_bcount = xfersize; if (smallest + bufoff + xfersize > eof) bp->b_bcount -= (smallest+bufoff+xfersize) - eof; bp->b_bcount = (bp->b_bcount + DEV_BSIZE-1) & ~(DEV_BSIZE-1); KASSERT(bp->b_bcount > 0); KASSERT(smallest >= 0); DPRINTF(("putpages writing from %x to %x (vp size %x)\n", (int)(smallest + bufoff), (int)(smallest + bufoff + bp->b_bcount), (int)eof)); bp->b_bufsize = round_page(bp->b_bcount); bp->b_lblkno = 0; bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT); bp->b_data = databuf + bufoff; bp->b_flags = B_WRITE; bp->b_cflags |= BC_BUSY; if (async) { bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } vp->v_numoutput++; VOP_STRATEGY(devvp, bp); if (bp->b_error) panic("%s: VOP_STRATEGY lazy bum %d", __func__, bp->b_error); if (!async) putiobuf(bp); } rumpvm_flushva(); mutex_enter(&uobj->vmobjlock); goto restart; } int genfs_null_putpages(void *v) { struct vop_putpages_args *ap = v; struct vnode *vp = ap->a_vp; KASSERT(vp->v_uobj.uo_npages == 0); mutex_exit(&vp->v_interlock); return 0; } int genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) { panic("%s: not implemented", __func__); } int genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) { panic("%s: not implemented", __func__); } int genfs_gop_write_rwmap(struct vnode *vp, struct vm_page **pgs, int npages, int flags) { panic("%s: not implemented", __func__); }