/* $NetBSD: vm.c,v 1.41 2008/10/15 13:04:26 pooka Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. * * Development of this software was supported by Google Summer of Code. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Virtual memory emulation routines. Contents: * + UBC * + anon objects & pager * + vnode objects & pager * + misc support routines * + kmem */ /* * XXX: we abuse pg->uanon for the virtual address of the storage * for each page. phys_addr would fit the job description better, * except that it will create unnecessary lossage on some platforms * due to not being a pointer type. */ #include <sys/param.h> #include <sys/atomic.h> #include <sys/null.h> #include <sys/vnode.h> #include <sys/buf.h> #include <sys/kmem.h> #include <machine/pmap.h> #include <rump/rumpuser.h> #include <uvm/uvm.h> #include <uvm/uvm_prot.h> #include <uvm/uvm_readahead.h> #include "rump_private.h" /* dumdidumdum */ #define len2npages(off, len) \ (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \ + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE)) static int vn_get(struct uvm_object *, voff_t, struct vm_page **, int *, int, vm_prot_t, int, int); static int vn_put(struct uvm_object *, voff_t, voff_t, int); static int ao_get(struct uvm_object *, voff_t, struct vm_page **, int *, int, vm_prot_t, int, int); static int ao_put(struct uvm_object *, voff_t, voff_t, int); const struct uvm_pagerops uvm_vnodeops = { .pgo_get = vn_get, .pgo_put = vn_put, }; const struct uvm_pagerops aobj_pager = { .pgo_get = ao_get, .pgo_put = ao_put, }; kmutex_t uvm_pageqlock; struct uvmexp uvmexp; struct uvm uvm; struct vmspace rump_vmspace; struct vm_map rump_vmmap; const struct rb_tree_ops uvm_page_tree_ops; static struct vm_map_kernel kernel_map_store; struct vm_map *kernel_map = &kernel_map_store.vmk_map; /* * vm pages */ /* called with the object locked */ struct vm_page * rumpvm_makepage(struct uvm_object *uobj, voff_t off) { struct vm_page *pg; pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP); pg->offset = off; pg->uobject = uobj; pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP); pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); return pg; } /* * Release a page. * * Called with the vm object locked. */ void uvm_pagefree(struct vm_page *pg) { struct uvm_object *uobj = pg->uobject; if (pg->flags & PG_WANTED) wakeup(pg); TAILQ_REMOVE(&uobj->memq, pg, listq.queue); kmem_free((void *)pg->uanon, PAGE_SIZE); kmem_free(pg, sizeof(*pg)); } struct rumpva { vaddr_t addr; struct vm_page *pg; LIST_ENTRY(rumpva) entries; }; static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead); static kmutex_t rvamtx; void rumpvm_enterva(vaddr_t addr, struct vm_page *pg) { struct rumpva *rva; rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP); rva->addr = addr; rva->pg = pg; mutex_enter(&rvamtx); LIST_INSERT_HEAD(&rvahead, rva, entries); mutex_exit(&rvamtx); } void rumpvm_flushva() { struct rumpva *rva; mutex_enter(&rvamtx); while ((rva = LIST_FIRST(&rvahead)) != NULL) { LIST_REMOVE(rva, entries); kmem_free(rva, sizeof(*rva)); } mutex_exit(&rvamtx); } /* * vnode pager */ static int vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs, int *npages, int centeridx, vm_prot_t access_type, int advice, int flags) { struct vnode *vp = (struct vnode *)uobj; return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type, advice, flags); } static int vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags) { struct vnode *vp = (struct vnode *)uobj; return VOP_PUTPAGES(vp, offlo, offhi, flags); } /* * Anon object stuff */ static int ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs, int *npages, int centeridx, vm_prot_t access_type, int advice, int flags) { struct vm_page *pg; int i; if (centeridx) panic("%s: centeridx != 0 not supported", __func__); /* loop over pages */ off = trunc_page(off); for (i = 0; i < *npages; i++) { retrylookup: pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT)); if (pg) { if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "aogetpg", 0); goto retrylookup; } pg->flags |= PG_BUSY; pgs[i] = pg; } else { pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT)); pgs[i] = pg; } } mutex_exit(&uobj->vmobjlock); return 0; } static int ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) { struct vm_page *pg; /* we only free all pages for now */ if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) { mutex_exit(&uobj->vmobjlock); return 0; } while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) uvm_pagefree(pg); mutex_exit(&uobj->vmobjlock); return 0; } struct uvm_object * uao_create(vsize_t size, int flags) { struct uvm_object *uobj; uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP); uobj->pgops = &aobj_pager; TAILQ_INIT(&uobj->memq); mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE); return uobj; } void uao_detach(struct uvm_object *uobj) { mutex_enter(&uobj->vmobjlock); ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE); kmem_free(uobj, sizeof(*uobj)); } /* * UBC */ struct ubc_window { struct uvm_object *uwin_obj; voff_t uwin_off; uint8_t *uwin_mem; size_t uwin_mapsize; LIST_ENTRY(ubc_window) uwin_entries; }; static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst); static kmutex_t uwinmtx; int rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp, struct ubc_window *uwinp) { struct vm_page **pgs; int npages = len2npages(uio->uio_offset, n); size_t allocsize; int i, rv; if (uwinp == NULL) { mutex_enter(&uwinmtx); LIST_FOREACH(uwinp, &uwinlst, uwin_entries) if ((uint8_t *)va >= uwinp->uwin_mem && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize)) break; mutex_exit(&uwinmtx); if (uwinp == NULL) { KASSERT(rvp != NULL); return 0; } } allocsize = npages * sizeof(pgs); pgs = kmem_zalloc(allocsize, KM_SLEEP); mutex_enter(&uwinp->uwin_obj->vmobjlock); rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj, uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem), pgs, &npages, 0, 0, 0, 0); if (rv) goto out; for (i = 0; i < npages; i++) { size_t xfersize; off_t pageoff; pageoff = uio->uio_offset & PAGE_MASK; xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff); uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio); if (uio->uio_rw == UIO_WRITE) pgs[i]->flags &= ~PG_CLEAN; n -= xfersize; } uvm_page_unbusy(pgs, npages); out: kmem_free(pgs, allocsize); if (rvp) *rvp = rv; return 1; } static struct ubc_window * uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len) { struct ubc_window *uwinp; /* pronounced: you wimp! */ uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP); uwinp->uwin_obj = uobj; uwinp->uwin_off = off; uwinp->uwin_mapsize = len; uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP); return uwinp; } static void uwin_free(struct ubc_window *uwinp) { kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize); kmem_free(uwinp, sizeof(struct ubc_window)); } void * ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice, int flags) { struct ubc_window *uwinp; uwinp = uwin_alloc(uobj, offset, *lenp); mutex_enter(&uwinmtx); LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries); mutex_exit(&uwinmtx); DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n", (unsigned long long)offset, uwinp, uwinp->uwin_mem)); return uwinp->uwin_mem; } void ubc_release(void *va, int flags) { struct ubc_window *uwinp; mutex_enter(&uwinmtx); LIST_FOREACH(uwinp, &uwinlst, uwin_entries) if ((uint8_t *)va >= uwinp->uwin_mem && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize)) break; mutex_exit(&uwinmtx); if (uwinp == NULL) panic("%s: releasing invalid window at %p", __func__, va); LIST_REMOVE(uwinp, uwin_entries); uwin_free(uwinp); } int ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice, int flags) { struct ubc_window *uwinp; vsize_t len; while (todo > 0) { len = todo; uwinp = uwin_alloc(uobj, uio->uio_offset, len); rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp); uwin_free(uwinp); todo -= len; } return 0; } /* * Misc routines */ void rumpvm_init() { uvmexp.free = 1024*1024; /* XXX */ uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */ rump_vmspace.vm_map.pmap = pmap_kernel(); mutex_init(&rvamtx, MUTEX_DEFAULT, 0); mutex_init(&uwinmtx, MUTEX_DEFAULT, 0); mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM); } void uvm_pageactivate(struct vm_page *pg) { /* nada */ } void uvm_pagewire(struct vm_page *pg) { /* nada */ } void uvm_pageunwire(struct vm_page *pg) { /* nada */ } vaddr_t uvm_pagermapin(struct vm_page **pps, int npages, int flags) { panic("%s: unimplemented", __func__); } /* Called with the vm object locked */ struct vm_page * uvm_pagelookup(struct uvm_object *uobj, voff_t off) { struct vm_page *pg; TAILQ_FOREACH(pg, &uobj->memq, listq.queue) { if (pg->offset == off) { return pg; } } return NULL; } struct vm_page * uvm_pageratop(vaddr_t va) { struct rumpva *rva; mutex_enter(&rvamtx); LIST_FOREACH(rva, &rvahead, entries) if (rva->addr == va) break; mutex_exit(&rvamtx); if (rva == NULL) panic("%s: va %llu", __func__, (unsigned long long)va); return rva->pg; } void uvm_page_unbusy(struct vm_page **pgs, int npgs) { struct vm_page *pg; int i; for (i = 0; i < npgs; i++) { pg = pgs[i]; if (pg == NULL) continue; KASSERT(pg->flags & PG_BUSY); if (pg->flags & PG_WANTED) wakeup(pg); if (pg->flags & PG_RELEASED) uvm_pagefree(pg); else pg->flags &= ~(PG_WANTED|PG_BUSY); } } void uvm_estimatepageable(int *active, int *inactive) { /* XXX: guessing game */ *active = 1024; *inactive = 1024; } void uvm_aio_biodone1(struct buf *bp) { panic("%s: unimplemented", __func__); } void uvm_aio_biodone(struct buf *bp) { uvm_aio_aiodone(bp); } void uvm_aio_aiodone(struct buf *bp) { if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp) bioopsp->io_pageiodone(bp); } void uvm_vnp_setsize(struct vnode *vp, voff_t newsize) { mutex_enter(&vp->v_interlock); vp->v_size = vp->v_writesize = newsize; mutex_exit(&vp->v_interlock); } void uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize) { mutex_enter(&vp->v_interlock); vp->v_writesize = newsize; mutex_exit(&vp->v_interlock); } void uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len) { struct uvm_object *uobj = &vp->v_uobj; struct vm_page **pgs; int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT); int rv, npages, i; pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP); while (len) { npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT); memset(pgs, 0, npages * sizeof(struct vm_page *)); mutex_enter(&uobj->vmobjlock); rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0); KASSERT(npages > 0); for (i = 0; i < npages; i++) { uint8_t *start; size_t chunkoff, chunklen; chunkoff = off & PAGE_MASK; chunklen = MIN(PAGE_SIZE - chunkoff, len); start = (uint8_t *)pgs[i]->uanon + chunkoff; memset(start, 0, chunklen); pgs[i]->flags &= ~PG_CLEAN; off += chunklen; len -= chunklen; } uvm_page_unbusy(pgs, npages); } kmem_free(pgs, maxpages * sizeof(pgs)); return; } struct uvm_ractx * uvm_ra_allocctx() { return NULL; } void uvm_ra_freectx(struct uvm_ractx *ra) { return; } bool uvn_clean_p(struct uvm_object *uobj) { struct vnode *vp = (void *)uobj; return (vp->v_iflag & VI_ONWORKLST) == 0; } struct vm_map_kernel * vm_map_to_kernel(struct vm_map *map) { return (struct vm_map_kernel *)map; } bool vm_map_starved_p(struct vm_map *map) { return false; } void uvm_pageout_start(int npages) { uvmexp.paging += npages; } void uvm_pageout_done(int npages) { uvmexp.paging -= npages; /* * wake up either of pagedaemon or LWPs waiting for it. */ if (uvmexp.free <= uvmexp.reserve_kernel) { wakeup(&uvm.pagedaemon); } else { wakeup(&uvmexp.free); } } /* XXX: following two are unfinished because lwp's are not refcounted yet */ void uvm_lwp_hold(struct lwp *l) { atomic_inc_uint(&l->l_holdcnt); } void uvm_lwp_rele(struct lwp *l) { atomic_dec_uint(&l->l_holdcnt); } int uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) { panic("%s: unimplemented", __func__); } void uvm_unloan(void *v, int npages, int flags) { panic("%s: unimplemented", __func__); } /* * Kmem */ #ifndef RUMP_USE_REAL_KMEM void * kmem_alloc(size_t size, km_flag_t kmflag) { return rumpuser_malloc(size, kmflag == KM_NOSLEEP); } void * kmem_zalloc(size_t size, km_flag_t kmflag) { void *rv; rv = kmem_alloc(size, kmflag); if (rv) memset(rv, 0, size); return rv; } void kmem_free(void *p, size_t size) { rumpuser_free(p); } #endif /* RUMP_USE_REAL_KMEM */ /* * UVM km */ vaddr_t uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) { void *rv; rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)); if (rv && flags & UVM_KMF_ZERO) memset(rv, 0, size); return (vaddr_t)rv; } void uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) { rumpuser_free((void *)vaddr); } struct vm_map * uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap) { return (struct vm_map *)417416; } vaddr_t uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) { return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok); } void uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) { rumpuser_free((void *)addr); }