OpenSolaris_b135/uts/common/io/ramdisk.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


/*
 * Ramdisk device driver.
 *
 * There are two types of ramdisk: 'real' OBP-created ramdisks, and 'pseudo'
 * ramdisks created at runtime with no corresponding OBP device node.  The
 * ramdisk(7D) driver is capable of dealing with both, and with the creation
 * and deletion of 'pseudo' ramdisks.
 *
 * Every ramdisk has a single 'state' structure which maintains data for
 * that ramdisk, and is assigned a single minor number.  The bottom 10-bits
 * of the minor number index the state structures; the top 8-bits give a
 * 'real OBP disk' number, i.e. they are zero for 'pseudo' ramdisks.  Thus
 * it is possible to distinguish 'real' from 'pseudo' ramdisks using the
 * top 8-bits of the minor number.
 *
 * Each OBP-created ramdisk has its own node in the device tree with an
 * "existing" property which describes the one-or-more physical address ranges
 * assigned to the ramdisk.  All 'pseudo' ramdisks share a common devinfo
 * structure.
 *
 * A single character device node is used by ramdiskadm(1M) to communicate
 * with the ramdisk driver, with minor number 0:
 *
 *	/dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl
 *
 * For consistent access, block and raw device nodes are created for *every*
 * ramdisk.  For 'pseudo' ramdisks:
 *
 *	/dev/ramdisk/<diskname>  -> /devices/pseudo/ramdisk@0:<diskname>
 *	/dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw
 *
 * For OBP-created ramdisks:
 *
 *	/dev/ramdisk/<diskname>  -> /devices/ramdisk-<diskname>:a
 *	/dev/ramdisk/<diskname>  -> /devices/ramdisk-<diskname>:a,raw
 *
 * This allows the transition from the standalone to the kernel to proceed
 * when booting from a ramdisk, and for the installation to correctly identify
 * the root device.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/buf.h>
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/conf.h>
#include <sys/cmn_err.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ramdisk.h>
#include <vm/seg_kmem.h>

/*
 * Flag to disable the use of real ramdisks (in the OBP - on Sparc) when
 * the associated memory is no longer available - set in the bootops section.
 */
#ifdef __sparc
extern int bootops_obp_ramdisk_disabled;
#endif /* __sparc */

/*
 * An opaque handle where information about our set of ramdisk devices lives.
 */
static void	*rd_statep;

/*
 * Pointer to devinfo for the 'pseudo' ramdisks.  Real OBP-created ramdisks
 * get their own individual devinfo.
 */
static dev_info_t *rd_dip = NULL;

/*
 * Global state lock.
 */
static kmutex_t	rd_lock;

/*
 * Maximum number of ramdisks supported by this driver.
 */
static uint32_t	rd_max_disks = RD_DFLT_DISKS;

/*
 * Percentage of physical memory which can be assigned to pseudo ramdisks,
 * what that equates to in pages, and how many pages are currently assigned.
 */
static uint_t	rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM;
static pgcnt_t	rd_max_physmem;
static pgcnt_t	rd_tot_physmem;

static uint_t	rd_maxphys = RD_DEFAULT_MAXPHYS;

/*
 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence?
 */
static int
rd_is_busy(void)
{
	minor_t	minor;
	rd_devstate_t	*rsp;

	ASSERT(mutex_owned(&rd_lock));
	for (minor = 1; minor <= rd_max_disks; ++minor) {
		if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL &&
		    rsp->rd_dip == rd_dip) {
			return (EBUSY);
		}
	}
	return (0);
}

/*
 * Find the first free minor number; returns zero if there isn't one.
 */
static minor_t
rd_find_free_minor(void)
{
	minor_t	minor;

	ASSERT(mutex_owned(&rd_lock));
	for (minor = 1; minor <= rd_max_disks; ++minor) {
		if (ddi_get_soft_state(rd_statep, minor) == NULL) {
			return (minor);
		}
	}
	return (0);
}

/*
 * Locate the rd_devstate for the named ramdisk; returns NULL if not found.
 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk
 * cannot have the same name as a pseudo ramdisk.
 */
static rd_devstate_t *
rd_find_named_disk(char *name)
{
	minor_t		minor;
	rd_devstate_t	*rsp;

	ASSERT(mutex_owned(&rd_lock));
	for (minor = 1; minor <= rd_max_disks; ++minor) {
		if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL &&
		    strcmp(rsp->rd_name, name) == 0) {
			return (rsp);
		}
	}
	return (NULL);
}

/*
 * Locate the rd_devstate for the real OBP-created ramdisk whose devinfo
 * is referenced by 'dip'; returns NULL if not found (shouldn't happen).
 */
static rd_devstate_t *
rd_find_dip_state(dev_info_t *dip)
{
	minor_t		minor;
	rd_devstate_t	*rsp;

	ASSERT(mutex_owned(&rd_lock));
	for (minor = 1; minor <= rd_max_disks; ++minor) {
		if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL &&
		    rsp->rd_dip == dip) {
			return (rsp);
		}
	}
	return (NULL);
}

/*
 * Is the ramdisk open?
 */
static int
rd_is_open(rd_devstate_t *rsp)
{
	ASSERT(mutex_owned(&rd_lock));
	return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt);
}

/*
 * Mark the ramdisk open.
 */
static int
rd_opened(rd_devstate_t *rsp, int otyp)
{
	ASSERT(mutex_owned(&rd_lock));
	switch (otyp) {
	case OTYP_CHR:
		rsp->rd_chr_open = 1;
		break;
	case OTYP_BLK:
		rsp->rd_blk_open = 1;
		break;
	case OTYP_LYR:
		rsp->rd_lyr_open_cnt++;
		break;
	default:
		return (-1);
	}
	return (0);
}

/*
 * Mark the ramdisk closed.
 */
static void
rd_closed(rd_devstate_t *rsp, int otyp)
{
	ASSERT(mutex_owned(&rd_lock));
	switch (otyp) {
	case OTYP_CHR:
		rsp->rd_chr_open = 0;
		break;
	case OTYP_BLK:
		rsp->rd_blk_open = 0;
		break;
	case OTYP_LYR:
		rsp->rd_lyr_open_cnt--;
		break;
	default:
		break;
	}
}

static void
rd_init_tuneables(void)
{
	char	*prop, *p;

	/*
	 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf.
	 */
	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0,
	    "max_disks", &prop) == DDI_PROP_SUCCESS) {
		p = prop;
		rd_max_disks = (uint32_t)stoi(&p);
		ddi_prop_free(prop);
	}
	if (rd_max_disks >= RD_MAX_DISKS) {
		cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;"
		    " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1);

		rd_max_disks = RD_MAX_DISKS - 1;
	}

	/*
	 * Ensure sanity of 'rd_percent_physmem', which may be tuned
	 * in ramdisk.conf.
	 */
	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0,
	    "percent_physmem", &prop) == DDI_PROP_SUCCESS) {
		p = prop;
		rd_percent_physmem = (uint_t)stoi(&p);
		ddi_prop_free(prop);
	}
	if (rd_percent_physmem >= 100) {
		cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;"
		    " using default (%u%%).", rd_percent_physmem,
		    RD_DEFAULT_PERCENT_PHYSMEM);

		rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM;
	}

	/*
	 * Since availrmem_initial is a long, this won't overflow.
	 */
	rd_max_physmem = (availrmem_initial * rd_percent_physmem) / 100;
}

/*
 * Allocate enough physical pages to hold "npages" pages.  Returns an
 * array of page_t * pointers that can later be mapped in or out via
 * rd_{un}map_window() but is otherwise opaque, or NULL on failure.
 */
page_t **
rd_phys_alloc(pgcnt_t npages)
{
	page_t		*pp, **ppa;
	spgcnt_t	i;
	size_t		ppalen;
	struct seg	kseg;
	caddr_t		addr;		/* For coloring */

	if (rd_tot_physmem + npages > rd_max_physmem)
		return (NULL);

	if (!page_resv(npages, KM_NOSLEEP))
		return (NULL);

	if (!page_create_wait(npages, 0)) {
		page_unresv(npages);
		return (NULL);
	}

	ppalen = npages * sizeof (struct page_t *);
	ppa = kmem_zalloc(ppalen, KM_NOSLEEP);
	if (ppa == NULL) {
		page_create_putback(npages);
		page_unresv(npages);
		return (NULL);
	}

	kseg.s_as = &kas;
	for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) {
		pp = page_get_freelist(&kvp, 0, &kseg, addr, PAGESIZE, 0, NULL);
		if (pp == NULL) {
			pp = page_get_cachelist(&kvp, 0, &kseg, addr, 0, NULL);
			if (pp == NULL)
				goto out;
			if (!PP_ISAGED(pp))
				page_hashout(pp, NULL);
		}

		PP_CLRFREE(pp);
		PP_CLRAGED(pp);
		ppa[i] = pp;
	}

	for (i = 0; i < npages; i++)
		page_downgrade(ppa[i]);
	rd_tot_physmem += npages;

	return (ppa);

out:
	ASSERT(i < npages);
	page_create_putback(npages - i);
	while (--i >= 0)
		page_free(ppa[i], 0);
	kmem_free(ppa, ppalen);
	page_unresv(npages);

	return (NULL);
}

/*
 * Free physical pages previously allocated via rd_phys_alloc(); note that
 * this function may block as it has to wait until it can exclusively lock
 * all the pages first.
 */
static void
rd_phys_free(page_t **ppa, pgcnt_t npages)
{
	pgcnt_t	i;
	size_t	ppalen = npages * sizeof (struct page_t *);

	for (i = 0; i < npages; ++i) {
		if (! page_tryupgrade(ppa[i])) {
			page_unlock(ppa[i]);
			while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM))
				;
		}
		page_free(ppa[i], 0);
	}

	kmem_free(ppa, ppalen);

	page_unresv(npages);
	rd_tot_physmem -= npages;
}

/*
 * Remove a window mapping (if present).
 */
static void
rd_unmap_window(rd_devstate_t *rsp)
{
	ASSERT(rsp->rd_window_obp == 0);
	if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) {
		hat_unload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size,
		    HAT_UNLOAD_UNLOCK);
	}
}

/*
 * Map a portion of the ramdisk into the virtual window.
 */
static void
rd_map_window(rd_devstate_t *rsp, off_t offset)
{
	pgcnt_t	offpgs = btop(offset);

	if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) {
		/*
		 * Already mapped; is offset within our window?
		 */
		if (offset >= rsp->rd_window_base &&
		    offset < rsp->rd_window_base + rsp->rd_window_size) {
			return;
		}

		/*
		 * No, we need to re-map; toss the old mapping.
		 */
		rd_unmap_window(rsp);
	}
	rsp->rd_window_base = ptob(offpgs);

	/*
	 * Different algorithms depending on whether this is a real
	 * OBP-created ramdisk, or a pseudo ramdisk.
	 */
	if (rsp->rd_dip == rd_dip) {
		pgcnt_t	pi, lastpi;
		caddr_t	vaddr;

		/*
		 * Find the range of pages which should be mapped.
		 */
		pi = offpgs;
		lastpi = pi + btopr(rsp->rd_window_size);
		if (lastpi > rsp->rd_npages) {
			lastpi = rsp->rd_npages;
		}

		/*
		 * Load the mapping.
		 */
		vaddr = rsp->rd_window_virt;
		for (; pi < lastpi; ++pi) {
			hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[pi],
			    (PROT_READ | PROT_WRITE) | HAT_NOSYNC,
			    HAT_LOAD_LOCK);
			vaddr += ptob(1);
		}
	} else {
		uint_t	i;
		pfn_t	pfn;

		/*
		 * Real OBP-created ramdisk: locate the physical range which
		 * contains this offset.
		 */
		for (i = 0; i < rsp->rd_nexisting; ++i) {
			if (offset < rsp->rd_existing[i].size) {
				break;
			}
			offset -= rsp->rd_existing[i].size;
		}
		ASSERT(i < rsp->rd_nexisting);

		/*
		 * Load the mapping.
		 */
		pfn = btop(rsp->rd_existing[i].phys + offset);
		hat_devload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size,
		    pfn, (PROT_READ | PROT_WRITE),
		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
	}
}

/*
 * Fakes up a disk geometry, and one big partition, based on the size
 * of the file. This is needed because we allow newfs'ing the device,
 * and newfs will do several disk ioctls to figure out the geometry and
 * partition information. It uses that information to determine the parameters
 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we
 * have to support it.
 *
 * Stolen from lofi.c - should maybe split out common code sometime.
 */
static void
rd_fake_disk_geometry(rd_devstate_t *rsp)
{
	/* dk_geom - see dkio(7I) */
	/*
	 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
	 * of sectors), but that breaks programs like fdisk which want to
	 * partition a disk by cylinder. With one cylinder, you can't create
	 * an fdisk partition and put pcfs on it for testing (hard to pick
	 * a number between one and one).
	 *
	 * The cheezy floppy test is an attempt to not have too few cylinders
	 * for a small file, or so many on a big file that you waste space
	 * for backup superblocks or cylinder group structures.
	 */
	if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */
		rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024);
	else
		rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024);
	/* in case file file is < 100k */
	if (rsp->rd_dkg.dkg_ncyl == 0)
		rsp->rd_dkg.dkg_ncyl = 1;
	rsp->rd_dkg.dkg_acyl = 0;
	rsp->rd_dkg.dkg_bcyl = 0;
	rsp->rd_dkg.dkg_nhead = 1;
	rsp->rd_dkg.dkg_obs1 = 0;
	rsp->rd_dkg.dkg_intrlv = 0;
	rsp->rd_dkg.dkg_obs2 = 0;
	rsp->rd_dkg.dkg_obs3 = 0;
	rsp->rd_dkg.dkg_apc = 0;
	rsp->rd_dkg.dkg_rpm = 7200;
	rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl;
	rsp->rd_dkg.dkg_nsect = rsp->rd_size /
	    (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl);
	rsp->rd_dkg.dkg_write_reinstruct = 0;
	rsp->rd_dkg.dkg_read_reinstruct = 0;

	/* vtoc - see dkio(7I) */
	bzero(&rsp->rd_vtoc, sizeof (struct vtoc));
	rsp->rd_vtoc.v_sanity = VTOC_SANE;
	rsp->rd_vtoc.v_version = V_VERSION;
	bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7);
	rsp->rd_vtoc.v_sectorsz = DEV_BSIZE;
	rsp->rd_vtoc.v_nparts = 1;
	rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED;
	rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT;
	rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0;
	/*
	 * The partition size cannot just be the number of sectors, because
	 * that might not end on a cylinder boundary. And if that's the case,
	 * newfs/mkfs will print a scary warning. So just figure the size
	 * based on the number of cylinders and sectors/cylinder.
	 */
	rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl *
	    rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead;

	/* dk_cinfo - see dkio(7I) */
	bzero(&rsp->rd_ci, sizeof (struct dk_cinfo));
	(void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME);
	rsp->rd_ci.dki_ctype = DKC_MD;
	rsp->rd_ci.dki_flags = 0;
	rsp->rd_ci.dki_cnum = 0;
	rsp->rd_ci.dki_addr = 0;
	rsp->rd_ci.dki_space = 0;
	rsp->rd_ci.dki_prio = 0;
	rsp->rd_ci.dki_vec = 0;
	(void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME);
	rsp->rd_ci.dki_unit = 0;
	rsp->rd_ci.dki_slave = 0;
	rsp->rd_ci.dki_partition = 0;
	/*
	 * newfs uses this to set maxcontig. Must not be < 16, or it
	 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
	 * it by the block size. Then tunefs doesn't work because
	 * maxcontig is 0.
	 */
	rsp->rd_ci.dki_maxtransfer = 16;
}

/*
 * Deallocate resources (virtual and physical, device nodes, structures)
 * from a ramdisk.
 */
static void
rd_dealloc_resources(rd_devstate_t *rsp)
{
	dev_info_t	*dip = rsp->rd_dip;
	char		namebuf[RD_NAME_LEN + 5];
	dev_t		fulldev;

	if (rsp->rd_window_obp == 0 && rsp->rd_window_virt != NULL) {
		if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) {
			rd_unmap_window(rsp);
		}
		vmem_free(heap_arena, rsp->rd_window_virt, rsp->rd_window_size);
	}
	mutex_destroy(&rsp->rd_device_lock);

	if (rsp->rd_existing) {
		ddi_prop_free(rsp->rd_existing);
	}
	if (rsp->rd_ppa != NULL) {
		rd_phys_free(rsp->rd_ppa, rsp->rd_npages);
	}

	/*
	 * Remove the block and raw device nodes.
	 */
	if (dip == rd_dip) {
		(void) snprintf(namebuf, sizeof (namebuf), "%s",
		    rsp->rd_name);
		ddi_remove_minor_node(dip, namebuf);
		(void) snprintf(namebuf, sizeof (namebuf), "%s,raw",
		    rsp->rd_name);
		ddi_remove_minor_node(dip, namebuf);
	} else {
		ddi_remove_minor_node(dip, "a");
		ddi_remove_minor_node(dip, "a,raw");
	}

	/*
	 * Remove the "Size" and "Nblocks" properties.
	 */
	fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor);
	(void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME);
	(void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME);

	if (rsp->rd_kstat) {
		kstat_delete(rsp->rd_kstat);
		mutex_destroy(&rsp->rd_kstat_lock);
	}

	ddi_soft_state_free(rd_statep, rsp->rd_minor);
}

/*
 * Allocate resources (virtual and physical, device nodes, structures)
 * to a ramdisk.
 */
static rd_devstate_t *
rd_alloc_resources(char *name, uint_t addr, size_t size, dev_info_t *dip)
{
	minor_t		minor;
	rd_devstate_t	*rsp;
	char		namebuf[RD_NAME_LEN + 5];
	dev_t		fulldev;
	int64_t		Nblocks_prop_val;
	int64_t		Size_prop_val;

	minor = rd_find_free_minor();
	if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) {
		return (NULL);
	}
	rsp = ddi_get_soft_state(rd_statep, minor);

	(void) strcpy(rsp->rd_name, name);
	rsp->rd_dip = dip;
	rsp->rd_minor = minor;
	rsp->rd_size = size;

	/*
	 * Allocate virtual window onto ramdisk.
	 */
	mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL);
	if (addr == 0) {
		rsp->rd_window_obp = 0;
		rsp->rd_window_base = RD_WINDOW_NOT_MAPPED;
		rsp->rd_window_size = PAGESIZE;
		rsp->rd_window_virt = vmem_alloc(heap_arena,
		    rsp->rd_window_size, VM_SLEEP);
		if (rsp->rd_window_virt == NULL) {
			goto create_failed;
		}
	} else {
		rsp->rd_window_obp = 1;
		rsp->rd_window_base = 0;
		rsp->rd_window_size = size;
		rsp->rd_window_virt = (caddr_t)((ulong_t)addr);
	}

	/*
	 * Allocate physical memory for non-OBP ramdisks.
	 * Create pseudo block and raw device nodes.
	 */
	if (dip == rd_dip) {
		rsp->rd_npages = btopr(size);
		rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages);
		if (rsp->rd_ppa == NULL) {
			goto create_failed;
		}

		/*
		 * For non-OBP ramdisks the device nodes are:
		 *
		 *	/devices/pseudo/ramdisk@0:<diskname>
		 *	/devices/pseudo/ramdisk@0:<diskname>,raw
		 */
		(void) snprintf(namebuf, sizeof (namebuf), "%s",
		    rsp->rd_name);
		if (ddi_create_minor_node(dip, namebuf, S_IFBLK, minor,
		    DDI_PSEUDO, 0) == DDI_FAILURE) {
			goto create_failed;
		}
		(void) snprintf(namebuf, sizeof (namebuf), "%s,raw",
		    rsp->rd_name);
		if (ddi_create_minor_node(dip, namebuf, S_IFCHR, minor,
		    DDI_PSEUDO, 0) == DDI_FAILURE) {
			goto create_failed;
		}
	} else {
		/*
		 * For OBP-created ramdisks the device nodes are:
		 *
		 *	/devices/ramdisk-<diskname>:a
		 *	/devices/ramdisk-<diskname>:a,raw
		 */
		if (ddi_create_minor_node(dip, "a", S_IFBLK, minor,
		    DDI_PSEUDO, 0) == DDI_FAILURE) {
			goto create_failed;
		}
		if (ddi_create_minor_node(dip, "a,raw", S_IFCHR, minor,
		    DDI_PSEUDO, 0) == DDI_FAILURE) {
			goto create_failed;
		}
	}

	/*
	 * Create the "Size" and "Nblocks" properties.
	 */
	fulldev = makedevice(ddi_driver_major(dip), minor);
	Size_prop_val = size;
	if ((ddi_prop_update_int64(fulldev, dip,
	    SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) {
		goto create_failed;
	}
	Nblocks_prop_val = size / DEV_BSIZE;
	if ((ddi_prop_update_int64(fulldev, dip,
	    NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
		goto create_failed;
	}

	/*
	 * Allocate kstat stuff.
	 */
	rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL,
	    "disk", KSTAT_TYPE_IO, 1, 0);
	if (rsp->rd_kstat) {
		mutex_init(&rsp->rd_kstat_lock, NULL,
		    MUTEX_DRIVER, NULL);
		rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock;
		kstat_install(rsp->rd_kstat);
	}

	rd_fake_disk_geometry(rsp);

	return (rsp);

create_failed:
	/*
	 * Cleanup.
	 */
	rd_dealloc_resources(rsp);

	return (NULL);
}

/*
 * Undo what we did in rd_attach, freeing resources and removing things which
 * we installed.  The system framework guarantees we are not active with this
 * devinfo node in any other entry points at this time.
 */
static int
rd_common_detach(dev_info_t *dip)
{
	if (dip == rd_dip) {
		/*
		 * Pseudo node: can't detach if any pseudo ramdisks exist.
		 */
		if (rd_is_busy()) {
			return (DDI_FAILURE);
		}
		ddi_soft_state_free(rd_statep, RD_CTL_MINOR);
		rd_dip = NULL;
	} else {
		/*
		 * A 'real' ramdisk; find the state and free resources.
		 */
		rd_devstate_t	*rsp;

		if ((rsp = rd_find_dip_state(dip)) != NULL) {
			rd_dealloc_resources(rsp);
		}
	}
	ddi_remove_minor_node(dip, NULL);

	return (DDI_SUCCESS);
}

static int
rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	char		*name;
	rd_existing_t	*ep = NULL;
	uint_t		obpaddr = 0, nep, i;
	size_t		size = 0;
	rd_devstate_t	*rsp;

	switch (cmd) {

	case DDI_ATTACH:
		mutex_enter(&rd_lock);

		/*
		 * For pseudo ramdisk devinfo set up state 0 and :ctl device;
		 * else it's an OBP-created ramdisk.
		 */
		if (is_pseudo_device(dip)) {
			rd_dip = dip;
			rd_init_tuneables();

			/*
			 * The zeroth minor is reserved for the ramdisk
			 * 'control' device.
			 */
			if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) ==
			    DDI_FAILURE) {
				goto attach_failed;
			}
			rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR);
			rsp->rd_dip = dip;

			if (ddi_create_minor_node(dip, RD_CTL_NODE,
			    S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE) {
				goto attach_failed;
			}
		} else {
#ifdef __sparc
			if (bootops_obp_ramdisk_disabled)
				goto attach_failed;
#endif /* __sparc */

			RD_STRIP_PREFIX(name, ddi_node_name(dip));

			if (strlen(name) > RD_NAME_LEN) {
				cmn_err(CE_CONT,
				    "%s: name too long - ignoring\n", name);
				goto attach_failed;
			}

			/*
			 * An OBP-created ramdisk must have an 'existing'
			 * property; get and check it.
			 */
			if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip,
			    DDI_PROP_DONTPASS, OBP_EXISTING_PROP_NAME,
			    (uchar_t **)&ep, &nep) == DDI_SUCCESS) {

				if (nep == 0 || (nep % sizeof (*ep)) != 0) {
					cmn_err(CE_CONT,
					    "%s: " OBP_EXISTING_PROP_NAME
					    " illegal size\n", name);
					goto attach_failed;
				}
				nep /= sizeof (*ep);

				/*
				 * Calculate the size of the ramdisk.
				 */
				for (i = 0; i < nep; ++i) {
					size += ep[i].size;
				}
			} else if ((obpaddr = ddi_prop_get_int(DDI_DEV_T_ANY,
			    dip, DDI_PROP_DONTPASS, OBP_ADDRESS_PROP_NAME,
			    0)) != 0)  {

				size = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
				    DDI_PROP_DONTPASS, OBP_SIZE_PROP_NAME, 0);
			} else {
				cmn_err(CE_CONT, "%s: missing OBP properties\n",
				    name);
				goto attach_failed;
			}

			/*
			 * Allocate driver resources for the ramdisk.
			 */
			if ((rsp = rd_alloc_resources(name, obpaddr, size,
			    dip)) == NULL) {
				goto attach_failed;
			}

			rsp->rd_existing = ep;
			rsp->rd_nexisting = nep;
		}

		mutex_exit(&rd_lock);

		ddi_report_dev(dip);

		return (DDI_SUCCESS);

	case DDI_RESUME:
		return (DDI_SUCCESS);

	default:
		return (DDI_FAILURE);
	}

attach_failed:
	/*
	 * Use our common detach routine to unallocate any stuff which
	 * was allocated above.
	 */
	(void) rd_common_detach(dip);
	mutex_exit(&rd_lock);

	if (ep != NULL) {
		ddi_prop_free(ep);
	}
	return (DDI_FAILURE);
}

static int
rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	int	e;

	switch (cmd) {

	case DDI_DETACH:
		mutex_enter(&rd_lock);
		e = rd_common_detach(dip);
		mutex_exit(&rd_lock);

		return (e);

	case DDI_SUSPEND:
		return (DDI_SUCCESS);

	default:
		return (DDI_FAILURE);
	}
}

/*ARGSUSED*/
static int
rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
	rd_devstate_t	*rsp;

	switch (infocmd) {
	case DDI_INFO_DEVT2DEVINFO:
		if ((rsp = ddi_get_soft_state(rd_statep,
		    getminor((dev_t)arg))) != NULL) {
			*result = rsp->rd_dip;
			return (DDI_SUCCESS);
		}
		*result = NULL;
		return (DDI_FAILURE);

	case DDI_INFO_DEVT2INSTANCE:
		if ((rsp = ddi_get_soft_state(rd_statep,
		    getminor((dev_t)arg))) != NULL) {
			*result = (void *)(uintptr_t)
			    ddi_get_instance(rsp->rd_dip);
			return (DDI_SUCCESS);
		}
		*result = NULL;
		return (DDI_FAILURE);

	default:
		return (DDI_FAILURE);
	}
}

/*ARGSUSED3*/
static int
rd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
	minor_t		minor;
	rd_devstate_t	*rsp;

	mutex_enter(&rd_lock);

	minor = getminor(*devp);
	if (minor == RD_CTL_MINOR) {
		/*
		 * Master control device; must be opened exclusively.
		 */
		if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) {
			mutex_exit(&rd_lock);
			return (EINVAL);
		}

		rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR);
		if (rsp == NULL) {
			mutex_exit(&rd_lock);
			return (ENXIO);
		}

		if (rd_is_open(rsp)) {
			mutex_exit(&rd_lock);
			return (EBUSY);
		}
		(void) rd_opened(rsp, OTYP_CHR);

		mutex_exit(&rd_lock);

		return (0);
	}

	rsp = ddi_get_soft_state(rd_statep, minor);
	if (rsp == NULL) {
		mutex_exit(&rd_lock);
		return (ENXIO);
	}

	if (rd_opened(rsp, otyp) == -1) {
		mutex_exit(&rd_lock);
		return (EINVAL);
	}

	mutex_exit(&rd_lock);
	return (0);
}

/*ARGSUSED*/
static int
rd_close(dev_t dev, int flag, int otyp, struct cred *credp)
{
	minor_t		minor;
	rd_devstate_t	*rsp;

	mutex_enter(&rd_lock);

	minor = getminor(dev);

	rsp = ddi_get_soft_state(rd_statep, minor);
	if (rsp == NULL) {
		mutex_exit(&rd_lock);
		return (EINVAL);
	}

	rd_closed(rsp, otyp);

	mutex_exit(&rd_lock);

	return (0);
}

static void
rd_minphys(struct buf *bp)
{
	if (bp->b_bcount > rd_maxphys) {
		bp->b_bcount = rd_maxphys;
	}
}

static void
rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes)
{
	int	reading = bp->b_flags & B_READ;
	caddr_t	buf_addr;

	bp_mapin(bp);
	buf_addr = bp->b_un.b_addr;

	while (nbytes > 0) {
		offset_t	off_in_window;
		size_t		rem_in_window, copy_bytes;
		caddr_t		raddr;

		mutex_enter(&rsp->rd_device_lock);
		rd_map_window(rsp, offset);

		off_in_window = offset - rsp->rd_window_base;
		rem_in_window = rsp->rd_window_size - off_in_window;

		raddr = rsp->rd_window_virt + off_in_window;
		copy_bytes = MIN(nbytes, rem_in_window);

		if (reading) {
			(void) bcopy(raddr, buf_addr, copy_bytes);
		} else {
			(void) bcopy(buf_addr, raddr, copy_bytes);
		}
		mutex_exit(&rsp->rd_device_lock);

		offset   += copy_bytes;
		buf_addr += copy_bytes;
		nbytes   -= copy_bytes;
	}
}

/*
 * On Sparc, this function deals with both pseudo ramdisks and OBP ramdisks.
 * In the case where we freed the "bootarchive" ramdisk in bop_free_archive(),
 * we stop allowing access to the OBP ramdisks. To do so, we set the
 * bootops_obp_ramdisk_disabled flag to true, and we check if the operation
 * is for an OBP ramdisk. In this case we indicate an ENXIO error.
 */
static int
rd_strategy(struct buf *bp)
{
	rd_devstate_t	*rsp;
	offset_t	offset;

	rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev));
	offset = bp->b_blkno * DEV_BSIZE;

#ifdef __sparc
	if (rsp == NULL ||
	    (bootops_obp_ramdisk_disabled &&
	    (rsp->rd_dip != rd_dip || rd_dip == NULL))) { /* OBP ramdisk */
#else /* __sparc */
	if (rsp == NULL) {
#endif /* __sparc */
		bp->b_error = ENXIO;
		bp->b_flags |= B_ERROR;
	} else if (offset >= rsp->rd_size) {
		bp->b_error = EINVAL;
		bp->b_flags |= B_ERROR;
	} else {
		size_t	nbytes;

		if (rsp->rd_kstat) {
			mutex_enter(rsp->rd_kstat->ks_lock);
			kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat));
			mutex_exit(rsp->rd_kstat->ks_lock);
		}

		nbytes = min(bp->b_bcount, rsp->rd_size - offset);

		rd_rw(rsp, bp, offset, nbytes);

		bp->b_resid = bp->b_bcount - nbytes;

		if (rsp->rd_kstat) {
			kstat_io_t *kioptr;

			mutex_enter(rsp->rd_kstat->ks_lock);
			kioptr = KSTAT_IO_PTR(rsp->rd_kstat);
			if (bp->b_flags & B_READ) {
				kioptr->nread += nbytes;
				kioptr->reads++;
			} else {
				kioptr->nwritten += nbytes;
				kioptr->writes++;
			}
			kstat_runq_exit(kioptr);
			mutex_exit(rsp->rd_kstat->ks_lock);
		}
	}

	biodone(bp);
	return (0);
}

/*ARGSUSED*/
static int
rd_read(dev_t dev, struct uio *uiop, cred_t *credp)
{
	rd_devstate_t	*rsp;

	rsp = ddi_get_soft_state(rd_statep, getminor(dev));

	if (uiop->uio_offset >= rsp->rd_size)
		return (EINVAL);

	return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop));
}

/*ARGSUSED*/
static int
rd_write(dev_t dev, register struct uio *uiop, cred_t *credp)
{
	rd_devstate_t	*rsp;

	rsp = ddi_get_soft_state(rd_statep, getminor(dev));

	if (uiop->uio_offset >= rsp->rd_size)
		return (EINVAL);

	return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop));
}

/*ARGSUSED*/
static int
rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp)
{
	struct rd_ioctl	kri;
	size_t		size;
	rd_devstate_t	*rsp;

	if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) {
		return (EFAULT);
	}

	kri.ri_name[RD_NAME_LEN] = '\0';

	size = kri.ri_size;
	if (size == 0) {
		return (EINVAL);
	}
	size = ptob(btopr(size));

	mutex_enter(&rd_lock);

	if (rd_find_named_disk(kri.ri_name) != NULL) {
		mutex_exit(&rd_lock);
		return (EEXIST);
	}

	rsp = rd_alloc_resources(kri.ri_name, 0, size, rd_dip);
	if (rsp == NULL) {
		mutex_exit(&rd_lock);
		return (EAGAIN);
	}

	mutex_exit(&rd_lock);

	return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0);
}

/*ARGSUSED*/
static int
rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode)
{
	struct rd_ioctl	kri;
	rd_devstate_t	*rsp;

	if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) {
		return (EFAULT);
	}

	kri.ri_name[RD_NAME_LEN] = '\0';

	mutex_enter(&rd_lock);

	rsp = rd_find_named_disk(kri.ri_name);
	if (rsp == NULL || rsp->rd_dip != rd_dip) {
		mutex_exit(&rd_lock);
		return (EINVAL);
	}
	if (rd_is_open(rsp)) {
		mutex_exit(&rd_lock);
		return (EBUSY);
	}

	rd_dealloc_resources(rsp);

	mutex_exit(&rd_lock);

	return (0);
}

/*ARGSUSED*/
static int
rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
{
	minor_t		minor;
	int		error;
	enum dkio_state	dkstate;
	rd_devstate_t	*rsp;

	minor = getminor(dev);

	/*
	 * Ramdisk ioctls only apply to the master device.
	 */
	if (minor == RD_CTL_MINOR) {
		struct rd_ioctl *rip = (struct rd_ioctl *)arg;

		/*
		 * The query commands only need read-access - i.e., normal
		 * users are allowed to do those on the controlling device
		 * as long as they can open it read-only.
		 */
		switch (cmd) {
		case RD_CREATE_DISK:
			if ((mode & FWRITE) == 0)
				return (EPERM);
			return (rd_create_disk(dev, rip, mode, rvalp));

		case RD_DELETE_DISK:
			if ((mode & FWRITE) == 0)
				return (EPERM);
			return (rd_delete_disk(dev, rip, mode));

		default:
			return (EINVAL);
		}
	}

	rsp = ddi_get_soft_state(rd_statep, minor);
	if (rsp == NULL) {
		return (ENXIO);
	}

	/*
	 * These are for faking out utilities like newfs.
	 */
	switch (cmd) {
	case DKIOCGVTOC:
		switch (ddi_model_convert_from(mode & FMODELS)) {
		case DDI_MODEL_ILP32: {
			struct vtoc32 vtoc32;

			vtoctovtoc32(rsp->rd_vtoc, vtoc32);
			if (ddi_copyout(&vtoc32, (void *)arg,
			    sizeof (struct vtoc32), mode))
				return (EFAULT);
			}
			break;

		case DDI_MODEL_NONE:
			if (ddi_copyout(&rsp->rd_vtoc, (void *)arg,
			    sizeof (struct vtoc), mode))
				return (EFAULT);
			break;
		}
		return (0);
	case DKIOCINFO:
		error = ddi_copyout(&rsp->rd_ci, (void *)arg,
		    sizeof (struct dk_cinfo), mode);
		if (error)
			return (EFAULT);
		return (0);
	case DKIOCG_VIRTGEOM:
	case DKIOCG_PHYGEOM:
	case DKIOCGGEOM:
		error = ddi_copyout(&rsp->rd_dkg, (void *)arg,
		    sizeof (struct dk_geom), mode);
		if (error)
			return (EFAULT);
		return (0);
	case DKIOCSTATE:
		/* the file is always there */
		dkstate = DKIO_INSERTED;
		error = ddi_copyout(&dkstate, (void *)arg,
		    sizeof (enum dkio_state), mode);
		if (error)
			return (EFAULT);
		return (0);
	default:
		return (ENOTTY);
	}
}


static struct cb_ops rd_cb_ops = {
	rd_open,
	rd_close,
	rd_strategy,
	nodev,
	nodev,		/* dump */
	rd_read,
	rd_write,
	rd_ioctl,
	nodev,		/* devmap */
	nodev,		/* mmap */
	nodev,		/* segmap */
	nochpoll,	/* poll */
	ddi_prop_op,
	NULL,
	D_NEW | D_MP
};

static struct dev_ops rd_ops = {
	DEVO_REV,
	0,
	rd_getinfo,
	nulldev,	/* identify */
	nulldev,	/* probe */
	rd_attach,
	rd_detach,
	nodev,		/* reset */
	&rd_cb_ops,
	(struct bus_ops *)0,
	NULL,
	ddi_quiesce_not_needed,		/* quiesce */
};


extern struct mod_ops mod_driverops;

static struct modldrv modldrv = {
	&mod_driverops,
	"ramdisk driver",
	&rd_ops
};

static struct modlinkage modlinkage = {
	MODREV_1,
	&modldrv,
	0
};

int
_init(void)
{
	int e;

	if ((e = ddi_soft_state_init(&rd_statep,
	    sizeof (rd_devstate_t), 0)) != 0) {
		return (e);
	}

	mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL);

	if ((e = mod_install(&modlinkage)) != 0)  {
		mutex_destroy(&rd_lock);
		ddi_soft_state_fini(&rd_statep);
	}

	return (e);
}

int
_fini(void)
{
	int e;

	if ((e = mod_remove(&modlinkage)) != 0)  {
		return (e);
	}

	ddi_soft_state_fini(&rd_statep);
	mutex_destroy(&rd_lock);

	return (e);
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}