OpenSolaris_b135/uts/common/io/kstat.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


/*
 * kernel statistics driver
 */

#include <sys/types.h>
#include <sys/time.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/file.h>
#include <sys/cmn_err.h>
#include <sys/t_lock.h>
#include <sys/proc.h>
#include <sys/fcntl.h>
#include <sys/uio.h>
#include <sys/kmem.h>
#include <sys/cred.h>
#include <sys/mman.h>
#include <sys/errno.h>
#include <sys/ioccom.h>
#include <sys/cpuvar.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/kobj.h>
#include <sys/kstat.h>
#include <sys/atomic.h>
#include <sys/policy.h>
#include <sys/zone.h>

static dev_info_t *kstat_devi;

static int
read_kstat_data(int *rvalp, void *user_ksp, int flag)
{
	kstat_t user_kstat, *ksp;
#ifdef _MULTI_DATAMODEL
	kstat32_t user_kstat32;
#endif
	void *kbuf = NULL;
	size_t kbufsize, ubufsize, copysize;
	int error = 0;
	uint_t model;

	switch (model = ddi_model_convert_from(flag & FMODELS)) {
#ifdef _MULTI_DATAMODEL
	case DDI_MODEL_ILP32:
		if (copyin(user_ksp, &user_kstat32, sizeof (kstat32_t)) != 0)
			return (EFAULT);
		user_kstat.ks_kid = user_kstat32.ks_kid;
		user_kstat.ks_data = (void *)(uintptr_t)user_kstat32.ks_data;
		user_kstat.ks_data_size = (size_t)user_kstat32.ks_data_size;
		break;
#endif
	default:
	case DDI_MODEL_NONE:
		if (copyin(user_ksp, &user_kstat, sizeof (kstat_t)) != 0)
			return (EFAULT);
	}

	ksp = kstat_hold_bykid(user_kstat.ks_kid, getzoneid());
	if (ksp == NULL) {
		/*
		 * There is no kstat with the specified KID
		 */
		return (ENXIO);
	}
	if (ksp->ks_flags & KSTAT_FLAG_INVALID) {
		/*
		 * The kstat exists, but is momentarily in some
		 * indeterminate state (e.g. the data section is not
		 * yet initialized).  Try again in a few milliseconds.
		 */
		kstat_rele(ksp);
		return (EAGAIN);
	}

	/*
	 * If it's a fixed-size kstat, allocate the buffer now, so we
	 * don't have to do it under the kstat's data lock.  (If it's a
	 * var-size kstat, we don't know the size until after the update
	 * routine is called, so we can't do this optimization.)
	 * The allocator relies on this behavior to prevent recursive
	 * mutex_enter in its (fixed-size) kstat update routine.
	 * It's a zalloc to prevent unintentional exposure of random
	 * juicy morsels of (old) kernel data.
	 */
	if (!(ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
		kbufsize = ksp->ks_data_size;
		kbuf = kmem_zalloc(kbufsize + 1, KM_NOSLEEP);
		if (kbuf == NULL) {
			kstat_rele(ksp);
			return (EAGAIN);
		}
	}
	KSTAT_ENTER(ksp);
	if ((error = KSTAT_UPDATE(ksp, KSTAT_READ)) != 0) {
		KSTAT_EXIT(ksp);
		kstat_rele(ksp);
		if (kbuf != NULL)
			kmem_free(kbuf, kbufsize + 1);
		return (error);
	}

	kbufsize = ksp->ks_data_size;
	ubufsize = user_kstat.ks_data_size;

	if (ubufsize < kbufsize) {
		error = ENOMEM;
	} else {
		if (kbuf == NULL)
			kbuf = kmem_zalloc(kbufsize + 1, KM_NOSLEEP);
		if (kbuf == NULL) {
			error = EAGAIN;
		} else {
			error = KSTAT_SNAPSHOT(ksp, kbuf, KSTAT_READ);
		}
	}

	/*
	 * The following info must be returned to user level,
	 * even if the the update or snapshot failed.  This allows
	 * kstat readers to get a handle on variable-size kstats,
	 * detect dormant kstats, etc.
	 */
	user_kstat.ks_ndata	= ksp->ks_ndata;
	user_kstat.ks_data_size	= kbufsize;
	user_kstat.ks_flags	= ksp->ks_flags;
	user_kstat.ks_snaptime	= ksp->ks_snaptime;

	*rvalp = kstat_chain_id;
	KSTAT_EXIT(ksp);
	kstat_rele(ksp);

	/*
	 * Copy the buffer containing the kstat back to userland.
	 */
	copysize = kbufsize;
	if (kbuf != NULL) {
#ifdef _MULTI_DATAMODEL
		kstat32_t *k32;
		kstat_t *k;
#endif
		int i;

		switch (model) {
#ifdef _MULTI_DATAMODEL
		case DDI_MODEL_ILP32:

			if (ksp->ks_type == KSTAT_TYPE_NAMED) {
				kstat_named_t *kn = kbuf;

				for (i = 0; i < user_kstat.ks_ndata; kn++, i++)
					switch (kn->data_type) {
					/*
					 * Named statistics have fields of type
					 * 'long'.  For a 32-bit application
					 * looking at a 64-bit kernel,
					 * forcibly truncate these 64-bit
					 * quantities to 32-bit values.
					 */
					case KSTAT_DATA_LONG:
						kn->value.i32 =
						    (int32_t)kn->value.l;
						kn->data_type =
						    KSTAT_DATA_INT32;
						break;
					case KSTAT_DATA_ULONG:
						kn->value.ui32 =
						    (uint32_t)kn->value.ul;
						kn->data_type =
						    KSTAT_DATA_UINT32;
						break;
					/*
					 * Long strings must be massaged before
					 * being copied out to userland.  Do
					 * that here.
					 */
					case KSTAT_DATA_STRING:
						if (KSTAT_NAMED_STR_PTR(kn)
						    == NULL)
							break;
						/*
						 * The offsets within the
						 * buffers are the same, so add
						 * the offset to the beginning
						 * of the new buffer to fix the
						 * pointer.
						 */
						KSTAT_NAMED_STR_PTR(kn) =
						    (char *)user_kstat.ks_data +
						    (KSTAT_NAMED_STR_PTR(kn) -
						    (char *)kbuf);
						/*
						 * Make sure the string pointer
						 * lies within the allocated
						 * buffer.
						 */
						ASSERT(KSTAT_NAMED_STR_PTR(kn) +
						    KSTAT_NAMED_STR_BUFLEN(kn)
						    <=
						    ((char *)
						    user_kstat.ks_data +
						    ubufsize));
						ASSERT(KSTAT_NAMED_STR_PTR(kn)
						    >=
						    (char *)
						    ((kstat_named_t *)
						    user_kstat.ks_data +
						    user_kstat.ks_ndata));
						/*
						 * Cast 64-bit ptr to 32-bit.
						 */
						kn->value.str.addr.ptr32 =
						    (caddr32_t)(uintptr_t)
						    KSTAT_NAMED_STR_PTR(kn);
						break;
					default:
						break;
					}
			}

			if (user_kstat.ks_kid != 0)
				break;

			/*
			 * This is the special case of the kstat header
			 * list for the entire system.  Reshape the
			 * array in place, then copy it out.
			 */
			k32 = kbuf;
			k = kbuf;
			for (i = 0; i < user_kstat.ks_ndata; k32++, k++, i++) {
				k32->ks_crtime		= k->ks_crtime;
				k32->ks_next		= 0;
				k32->ks_kid		= k->ks_kid;
				(void) strcpy(k32->ks_module, k->ks_module);
				k32->ks_resv		= k->ks_resv;
				k32->ks_instance	= k->ks_instance;
				(void) strcpy(k32->ks_name, k->ks_name);
				k32->ks_type		= k->ks_type;
				(void) strcpy(k32->ks_class, k->ks_class);
				k32->ks_flags		= k->ks_flags;
				k32->ks_data		= 0;
				k32->ks_ndata		= k->ks_ndata;
				if (k->ks_data_size > UINT32_MAX) {
					error = EOVERFLOW;
					break;
				}
				k32->ks_data_size = (size32_t)k->ks_data_size;
				k32->ks_snaptime	= k->ks_snaptime;
			}

			/*
			 * XXX	In this case we copy less data than is
			 *	claimed in the header.
			 */
			copysize = user_kstat.ks_ndata * sizeof (kstat32_t);
			break;
#endif	/* _MULTI_DATAMODEL */
		default:
		case DDI_MODEL_NONE:
			if (ksp->ks_type == KSTAT_TYPE_NAMED) {
				kstat_named_t *kn = kbuf;

				for (i = 0; i < user_kstat.ks_ndata; kn++, i++)
					switch (kn->data_type) {
#ifdef _LP64
					case KSTAT_DATA_LONG:
						kn->data_type =
						    KSTAT_DATA_INT64;
						break;
					case KSTAT_DATA_ULONG:
						kn->data_type =
						    KSTAT_DATA_UINT64;
						break;
#endif	/* _LP64 */
					case KSTAT_DATA_STRING:
						if (KSTAT_NAMED_STR_PTR(kn)
						    == NULL)
							break;
						KSTAT_NAMED_STR_PTR(kn) =
						    (char *)user_kstat.ks_data +
						    (KSTAT_NAMED_STR_PTR(kn) -
						    (char *)kbuf);
						ASSERT(KSTAT_NAMED_STR_PTR(kn) +
						    KSTAT_NAMED_STR_BUFLEN(kn)
						    <=
						    ((char *)
						    user_kstat.ks_data +
						    ubufsize));
						ASSERT(KSTAT_NAMED_STR_PTR(kn)
						    >=
						    (char *)
						    ((kstat_named_t *)
						    user_kstat.ks_data +
						    user_kstat.ks_ndata));
						break;
					default:
						break;
					}
			}
			break;
		}

		if (error == 0 &&
		    copyout(kbuf, user_kstat.ks_data, copysize))
			error = EFAULT;
		kmem_free(kbuf, kbufsize + 1);
	}

	/*
	 * We have modified the ks_ndata, ks_data_size, ks_flags, and
	 * ks_snaptime fields of the user kstat; now copy it back to userland.
	 */
	switch (model) {
#ifdef _MULTI_DATAMODEL
	case DDI_MODEL_ILP32:
		if (kbufsize > UINT32_MAX) {
			error = EOVERFLOW;
			break;
		}
		user_kstat32.ks_ndata		= user_kstat.ks_ndata;
		user_kstat32.ks_data_size	= (size32_t)kbufsize;
		user_kstat32.ks_flags		= user_kstat.ks_flags;
		user_kstat32.ks_snaptime	= user_kstat.ks_snaptime;
		if (copyout(&user_kstat32, user_ksp, sizeof (kstat32_t)) &&
		    error == 0)
			error = EFAULT;
		break;
#endif
	default:
	case DDI_MODEL_NONE:
		if (copyout(&user_kstat, user_ksp, sizeof (kstat_t)) &&
		    error == 0)
			error = EFAULT;
		break;
	}

	return (error);
}

static int
write_kstat_data(int *rvalp, void *user_ksp, int flag, cred_t *cred)
{
	kstat_t user_kstat, *ksp;
	void *buf = NULL;
	size_t bufsize;
	int error = 0;

	if (secpolicy_sys_config(cred, B_FALSE) != 0)
		return (EPERM);

	switch (ddi_model_convert_from(flag & FMODELS)) {
#ifdef _MULTI_DATAMODEL
		kstat32_t user_kstat32;

	case DDI_MODEL_ILP32:
		if (copyin(user_ksp, &user_kstat32, sizeof (kstat32_t)))
			return (EFAULT);
		/*
		 * These are the only fields we actually look at.
		 */
		user_kstat.ks_kid = user_kstat32.ks_kid;
		user_kstat.ks_data = (void *)(uintptr_t)user_kstat32.ks_data;
		user_kstat.ks_data_size = (size_t)user_kstat32.ks_data_size;
		user_kstat.ks_ndata = user_kstat32.ks_ndata;
		break;
#endif
	default:
	case DDI_MODEL_NONE:
		if (copyin(user_ksp, &user_kstat, sizeof (kstat_t)))
			return (EFAULT);
	}

	bufsize = user_kstat.ks_data_size;
	buf = kmem_alloc(bufsize + 1, KM_NOSLEEP);
	if (buf == NULL)
		return (EAGAIN);

	if (copyin(user_kstat.ks_data, buf, bufsize)) {
		kmem_free(buf, bufsize + 1);
		return (EFAULT);
	}

	ksp = kstat_hold_bykid(user_kstat.ks_kid, getzoneid());
	if (ksp == NULL) {
		kmem_free(buf, bufsize + 1);
		return (ENXIO);
	}
	if (ksp->ks_flags & KSTAT_FLAG_INVALID) {
		kstat_rele(ksp);
		kmem_free(buf, bufsize + 1);
		return (EAGAIN);
	}
	if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE)) {
		kstat_rele(ksp);
		kmem_free(buf, bufsize + 1);
		return (EACCES);
	}

	/*
	 * With KSTAT_FLAG_VARIABLE, one must call the kstat's update callback
	 * routine to ensure ks_data_size is up to date.
	 * In this case it makes sense to do it anyhow, as it will be shortly
	 * followed by a KSTAT_SNAPSHOT().
	 */
	KSTAT_ENTER(ksp);
	error = KSTAT_UPDATE(ksp, KSTAT_READ);
	if (error || user_kstat.ks_data_size != ksp->ks_data_size ||
	    user_kstat.ks_ndata != ksp->ks_ndata) {
		KSTAT_EXIT(ksp);
		kstat_rele(ksp);
		kmem_free(buf, bufsize + 1);
		return (error ? error : EINVAL);
	}

	/*
	 * We have to ensure that we don't accidentally change the type of
	 * existing kstat_named statistics when writing over them.
	 * Since read_kstat_data() modifies some of the types on their way
	 * out, we need to be sure to handle these types seperately.
	 */
	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
		void *kbuf;
		kstat_named_t *kold;
		kstat_named_t *knew = buf;
		int i;

#ifdef	_MULTI_DATAMODEL
		int model = ddi_model_convert_from(flag & FMODELS);
#endif

		/*
		 * Since ksp->ks_data may be NULL, we need to take a snapshot
		 * of the published data to look at the types.
		 */
		kbuf = kmem_alloc(bufsize + 1, KM_NOSLEEP);
		if (kbuf == NULL) {
			KSTAT_EXIT(ksp);
			kstat_rele(ksp);
			kmem_free(buf, bufsize + 1);
			return (EAGAIN);
		}
		error = KSTAT_SNAPSHOT(ksp, kbuf, KSTAT_READ);
		if (error) {
			KSTAT_EXIT(ksp);
			kstat_rele(ksp);
			kmem_free(kbuf, bufsize + 1);
			kmem_free(buf, bufsize + 1);
			return (error);
		}
		kold = kbuf;

		/*
		 * read_kstat_data() changes the types of
		 * KSTAT_DATA_LONG / KSTAT_DATA_ULONG, so we need to
		 * make sure that these (modified) types are considered
		 * valid.
		 */
		for (i = 0; i < ksp->ks_ndata; i++, kold++, knew++) {
			switch (kold->data_type) {
#ifdef	_MULTI_DATAMODEL
			case KSTAT_DATA_LONG:
				switch (model) {
				case DDI_MODEL_ILP32:
					if (knew->data_type ==
					    KSTAT_DATA_INT32) {
						knew->value.l =
						    (long)knew->value.i32;
						knew->data_type =
						    KSTAT_DATA_LONG;
					}
					break;
				default:
				case DDI_MODEL_NONE:
#ifdef _LP64
					if (knew->data_type ==
					    KSTAT_DATA_INT64) {
						knew->value.l =
						    (long)knew->value.i64;
						knew->data_type =
						    KSTAT_DATA_LONG;
					}
#endif /* _LP64 */
					break;
				}
				break;
			case KSTAT_DATA_ULONG:
				switch (model) {
				case DDI_MODEL_ILP32:
					if (knew->data_type ==
					    KSTAT_DATA_UINT32) {
						knew->value.ul =
						    (ulong_t)knew->value.ui32;
						knew->data_type =
						    KSTAT_DATA_ULONG;
					}
					break;
				default:
				case DDI_MODEL_NONE:
#ifdef _LP64
					if (knew->data_type ==
					    KSTAT_DATA_UINT64) {
						knew->value.ul =
						    (ulong_t)knew->value.ui64;
						knew->data_type =
						    KSTAT_DATA_ULONG;
					}
#endif /* _LP64 */
					break;
				}
				break;
#endif /* _MULTI_DATAMODEL */
			case KSTAT_DATA_STRING:
				if (knew->data_type != KSTAT_DATA_STRING) {
					KSTAT_EXIT(ksp);
					kstat_rele(ksp);
					kmem_free(kbuf, bufsize + 1);
					kmem_free(buf, bufsize + 1);
					return (EINVAL);
				}

#ifdef _MULTI_DATAMODEL
				if (model == DDI_MODEL_ILP32)
					KSTAT_NAMED_STR_PTR(knew) =
					    (char *)(uintptr_t)
						knew->value.str.addr.ptr32;
#endif
				/*
				 * Nothing special for NULL
				 */
				if (KSTAT_NAMED_STR_PTR(knew) == NULL)
					break;

				/*
				 * Check to see that the pointers all point
				 * to within the buffer and after the array
				 * of kstat_named_t's.
				 */
				if (KSTAT_NAMED_STR_PTR(knew) <
				    (char *)
				    ((kstat_named_t *)user_kstat.ks_data +
				    ksp->ks_ndata)) {
					KSTAT_EXIT(ksp);
					kstat_rele(ksp);
					kmem_free(kbuf, bufsize + 1);
					kmem_free(buf, bufsize + 1);
					return (EINVAL);
				}
				if (KSTAT_NAMED_STR_PTR(knew) +
				    KSTAT_NAMED_STR_BUFLEN(knew) >
				    ((char *)user_kstat.ks_data +
				    ksp->ks_data_size)) {
					KSTAT_EXIT(ksp);
					kstat_rele(ksp);
					kmem_free(kbuf, bufsize + 1);
					kmem_free(buf, bufsize + 1);
					return (EINVAL);
				}

				/*
				 * Update the pointers within the buffer
				 */
				KSTAT_NAMED_STR_PTR(knew) =
				    (char *)buf +
				    (KSTAT_NAMED_STR_PTR(knew) -
				    (char *)user_kstat.ks_data);
				break;
			default:
				break;
			}
		}

		kold = kbuf;
		knew = buf;

		/*
		 * Now make sure the types are what we expected them to be.
		 */
		for (i = 0; i < ksp->ks_ndata; i++, kold++, knew++)
			if (kold->data_type != knew->data_type) {
				KSTAT_EXIT(ksp);
				kstat_rele(ksp);
				kmem_free(kbuf, bufsize + 1);
				kmem_free(buf, bufsize + 1);
				return (EINVAL);
			}

		kmem_free(kbuf, bufsize + 1);
	}

	error = KSTAT_SNAPSHOT(ksp, buf, KSTAT_WRITE);
	if (!error)
		error = KSTAT_UPDATE(ksp, KSTAT_WRITE);
	*rvalp = kstat_chain_id;
	KSTAT_EXIT(ksp);
	kstat_rele(ksp);
	kmem_free(buf, bufsize + 1);
	return (error);
}

/*ARGSUSED*/
static int
kstat_ioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cr, int *rvalp)
{
	int rc = 0;

	switch (cmd) {

	case KSTAT_IOC_CHAIN_ID:
		*rvalp = kstat_chain_id;
		break;

	case KSTAT_IOC_READ:
		rc = read_kstat_data(rvalp, (void *)data, flag);
		break;

	case KSTAT_IOC_WRITE:
		rc = write_kstat_data(rvalp, (void *)data, flag, cr);
		break;

	default:
		/* invalid request */
		rc = EINVAL;
	}
	return (rc);
}

/* ARGSUSED */
static int
kstat_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
	void **result)
{
	switch (infocmd) {
	case DDI_INFO_DEVT2DEVINFO:
		*result = kstat_devi;
		return (DDI_SUCCESS);
	case DDI_INFO_DEVT2INSTANCE:
		*result = NULL;
		return (DDI_SUCCESS);
	}
	return (DDI_FAILURE);
}

static int
kstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
	if (cmd != DDI_ATTACH)
		return (DDI_FAILURE);

	if (ddi_create_minor_node(devi, "kstat", S_IFCHR,
	    0, DDI_PSEUDO, NULL) == DDI_FAILURE) {
		ddi_remove_minor_node(devi, NULL);
		return (DDI_FAILURE);
	}
	kstat_devi = devi;
	return (DDI_SUCCESS);
}

static int
kstat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
	if (cmd != DDI_DETACH)
		return (DDI_FAILURE);

	ddi_remove_minor_node(devi, NULL);
	return (DDI_SUCCESS);
}

static struct cb_ops kstat_cb_ops = {
	nulldev,		/* open */
	nulldev,		/* close */
	nodev,			/* strategy */
	nodev,			/* print */
	nodev,			/* dump */
	nodev,			/* read */
	nodev,			/* write */
	kstat_ioctl,		/* ioctl */
	nodev,			/* devmap */
	nodev,			/* mmap */
	nodev,			/* segmap */
	nochpoll,		/* poll */
	ddi_prop_op,		/* prop_op */
	0,			/* streamtab  */
	D_NEW | D_MP		/* Driver compatibility flag */
};

static struct dev_ops kstat_ops = {
	DEVO_REV,		/* devo_rev, */
	0,			/* refcnt  */
	kstat_info,		/* get_dev_info */
	nulldev,		/* identify */
	nulldev,		/* probe */
	kstat_attach,		/* attach */
	kstat_detach,		/* detach */
	nodev,			/* reset */
	&kstat_cb_ops,		/* driver operations */
	(struct bus_ops *)0,	/* no bus operations */
	NULL,			/* power */
	ddi_quiesce_not_needed,		/* quiesce */
};

static struct modldrv modldrv = {
	&mod_driverops, "kernel statistics driver", &kstat_ops,
};

static struct modlinkage modlinkage = {
	MODREV_1, &modldrv, NULL
};

int
_init(void)
{
	return (mod_install(&modlinkage));
}

int
_fini(void)
{
	return (mod_remove(&modlinkage));
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}