OpenSolaris_b135/cmd/dcs/sparc/sun4u/ri_init.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Implementation of ri_init routine for obtaining mapping
 * of system board attachment points to physical devices and to
 * the Reconfiguration Coordination Manager (RCM) client usage
 * of these devices.
 */
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <kstat.h>
#include <sys/param.h>
#include <sys/sbd_ioctl.h>
#include "rsrc_info_impl.h"

/*
 * Occupant types exported by cfgadm sbd plugin via
 * config_admin(3CFGADM).
 */
#define	SBD_CM_CPU	"cpu"
#define	SBD_CM_MEM	"memory"
#define	SBD_CM_IO	"io"

/*
 * RCM abstract resource names.
 */
#define	RCM_MEM_ALL	"SUNW_memory"
#define	RCM_CPU_ALL	"SUNW_cpu"
#define	RCM_CPU		RCM_CPU_ALL"/cpu"

#define	KBYTE		1024
#define	MBYTE		1048576
#define	USAGE_ALLOC_SIZE	128

/*
 * define to allow io_cm_info to return NODE is NULL to ri_init,
 * in order to skip over nodes w/unattached drivers
 */
#define	RI_NODE_NIL	1

/*
 * This code is CMP aware as it parses the
 * cfgadm info field for individual cpuids.
 */
#define	CPUID_SEP	","
#define	CPU_INFO_FMT	"cpuid=%s speed=%d ecache=%d"

typedef struct {
	cfga_list_data_t *cfga_list_data;
	int		nlist;
} apd_t;

typedef struct {
	long		pagesize;
	long		syspages;
	long		sysmb;
} mem_stat_t;

#define	ms_syspages	m_stat.syspages
#define	ms_pagesize	m_stat.pagesize
#define	ms_sysmb	m_stat.sysmb

typedef int32_t		cpuid_t;

typedef struct {
	int	cpuid_max;	/* maximum cpuid value */
	int	ecache_curr;	/* cached during tree walk */
	int	*ecache_sizes;	/* indexed by cpuid */
} ecache_info_t;

typedef struct {
	rcm_handle_t	*hdl;
	rcm_info_t	*offline_query_info;
	char		**rlist;
	int		nrlist;
	cpuid_t		*cpus;
	int		ncpus;
	int		ndevs;
	uint_t		query_pages;
	mem_stat_t	m_stat;
	ecache_info_t	ecache_info;
} rcmd_t;

typedef struct {
	const char	*rsrc;
	const char	*info;
} usage_t;

/* Lookup table entry for matching IO devices to RCM resource usage */
typedef struct {
	int		index;		/* index into the table array */
	di_node_t	node;		/* associated devinfo node */
	char		*name;		/* device full path name */
	int		n_usage;
	usage_t		*usage;
} lookup_entry_t;

typedef struct {
	int		n_entries;
	int		n_slots;
	lookup_entry_t	*table;
} lookup_table_t;

typedef struct {
	int			err;
	di_node_t		node;
	char			*pathbuf;
	lookup_table_t		*table;
	di_devlink_handle_t	linkhd;
} devinfo_arg_t;

static int dyn_ap_ids(char *, cfga_list_data_t **, int *);
static int rcm_init(rcmd_t *, apd_t [], int, int);
static void rcm_fini(rcmd_t *);
static int rcm_query_init(rcmd_t *, apd_t [], int);
static int cap_request(ri_hdl_t *, rcmd_t *);
static int syscpus(cpuid_t **, int *);
static int cpu_cap_request(ri_hdl_t *, rcmd_t *);
static int mem_cap_request(ri_hdl_t *, rcmd_t *);
static int (*cm_rcm_qpass_func(cfga_type_t))(cfga_list_data_t *, rcmd_t *);
static int cpu_rcm_qpass(cfga_list_data_t *, rcmd_t *);
static int mem_rcm_qpass(cfga_list_data_t *, rcmd_t *);
static int io_rcm_qpass(cfga_list_data_t *, rcmd_t *);
static int (*cm_info_func(cfga_type_t))(ri_ap_t *, cfga_list_data_t *, int,
    rcmd_t *);
static int cpu_cm_info(ri_ap_t *, cfga_list_data_t *, int, rcmd_t *);
static int i_cpu_cm_info(processorid_t, int, int, ri_ap_t *, rcmd_t *);
static int mem_cm_info(ri_ap_t *, cfga_list_data_t *, int, rcmd_t *);
static int io_cm_info(ri_ap_t *, cfga_list_data_t *, int, rcmd_t *);
static int ident_leaf(di_node_t);
static int mk_drv_inst(di_node_t, char [], char *);
static int devinfo_node_walk(di_node_t, void *);
static int devinfo_minor_walk(di_node_t, di_minor_t, void *);
static int devinfo_devlink_walk(di_devlink_t, void *);
static int add_rcm_clients(ri_client_t **, rcmd_t *, rcm_info_t *, int, int *);
static int rcm_ignore(char *, char *);
static int add_query_state(rcmd_t *, ri_client_t *, const char *, const char *);
static int state2query(int);
static void dev_list_append(ri_dev_t **, ri_dev_t *);
static void dev_list_cpu_insert(ri_dev_t **, ri_dev_t *, processorid_t);
static rcm_info_tuple_t *tuple_lookup(rcmd_t *, const char *, const char *);
static ri_ap_t *ri_ap_alloc(char *, ri_hdl_t *);
static ri_dev_t *ri_dev_alloc(void);
static ri_dev_t *io_dev_alloc(char *);
static ri_client_t *ri_client_alloc(char *, char *);
static void apd_tbl_free(apd_t [], int);
static char *pstate2str(int);
static int ecache_info_init(ecache_info_t *);
static int find_cpu_nodes(di_node_t, void *);
static int prop_lookup_int(di_node_t, di_prom_handle_t, char *, int **);
static int add_lookup_entry(lookup_table_t *, const char *, di_node_t);
static int table_compare_names(const void *, const void *);
static int table_compare_indices(const void *, const void *);
static lookup_entry_t *lookup(lookup_table_t *table, const char *);
static int add_usage(lookup_entry_t *, const char *, rcm_info_tuple_t *);
static void empty_table(lookup_table_t *);

#ifdef DEBUG
static void		dump_apd_tbl(FILE *, apd_t *, int);
#endif /* DEBUG */

static struct {
	char	*type;
	int	(*cm_info)(ri_ap_t *, cfga_list_data_t *, int, rcmd_t *);
	int	(*cm_rcm_qpass)(cfga_list_data_t *, rcmd_t *);
} cm_ctl[] = {
	{SBD_CM_CPU,	cpu_cm_info,	cpu_rcm_qpass},
	{SBD_CM_MEM,	mem_cm_info,	mem_rcm_qpass},
	{SBD_CM_IO,	io_cm_info,	io_rcm_qpass}
};

/*
 * Table of known info string prefixes for RCM modules that do not
 * represent actual resource usage, but instead provide name translations
 * or sequencing within the RCM namespace. Since RCM provides no way to
 * filter these out, we must maintain this hack.
 */
static char *rcm_info_filter[] = {
	"Network interface",		/* Network naming module */
	NULL
};


/*
 * Allocate snapshot handle.
 */
int
ri_init(int n_apids, char **ap_ids, int flags, ri_hdl_t **hdlp)
{
	int			i, j;
	ri_hdl_t		*ri_hdl;
	ri_ap_t			*ap_hdl;
	rcmd_t			*rcm = NULL;
	cfga_list_data_t	*cfga_ldata;
	apd_t			*apd, *apd_tbl = NULL;
	int			(*cm_info)(ri_ap_t *, cfga_list_data_t *,
				    int, rcmd_t *);
	int			rv = RI_SUCCESS;
	int			cm_info_rv;

	if (n_apids <= 0 || ap_ids == NULL || hdlp == NULL)
		return (RI_INVAL);

	if (flags & ~RI_REQ_MASK)
		return (RI_NOTSUP);

	*hdlp = NULL;
	if ((ri_hdl = calloc(1, sizeof (*ri_hdl))) == NULL ||
	    (rcm = calloc(1, sizeof (*rcm))) == NULL ||
	    (apd_tbl = calloc(n_apids, sizeof (*apd_tbl))) == NULL) {
		dprintf((stderr, "calloc: %s\n", strerror(errno)));
		rv = RI_FAILURE;
		goto out;
	}

	/*
	 * Create mapping of boards to components.
	 */
	for (i = 0, apd = apd_tbl; i < n_apids; i++, apd++) {
		if (dyn_ap_ids(ap_ids[i], &apd->cfga_list_data,
		    &apd->nlist) == -1) {
			rv = RI_INVAL;
			goto out;
		}
	}
#ifdef DEBUG
	dump_apd_tbl(stderr, apd_tbl, n_apids);
#endif /* DEBUG */

	if (rcm_init(rcm, apd_tbl, n_apids, flags) != 0) {
		rv = RI_FAILURE;
		goto out;
	}

	/*
	 * Best effort attempt to read cpu ecache sizes from
	 * OBP/Solaris device trees. These are later looked up
	 * in i_cpu_cm_info().
	 */
	(void) ecache_info_init(&rcm->ecache_info);

	for (i = 0, apd = apd_tbl; i < n_apids; i++, apd++) {
		if ((ap_hdl = ri_ap_alloc(ap_ids[i], ri_hdl)) == NULL) {
			rv = RI_FAILURE;
			goto out;
		}

		/*
		 * Add component info based on occupant type. Note all
		 * passes through the apd table skip over the first
		 * cfgadm_list_data entry, which is the static system board
		 * attachment point.
		 */
		for (j = 1, cfga_ldata = &apd->cfga_list_data[1];
		    j < apd->nlist; j++, cfga_ldata++) {
			if (cfga_ldata->ap_o_state != CFGA_STAT_CONFIGURED) {
				continue;
			}

			if ((cm_info =
			    cm_info_func(cfga_ldata->ap_type)) != NULL) {
				cm_info_rv =
				    (*cm_info)(ap_hdl, cfga_ldata, flags, rcm);
				if (cm_info_rv != 0) {
					/*
					 * If we cannot obtain info for the ap,
					 * skip it and do not fail the entire
					 * operation.  This case occurs when the
					 * driver for a device is not attached:
					 * di_init() returns failed back to
					 * io_cm_info().
					 */
					if (cm_info_rv == RI_NODE_NIL)
						continue;
					else {
						rv = RI_FAILURE;
						goto out;
					}
				}
			}
		}
	}

	if ((flags & RI_INCLUDE_QUERY) && cap_request(ri_hdl, rcm) != 0)
		rv = RI_FAILURE;

out:
	if (apd_tbl != NULL)
		apd_tbl_free(apd_tbl, n_apids);
	if (rcm != NULL)
		rcm_fini(rcm);

	if (rv == RI_SUCCESS)
		*hdlp = ri_hdl;
	else
		ri_fini(ri_hdl);

	return (rv);
}

/*
 * Map static board attachment point to dynamic attachment points (components).
 */
static int
dyn_ap_ids(char *ap_id, cfga_list_data_t **ap_id_list, int *nlist)
{
	cfga_err_t	cfga_err;
	char		*errstr;
	char		*opts = "parsable";
	char		*listops = "class=sbd";

	cfga_err = config_list_ext(1, &ap_id, ap_id_list, nlist,
	    opts, listops, &errstr, CFGA_FLAG_LIST_ALL);
	if (cfga_err != CFGA_OK) {
		dprintf((stderr, "config_list_ext: %s\n",
		    config_strerror(cfga_err)));
		return (-1);
	}

	return (0);
}

/*
 * Initialize rcm handle, memory stats. Cache query result if necessary.
 */
static int
rcm_init(rcmd_t *rcm, apd_t apd_tbl[], int napds, int flags)
{
	longlong_t	ii;
	int		rv = 0;

	rcm->offline_query_info = NULL;
	rcm->rlist = NULL;
	rcm->cpus = NULL;

	if (rcm_alloc_handle(NULL, RCM_NOPID, NULL, &rcm->hdl) != RCM_SUCCESS) {
		dprintf((stderr, "rcm_alloc_handle (errno=%d)\n", errno));
		return (-1);
	}

	if ((rcm->ms_pagesize = sysconf(_SC_PAGE_SIZE)) == -1 ||
	    (rcm->ms_syspages = sysconf(_SC_PHYS_PAGES)) == -1) {
		dprintf((stderr, "sysconf: %s\n", strerror(errno)));
		return (-1);
	}
	ii = (longlong_t)rcm->ms_pagesize * rcm->ms_syspages;
	rcm->ms_sysmb = (int)((ii+MBYTE-1) / MBYTE);

	if (flags & RI_INCLUDE_QUERY)
		rv = rcm_query_init(rcm, apd_tbl, napds);

	return (rv);
}

static void
rcm_fini(rcmd_t *rcm)
{
	char	**cpp;

	assert(rcm != NULL);

	if (rcm->offline_query_info != NULL)
		rcm_free_info(rcm->offline_query_info);
	if (rcm->hdl != NULL)
		rcm_free_handle(rcm->hdl);

	if (rcm->rlist != NULL) {
		for (cpp = rcm->rlist; *cpp != NULL; cpp++)
			s_free(*cpp);
		free(rcm->rlist);
	}

	s_free(rcm->cpus);
	free(rcm);
}

#define	NODENAME_CMP		"cmp"
#define	NODENAME_SSM		"ssm"
#define	PROP_CPUID		"cpuid"
#define	PROP_DEVICE_TYPE	"device-type"
#define	PROP_ECACHE_SIZE	"ecache-size"
#define	PROP_L2_CACHE_SIZE	"l2-cache-size"
#define	PROP_L3_CACHE_SIZE	"l3-cache-size"

typedef struct {
	di_node_t		root;
	di_prom_handle_t	ph;
	ecache_info_t		*ecache_info;
} di_arg_t;

/*
 * The ecache sizes for individual cpus are read from the
 * OBP/Solaris device trees. This info cannot be derived
 * from the cfgadm_sbd cpu attachment point ecache info,
 * which may be a sum of multiple cores for CMP.
 */
static int
ecache_info_init(ecache_info_t *ec)
{
	di_arg_t	di_arg;
	di_prom_handle_t ph = DI_PROM_HANDLE_NIL;
	di_node_t	root = DI_NODE_NIL;
	int		cpuid_max, rv = 0;

	assert(ec != NULL && ec->cpuid_max == 0 && ec->ecache_sizes == NULL);

	if ((cpuid_max = sysconf(_SC_CPUID_MAX)) == -1) {
		dprintf((stderr, "sysconf fail: %s\n", strerror(errno)));
		rv = -1;
		goto done;
	}

	if ((root = di_init("/", DINFOCPYALL)) == DI_NODE_NIL) {
		dprintf((stderr, "di_init fail: %s\n", strerror(errno)));
		rv = -1;
		goto done;
	}

	if ((ph = di_prom_init()) == DI_PROM_HANDLE_NIL) {
		dprintf((stderr, "di_prom_init fail: %s\n", strerror(errno)));
		rv = -1;
		goto done;
	}

	if ((ec->ecache_sizes = calloc(cpuid_max + 1, sizeof (int))) == NULL) {
		dprintf((stderr, "calloc fail: %s\n", strerror(errno)));
		rv = -1;
		goto done;
	}
	ec->cpuid_max = cpuid_max;

	dprintf((stderr, "cpuid_max is set to %d\n", ec->cpuid_max));

	di_arg.ph = ph;
	di_arg.root = root;
	di_arg.ecache_info = ec;

	if (di_walk_node(root, DI_WALK_CLDFIRST, (void *)&di_arg,
	    find_cpu_nodes) != 0) {
		dprintf((stderr, "di_walk_node fail: %s\n", strerror(errno)));
		rv = -1;
	}

done:
	if (root != DI_NODE_NIL)
		di_fini(root);
	if (ph != DI_PROM_HANDLE_NIL)
		di_prom_fini(ph);

	return (rv);
}

/*
 * Libdevinfo node walk callback for reading ecache size
 * properties for cpu device nodes. Subtrees not containing
 * cpu nodes are filtered out.
 */
static int
find_cpu_nodes(di_node_t node, void *arg)
{
	char			*name;
	int			*cpuid, *ecache;
	di_arg_t		*di_arg = (di_arg_t *)arg;
	ecache_info_t		*ec = di_arg->ecache_info;
	di_prom_handle_t	ph = di_arg->ph;
	int			walk_child = 0;

	if (node == DI_NODE_NIL) {
		return (DI_WALK_TERMINATE);
	}

	if (node == di_arg->root) {
		return (DI_WALK_CONTINUE);
	}

	if (di_nodeid(node) == DI_PSEUDO_NODEID) {
		return (DI_WALK_PRUNECHILD);
	}

	name = di_node_name(node);
	if (name != NULL) {
		/*
		 * CMP nodes will be the parent of cpu nodes. On some platforms,
		 * cpu nodes will be under the ssm node. In either case,
		 * continue searching this subtree.
		 */
		if (strncmp(name, NODENAME_SSM, strlen(NODENAME_SSM)) == 0 ||
		    strncmp(name, NODENAME_CMP, strlen(NODENAME_CMP)) == 0) {
			return (DI_WALK_CONTINUE);
		}
	}

	dprintf((stderr, "find_cpu_nodes: node=%p, name=%s, binding_name=%s\n",
	    node, di_node_name(node), di_binding_name(node)));

	/*
	 * Ecache size property name differs with processor implementation.
	 * Panther has both L2 and L3, so check for L3 first to differentiate
	 * from Jaguar, which has only L2.
	 */
	if (prop_lookup_int(node, ph, PROP_ECACHE_SIZE, &ecache) == 0 ||
	    prop_lookup_int(node, ph, PROP_L3_CACHE_SIZE, &ecache) == 0 ||
	    prop_lookup_int(node, ph, PROP_L2_CACHE_SIZE, &ecache) == 0) {
		/*
		 * On some platforms the cache property is in the core
		 * node while the cpuid is in the child cpu node.  It may
		 * be needed while processing this node or a child node.
		 */
		ec->ecache_curr = *ecache;
		walk_child = 1;
	}

	if (prop_lookup_int(node, ph, PROP_CPUID, &cpuid) == 0) {

		assert(ec != NULL && ec->ecache_sizes != NULL &&
		    *cpuid <= ec->cpuid_max);

		if (ec->ecache_curr != 0) {
			ec->ecache_sizes[*cpuid] = ec->ecache_curr;

		}
	}

	return (walk_child ? DI_WALK_CONTINUE : DI_WALK_PRUNECHILD);
}

/*
 * Given a di_node_t, call the appropriate int property lookup routine.
 * Note: This lookup fails if the int property has multiple value entries.
 */
static int
prop_lookup_int(di_node_t node, di_prom_handle_t ph, char *propname, int **ival)
{
	int rv;

	rv = (di_nodeid(node) == DI_PROM_NODEID) ?
	    di_prom_prop_lookup_ints(ph, node, propname, ival) :
	    di_prop_lookup_ints(DDI_DEV_T_ANY, node, propname, ival);

	return (rv == 1 ? 0 : -1);
}

/*
 * For offline queries, RCM must be given a list of all resources
 * so modules can have access to the full scope of the operation.
 * The rcm_get_info calls are made individually in order to map the
 * returned rcm_info_t's to physical devices. The rcm_request_offline
 * result is cached so the query state can be looked up as we process
 * the rcm_get_info calls. This routine also tallies up the amount of
 * memory going away and creates a list of cpu ids to be used
 * later for rcm_request_capacity_change.
 */
static int
rcm_query_init(rcmd_t *rcm, apd_t apd_tbl[], int napds)
{
	apd_t			*apd;
	int 			i, j;
	cfga_list_data_t	*cfga_ldata;
	int			(*cm_rcm_qpass)(cfga_list_data_t *, rcmd_t *);
#ifdef DEBUG
	char			**cpp;
#endif /* DEBUG */

	/*
	 * Initial pass to size cpu and resource name arrays needed to
	 * interface with RCM. Attachment point ids for CMP can represent
	 * multiple cpus (and resource names). Instead of parsing the
	 * cfgadm info field here, use the worse case that all component
	 * attachment points are CMP.
	 */
	rcm->ndevs = 0;
	for (i = 0, apd = apd_tbl; i < napds; i++, apd++) {
		for (j = 1, cfga_ldata = &apd->cfga_list_data[1];
		    j < apd->nlist; j++, cfga_ldata++) {
			if (cfga_ldata->ap_o_state != CFGA_STAT_CONFIGURED) {
				continue;
			}
			rcm->ndevs += SBD_MAX_CORES_PER_CMP;
		}
	}

	/* account for trailing NULL in rlist */
	if (rcm->ndevs > 0 &&
	    ((rcm->cpus = calloc(rcm->ndevs, sizeof (cpuid_t))) == NULL ||
	    (rcm->rlist = calloc(rcm->ndevs + 1, sizeof (char *))) == NULL)) {
		dprintf((stderr, "calloc: %s\n", strerror(errno)));
		return (-1);
	}

	/*
	 * Second pass to fill in the RCM resource and cpu lists.
	 */
	for (i = 0, apd = apd_tbl; i < napds; i++, apd++) {
		for (j = 1, cfga_ldata = &apd->cfga_list_data[1];
		    j < apd->nlist; j++, cfga_ldata++) {
			if (cfga_ldata->ap_o_state != CFGA_STAT_CONFIGURED) {
				continue;
			}
			if ((cm_rcm_qpass =
			    cm_rcm_qpass_func(cfga_ldata->ap_type)) != NULL &&
			    (*cm_rcm_qpass)(cfga_ldata, rcm) != 0) {
				return (-1);
			}
		}
	}

	if (rcm->nrlist == 0)
		return (0);

	/*
	 * Cache query result. Since we are only interested in the
	 * set of RCM clients processed and not their request status,
	 * the return value is irrelevant.
	 */
	(void) rcm_request_offline_list(rcm->hdl, rcm->rlist,
	    RCM_QUERY|RCM_SCOPE, &rcm->offline_query_info);

#ifdef DEBUG
	dprintf((stderr, "RCM rlist: nrlist=%d\n", rcm->nrlist));
	for (cpp = rcm->rlist, i = 0; *cpp != NULL; cpp++, i++) {
		dprintf((stderr, "rlist[%d]=%s\n", i, *cpp));
	}
#endif /* DEBUG */

	return (0);
}

static int
cap_request(ri_hdl_t *ri_hdl, rcmd_t *rcm)
{
	return (((rcm->ncpus > 0 && cpu_cap_request(ri_hdl, rcm) != 0) ||
	    (rcm->query_pages > 0 && mem_cap_request(ri_hdl, rcm) != 0)) ?
	    -1 : 0);
}

/*
 * RCM capacity change request for cpus.
 */
static int
cpu_cap_request(ri_hdl_t *ri_hdl, rcmd_t *rcm)
{
	cpuid_t		*syscpuids, *newcpuids;
	int		sysncpus, newncpus;
	rcm_info_t	*rcm_info = NULL;
	int		i, j, k;
	nvlist_t	*nvl;
	int		rv = 0;

	/* get all cpus in the system */
	if (syscpus(&syscpuids, &sysncpus) == -1)
		return (-1);

	newncpus = sysncpus - rcm->ncpus;
	if ((newcpuids = calloc(newncpus, sizeof (cpuid_t))) == NULL) {
		dprintf((stderr, "calloc: %s", strerror(errno)));
		rv = -1;
		goto out;
	}

	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
		dprintf((stderr, "nvlist_alloc fail\n"));
		rv = -1;
		goto out;
	}

	/*
	 * Construct the new cpu list.
	 */
	for (i = 0, j = 0; i < sysncpus; i++) {
		for (k = 0; k < rcm->ncpus; k++) {
			if (rcm->cpus[k] == syscpuids[i]) {
				break;
			}
		}
		if (k == rcm->ncpus) {
			newcpuids[j++] = syscpuids[i];
		}
	}

	if (nvlist_add_int32(nvl, "old_total", sysncpus) != 0 ||
	    nvlist_add_int32(nvl, "new_total", newncpus) != 0 ||
	    nvlist_add_int32_array(nvl, "old_cpu_list", syscpuids,
	    sysncpus) != 0 ||
	    nvlist_add_int32_array(nvl, "new_cpu_list", newcpuids,
	    newncpus) != 0) {
		dprintf((stderr, "nvlist_add fail\n"));
		rv = -1;
		goto out;
	}

#ifdef DEBUG
	dprintf((stderr, "old_total=%d\n", sysncpus));
	for (i = 0; i < sysncpus; i++) {
		dprintf((stderr, "old_cpu_list[%d]=%d\n", i, syscpuids[i]));
	}
	dprintf((stderr, "new_total=%d\n", newncpus));
	for (i = 0; i < newncpus; i++) {
		dprintf((stderr, "new_cpu_list[%d]=%d\n", i, newcpuids[i]));
	}
#endif /* DEBUG */

	(void) rcm_request_capacity_change(rcm->hdl, RCM_CPU_ALL,
	    RCM_QUERY|RCM_SCOPE, nvl, &rcm_info);

	rv = add_rcm_clients(&ri_hdl->cpu_cap_clients, rcm, rcm_info, 0, NULL);

out:
	s_free(syscpuids);
	s_free(newcpuids);
	if (nvl != NULL)
		nvlist_free(nvl);
	if (rcm_info != NULL)
		rcm_free_info(rcm_info);

	return (rv);
}

static int
syscpus(cpuid_t **cpuids, int *ncpus)
{
	kstat_t		*ksp;
	kstat_ctl_t	*kc;
	cpuid_t		*cp;
	int		i;

	if ((*ncpus = sysconf(_SC_NPROCESSORS_CONF)) == -1) {
		dprintf((stderr, "sysconf: %s\n", errno));
		return (-1);
	}

	if ((kc = kstat_open()) == NULL) {
		dprintf((stderr, "kstat_open fail\n"));
		return (-1);
	}

	if ((cp = calloc(*ncpus, sizeof (cpuid_t))) == NULL) {
		dprintf((stderr, "calloc: %s\n", errno));
		(void) kstat_close(kc);
		return (-1);
	}

	for (i = 0, ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
		if (strcmp(ksp->ks_module, "cpu_info") == 0) {
			cp[i++] = ksp->ks_instance;
		}
	}

	(void) kstat_close(kc);
	*cpuids = cp;

	return (0);
}

/*
 * RCM capacity change request for memory.
 */
static int
mem_cap_request(ri_hdl_t *ri_hdl, rcmd_t *rcm)
{
	nvlist_t	*nvl;
	rcm_info_t	*rcm_info = NULL;
	long 		newpages;
	int		rv = 0;

	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
		dprintf((stderr, "nvlist_alloc fail\n"));
		return (-1);
	}

	newpages = rcm->ms_syspages - rcm->query_pages;
	if (nvlist_add_int32(nvl, "page_size", rcm->ms_pagesize) != 0 ||
	    nvlist_add_int32(nvl, "old_pages", rcm->ms_syspages) != 0 ||
	    nvlist_add_int32(nvl, "new_pages", newpages) != 0) {
		dprintf((stderr, "nvlist_add fail\n"));
		nvlist_free(nvl);
		return (-1);
	}

	dprintf((stderr, "memory capacity change req: "
	    "page_size=%d, old_pages=%d, new_pages=%d\n",
	    rcm->ms_pagesize, rcm->ms_syspages, newpages));

	(void) rcm_request_capacity_change(rcm->hdl, RCM_MEM_ALL,
	    RCM_QUERY|RCM_SCOPE, nvl, &rcm_info);

	rv = add_rcm_clients(&ri_hdl->mem_cap_clients, rcm, rcm_info, 0, NULL);

	nvlist_free(nvl);
	if (rcm_info != NULL)
		rcm_free_info(rcm_info);

	return (rv);
}

static int
(*cm_rcm_qpass_func(cfga_type_t ap_type))(cfga_list_data_t *, rcmd_t *)
{
	int i;

	for (i = 0; i < sizeof (cm_ctl) / sizeof (cm_ctl[0]); i++) {
		if (strcmp(cm_ctl[i].type, ap_type) == 0) {
			return (cm_ctl[i].cm_rcm_qpass);
		}
	}
	return (NULL);
}

/*
 * Save cpu ids and RCM abstract resource names.
 * Cpu ids will be used for the capacity change request.
 * Resource names will be used for the offline query.
 */
static int
cpu_rcm_qpass(cfga_list_data_t *cfga_ldata, rcmd_t *rcm)
{
	processorid_t	cpuid;
	char		*cpustr, *lasts, *rsrcname, rbuf[32];
	char		cbuf[CFGA_INFO_LEN];
	int		speed, ecache;

	assert(sscanf(cfga_ldata->ap_info, CPU_INFO_FMT, &cbuf, &speed,
	    &ecache) == 3);

	for (cpustr = (char *)strtok_r(cbuf, CPUID_SEP, &lasts);
	    cpustr != NULL;
	    cpustr = (char *)strtok_r(NULL, CPUID_SEP, &lasts)) {
		cpuid = atoi(cpustr);

		(void) snprintf(rbuf, sizeof (rbuf), "%s%d", RCM_CPU, cpuid);
		if ((rsrcname = strdup(rbuf)) == NULL) {
			dprintf((stderr, "strdup fail\n"));
			return (-1);
		}
		assert(rcm->nrlist < rcm->ndevs && rcm->ncpus < rcm->ndevs);
		rcm->rlist[rcm->nrlist++] = rsrcname;
		rcm->cpus[rcm->ncpus++] = (cpuid_t)cpuid;

		dprintf((stderr, "cpu_cm_info: cpuid=%d, rsrcname=%s",
		    cpuid, rsrcname));
	}

	return (0);
}

/*
 * No RCM resource names for individual memory units, so
 * just add to offline query page count.
 */
static int
mem_rcm_qpass(cfga_list_data_t *cfga, rcmd_t *rcm)
{
	char		*cp;
	uint_t		kbytes;
	longlong_t	ii;

	if ((cp = strstr(cfga->ap_info, "size")) == NULL ||
	    sscanf(cp, "size=%u", &kbytes) != 1) {
		dprintf((stderr, "unknown sbd info format: %s\n", cp));
		return (-1);
	}

	ii = (longlong_t)kbytes * KBYTE;
	rcm->query_pages += (uint_t)(ii / rcm->ms_pagesize);

	dprintf((stderr, "%s: npages=%u\n", cfga->ap_log_id,
	    (uint_t)(ii / rcm->ms_pagesize)));

	return (0);
}

/*
 * Add physical I/O bus name to RCM resource list.
 */
static int
io_rcm_qpass(cfga_list_data_t *cfga, rcmd_t *rcm)
{
	char		path[MAXPATHLEN];
	char		buf[MAXPATHLEN];
	char		*rsrcname;

	if (sscanf(cfga->ap_info, "device=%s", path) != 1) {
		dprintf((stderr, "unknown sbd info format: %s\n",
		    cfga->ap_info));
		return (-1);
	}

	(void) snprintf(buf, sizeof (buf), "/devices%s", path);
	if ((rsrcname = strdup(buf)) == NULL) {
		dprintf((stderr, "strdup fail\n"));
		return (-1);
	}

	assert(rcm->nrlist < rcm->ndevs);
	rcm->rlist[rcm->nrlist++] = rsrcname;

	return (0);
}

static int
(*cm_info_func(cfga_type_t ap_type))(ri_ap_t *, cfga_list_data_t *,
    int, rcmd_t *)
{
	int i;

	for (i = 0; i < sizeof (cm_ctl) / sizeof (cm_ctl[0]); i++) {
		if (strcmp(cm_ctl[i].type, ap_type) == 0) {
			return (cm_ctl[i].cm_info);
		}
	}
	return (NULL);
}

/*
 * Create cpu handle, adding properties exported by sbd plugin and
 * RCM client usage.
 */
/* ARGSUSED */
static int
cpu_cm_info(ri_ap_t *ap, cfga_list_data_t *cfga, int flags, rcmd_t *rcm)
{
	processorid_t	cpuid;
	int		speed, ecache, rv = 0;
	char		buf[CFGA_INFO_LEN], *cpustr, *lasts;

	if (sscanf(cfga->ap_info, CPU_INFO_FMT, &buf, &speed, &ecache) != 3) {
		dprintf((stderr, "unknown sbd info format: %s\n",
		    cfga->ap_info));
		return (-1);
	}

	/* parse cpuids */
	for (cpustr = (char *)strtok_r(buf, CPUID_SEP, &lasts);
	    cpustr != NULL;
	    cpustr = (char *)strtok_r(NULL, CPUID_SEP, &lasts)) {
		cpuid = atoi(cpustr);
		if ((rv = i_cpu_cm_info(cpuid, speed, ecache, ap, rcm)) != 0) {
			break;
		}
	}

	return (rv);
}

static int
i_cpu_cm_info(processorid_t cpuid, int speed, int ecache_cfga, ri_ap_t *ap,
    rcmd_t *rcm)
{
	int		ecache_mb = 0;
	int		ecache_kb = 0;
	char		*state, buf[32];
	processor_info_t cpu_info;
	ri_dev_t	*cpu = NULL;
	rcm_info_t	*rcm_info = NULL;

	/*
	 * Could have been unconfigured in the interim, so cannot
	 * count on processor_info recognizing it.
	 */
	state = (processor_info(cpuid, &cpu_info) == 0) ?
	    pstate2str(cpu_info.pi_state) : "unknown";

	if ((cpu = ri_dev_alloc()) == NULL) {
		dprintf((stderr, "ri_dev_alloc failed\n"));
		return (-1);
	}

	/*
	 * Assume the ecache_info table has the right e-cache size for
	 * this CPU.  Use the value found in cfgadm (ecache_cfga) if not.
	 */
	if (rcm->ecache_info.ecache_sizes != NULL) {
		assert(rcm->ecache_info.cpuid_max != 0 &&
		    cpuid <= rcm->ecache_info.cpuid_max);
		ecache_mb = rcm->ecache_info.ecache_sizes[cpuid] / MBYTE;
		ecache_kb = rcm->ecache_info.ecache_sizes[cpuid] / KBYTE;
	}

	if (ecache_mb == 0) {
		ecache_mb = ecache_cfga;
	}

	dprintf((stderr, "i_cpu_cm_info: cpu(%d) ecache=%d MB\n",
	    cpuid, ecache));

	if (nvlist_add_int32(cpu->conf_props, RI_CPU_ID, cpuid) != 0 ||
	    nvlist_add_int32(cpu->conf_props, RI_CPU_SPEED, speed) != 0 ||
	    nvlist_add_int32(cpu->conf_props, RI_CPU_ECACHE, ecache_mb) != 0 ||
	    nvlist_add_string(cpu->conf_props, RI_CPU_STATE, state) != 0) {
		dprintf((stderr, "nvlist_add fail\n"));
		ri_dev_free(cpu);
		return (-1);
	}

	/*
	 * Report cache size in kilobyte units if available.  This info is
	 * added to support processors with cache sizes that are non-integer
	 * megabyte multiples.
	 */
	if (ecache_kb != 0) {
		if (nvlist_add_int32(cpu->conf_props, RI_CPU_ECACHE_KBYTE,
		    ecache_kb) != 0)  {
			dprintf((stderr, "nvlist_add fail: %s\n",
			    RI_CPU_ECACHE_KBYTE));
			ri_dev_free(cpu);
			return (-1);
		}
	}

	(void) snprintf(buf, sizeof (buf), "%s%d", RCM_CPU, cpuid);
	dprintf((stderr, "rcm_get_info(%s)\n", buf));
	if (rcm_get_info(rcm->hdl, buf, RCM_INCLUDE_DEPENDENT,
	    &rcm_info) != RCM_SUCCESS) {
		dprintf((stderr, "rcm_get_info (errno=%d)\n", errno));
		ri_dev_free(cpu);
		if (rcm_info != NULL)
			rcm_free_info(rcm_info);
		return (-1);
	}

	dev_list_cpu_insert(&ap->cpus, cpu, cpuid);

	return (0);
}

/*
 * Create memory handle, adding properties exported by sbd plugin.
 * No RCM tuples to be saved unless RCM is modified to export names
 * for individual memory units.
 */
/* ARGSUSED */
static int
mem_cm_info(ri_ap_t *ap, cfga_list_data_t *cfga, int flags, rcmd_t *rcm)
{
	ri_dev_t	*mem;
	char		*cp;
	char		*cpval;
	int		len;
	uint64_t	base_addr;				/* required */
	int32_t		size_kb;				/* required */
	int32_t		perm_kb = 0;				/* optional */
	char		target[CFGA_AP_LOG_ID_LEN] = "";	/* optional */
	int32_t		del_kb = 0;				/* optional */
	int32_t		rem_kb = 0;				/* optional */
	char		source[CFGA_AP_LOG_ID_LEN] = "";	/* optional */

	if (sscanf(cfga->ap_info, "address=0x%llx size=%u", &base_addr,
	    &size_kb) != 2) {
		goto err_fmt;
	}

	if ((cp = strstr(cfga->ap_info, "permanent")) != NULL &&
	    sscanf(cp, "permanent=%u", &perm_kb) != 1) {
		goto err_fmt;
	}

	if ((cp = strstr(cfga->ap_info, "target")) != NULL) {
		if ((cpval = strstr(cp, "=")) == NULL) {
			goto err_fmt;
		}
		for (len = 0; cpval[len] != '\0' && cpval[len] != ' '; len++) {
			if (len >= CFGA_AP_LOG_ID_LEN) {
				goto err_fmt;
			}
		}
		if (sscanf(cp, "target=%s deleted=%u remaining=%u", &target,
		    &del_kb, &rem_kb) != 3) {
			goto err_fmt;
		}
	}

	if ((cp = strstr(cfga->ap_info, "source")) != NULL) {
		if ((cpval = strstr(cp, "=")) == NULL) {
			goto err_fmt;
		}
		for (len = 0; cpval[len] != '\0' && cpval[len] != ' '; len++) {
			if (len >= CFGA_AP_LOG_ID_LEN) {
				goto err_fmt;
			}
		}
		if (sscanf(cp, "source=%s", &source) != 1) {
			goto err_fmt;
		}
	}

	dprintf((stderr, "%s: base=0x%llx, size=%u, permanent=%u\n",
	    cfga->ap_log_id, base_addr, size_kb, perm_kb));

	if ((mem = ri_dev_alloc()) == NULL)
		return (-1);

	/*
	 * Convert memory sizes to MB (truncate).
	 */
	if (nvlist_add_uint64(mem->conf_props, RI_MEM_ADDR, base_addr) != 0 ||
	    nvlist_add_int32(mem->conf_props, RI_MEM_BRD, size_kb/KBYTE) != 0 ||
	    nvlist_add_int32(mem->conf_props, RI_MEM_PERM,
	    perm_kb/KBYTE) != 0) {
		dprintf((stderr, "nvlist_add failure\n"));
		ri_dev_free(mem);
		return (-1);
	}

	if (target[0] != '\0' &&
	    (nvlist_add_string(mem->conf_props, RI_MEM_TARG, target) != 0 ||
	    nvlist_add_int32(mem->conf_props, RI_MEM_DEL, del_kb/KBYTE) != 0 ||
	    nvlist_add_int32(mem->conf_props, RI_MEM_REMAIN,
	    rem_kb/KBYTE) != 0)) {
		dprintf((stderr, "nvlist_add failure\n"));
		ri_dev_free(mem);
		return (-1);
	}

	if (source[0] != '\0' &&
	    nvlist_add_string(mem->conf_props, RI_MEM_SRC, source) != 0) {
		dprintf((stderr, "nvlist_add failure\n"));
		ri_dev_free(mem);
		return (-1);
	}

	/*
	 * XXX - move this property to attachment point hdl?
	 */
	if (nvlist_add_int32(mem->conf_props, RI_MEM_DOMAIN,
	    rcm->ms_sysmb) != 0) {
		dprintf((stderr, "nvlist_add failure\n"));
		ri_dev_free(mem);
		return (-1);
	}

	dev_list_append(&ap->mems, mem);
	return (0);

err_fmt:
	dprintf((stderr, "unknown sbd info format: %s\n", cfga->ap_info));
	return (-1);
}

/*
 * Initiate a libdevinfo walk on the IO bus path.
 * XXX - investigate performance using two threads here: one thread to do the
 * libdevinfo snapshot and treewalk; and one thread to get RCM usage info
 */
static int
io_cm_info(ri_ap_t *ap, cfga_list_data_t *cfga, int flags, rcmd_t *rcm)
{
	int			i;
	int			j;
	int			k;
	int			set_size;
	int			retval = 0;
	int			n_usage;
	devinfo_arg_t		di_arg;
	lookup_table_t		devicetable;
	lookup_entry_t		*deventry;
	lookup_entry_t		*lastdeventry;
	ri_dev_t		*io = NULL;
	ri_client_t		*client;
	ri_client_t		*tmp;
	di_devlink_handle_t	linkhd = NULL;
	di_node_t		root = DI_NODE_NIL;
	di_node_t		node = DI_NODE_NIL;
	rcm_info_tuple_t	*rcm_tuple;
	rcm_info_t		*rcm_info = NULL;
	const char		*rcm_rsrc = NULL;
	char			drv_inst[MAXPATHLEN];
	char			path[MAXPATHLEN];
	char			pathbuf[MAXPATHLEN];

	dprintf((stderr, "io_cm_info(%s)\n", cfga->ap_log_id));

	/* Extract devfs path from cfgadm information */
	if (sscanf(cfga->ap_info, "device=%s\n", path) != 1) {
		dprintf((stderr, "unknown sbd info format: %s\n",
		    cfga->ap_info));
		return (-1);
	}

	/* Initialize empty device lookup table */
	devicetable.n_entries = 0;
	devicetable.n_slots = 0;
	devicetable.table = NULL;

	/* Get libdevinfo snapshot */
	dprintf((stderr, "di_init(%s)\n", path));
	if ((root = di_init(path, DINFOCPYALL)) == DI_NODE_NIL) {
		dprintf((stderr, "di_init: %s\n", strerror(errno)));
		retval = RI_NODE_NIL; /* tell ri_init to skip this node */
		goto end;
	}

	/*
	 * Map in devlinks database.
	 * XXX - This could be moved to ri_init() for better performance.
	 */
	dprintf((stderr, "di_devlink_init()\n"));
	if ((linkhd = di_devlink_init(NULL, 0)) == NULL) {
		dprintf((stderr, "di_devlink_init: %s\n", strerror(errno)));
		retval = -1;
		goto end;
	}

	/* Initialize argument for devinfo treewalk */
	di_arg.err = 0;
	di_arg.node = DI_NODE_NIL;
	di_arg.pathbuf = pathbuf;
	di_arg.table = &devicetable;
	di_arg.linkhd = linkhd;

	/* Use libdevinfo treewalk to build device lookup table */
	if (di_walk_node(root, DI_WALK_CLDFIRST, (void *)&di_arg,
	    devinfo_node_walk) != 0) {
		dprintf((stderr, "di_walk_node: %s\n", strerror(errno)));
		retval = -1;
		goto end;
	}
	if (di_arg.err != 0) {
		dprintf((stderr, "di_walk_node: device tree walk failed\n"));
		retval = -1;
		goto end;
	}

	/* Call RCM to gather usage information */
	(void) snprintf(pathbuf, MAXPATHLEN, "/devices%s", path);
	dprintf((stderr, "rcm_get_info(%s)\n", pathbuf));
	if (rcm_get_info(rcm->hdl, pathbuf,
	    RCM_INCLUDE_SUBTREE|RCM_INCLUDE_DEPENDENT, &rcm_info) !=
	    RCM_SUCCESS) {
		dprintf((stderr, "rcm_get_info (errno=%d)\n", errno));
		retval = -1;
		goto end;
	}

	/* Sort the device table by name (proper order for lookups) */
	qsort(devicetable.table, devicetable.n_entries, sizeof (lookup_entry_t),
	    table_compare_names);

	/* Perform mappings of RCM usage segments to device table entries */
	lastdeventry = NULL;
	rcm_tuple = NULL;
	while ((rcm_tuple = rcm_info_next(rcm_info, rcm_tuple)) != NULL) {
		if ((rcm_rsrc = rcm_info_rsrc(rcm_tuple)) == NULL)
			continue;
		if (deventry = lookup(&devicetable, rcm_rsrc)) {
			if (add_usage(deventry, rcm_rsrc, rcm_tuple)) {
				retval = -1;
				goto end;
			}
			lastdeventry = deventry;
		} else {
			if (add_usage(lastdeventry, rcm_rsrc, rcm_tuple)) {
				retval = -1;
				goto end;
			}
		}
	}

	/* Re-sort the device table by index number (original treewalk order) */
	qsort(devicetable.table, devicetable.n_entries, sizeof (lookup_entry_t),
	    table_compare_indices);

	/*
	 * Use the mapped usage and the device table to construct ri_dev_t's.
	 * Construct one for each set of entries in the device table with
	 * matching di_node_t's, if: 1) it has mapped RCM usage, or 2) it is
	 * a leaf node and the caller has requested that unmanaged nodes be
	 * included in the output.
	 */
	i = 0;
	while (i < devicetable.n_entries) {

		node = devicetable.table[i].node;

		/* Count how many usage records are mapped to this node's set */
		n_usage = 0;
		set_size = 0;
		while (((i + set_size) < devicetable.n_entries) &&
		    (devicetable.table[i + set_size].node == node)) {
			n_usage += devicetable.table[i + set_size].n_usage;
			set_size += 1;
		}

		/*
		 * If there's no usage, then the node is unmanaged.  Skip this
		 * set of devicetable entries unless the node is a leaf node
		 * and the caller has requested information on unmanaged leaves.
		 */
		if ((n_usage == 0) &&
		    !((flags & RI_INCLUDE_UNMANAGED) && (ident_leaf(node)))) {
			i += set_size;
			continue;
		}

		/*
		 * The checks above determined that this node is going in.
		 * So determine its driver/instance name and allocate an
		 * ri_dev_t for this node.
		 */
		if (mk_drv_inst(node, drv_inst, devicetable.table[i].name)) {
			dprintf((stderr, "mk_drv_inst failed\n"));
			retval = -1;
			break;
		}
		if ((io = io_dev_alloc(drv_inst)) == NULL) {
			dprintf((stderr, "io_dev_alloc failed\n"));
			retval = -1;
			break;
		}

		/* Now add all the RCM usage records (if any) to the ri_dev_t */
		for (j = i; j < (i + set_size); j++) {
			for (k = 0; k < devicetable.table[j].n_usage; k++) {
				/* Create new ri_client_t for basic usage */
				client = ri_client_alloc(
				    (char *)devicetable.table[j].usage[k].rsrc,
				    (char *)devicetable.table[j].usage[k].info);
				if (client == NULL) {
					dprintf((stderr,
					    "ri_client_alloc failed\n"));
					ri_dev_free(io);
					retval = -1;
					goto end;
				}

				/* Add extra query usage to the ri_client_t */
				if ((flags & RI_INCLUDE_QUERY) &&
				    (add_query_state(rcm, client,
				    devicetable.table[j].usage[k].rsrc,
				    devicetable.table[j].usage[k].info) != 0)) {
					dprintf((stderr,
					    "add_query_state failed\n"));
					ri_dev_free(io);
					ri_client_free(client);
					retval = -1;
					goto end;
				}

				/* Link new ri_client_t to ri_dev_t */
				if (io->rcm_clients) {
					tmp = io->rcm_clients;
					while (tmp->next)
						tmp = tmp->next;
					tmp->next = client;
				} else {
					io->rcm_clients = client;
				}
			}
		}

		/* Link the ri_dev_t into the return value */
		dev_list_append(&ap->ios, io);

		/* Advance to the next node set */
		i += set_size;
	}

end:
	if (rcm_info != NULL)
		rcm_free_info(rcm_info);
	if (linkhd != NULL)
		di_devlink_fini(&linkhd);
	if (root != DI_NODE_NIL)
		di_fini(root);
	empty_table(&devicetable);

	dprintf((stderr, "io_cm_info: returning %d\n", retval));
	return (retval);
}

static int
ident_leaf(di_node_t node)
{
	di_minor_t	minor = DI_MINOR_NIL;

	return ((minor = di_minor_next(node, minor)) != DI_MINOR_NIL &&
	    di_child_node(node) == DI_NODE_NIL);
}

/* ARGSUSED */
static int
mk_drv_inst(di_node_t node, char drv_inst[], char *devfs_path)
{
	char	*drv;
	int	inst;

	if ((drv = di_driver_name(node)) == NULL) {
		dprintf((stderr, "no driver bound to %s\n",
		    devfs_path));
		return (-1);
	}

	if ((inst = di_instance(node)) == -1) {
		dprintf((stderr, "no instance assigned to %s\n",
		    devfs_path));
		return (-1);
	}
	(void) snprintf(drv_inst, MAXPATHLEN, "%s%d", drv, inst);

	return (0);
}

/*
 * Libdevinfo walker.
 *
 * During the tree walk of the attached IO devices, for each node
 * and all of its associated minors, the following actions are performed:
 *  -  The /devices path of the physical device node or minor
 *     is stored in a lookup table along with a reference to the
 *     libdevinfo node it represents via add_lookup_entry().
 *  -  The device links associated with each device are also
 *     stored in the same lookup table along with a reference to
 *     the libdevinfo node it represents via the minor walk callback.
 *
 */
static int
devinfo_node_walk(di_node_t node, void *arg)
{
	char			*devfs_path;
#ifdef DEBUG
	char			*drv;
#endif /* DEBUG */
	devinfo_arg_t		*di_arg = (devinfo_arg_t *)arg;

	if (node == DI_NODE_NIL) {
		return (DI_WALK_TERMINATE);
	}

	if (((di_state(node) & DI_DRIVER_DETACHED) == 0) &&
	    ((devfs_path = di_devfs_path(node)) != NULL)) {

		/* Use the provided path buffer to create full /devices path */
		(void) snprintf(di_arg->pathbuf, MAXPATHLEN, "/devices%s",
		    devfs_path);

#ifdef DEBUG
		dprintf((stderr, "devinfo_node_walk(%s)\n", di_arg->pathbuf));
		if ((drv = di_driver_name(node)) != NULL)
			dprintf((stderr, " driver name %s instance %d\n", drv,
			    di_instance(node)));
#endif

		/* Free the devfs_path */
		di_devfs_path_free(devfs_path);

		/* Add an entry to the lookup table for this physical device */
		if (add_lookup_entry(di_arg->table, di_arg->pathbuf, node)) {
			dprintf((stderr, "add_lookup_entry: %s\n",
			    strerror(errno)));
			di_arg->err = 1;
			return (DI_WALK_TERMINATE);
		}

		/* Check if this node has minors */
		if ((di_minor_next(node, DI_MINOR_NIL)) != DI_MINOR_NIL) {
			/* Walk this node's minors */
			di_arg->node = node;
			if (di_walk_minor(node, NULL, DI_CHECK_ALIAS, arg,
			    devinfo_minor_walk) != 0) {
				dprintf((stderr, "di_walk_minor: %s\n",
				    strerror(errno)));
				di_arg->err = 1;
				return (DI_WALK_TERMINATE);
			}
		}
	}

	return (DI_WALK_CONTINUE);
}

/*
 * Use di_devlink_walk to find the /dev link from /devices path for this minor
 */
static int
devinfo_minor_walk(di_node_t node, di_minor_t minor, void *arg)
{
	char		*name;
	char		*devfs_path;
	devinfo_arg_t	*di_arg = (devinfo_arg_t *)arg;
	char		pathbuf[MAXPATHLEN];

#ifdef DEBUG
	dprintf((stderr, "devinfo_minor_walk(%d) %s\n", minor,
	    di_arg->pathbuf));

	if ((name = di_minor_name(minor)) != NULL) {
		dprintf((stderr, "  minor name %s\n", name));
	}
#endif /* DEBUG */

	/* Terminate the walk when the device node changes */
	if (node != di_arg->node) {
		return (DI_WALK_TERMINATE);
	}

	/* Construct full /devices path for this minor */
	if ((name = di_minor_name(minor)) == NULL) {
		return (DI_WALK_CONTINUE);
	}
	(void) snprintf(pathbuf, MAXPATHLEN, "%s:%s", di_arg->pathbuf, name);

	/* Add lookup entry for this minor node */
	if (add_lookup_entry(di_arg->table, pathbuf, node)) {
		dprintf((stderr, "add_lookup_entry: %s\n", strerror(errno)));
		di_arg->err = 1;
		return (DI_WALK_TERMINATE);
	}

	/*
	 * Walk the associated device links.
	 * Note that di_devlink_walk() doesn't want "/devices" in its paths.
	 * Also note that di_devlink_walk() will fail if there are no device
	 * links, which is fine; so ignore if it fails.  Only check for
	 * internal failures during such a walk.
	 */
	devfs_path = &pathbuf[strlen("/devices")];
	(void) di_devlink_walk(di_arg->linkhd, NULL, devfs_path, 0, arg,
	    devinfo_devlink_walk);
	if (di_arg->err != 0) {
		return (DI_WALK_TERMINATE);
	}

	return (DI_WALK_CONTINUE);
}

static int
devinfo_devlink_walk(di_devlink_t devlink, void *arg)
{
	const char	*linkpath;
	devinfo_arg_t	*di_arg = (devinfo_arg_t *)arg;

	/* Get the devlink's path */
	if ((linkpath = di_devlink_path(devlink)) == NULL) {
		dprintf((stderr, "di_devlink_path: %s\n", strerror(errno)));
		di_arg->err = 1;
		return (DI_WALK_TERMINATE);
	}
	dprintf((stderr, "devinfo_devlink_walk: %s\n", linkpath));

	/* Add lookup entry for this devlink */
	if (add_lookup_entry(di_arg->table, linkpath, di_arg->node)) {
		dprintf((stderr, "add_lookup_entry: %s\n", strerror(errno)));
		di_arg->err = 1;
		return (DI_WALK_TERMINATE);
	}

	return (DI_WALK_CONTINUE);
}

/*
 * Map rcm_info_t's to ri_client_t's, filtering out "uninteresting" (hack)
 * RCM clients. The number of "interesting" ri_client_t's is returned
 * in cnt if passed non-NULL.
 */
static int
add_rcm_clients(ri_client_t **client_list, rcmd_t *rcm, rcm_info_t *info,
    int flags, int *cnt)
{
	rcm_info_tuple_t	*tuple;
	char			*rsrc, *usage;
	ri_client_t		*client, *tmp;

	assert(client_list != NULL && rcm != NULL);

	if (info == NULL)
		return (0);

	if (cnt != NULL)
		*cnt = 0;

	tuple = NULL;
	while ((tuple = rcm_info_next(info, tuple)) != NULL) {
		if ((rsrc = (char *)rcm_info_rsrc(tuple)) == NULL ||
		    (usage = (char *)rcm_info_info(tuple)) == NULL) {
			continue;
		}

		if (rcm_ignore(rsrc, usage) == 0)
			continue;

		if ((client = ri_client_alloc(rsrc, usage)) == NULL)
			return (-1);

		if ((flags & RI_INCLUDE_QUERY) && add_query_state(rcm, client,
		    rsrc, usage) != 0) {
			ri_client_free(client);
			return (-1);
		}

		if (cnt != NULL)
			++*cnt;

		/*
		 * Link in
		 */
		if ((tmp = *client_list) == NULL) {
			*client_list = client;
			continue;
		}
		while (tmp->next != NULL) {
			tmp = tmp->next;
		}
		tmp->next = client;
	}

	return (0);
}

/*
 * Currently only filtering out based on known info string prefixes.
 */
/* ARGSUSED */
static int
rcm_ignore(char *rsrc, char *infostr)
{
	char	**cpp;

	for (cpp = rcm_info_filter; *cpp != NULL; cpp++) {
		if (strncmp(infostr, *cpp, strlen(*cpp)) == 0) {
			return (0);
		}
	}
	return (-1);
}

/*
 * If this tuple was cached in the offline query pass, add the
 * query state and error string to the ri_client_t.
 */
static int
add_query_state(rcmd_t *rcm, ri_client_t *client, const char *rsrc,
    const char *info)
{
	int			qstate = RI_QUERY_UNKNOWN;
	char			*errstr = NULL;
	rcm_info_tuple_t	*cached_tuple;

	if ((cached_tuple = tuple_lookup(rcm, rsrc, info)) != NULL) {
		qstate = state2query(rcm_info_state(cached_tuple));
		errstr = (char *)rcm_info_error(cached_tuple);
	}

	if (nvlist_add_int32(client->usg_props, RI_QUERY_STATE, qstate) != 0 ||
	    (errstr != NULL && nvlist_add_string(client->usg_props,
	    RI_QUERY_ERR, errstr) != 0)) {
		dprintf((stderr, "nvlist_add fail\n"));
		return (-1);
	}

	return (0);
}

static int
state2query(int rcm_state)
{
	int	query;

	switch (rcm_state) {
	case RCM_STATE_OFFLINE_QUERY:
	case RCM_STATE_SUSPEND_QUERY:
		query = RI_QUERY_OK;
		break;
	case RCM_STATE_OFFLINE_QUERY_FAIL:
	case RCM_STATE_SUSPEND_QUERY_FAIL:
		query = RI_QUERY_FAIL;
		break;
	default:
		query = RI_QUERY_UNKNOWN;
		break;
	}

	return (query);
}

static void
dev_list_append(ri_dev_t **head, ri_dev_t *dev)
{
	ri_dev_t	*tmp;

	if ((tmp = *head) == NULL) {
		*head = dev;
		return;
	}
	while (tmp->next != NULL) {
		tmp = tmp->next;
	}
	tmp->next = dev;
}

/*
 * The cpu list is ordered on cpuid since CMP cpuids will not necessarily
 * be discovered in sequence.
 */
static void
dev_list_cpu_insert(ri_dev_t **listp, ri_dev_t *dev, processorid_t newid)
{
	ri_dev_t	*tmp;
	int32_t		cpuid;

	while ((tmp = *listp) != NULL &&
	    nvlist_lookup_int32(tmp->conf_props, RI_CPU_ID, &cpuid) == 0 &&
	    cpuid < newid) {
		listp = &tmp->next;
	}

	dev->next = tmp;
	*listp = dev;
}

/*
 * Linear lookup. Should convert to hash tab.
 */
static rcm_info_tuple_t *
tuple_lookup(rcmd_t *rcm, const char *krsrc, const char *kinfo)
{
	rcm_info_tuple_t	*tuple = NULL;
	const char		*rsrc, *info;

	if ((rcm == NULL) || (krsrc == NULL) || (kinfo == NULL)) {
		return (NULL);
	}

	while ((tuple = rcm_info_next(rcm->offline_query_info,
	    tuple)) != NULL) {
		if ((rsrc = rcm_info_rsrc(tuple)) == NULL ||
		    (info = rcm_info_info(tuple)) == NULL) {
			continue;
		}

		if (strcmp(rsrc, krsrc) == 0 && strcmp(info, kinfo) == 0) {
			return (tuple);
		}
	}
	return (NULL);
}

/*
 * Create and link attachment point handle.
 */
static ri_ap_t *
ri_ap_alloc(char *ap_id, ri_hdl_t *hdl)
{
	ri_ap_t		*ap, *tmp;

	if ((ap = calloc(1, sizeof (*ap))) == NULL) {
		dprintf((stderr, "calloc: %s\n", strerror(errno)));
		return (NULL);
	}

	if (nvlist_alloc(&ap->conf_props, NV_UNIQUE_NAME, 0) != 0 ||
	    nvlist_add_string(ap->conf_props, RI_AP_REQ_ID, ap_id) != 0) {
		if (ap->conf_props != NULL)
			nvlist_free(ap->conf_props);
		free(ap);
		return (NULL);
	}

	if ((tmp = hdl->aps) == NULL) {
		hdl->aps = ap;
	} else {
		while (tmp->next != NULL) {
			tmp = tmp->next;
		}
		tmp->next = ap;
	}

	return (ap);
}

static ri_dev_t *
ri_dev_alloc(void)
{
	ri_dev_t	*dev;

	if ((dev = calloc(1, sizeof (*dev))) == NULL ||
	    nvlist_alloc(&dev->conf_props, NV_UNIQUE_NAME, 0) != 0) {
		s_free(dev);
	}
	return (dev);
}

static ri_dev_t *
io_dev_alloc(char *drv_inst)
{
	ri_dev_t	*io;

	assert(drv_inst != NULL);

	if ((io = ri_dev_alloc()) == NULL)
		return (NULL);

	if (nvlist_add_string(io->conf_props, RI_IO_DRV_INST,
	    drv_inst) != 0) {
		dprintf((stderr, "nvlist_add_string fail\n"));
		ri_dev_free(io);
		return (NULL);
	}

	return (io);
}

static ri_client_t *
ri_client_alloc(char *rsrc, char *usage)
{
	ri_client_t	*client;

	assert(rsrc != NULL && usage != NULL);

	if ((client = calloc(1, sizeof (*client))) == NULL) {
		dprintf((stderr, "calloc: %s\n", strerror(errno)));
		return (NULL);
	}

	if (nvlist_alloc(&client->usg_props, NV_UNIQUE_NAME, 0) != 0) {
		dprintf((stderr, "nvlist_alloc fail\n"));
		free(client);
		return (NULL);
	}

	if (nvlist_add_string(client->usg_props, RI_CLIENT_RSRC, rsrc) != 0 ||
	    nvlist_add_string(client->usg_props, RI_CLIENT_USAGE, usage) != 0) {
		dprintf((stderr, "nvlist_add_string fail\n"));
		ri_client_free(client);
		return (NULL);
	}

	return (client);
}

static void
apd_tbl_free(apd_t apd_tbl[], int napds)
{
	int	i;
	apd_t	*apd;

	for (i = 0, apd = apd_tbl; i < napds; i++, apd++)
		s_free(apd->cfga_list_data);

	free(apd_tbl);
}

static char *
pstate2str(int pi_state)
{
	char	*state;

	switch (pi_state) {
	case P_OFFLINE:
		state = PS_OFFLINE;
		break;
	case P_ONLINE:
		state = PS_ONLINE;
		break;
	case P_FAULTED:
		state = PS_FAULTED;
		break;
	case P_POWEROFF:
		state = PS_POWEROFF;
		break;
	case P_NOINTR:
		state = PS_NOINTR;
		break;
	case P_SPARE:
		state = PS_SPARE;
		break;
	default:
		state = "unknown";
		break;
	}

	return (state);
}

#ifdef DEBUG
static void
dump_apd_tbl(FILE *fp, apd_t *apds, int n_apds)
{
	int			i, j;
	cfga_list_data_t	*cfga_ldata;

	for (i = 0; i < n_apds; i++, apds++) {
		dprintf((stderr, "apd_tbl[%d].nlist=%d\n", i, apds->nlist));
		for (j = 0, cfga_ldata = apds->cfga_list_data; j < apds->nlist;
		    j++, cfga_ldata++) {
			dprintf((fp,
			    "apd_tbl[%d].cfga_list_data[%d].ap_log_id=%s\n",
			    i, j, cfga_ldata->ap_log_id));
		}
	}
}
#endif /* DEBUG */

/*
 * The lookup table is a simple array that is grown in chunks
 * to optimize memory allocation.
 * Indices are assigned to each array entry in-order so that
 * the original device tree ordering can be discerned at a later time.
 *
 * add_lookup_entry is called from the libdevinfo tree traversal callbacks:
 * 1) devinfo_node_walk - physical device path for each node in
 *    the devinfo tree via di_walk_node(), lookup entry name is
 *    /devices/[di_devfs_path]
 * 2) devinfo_minor_walk - physical device path plus minor name for
 *    each minor associated with a node via di_walk_minor(), lookup entry
 *    name is /devices/[di_devfs_path:di_minor_name]
 * 3) devinfo_devlink_walk - for each minor's /dev link from its /devices
 *    path via di_devlink_walk(), lookup entry name is di_devlink_path()
 */
static int
add_lookup_entry(lookup_table_t *table, const char *name, di_node_t node)
{
	size_t		size;
	lookup_entry_t	*new_table;


	/* Grow the lookup table by USAGE_ALLOC_SIZE slots if necessary */
	if (table->n_entries == table->n_slots) {
		size = (table->n_slots + USAGE_ALLOC_SIZE) *
		    sizeof (lookup_entry_t);
		new_table = (lookup_entry_t *)realloc(table->table, size);
		if (new_table == NULL) {
			dprintf((stderr, "add_lookup_entry: alloc failed: %s\n",
			    strerror(errno)));
			errno = ENOMEM;
			return (-1);
		}
		table->table = new_table;
		table->n_slots += USAGE_ALLOC_SIZE;
	}

	dprintf((stderr, "add_lookup_entry[%d]:%s\n", table->n_entries, name));

	/* Add this name to the next slot */
	if ((table->table[table->n_entries].name = strdup(name)) == NULL) {
		dprintf((stderr, "add_lookup_entry: strdup failed: %s\n",
		    strerror(errno)));
		errno = ENOMEM;
		return (-1);
	}
	table->table[table->n_entries].index = table->n_entries;
	table->table[table->n_entries].node = node;
	table->table[table->n_entries].n_usage = 0;
	table->table[table->n_entries].usage = NULL;
	table->n_entries += 1;

	return (0);
}

/*
 * lookup table entry names are full pathname strings, all start with /
 */
static int
table_compare_names(const void *a, const void *b)
{
	lookup_entry_t *entry1 = (lookup_entry_t *)a;
	lookup_entry_t *entry2 = (lookup_entry_t *)b;

	return (strcmp(entry1->name, entry2->name));
}


/*
 * Compare two indices and return -1 for less, 1 for greater, 0 for equal
 */
static int
table_compare_indices(const void *a, const void *b)
{
	lookup_entry_t *entry1 = (lookup_entry_t *)a;
	lookup_entry_t *entry2 = (lookup_entry_t *)b;

	if (entry1->index < entry2->index)
		return (-1);
	if (entry1->index > entry2->index)
		return (1);
	return (0);
}

/*
 * Given a RCM resource name, find the matching entry in the IO device table
 */
static lookup_entry_t *
lookup(lookup_table_t *table, const char *rcm_rsrc)
{
	lookup_entry_t	*entry;
	lookup_entry_t	lookup_arg;

	dprintf((stderr, "lookup:%s\n", rcm_rsrc));
	lookup_arg.name = (char *)rcm_rsrc;
	entry = bsearch(&lookup_arg, table->table, table->n_entries,
	    sizeof (lookup_entry_t), table_compare_names);

#ifdef DEBUG
	if (entry != NULL) {
		dprintf((stderr, " found entry:%d\n", entry->index));
	}
#endif /* DEBUG */
	return (entry);
}

/*
 * Add RCM usage to the given device table entry.
 * Returns -1 on realloc failure.
 */
static int
add_usage(lookup_entry_t *entry, const char *rcm_rsrc, rcm_info_tuple_t *tuple)
{
	size_t		size;
	const char	*info;
	usage_t		*new_usage;

	if ((entry == NULL) ||
	    ((info = rcm_info_info(tuple)) == NULL))
		return (0);

	if (rcm_ignore((char *)rcm_rsrc, (char *)info) == 0)
		return (0);

	size = (entry->n_usage + 1) * sizeof (usage_t);
	new_usage = (usage_t *)realloc(entry->usage, size);
	if (new_usage == NULL) {
		dprintf((stderr, "add_usage: alloc failed: %s\n",
		    strerror(errno)));
		return (-1);
	}
	dprintf((stderr, "add_usage: entry %d rsrc: %s info: %s\n",
	    entry->index, rcm_rsrc, info));

	entry->usage = new_usage;
	entry->usage[entry->n_usage].rsrc = rcm_rsrc;
	entry->usage[entry->n_usage].info = info;
	entry->n_usage += 1;
	return (0);
}

static void
empty_table(lookup_table_t *table)
{
	int i;

	if (table) {
		for (i = 0; i < table->n_entries; i++) {
			if (table->table[i].name)
				free(table->table[i].name);
			/*
			 * Note: the strings pointed to from within
			 * usage were freed already by rcm_free_info
			 */
			if (table->table[i].usage)
				free(table->table[i].usage);
		}
		if (table->table)
			free(table->table);
		table->table = NULL;
		table->n_entries = 0;
		table->n_slots = 0;
	}
}