OpenSolaris_b135/lib/cfgadm_plugins/scsi/common/cfga_ctl.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include "cfga_scsi.h"

struct larg {
	int ndevs;
	int nelem;
	char *dev;
	char **dev_list;
};

#define	ETC_VFSTAB	"/etc/vfstab"
#define	SCFGA_LOCK	"/var/run/cfgadm_scsi"

/* Function prototypes */

static scfga_ret_t quiesce_confirm(apid_t *apidp,
    msgid_t cmd_msg, prompt_t *pt, int *okp, int *quiesce, int *l_errnop);
static scfga_ret_t dev_hotplug(apid_t *apidp,
    prompt_t *pt, cfga_flags_t flags, int quiesce, char **errstring);
static int disconnect(struct cfga_confirm *confp);
static int critical_ctrlr(const char *hba_phys);
static cfga_stat_t bus_devctl_to_recep_state(uint_t bus_dc_state);
static int get_hba_children(char *bus_path, char *dev_excl, char ***dev_list);
static char *get_node_path(char *minor_path);
static void free_dev_list_elements(char **dev_list);
static void free_dev_list(char **dev_list);
static int alloc_dev_list(struct larg *largp);

/*
 * Single thread all implicit quiesce operations
 */
static mutex_t	quiesce_mutex = DEFAULTMUTEX;

/*ARGSUSED*/
scfga_ret_t
bus_change_state(
	cfga_cmd_t state_change_cmd,
	apid_t *apidp,
	struct cfga_confirm *confp,
	cfga_flags_t flags,
	char **errstring)
{
	int l_errno = 0, force;
	uint_t state = 0;
	cfga_stat_t bus_state;
	scfga_cmd_t cmd;
	msgid_t errid;
	cfga_stat_t prereq;
	scfga_ret_t ret;
	char **dev_list = NULL;

	assert(apidp->path != NULL);
	assert(apidp->hba_phys != NULL);

	/*
	 * No dynamic components allowed
	 */
	if (apidp->dyncomp != NULL) {
		cfga_err(errstring, 0, ERR_NOT_BUSAPID, 0);
		return (SCFGA_ERR);
	}

	/* Get bus state */
	if (devctl_cmd(apidp->path, SCFGA_BUS_GETSTATE, &state,
	    &l_errno) != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_BUS_GETSTATE, 0);
		return (SCFGA_ERR);
	}

	bus_state = bus_devctl_to_recep_state(state);
	force = ((flags & CFGA_FLAG_FORCE) == CFGA_FLAG_FORCE) ? 1 : 0;
	assert(confp->confirm != NULL);

	switch (state_change_cmd) {
	case CFGA_CMD_DISCONNECT:	/* quiesce bus */
		/*
		 * If force flag not specified, check if controller is
		 * critical.
		 */
		if (!force) {
			/*
			 * This check is not foolproof, get user confirmation
			 * if test passes.
			 */
			if (critical_ctrlr(apidp->path)) {
				cfga_err(errstring, 0, ERR_CTRLR_CRIT, 0);
				ret = SCFGA_ERR;
				break;
			} else if (!disconnect(confp)) {
				ret = SCFGA_NACK;
				break;
			}
		}

		cmd = SCFGA_BUS_QUIESCE;
		errid = ERR_BUS_QUIESCE;
		prereq = CFGA_STAT_CONNECTED;

		goto common;

	case CFGA_CMD_CONNECT:		/* unquiesce bus */
		cmd = SCFGA_BUS_UNQUIESCE;
		errid = ERR_BUS_UNQUIESCE;
		prereq = CFGA_STAT_DISCONNECTED;

		goto common;

	case CFGA_CMD_CONFIGURE:
		cmd = SCFGA_BUS_CONFIGURE;
		errid = ERR_BUS_CONFIGURE;
		prereq = CFGA_STAT_CONNECTED;

		goto common;

	case CFGA_CMD_UNCONFIGURE:
		cmd = SCFGA_BUS_UNCONFIGURE;
		errid = ERR_BUS_UNCONFIGURE;
		prereq = CFGA_STAT_CONNECTED;

		/* FALLTHROUGH */
	common:
		if (bus_state != prereq) {
			cfga_err(errstring, 0,
			    (prereq == CFGA_STAT_CONNECTED)
			    ? ERR_BUS_NOTCONNECTED
			    : ERR_BUS_CONNECTED, 0);
			ret = SCFGA_ERR;
			break;
		}

		/*
		 * When quiescing or unconfiguring a bus, first suspend or
		 * offline it through RCM.
		 * For unquiescing, we simple build the dev_list for
		 * resume notification.
		 */
		if (((apidp->flags & FLAG_DISABLE_RCM) == 0) &&
		    ((cmd == SCFGA_BUS_QUIESCE) ||
		    (cmd == SCFGA_BUS_UNQUIESCE) ||
		    (cmd == SCFGA_BUS_UNCONFIGURE))) {
			ret = get_hba_children(apidp->path, NULL, &dev_list);
			if (ret != SCFGA_OK) {
				break;
			}
			if (cmd == SCFGA_BUS_QUIESCE) {
				if ((ret = scsi_rcm_suspend(dev_list,
				    errstring, flags, 1)) != SCFGA_OK) {
					break;
				}
			} else if (cmd == SCFGA_BUS_UNCONFIGURE) {
				if ((ret = scsi_rcm_offline(dev_list,
				    errstring, flags)) != SCFGA_OK) {
					break;
				}
			}
		}

		ret = devctl_cmd(apidp->path, cmd, NULL, &l_errno);
		if (ret != SCFGA_OK) {
			/*
			 * EIO when child devices are busy may confuse user.
			 * So explain it.
			 */
			if (cmd == SCFGA_BUS_UNCONFIGURE && l_errno == EIO)
				errid = ERR_MAYBE_BUSY;

			cfga_err(errstring, l_errno, errid, 0);

			/*
			 * If the bus was suspended in RCM, then cancel the RCM
			 * operation.  Discard RCM failures here because the
			 * devctl's failure is what is most relevant.
			 */
			if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
				if (cmd == SCFGA_BUS_QUIESCE)
					(void) scsi_rcm_resume(dev_list,
					    errstring,
					    (flags & (~CFGA_FLAG_FORCE)), 1);
				else if (cmd == SCFGA_BUS_UNCONFIGURE) {
					(void) devctl_cmd(apidp->path,
					    SCFGA_BUS_CONFIGURE, NULL,
					    &l_errno);
					(void) scsi_rcm_online(dev_list,
					    errstring,
					    (flags & (~CFGA_FLAG_FORCE)));
				}
			}

			break;
		}

		/*
		 * When unquiescing or configuring a bus, resume or online it
		 * in RCM when the devctl command is complete.
		 * When unconfiguring a bus, notify removal of devices.
		 */
		if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
			if (cmd == SCFGA_BUS_UNQUIESCE) {
				ret = scsi_rcm_resume(dev_list, errstring,
				    (flags & (~CFGA_FLAG_FORCE)), 1);
			} else if (cmd == SCFGA_BUS_UNCONFIGURE) {
				ret = scsi_rcm_remove(dev_list, errstring,
				    (flags & (~CFGA_FLAG_FORCE)));
			}
		}
		break;

	case CFGA_CMD_LOAD:
	case CFGA_CMD_UNLOAD:
		ret = SCFGA_OPNOTSUPP;
		break;

	default:
		cfga_err(errstring, 0, ERR_CMD_INVAL, 0);
		ret = SCFGA_ERR;
		break;
	}

	free_dev_list(dev_list);
	return (ret);
}

scfga_ret_t
dev_change_state(
	cfga_cmd_t state_change_cmd,
	apid_t *apidp,
	cfga_flags_t flags,
	char **errstring)
{
	uint_t state = 0;
	int l_errno = 0;
	cfga_stat_t bus_state;
	scfga_cmd_t cmd;
	msgid_t errid;
	scfga_ret_t ret;
	char *dev_list[2] = {NULL};

	assert(apidp->path != NULL);
	assert(apidp->hba_phys != NULL);

	/*
	 * For a device, dynamic component must be present
	 */
	if (apidp->dyncomp == NULL) {
		cfga_err(errstring, 0, ERR_APID_INVAL, 0);
		return (SCFGA_ERR);
	}

	/* Get bus state */
	if (devctl_cmd(apidp->hba_phys, SCFGA_BUS_GETSTATE, &state,
	    &l_errno) != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_BUS_GETSTATE, 0);
		return (SCFGA_ERR);
	}

	bus_state = bus_devctl_to_recep_state(state);

	switch (state_change_cmd) {
	case CFGA_CMD_CONFIGURE:		/* online device */
		cmd = SCFGA_DEV_CONFIGURE;
		errid = ERR_DEV_CONFIGURE;
		goto common;

	case CFGA_CMD_UNCONFIGURE:		/* offline device */
		cmd = SCFGA_DEV_UNCONFIGURE;
		errid = ERR_DEV_UNCONFIGURE;
		/* FALLTHROUGH */
	common:
		if (bus_state != CFGA_STAT_CONNECTED) {
			cfga_err(errstring, 0, ERR_BUS_NOTCONNECTED, 0);
			ret = SCFGA_ERR;
			break;
		}

		if (apidp->dyntype == PATH_APID) {
			/* call a scsi_vhci ioctl to do online/offline path. */
			ret = path_apid_state_change(apidp, cmd,
			    flags, errstring, &l_errno, errid);
		} else {
			/*
			 * When unconfiguring a device, first offline it
			 * through RCM.
			 */
			if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
				if (cmd == SCFGA_DEV_UNCONFIGURE) {
					dev_list[0] =
					    get_node_path(apidp->path);
					if (dev_list[0] == NULL) {
						ret = SCFGA_ERR;
						break;
					}
					if ((ret = scsi_rcm_offline(dev_list,
					    errstring, flags)) != SCFGA_OK) {
						break;
					}
				}
			}

			ret = devctl_cmd(apidp->path, cmd, NULL, &l_errno);
			if (ret != SCFGA_OK) {
				cfga_err(errstring, l_errno, errid, 0);

			/*
			 * If an unconfigure fails, cancel the RCM offline.
			 * Discard any RCM failures so that the devctl
			 * failure will still be reported.
			 */
				if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
					if (cmd == SCFGA_DEV_UNCONFIGURE)
						(void) scsi_rcm_online(dev_list,
						    errstring, flags);
				}
				break;
			}
			if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
			/*
			 * Unconfigure succeeded, call the RCM notify_remove.
			 */
				if (cmd == SCFGA_DEV_UNCONFIGURE)
					(void) scsi_rcm_remove(dev_list,
					    errstring, flags);
			}
		}
		break;

	/*
	 * Cannot disconnect/connect individual devices without affecting
	 * other devices on the bus. So we don't support these ops.
	 */
	case CFGA_CMD_DISCONNECT:
	case CFGA_CMD_CONNECT:
		cfga_err(errstring, 0, ERR_NOT_DEVOP, 0);
		ret = SCFGA_ERR;
		break;
	case CFGA_CMD_LOAD:
	case CFGA_CMD_UNLOAD:
		ret = SCFGA_OPNOTSUPP;
		break;
	default:
		cfga_err(errstring, 0, ERR_CMD_INVAL, 0);
		ret = SCFGA_ERR;
		break;
	}

	free_dev_list_elements(dev_list);
	return (ret);
}

/*ARGSUSED*/
scfga_ret_t
dev_remove(
	const char *func,
	scfga_cmd_t cmd,
	apid_t *apidp,
	prompt_t *prp,
	cfga_flags_t flags,
	char **errstring)
{
	int proceed, l_errno = 0;
	scfga_ret_t ret;
	int do_quiesce;
	char *dev_list[2] = {NULL};

	assert(apidp->hba_phys != NULL);
	assert(apidp->path != NULL);

	/* device operation only */
	if (apidp->dyncomp == NULL) {
		cfga_err(errstring, 0, ERR_NOT_BUSOP, 0);
		return (SCFGA_ERR);
	}

	proceed = 1;
	ret = quiesce_confirm(apidp, MSG_RMDEV, prp, &proceed, &do_quiesce,
	    &l_errno);
	if (ret != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_DEV_REMOVE, 0);
		return (ret);
	}

	if (!proceed) {
		return (SCFGA_NACK);
	}

	/*
	 * Offline the device in RCM
	 */
	if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
		dev_list[0] = get_node_path(apidp->path);
		if (dev_list[0] == NULL)
			return (SCFGA_ERR);
		if ((ret = scsi_rcm_offline(dev_list, errstring, flags))
		    != SCFGA_OK) {
			free_dev_list_elements(dev_list);
			return (ret);
		}
	}

	/*
	 * Offline the device
	 */
	ret = devctl_cmd(apidp->path, SCFGA_DEV_UNCONFIGURE, NULL, &l_errno);
	if (ret != SCFGA_OK) {

		cfga_err(errstring, l_errno, ERR_DEV_REMOVE, 0);

		/*
		 * Cancel the RCM offline.  Discard the RCM failures so that
		 * the above devctl failure is still reported.
		 */
		if ((apidp->flags & FLAG_DISABLE_RCM) == 0)
			(void) scsi_rcm_online(dev_list, errstring, flags);
		free_dev_list_elements(dev_list);
		return (ret);
	}

	/* Do the physical removal */
	ret = dev_hotplug(apidp, prp, flags, do_quiesce, errstring);

	if (ret == SCFGA_OK) {
		/*
		 * Complete the remove.
		 * Since the device is already offlined, remove shouldn't
		 * fail. Even if remove fails, there is no side effect.
		 */
		(void) devctl_cmd(apidp->path, SCFGA_DEV_REMOVE,
		    NULL, &l_errno);
		if ((apidp->flags & FLAG_DISABLE_RCM) == 0)
			ret = scsi_rcm_remove(dev_list, errstring, flags);
	} else {
		/*
		 * Reconfigure the device and restore the device's RCM state.
		 * If reconfigure succeeds, restore the state to online.
		 * If reconfigure fails (e.g. a typo from user), we treat
		 * the device as removed.
		 */
		if (devctl_cmd(apidp->path, SCFGA_DEV_CONFIGURE, NULL, &l_errno)
		    == SCFGA_OK) {
			if ((apidp->flags & FLAG_DISABLE_RCM) == 0)
				(void) scsi_rcm_online(dev_list, errstring,
				    flags);
		} else {
			char *cp = strrchr(apidp->path, ':');
			if (cp)
				*cp = '\0';
			cfga_err(errstring, l_errno, ERR_DEV_RECONFIGURE,
			    apidp->path, 0);
			if (cp)
				*cp = ':';
			if ((apidp->flags & FLAG_DISABLE_RCM) == 0)
				(void) scsi_rcm_remove(dev_list, errstring,
				    flags);
		}
	}

	free_dev_list_elements(dev_list);
	return (ret);
}

/*ARGSUSED*/
scfga_ret_t
dev_insert(
	const char *func,
	scfga_cmd_t cmd,
	apid_t *apidp,
	prompt_t *prp,
	cfga_flags_t flags,
	char **errstring)
{
	int proceed, l_errno = 0;
	scfga_ret_t ret;
	int do_quiesce;

	assert(apidp->hba_phys != NULL);
	assert(apidp->path != NULL);

	/* Currently, insert operation only allowed for bus */
	if (apidp->dyncomp != NULL) {
		cfga_err(errstring, 0, ERR_NOT_DEVOP, 0);
		return (SCFGA_ERR);
	}

	proceed = 1;
	ret = quiesce_confirm(apidp, MSG_INSDEV, prp, &proceed, &do_quiesce,
	    &l_errno);
	if (ret != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_DEV_INSERT, 0);
		return (ret);
	}

	if (!proceed) {
		return (SCFGA_NACK);
	}

	/* Do the physical addition */
	ret = dev_hotplug(apidp, prp, flags, do_quiesce, errstring);
	if (ret != SCFGA_OK) {
		return (ret);
	}

	/*
	 * Configure bus to online new device(s).
	 * Previously offlined devices will not be onlined.
	 */
	ret = devctl_cmd(apidp->hba_phys, SCFGA_BUS_CONFIGURE, NULL, &l_errno);
	if (ret != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_DEV_INSERT, 0);
		return (SCFGA_ERR);
	}

	return (SCFGA_OK);
}

/*ARGSUSED*/
scfga_ret_t
dev_replace(
	const char *func,
	scfga_cmd_t cmd,
	apid_t *apidp,
	prompt_t *prp,
	cfga_flags_t flags,
	char **errstring)
{
	int proceed, l_errno = 0;
	scfga_ret_t ret, ret2;
	int do_quiesce;
	char *dev_list[2] = {NULL};

	assert(apidp->hba_phys != NULL);
	assert(apidp->path != NULL);

	/* device operation only */
	if (apidp->dyncomp == NULL) {
		cfga_err(errstring, 0, ERR_NOT_BUSOP, 0);
		return (SCFGA_ERR);
	}

	proceed = 1;
	ret = quiesce_confirm(apidp, MSG_REPLDEV, prp, &proceed, &do_quiesce,
	    &l_errno);
	if (ret != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_DEV_REPLACE, 0);
		return (ret);
	}

	if (!proceed) {
		return (SCFGA_NACK);
	}

	/* Offline the device in RCM */
	if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
		dev_list[0] = get_node_path(apidp->path);
		if (dev_list[0] == NULL)
			return (SCFGA_ERR);
		if ((ret = scsi_rcm_offline(dev_list, errstring, flags))
		    != SCFGA_OK) {
			free_dev_list_elements(dev_list);
			return (ret);
		}
	}

	ret = devctl_cmd(apidp->path, SCFGA_DEV_REMOVE, NULL, &l_errno);
	if (ret != SCFGA_OK) {

		/*
		 * Cancel the RCM offline.  Discard any RCM failures so that
		 * the devctl failure can still be reported.
		 */
		if ((apidp->flags & FLAG_DISABLE_RCM) == 0)
			(void) scsi_rcm_online(dev_list, errstring, flags);

		cfga_err(errstring, l_errno, ERR_DEV_REPLACE, 0);
		free_dev_list_elements(dev_list);
		return (ret);
	}

	/* do the physical replace */
	ret = dev_hotplug(apidp, prp, flags, do_quiesce, errstring);

	/* Online the replacement, or restore state on error */
	ret2 = devctl_cmd(apidp->path, SCFGA_DEV_CONFIGURE, NULL, &l_errno);

	if (ret2 != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_DEV_REPLACE, 0);
	}

	/*
	 * Remove the replaced device in RCM, or online the device in RCM
	 * to recover.
	 */
	if ((apidp->flags & FLAG_DISABLE_RCM) == 0) {
		if (ret == SCFGA_OK)
			ret = scsi_rcm_remove(dev_list, errstring, flags);
		else if (ret2 == SCFGA_OK)
			ret2 = scsi_rcm_online(dev_list, errstring, flags);
	}
	free_dev_list_elements(dev_list);

	return (ret == SCFGA_OK ? ret2 : ret);
}

#pragma weak plat_dev_led
/*ARGSUSED*/
scfga_ret_t
dev_led(
	const char *func,
	scfga_cmd_t cmd,
	apid_t *apidp,
	prompt_t *prp,
	cfga_flags_t flags,
	char **errstring)
{

	/*
	 * The implementation of the led command is platform-specific, so
	 * the default behavior is to say that the functionality is not
	 * available for this device.
	 */
	if (plat_dev_led) {
		return (plat_dev_led(func, cmd, apidp, prp, flags, errstring));
	}
	cfga_err(errstring, 0, ERR_UNAVAILABLE, 0);
	return (SCFGA_ERR);
}

/*ARGSUSED*/
scfga_ret_t
reset_common(
	const char *func,
	scfga_cmd_t cmd,
	apid_t *apidp,
	prompt_t *prp,
	cfga_flags_t flags,
	char **errstring)
{
	int l_errno = 0;
	scfga_ret_t ret;


	assert(apidp->path != NULL);
	assert(apidp->hba_phys != NULL);

	switch (cmd) {
	case SCFGA_RESET_DEV:
		if (apidp->dyncomp == NULL) {
			cfga_err(errstring, 0, ERR_NOT_BUSOP, 0);
			return (SCFGA_ERR);
		}
		break;

	case SCFGA_RESET_BUS:
	case SCFGA_RESET_ALL:
		if (apidp->dyncomp != NULL) {
			cfga_err(errstring, 0, ERR_NOT_DEVOP, 0);
			return (SCFGA_ERR);
		}
		break;
	default:
		cfga_err(errstring, 0, ERR_CMD_INVAL, 0);
		return (SCFGA_ERR);
	}

	ret = devctl_cmd(apidp->path, cmd, NULL, &l_errno);
	if (ret != SCFGA_OK) {
		cfga_err(errstring, l_errno, ERR_RESET, 0);
	}

	return (ret);
}

static int
disconnect(struct cfga_confirm *confp)
{
	int ans, append_newline;
	char *cq;

	append_newline = 0;
	cq = cfga_str(append_newline, WARN_DISCONNECT, 0);

	ans = confp->confirm(confp->appdata_ptr, cq);

	S_FREE(cq);

	return (ans == 1);
}

/*
 * Check for "scsi-no-quiesce" property
 * Return code: -1 error, 0 quiesce not required, 1 quiesce required
 */
static int
quiesce_required(apid_t *apidp, int *l_errnop)
{
	di_node_t bus_node, dev_node;
	char *bus_path, *bus_end;
	char *dev_path, *dev_end;
	int *propval;

	/* take libdevinfo snapshot of subtree at hba */
	bus_path = apidp->hba_phys + strlen(DEVICES_DIR);
	bus_end = strrchr(bus_path, ':');
	if (bus_end)
		*bus_end = '\0';

	bus_node = di_init(bus_path, DINFOSUBTREE|DINFOPROP);
	if (bus_end)
		*bus_end = ':';
	if (bus_node == DI_NODE_NIL) {
		*l_errnop = errno;
		return (-1);	/* error */
	}

	/* check bus node for property */
	if (di_prop_lookup_ints(DDI_DEV_T_ANY, bus_node, SCSI_NO_QUIESCE,
	    &propval) == 1) {
		di_fini(bus_node);
		return (0);	/* quiesce not required */
	}

	/* if this ap is HBA, return with quiesce required */
	if (apidp->dyncomp == NULL) {
		di_fini(bus_node);
		return (1);
	}

	/* check device node for property */
	dev_path = apidp->path + strlen(DEVICES_DIR);
	dev_end = strrchr(dev_path, ':');
	if (dev_end)
		*dev_end = '\0';

	dev_node = di_child_node(bus_node);
	while (dev_node != DI_NODE_NIL) {
		char *child_path;
		child_path = di_devfs_path(dev_node);
		if (strcmp(child_path, dev_path) == 0) {
			di_devfs_path_free(child_path);
			break;
		}
		di_devfs_path_free(child_path);
		dev_node = di_sibling_node(dev_node);
	}

	if (dev_end)
		*dev_end = ':';
	if (dev_node == DI_NODE_NIL) {
		di_fini(bus_node);
		return (1);	/* dev not found (insert case) */
	}

	/* check child node for property */
	if (di_prop_lookup_ints(DDI_DEV_T_ANY, dev_node, "scsi-no-quiesce",
	    &propval) == 1) {
		di_fini(bus_node);
		return (0);	/* quiesce not required */
	}
	return (1);	/* quiesce required */
}

static scfga_ret_t
quiesce_confirm(
	apid_t *apidp,
	msgid_t cmd_msg,
	prompt_t *prp,
	int *okp,
	int *quiesce,
	int *l_errnop)
{
	char *buf = NULL, *hbap = NULL, *cq1 = NULL, *cq2 = NULL;
	char *cp;
	size_t len = 0;
	int i = 0, append_newline;
	scfga_ret_t ret;

	assert(apidp->path != NULL);
	assert(apidp->hba_phys != NULL);

	*quiesce = quiesce_required(apidp, l_errnop);
	if (*quiesce == -1)
		return (SCFGA_ERR);
	else if (*quiesce == 0)
		return (SCFGA_OK);

	/*
	 * Try to create HBA logical ap_id.
	 * If that fails use physical path
	 */
	ret = make_hba_logid(apidp->hba_phys, &hbap, &i);
	if (ret != SCFGA_OK) {
		if ((hbap = get_node_path(apidp->hba_phys)) == NULL) {
			*l_errnop = errno;
			return (SCFGA_LIB_ERR);
		}
	}

	assert(hbap != NULL);

	append_newline = 0;
	cq1 = cfga_str(append_newline, CONF_QUIESCE_1, hbap, 0);
	cq2 = cfga_str(append_newline, CONF_QUIESCE_2, 0);
	len = strlen(cq1) + strlen(cq2) + 1; /* Includes term. NULL */

	if ((buf = calloc(1, len)) == NULL) {
		*l_errnop = errno;
		ret = SCFGA_LIB_ERR;
		S_FREE(cq1);
		S_FREE(cq2);
		goto out;
	}
	(void) strcpy(buf, cq1);
	(void) strcat(buf, cq2);

	S_FREE(cq1);
	S_FREE(cq2);


	/* Remove minor name (if any) from phys path */
	if ((cp = strrchr(apidp->path, ':')) != NULL) {
		*cp = '\0';
	}

	/* describe operation being attempted */
	cfga_msg(prp->msgp, cmd_msg, apidp->path, 0);

	/* Restore minor name */
	if (cp != NULL) {
		*cp = ':';
	}

	/* request permission to quiesce */
	assert(prp->confp != NULL && prp->confp->confirm != NULL);
	*okp = prp->confp->confirm(prp->confp->appdata_ptr, buf);

	ret = SCFGA_OK;
	/*FALLTHRU*/
out:
	S_FREE(buf);
	S_FREE(hbap);
	return (ret);
}

static scfga_ret_t
suspend_in_rcm(
	apid_t		*apidp,
	char		***suspend_list_ptr,
	char		**errstring,
	cfga_flags_t	flags)
{
	scfga_ret_t	ret;
	char		*bus_path = NULL;
	char		*dev_path = NULL;
	char		**suspend_list = NULL;

	*suspend_list_ptr = NULL;

	/* Suspend the bus through RCM */
	if (apidp->flags & FLAG_DISABLE_RCM)
		return (SCFGA_OK);

	/* The bus_path is the HBA path without its minor */
	if ((bus_path = get_node_path(apidp->hba_phys)) == NULL)
		return (SCFGA_ERR);

	/*
	 * The dev_path is already initialized to NULL.  If the AP Id
	 * path differs from the HBA path, then the dev_path should
	 * instead be set to the AP Id path without its minor.
	 */
	if (strcmp(apidp->hba_phys, apidp->path) != 0) {
		if ((dev_path = get_node_path(apidp->path)) == NULL) {
			ret = SCFGA_ERR;
			goto out;
		}
	}

	if ((ret = get_hba_children(bus_path, dev_path, &suspend_list))
	    != SCFGA_OK) {
		free_dev_list(suspend_list);
		goto out;
	}

	if (scsi_rcm_suspend(suspend_list, errstring, flags, 0) != SCFGA_OK) {
		ret = SCFGA_ERR;
		free_dev_list(suspend_list);
	} else {
		ret = SCFGA_OK;
		*suspend_list_ptr = suspend_list;
	}
	/*FALLTHROUGH*/
out:
	S_FREE(bus_path);
	S_FREE(dev_path);
	return (ret);
}

/*
 * Resume the bus through RCM if it successfully
 * unquiesced.
 */
static void
resume_in_rcm(
	apid_t		*apidp,
	char		**suspend_list,
	char		**errstring,
	cfga_flags_t	flags)
{
	if (apidp->flags & FLAG_DISABLE_RCM)
		return;

	(void) scsi_rcm_resume(suspend_list, errstring, flags, 0);

	free_dev_list(suspend_list);
}

static scfga_ret_t
wait_for_hotplug(prompt_t *pt, int msg)
{
	char		*cu = NULL;
	int		append_newline = 0;
	scfga_ret_t	ret;

	cu = cfga_str(append_newline, msg, 0);
	if (pt->confp->confirm(pt->confp->appdata_ptr, cu) != 1) {
		ret = SCFGA_NACK;
	} else {
		ret = SCFGA_OK;
	}
	S_FREE(cu);
	return (ret);
}

static scfga_ret_t
bus_quiesce(apid_t *apidp, prompt_t *pt, char **errstring, cfga_flags_t flags)
{
	int		l_errno;
	scfga_ret_t	ret;
	scfga_ret_t	hpret;
	char		**suspend_list = NULL;

	ret = suspend_in_rcm(apidp, &suspend_list, errstring, flags);
	if (ret != SCFGA_OK) {
		return (ret);
	}

	/*
	 * If the quiesce fails, then cancel the RCM suspend.
	 * Discard any RCM failures so that the devctl failure
	 * can still be reported.
	 */
	l_errno = 0;
	ret = devctl_cmd(apidp->hba_phys, SCFGA_BUS_QUIESCE, NULL, &l_errno);
	if (ret != SCFGA_OK) {
		resume_in_rcm(apidp, suspend_list, errstring, flags);
		cfga_err(errstring, l_errno, ERR_BUS_QUIESCE, 0);
		return (ret);
	}

	/*
	 * Prompt user to proceed with physical hotplug
	 * and wait until they are done.
	 */
	hpret = wait_for_hotplug(pt, CONF_UNQUIESCE);

	/*
	 * The unquiesce may fail with EALREADY (which is ok)
	 * or some other error (which is not ok).
	 */
	l_errno = 0;
	ret = devctl_cmd(apidp->hba_phys, SCFGA_BUS_UNQUIESCE, NULL, &l_errno);
	if (ret != SCFGA_OK && l_errno != EALREADY) {
		free_dev_list(suspend_list);
		cfga_err(errstring, l_errno, ERR_BUS_UNQUIESCE, 0);
		return (SCFGA_ERR);
	}

	resume_in_rcm(apidp, suspend_list, errstring, flags);

	return (hpret);
}

#define	MAX_LOCK_TRIES		20
#define	MAX_UNLINK_TRIES	60
#define	s_getpid		(int)getpid	/* else lint causes problems */

static void
s_unlink(char *file)
{
	int	count = 0;

retry:
	if (unlink(file) == -1) {
		if (errno != EINTR && errno != EAGAIN) {
			CFGA_TRACE1((stdout, "s_unlink[%d]: unlink failed: "
			    "%s: %s\n", s_getpid(), file, strerror(errno)));
			return;
		}

		if (++count < MAX_UNLINK_TRIES) {
			(void) sleep(1);
			goto retry;
		}
		CFGA_TRACE1((stdout, "s_unlink[%d]: retry limit: %s\n",
		    s_getpid(), file));
	} else {
		CFGA_TRACE3((stdout, "s_unlink[%d]: unlinked: %s\n",
		    s_getpid(), file));
	}
}

static scfga_ret_t
create_lock(int *fdp, struct cfga_msg *msgp, char **errstring)
{
	FILE			*fp;
	int			count;
	struct extmnttab	ent;
	int			mnted;


	*fdp = -1;

	/*
	 * Check that /var/run is mounted. In the unlikely event
	 * that the lock file is left behind, we want it
	 * cleared on the next reboot.
	 */
	errno = 0;
	if ((fp = fopen(MNTTAB, "r")) == NULL) {
		cfga_err(errstring, errno, ERRARG_OPEN, MNTTAB, 0);
		return (SCFGA_LIB_ERR);
	}

	resetmnttab(fp);

	mnted = 0;
	while (getextmntent(fp, &ent, sizeof (ent)) == 0) {
		if (strcmp(ent.mnt_mountp, "/var/run") == 0) {
			mnted = 1;
			break;
		}
	}

	(void) fclose(fp);

	if (!mnted) {
		cfga_err(errstring, 0, ERR_VAR_RUN, 0);
		return (SCFGA_LIB_ERR);
	}

	/*
	 * Wait for a short period of time if we cannot O_EXCL create
	 * lock file. If some other cfgadm process is finishing up, we
	 * can get in. If the wait required is long however, just
	 * return SYSTEM_BUSY to the user - a hotplug operation is
	 * probably in progress.
	 */
	count = 0;
retry:
	*fdp = open(SCFGA_LOCK, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR);
	if (*fdp == -1 && errno == EEXIST) {
		if (++count < MAX_LOCK_TRIES) {
			if (count == 1)
				cfga_msg(msgp, MSG_WAIT_LOCK, 0);
			(void) sleep(1);
			goto retry;
		}
	}

	if (*fdp == -1 && errno == EEXIST) {
		cfga_err(errstring, 0, ERRARG_QUIESCE_LOCK, SCFGA_LOCK, 0);
		return (SCFGA_SYSTEM_BUSY);
	} else if (*fdp == -1) {
		cfga_err(errstring, errno, ERRARG_QUIESCE_LOCK, SCFGA_LOCK, 0);
		return (SCFGA_LIB_ERR);
	}

	CFGA_TRACE3((stdout, "create_lock[%d]: created lockfile: %s\n",
	    s_getpid(), SCFGA_LOCK));

	return (SCFGA_OK);
}

static scfga_ret_t
syslock(int fd, char **errstring)
{
	struct flock	lock;
	int		count;
	int		rval;

	assert(fd != -1);

	CFGA_TRACE3((stdout, "syslock[%d]: trying lock: %s\n",
	    s_getpid(), SCFGA_LOCK));

	lock.l_type = F_WRLCK;
	lock.l_whence = SEEK_SET;
	lock.l_start = 0;
	lock.l_len = 0;

	count = 0;
	while ((rval = fcntl(fd, F_SETLKW, &lock)) == -1 && errno == EINTR) {
		if (++count >= MAX_LOCK_TRIES) {
			CFGA_TRACE1((stdout, "syslock[%d]: retry limit: %s\n",
			    s_getpid(), SCFGA_LOCK));
			goto badlock;
		}
		(void) sleep(1);
	}

	if (rval != -1) {
		CFGA_TRACE3((stdout, "syslock[%d]: locked file: %s\n",
		    s_getpid(), SCFGA_LOCK));
		return (SCFGA_OK);
	}

	/*FALLTHROUGH*/
badlock:
	cfga_err(errstring, errno, ERRARG_LOCK, SCFGA_LOCK, 0);
	/* trace message to display pid */
	CFGA_TRACE1((stdout, "syslock[%d]: cannot lock %s\n",
	    s_getpid(), SCFGA_LOCK));
	return (SCFGA_LIB_ERR);
}

static void
wait_for_child(pid_t cpid)
{
	int	status;
	pid_t	rval;

	CFGA_TRACE2((stdout, "wait_for_child[%d]: child[%d]\n",
	    s_getpid(), (int)cpid));

	for (;;) {
		while ((rval = waitpid(cpid, &status, 0)) != cpid) {
			if (errno == ECHILD) {
				CFGA_TRACE1((stdout, "waitpid[%d]: child[%d] "
				    "doesn't exist\n", s_getpid(), (int)cpid));
				return;
			}

			CFGA_TRACE3((stdout, "waitpid[%d]: returned: %d"
			    ": errno: %s\n", s_getpid(), (int)rval,
			    strerror(errno)));
		}

		if (WIFEXITED(status)) {
			CFGA_TRACE2((stdout, "waitpid[%d]: child[%d]: "
			    "normal exit\n", s_getpid(), (int)cpid));
			return;
		}

		if (WIFSIGNALED(status)) {
			CFGA_TRACE2((stdout, "waitpid[%d]: child[%d]: "
			    "signal exit\n", s_getpid(), (int)cpid));
			return;
		}

		/*
		 * The child has not terminated. We received status
		 * because the child was either stopped or continued.
		 * Wait for child termination by calling waitpid() again.
		 */
	}
}

static void
wait_and_cleanup(int fd, apid_t *apidp)
{
	int		l_errno;
	scfga_ret_t	ret;

	/* This is the child */
	CFGA_TRACE2((stdout, "child[%d]: Entering wait_cleanup\n", s_getpid()));

	if (syslock(fd, NULL) != SCFGA_OK) {
		CFGA_TRACE1((stdout, "child[%d]: lock failure "
		    " - _exit(1)\n", s_getpid()));
		/*
		 * As a last resort, unlink the lock file. This is relatively
		 * safe as the child doesn't unquiesce the bus in this case.
		 */
		s_unlink(SCFGA_LOCK);
		_exit(1);
	}

	l_errno = 0;
	ret = devctl_cmd(apidp->hba_phys, SCFGA_BUS_UNQUIESCE, NULL, &l_errno);
	if (ret != SCFGA_OK) {
		if (l_errno == EALREADY)
			CFGA_TRACE3((stdout, "child[%d]: bus already "
			    "unquiesced: %s\n", s_getpid(), apidp->hba_phys));
		else
			CFGA_TRACE1((stdout, "child[%d]: unquiesce failed: "
			    "%s\n", s_getpid(), strerror(l_errno)));
	} else {
		CFGA_TRACE1((stdout, "child[%d]: unquiesced bus: %s\n",
		    s_getpid(), apidp->hba_phys));
	}

	s_unlink(SCFGA_LOCK);

	CFGA_TRACE2((stdout, "child[%d]: _exit(0)\n", s_getpid()));

	_exit(0);
}

static void
sigblk(sigset_t *osp)
{
	sigset_t set;

	(void) sigemptyset(&set);
	(void) sigemptyset(osp);
	(void) sigaddset(&set, SIGHUP);
	(void) sigaddset(&set, SIGINT);
	(void) sigaddset(&set, SIGQUIT);
	(void) sigaddset(&set, SIGTERM);
	(void) sigaddset(&set, SIGUSR1);
	(void) sigaddset(&set, SIGUSR2);
	(void) sigprocmask(SIG_BLOCK, &set, osp);
}

static void
sigunblk(sigset_t *osp)
{
	(void) sigprocmask(SIG_SETMASK, osp, NULL);
}

/*
 * Here is the algorithm used to ensure that a SCSI bus is not
 * left in the quiesced state:
 *
 *	lock quiesce mutex	// single threads this code
 *	open(O_CREAT|O_EXCL) lock file	// only 1 process at a time
 *	exclusive record lock on lock file
 *	fork1()
 *	quiesce bus
 *	do the physical hotplug operation
 *	unquiesce bus
 *	unlock record lock
 *		-> *child*
 *		-> wait for record lock
 *		-> unconditionally unquiesce bus
 *		-> unlink lock file
 *		-> exit
 *	wait for child to exit
 *	unlock quiesce mutex
 *
 * NOTE1: since record locks are per-process and a close() can
 * release a lock, to keep things MT-safe we need a quiesce mutex.
 *
 * NOTE2: To ensure that the child does not unquiesce a bus quiesced
 * by an unrelated cfgadm_scsi operation, exactly 1 process in the
 * system can be doing an implicit quiesce operation  The exclusive
 * creation of the lock file guarantees this.
 *
 * NOTE3: This works even if the parent process dumps core and/or is
 * abnormally terminated. If the parent dies before the child is
 * forked, the bus is not quiesced. If the parent dies after the
 * bus is quiesced, the child process will ensure that the bus is
 * unquiesced.
 */
static scfga_ret_t
dev_hotplug(
	apid_t *apidp,
	prompt_t *pt,
	cfga_flags_t flags,
	int do_quiesce,
	char **errstring)
{
	scfga_ret_t	ret;
	pid_t		cpid;
	int		fd;
	sigset_t	oset;

	assert(apidp->hba_phys != NULL);
	assert(apidp->path != NULL);

	/* If no quiesce required, prompt the user to do the operation */
	if (!do_quiesce)
		return (wait_for_hotplug(pt, CONF_NO_QUIESCE));

	(void) mutex_lock(&quiesce_mutex);

	ret = create_lock(&fd, pt->msgp, errstring);
	if (ret != SCFGA_OK) {
		(void) mutex_unlock(&quiesce_mutex);
		return (ret);
	}

	ret = syslock(fd, errstring);
	if (ret != SCFGA_OK) {
		goto bad;
	}

	/*
	 * block signals in the child. Parent may
	 * exit, causing signal to be sent to child.
	 */
	sigblk(&oset);

	switch (cpid = fork1()) {
		case 0:
			/* child */
			wait_and_cleanup(fd, apidp);
			_exit(0); /* paranoia */
			/*NOTREACHED*/
		case -1:
			cfga_err(errstring, errno, ERR_FORK, 0);
			sigunblk(&oset);
			ret = SCFGA_LIB_ERR;
			goto bad;
		default:
			/* parent */
			break;
	}

	sigunblk(&oset);

	/* We have forked successfully - this is the parent */
	ret = bus_quiesce(apidp, pt, errstring, flags);

	(void) close(fd);	/* also unlocks */

	wait_for_child(cpid);

	(void) mutex_unlock(&quiesce_mutex);

	return (ret);
bad:
	(void) close(fd);
	s_unlink(SCFGA_LOCK);
	(void) mutex_unlock(&quiesce_mutex);
	return (ret);
}

/*
 * Checks if HBA controls a critical file-system (/, /usr or swap)
 * This routine reads /etc/vfstab and is NOT foolproof.
 * If an error occurs, assumes that controller is NOT critical.
 */
static int
critical_ctrlr(const char *hba_phys)
{
	FILE *fp;
	struct vfstab vfst;
	int vfsret = 1, rv = -1;
	char *bufp;
	const size_t buflen = PATH_MAX;
	char mount[MAXPATHLEN], fstype[MAXPATHLEN], spec[MAXPATHLEN];


	if ((bufp = calloc(1, buflen)) == NULL) {
		return (0);
	}

	fp = NULL;
	if ((fp = fopen(ETC_VFSTAB, "r")) == NULL) {
		rv = 0;
		goto out;
	}

	while ((vfsret = getvfsent(fp, &vfst)) == 0) {

		(void) strcpy(mount, S_STR(vfst.vfs_mountp));
		(void) strcpy(fstype, S_STR(vfst.vfs_fstype));
		(void) strcpy(spec, S_STR(vfst.vfs_special));

		/* Ignore non-critical entries */
		if (strcmp(mount, "/") && strcmp(mount, "/usr") &&
		    strcmp(fstype, "swap")) {
			continue;
		}

		/* get physical path */
		if (realpath(spec, bufp) == NULL) {
			continue;
		}

		/* Check if critical partition is on the HBA */
		if (!(rv = hba_dev_cmp(hba_phys, bufp))) {
			break;
		}
	}

	rv = !vfsret;

	/*FALLTHRU*/
out:
	S_FREE(bufp);
	if (fp != NULL) {
		(void) fclose(fp);
	}
	return (rv);
}

/*
 * Convert bus state to receptacle state
 */
static cfga_stat_t
bus_devctl_to_recep_state(uint_t bus_dc_state)
{
	cfga_stat_t rs;

	switch (bus_dc_state) {
	case BUS_ACTIVE:
		rs = CFGA_STAT_CONNECTED;
		break;
	case BUS_QUIESCED:
	case BUS_SHUTDOWN:
		rs = CFGA_STAT_DISCONNECTED;
		break;
	default:
		rs = CFGA_STAT_NONE;
		break;
	}

	return (rs);
}

static int
add_dev(di_node_t node, void *arg)
{
	int ndevs, len;
	char *path, *p;
	struct larg *largp = (struct larg *)arg;

	/* ignore hba itself and all detached nodes */
	if (di_parent_node(node) == DI_NODE_NIL ||
	    di_node_state(node) < DS_ATTACHED)
		return (DI_WALK_CONTINUE);

	if ((path = di_devfs_path(node)) == NULL) {
		largp->ndevs = -1;
		return (DI_WALK_TERMINATE);
	}

	/* sizeof (DEVICES_DIR) includes the null terminator */
	len = strlen(path) + sizeof (DEVICES_DIR);
	if ((p = malloc(len)) == NULL) {
		di_devfs_path_free(path);
		largp->ndevs = -1;
		return (DI_WALK_TERMINATE);
	}
	(void) snprintf(p, len, "%s%s", DEVICES_DIR, path);
	di_devfs_path_free(path);

	/* ignore device to be excluded */
	if (largp->dev && strcmp(largp->dev, p) == 0) {
		free(p);
		return (DI_WALK_CONTINUE);
	}

	/* grow dev_list to allow room for one more device */
	if (alloc_dev_list(largp) != 0) {
		free(p);
		return (DI_WALK_TERMINATE);
	}
	ndevs = largp->ndevs;
	largp->ndevs++;
	largp->dev_list[ndevs] = p;
	largp->dev_list[ndevs + 1] = NULL;
	return (DI_WALK_CONTINUE);
}

/*
 * Get list of children excluding dev_excl (if not null).
 */
static int
get_hba_children(char *bus_path, char *dev_excl, char ***dev_listp)
{
	int err, ret;
	walkarg_t u;
	struct larg larg;

	*dev_listp = NULL;

	u.node_args.flags = DI_WALK_CLDFIRST;
	u.node_args.fcn = add_dev;

	larg.ndevs = 0;
	larg.nelem = 0;
	larg.dev = dev_excl;
	larg.dev_list = NULL;

	ret = walk_tree(bus_path, &larg, DINFOSUBTREE, &u, SCFGA_WALK_NODE,
	    &err);
	if (larg.ndevs == -1) {
		free_dev_list(larg.dev_list);
		return (SCFGA_ERR);
	}
	*dev_listp = larg.dev_list;
	return (ret);
}

static char *
get_node_path(char *minor_path)
{
	char *path, *cp;

	if ((path = strdup(minor_path)) == NULL)
		return (NULL);
	if ((cp = strrchr(path, ':')) != NULL)
		*cp = '\0';
	return (path);
}

/*
 * Ensure largp->dev_list has room for one more device.
 * Returns 0 on success, -1 on failure.
 */
static int
alloc_dev_list(struct larg *largp)
{
	int nelem;
	char **p;

	if (largp->nelem > largp->ndevs + 2)	/* +1 for NULL termination */
		return (0);

	nelem =  largp->nelem + 16;
	p = realloc(largp->dev_list, nelem * sizeof (char *));
	if (p == NULL)
		return (-1);

	largp->dev_list = p;
	largp->nelem = nelem;
	return (0);
}

static void
free_dev_list_elements(char **dev_list)
{
	while (*dev_list) {
		free(*dev_list);
		dev_list++;
	}
}

static void
free_dev_list(char **dev_list)
{
	if (dev_list == NULL)
		return;

	free_dev_list_elements(dev_list);
	free(dev_list);
}