OpenSolaris_b135/lib/lvm/libmeta/common/meta_set_hst.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Just in case we're not in a build environment, make sure that
 * TEXT_DOMAIN gets set to something.
 */
#if !defined(TEXT_DOMAIN)
#define	TEXT_DOMAIN "SYS_TEST"
#endif

/*
 * Metadevice diskset interfaces
 */

#include "meta_set_prv.h"
#include <meta.h>
#include <sys/lvm/md_crc.h>
#include <sys/time.h>
#include <sdssc.h>

static int
add_db_sidenms(
	mdsetname_t	*sp,
	md_error_t	*ep
)
{
	md_replicalist_t	*rlp = NULL;
	md_replicalist_t	*rl;
	int			rval = 0;

	if (metareplicalist(sp, MD_FULLNAME_ONLY, &rlp, ep) < 0)
		return (-1);

	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
		md_replica_t	*r = rl->rl_repp;

		/*
		 * This is not the first replica being added to the
		 * diskset so call with ADDSIDENMS_BCAST.  If this
		 * is a traditional diskset, the bcast flag is ignored
		 * since traditional disksets don't use the rpc.mdcommd.
		 */
		if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
		    DB_ADDSIDENMS_BCAST, ep)) {
			rval = -1;
			goto out;
		}
	}

out:
	metafreereplicalist(rlp);
	return (rval);
}

static int
add_drvs_to_hosts(
	mdsetname_t	*sp,
	int		node_c,
	char		**node_v,
	md_error_t	*ep
)
{
	int		i;
	md_set_desc	*sd;
	md_drive_desc	*dd;
	md_timeval32_t	now;
	ulong_t		genid;

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) {
		if (! mdisok(ep))
			return (-1);
		return (0);
	}

	now = sd->sd_ctime;
	genid = sd->sd_genid - 1;

	for (i = 0; i < node_c; i++) {
		if (clnt_adddrvs(node_v[i], sp, dd, now, genid, ep) == -1)
			return (-1);
	}

	return (0);
}

static int
add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep)
{
	mdnm_params_t	nm;
	char		*cname, *dname;
	side_t		tmp_sideno;
	minor_t		mnum;
	int		done, i;
	int		rval = 0;
	md_set_desc	*sd;

	(void) memset(&nm, '\0', sizeof (nm));
	nm.key   = MD_KEYWILD;

	if (!metaislocalset(sp)) {
		if ((sd = metaget_setdesc(sp, ep)) == NULL)
			return (-1);
	}
	/* Use rpc.mdcommd to add md side info from all nodes */
	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
		md_mn_result_t			*resultp = NULL;
		md_mn_msg_meta_md_addside_t	md_as;
		int				send_rval;

		md_as.msg_sideno = sideno;
		md_as.msg_otherside = otherside;
		/*
		 * If reconfig cycle has been started, this node is stuck in
		 * in the return step until this command has completed.  If
		 * mdcommd is suspended, ask send_message to fail (instead of
		 * retrying) so that metaset can finish allowing the
		 * reconfig cycle to proceed.
		 */
		send_rval = mdmn_send_message(sp->setno,
		    MD_MN_MSG_META_MD_ADDSIDE,
		    MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT,
		    0, (char *)&md_as, sizeof (md_mn_msg_meta_md_addside_t),
		    &resultp, ep);
		if (send_rval != 0) {
			(void) mdstealerror(ep, &(resultp->mmr_ep));
			if (resultp)
				free_result(resultp);
			return (-1);
		}
		if (resultp)
			free_result(resultp);
		return (0);
	} else {
		/*CONSTCOND*/
		while (1) {
			char	*drvnm = NULL;

			nm.mde   = mdnullerror;
			nm.setno = sp->setno;
			nm.side  = otherside;
			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0)
				return (mdstealerror(ep, &nm.mde));

			if (nm.key == MD_KEYWILD)
				return (0);

			/*
			 * Okay we have a valid key
			 * Let's see if it is hsp or not
			 */
			nm.devname = (uintptr_t)meta_getnmentbykey(sp->setno,
			    otherside, nm.key, &drvnm, NULL, NULL, ep);
			if (nm.devname == NULL || drvnm == NULL) {
				if (nm.devname)
					Free((void *)(uintptr_t)nm.devname);
				if (drvnm)
					Free((void *)(uintptr_t)drvnm);
				return (-1);
			}

			/*
			 * If it is hsp add here
			 */
			if (strcmp(drvnm, MD_HOTSPARES) == 0) {
				if (add_name(sp, sideno, nm.key, MD_HOTSPARES,
				    minor(NODEV), (char *)(uintptr_t)nm.devname,
				    NULL, NULL, ep) == -1) {
					Free((void *)(uintptr_t)nm.devname);
					Free((void *)(uintptr_t)drvnm);
					return (-1);
				} else {
					Free((void *)(uintptr_t)nm.devname);
					Free((void *)(uintptr_t)drvnm);
					continue;
				}
			}

			nm.side = sideno;
			if (MD_MNSET_DESC(sd)) {
				tmp_sideno = sideno;
			} else {
				tmp_sideno = sideno - 1;
			}

			if ((done = meta_getnextside_devinfo(sp,
			    (char *)(uintptr_t)nm.devname, &tmp_sideno,
			    &cname, &dname, &mnum, ep)) == -1) {
				Free((void *)(uintptr_t)nm.devname);
				return (-1);
			}

			assert(done == 1);
			Free((void *)(uintptr_t)nm.devname);
			Free((void *)(uintptr_t)drvnm);

			/*
			 * The device reference count can be greater than 1 if
			 * more than one softpart is configured on top of the
			 * same device.  If this is the case then we want to
			 * increment the count to sync up with the other sides.
			 */
			for (i = 0; i < nm.ref_count; i++) {
				if (add_name(sp, sideno, nm.key, dname, mnum,
				    cname, NULL, NULL, ep) == -1)
					rval = -1;
			}

			Free(cname);
			Free(dname);

			if (rval != 0)
				return (rval);
		}
	}

	/*NOTREACHED*/
}

static int
check_setdrvs_againstnode(mdsetname_t *sp, char *node, md_error_t *ep)
{
	mddrivename_t	*dp;
	md_drive_desc	*dd, *ddp;

	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL)
		if (! mdisok(ep))
			return (-1);

	for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
		dp = ddp->dd_dnp;

		if (checkdrive_onnode(sp, dp, node, ep))
			return (-1);
	}

	return (0);
}

static int
create_multinode_set_on_hosts(
	mdsetname_t	*sp,
	int		node_c,		/* Number of new nodes */
	char		**node_v,	/* Nodes which are being added */
	int		new_set,
	md_error_t	*ep
)
{
	int				i;
	md_set_desc			*sd;
	md_timeval32_t			now;
	ulong_t				genid;
	int				rval = 0;
	md_mnnode_desc			*nd, *ndm = NULL;
	md_mnnode_desc			*nd_prev, *nd_curr;
	int				nodecnt;
	mndiskset_membershiplist_t	*nl, *nl2;

	if (!new_set) {
		if ((sd = metaget_setdesc(sp, ep)) == NULL)
			return (-1);
		now = sd->sd_ctime;
		genid = sd->sd_genid - 1;
		if (sd->sd_drvs)
			genid--;
	} else {
		sd = Zalloc(sizeof (*sd));

		if (meta_gettimeofday(&now) == -1) {
			(void) mdsyserror(ep, errno,
			    dgettext(TEXT_DOMAIN, "meta_gettimeofday()"));
			rval = -1;
			goto out;
		}

		/* Put the new entries into the set */
		/*
		 * Get membershiplist from API routine.  If there's
		 * an error, fail to create set and pass back error.
		 */
		if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
			rval = -1;
			goto out;
		}

		/*
		 * meta_set_addhosts has already verified that
		 * this node list is in the membership list
		 * so set ALIVE flag.
		 * Since this is a new set, all hosts being
		 * added are new to the set, so also set ADD flag.
		 */
		for (i = 0; i < node_c; i++) {
			nd = Zalloc(sizeof (*nd));
			(void) strcpy(nd->nd_nodename, node_v[i]);
			nd->nd_ctime = now;
			nd->nd_flags = (MD_MN_NODE_ALIVE |
			    MD_MN_NODE_ADD);
			nl2 = nl;
			while (nl2) {
				if (strcmp(nl2->msl_node_name,
				    node_v[i]) == 0) {
					nd->nd_nodeid = nl2->msl_node_id;
					(void) strcpy(nd->nd_priv_ic,
					    nl2->msl_node_addr);
					break;
				}
				nl2 = nl2->next;
			}

			/*
			 * Nodelist must be kept in ascending
			 * nodeid order.
			 */
			if (sd->sd_nodelist == NULL) {
				/* Nothing in list, just add it */
				sd->sd_nodelist = nd;
			} else if (nd->nd_nodeid < sd->sd_nodelist->nd_nodeid) {
				/* Add to head of list */
				nd->nd_next = sd->sd_nodelist;
				sd->sd_nodelist = nd;
			} else {
				nd_curr = sd->sd_nodelist->nd_next;
				nd_prev = sd->sd_nodelist;
				/* Search for place ot add it */
				while (nd_curr) {
					if (nd->nd_nodeid <
					    nd_curr->nd_nodeid) {
						/* Add before nd_curr */
						nd->nd_next = nd_curr;
						nd_prev->nd_next = nd;
						break;
					}
					nd_prev = nd_curr;
					nd_curr = nd_curr->nd_next;
				}
				/* Add to end of list */
				if (nd_curr == NULL) {
					nd_prev->nd_next = nd;
				}

			}
			/* Set master to be first node added */
			if (ndm == NULL)
				ndm = nd;
		}

		meta_free_nodelist(nl);
		/*
		 * Creating mnset for first time.
		 * Set master to be invalid until first drive is
		 * in set.
		 */
		(void) strcpy(sd->sd_mn_master_nodenm, "");
		sd->sd_mn_master_nodeid = MD_MN_INVALID_NID;
		sd->sd_mn_masternode = ndm;
		sd->sd_ctime = now;
		genid = sd->sd_genid = 0;
	}

	/* Create the set where needed */
	for (i = 0; i < node_c; i++) {
		/*
		 * Create the set on each new node.  If the set already
		 * exists, then the node list being created on each new node
		 * is the current node list from before the new nodes
		 * were added.  If the set doesn't exist, then the node
		 * list being created on each new node is the entire
		 * new node list.
		 */
		if (clnt_mncreateset(node_v[i], sp, sd->sd_nodelist,
		    now, genid, sd->sd_mn_master_nodenm,
		    sd->sd_mn_master_nodeid, ep) == -1) {
			rval = -1;
			break;
		}
	}

out:
	if (new_set) {
		nd = sd->sd_nodelist;
		while (nd) {
			sd->sd_nodelist = nd->nd_next;
			Free(nd);
			nd = sd->sd_nodelist;
		}
		Free(sd);
	}

	if (rval != 0 || new_set)
		return (rval);

	/*
	 * Add the drive records to the new sets
	 * and names for the new sides.
	 */
	return (add_drvs_to_hosts(sp, node_c, node_v, ep));
}


static int
create_traditional_set_on_hosts(
	mdsetname_t	*sp,
	int		node_c,		/* Number of new nodes */
	char		**node_v,	/* Nodes which are being added */
	int		new_set,
	md_error_t	*ep
)
{
	int		i;
	md_set_desc	*sd;
	md_timeval32_t	now;
	ulong_t		genid;
	int		rval = 0;

	if (!new_set) {

		if ((sd = metaget_setdesc(sp, ep)) == NULL)
			return (-1);
		now = sd->sd_ctime;

		genid = sd->sd_genid;

		if (sd->sd_drvs)
			genid--;
	} else {
		if (node_c > MD_MAXSIDES)
			return (mddserror(ep, MDE_DS_SIDENUMNOTAVAIL,
			    sp->setno, NULL, NULL, sp->setname));

		sd = Zalloc(sizeof (*sd));

		/* Put the new entries into the set */
		for (i = 0; i < node_c; i++) {
			(void) strcpy(sd->sd_nodes[i], node_v[i]);
		}

		if (meta_gettimeofday(&now) == -1) {
			(void) mdsyserror(ep, errno, "meta_gettimeofday()");
			rval = -1;
			goto out;
		}

		sd->sd_ctime = now;
		genid = sd->sd_genid = 0;
	}

	/* Create the set where needed */
	for (i = 0; i < node_c; i++) {
		/*
		 * Create the set on each new host
		 */
		if (clnt_createset(node_v[i], sp, sd->sd_nodes, now, genid,
		    ep) == -1) {
			rval = -1;
			break;
		}
	}

out:
	if (new_set)
		Free(sd);

	if (rval != 0 || new_set)
		return (rval);

	/*
	 * Add the drive records to the new sets
	 * and names for the new sides.
	 */
	return (add_drvs_to_hosts(sp, node_c, node_v, ep));
}

static int
create_set_on_hosts(
	mdsetname_t	*sp,
	int		multi_node,	/* Multi_node diskset or not? */
	int		node_c,		/* Number of new nodes */
	char		**node_v,	/* Nodes which are being added */
	int		new_set,
	md_error_t	*ep
)
{
	if (multi_node)
		return (create_multinode_set_on_hosts(sp, node_c, node_v,
		    new_set, ep));
	else
		return (create_traditional_set_on_hosts(sp, node_c, node_v,
		    new_set, ep));
}

static int
create_set(
	mdsetname_t	*sp,
	int		multi_node,	/* Multi-node diskset or not? */
	int		node_c,
	char		**node_v,
	int		auto_take,
	md_error_t	*ep
)
{
	int		i;
	int		rval = 0;
	set_t		max_sets;
	set_t		setno;
	int		bool;
	uint_t		sr_flags;
	sigset_t	oldsigs;
	md_setkey_t	*cl_sk;
	int		rb_level = 0;
	md_error_t	xep = mdnullerror;
	rval_e		sdssc_rval;
	int		lock_flag = 0;
	int		sig_flag = 0;

	if ((max_sets = get_max_sets(ep)) == 0)
		return (-1);

	/* We must be a member of the set we are creating */
	if (! strinlst(mynode(), node_c, node_v))
		return (mddserror(ep, MDE_DS_SELFNOTIN,
		    sp->setno, mynode(), NULL, sp->setname));

	/*
	 * If auto_take then we must be the only member of the set
	 * that we are creating.
	 */
	if (auto_take && node_c > 1)
		return (mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
		    sp->setname));

	/*
	 * If we're part of SC3.0 we'll already have allocated the
	 * set number so we can skip the allocation algorithm used.
	 * Set number is unique across traditional and MN disksets.
	 */
	if ((sdssc_rval = sdssc_get_index(sp->setname, &setno))
	    == SDSSC_NOT_BOUND) {

		for (i = 0; i < node_c; i++) {
			int	has_set;

			/* Skip my node */
			if (strcmp(mynode(), node_v[i]) == 0)
				continue;

			/*
			 * Make sure this set name is not used on the
			 * other hosts
			 */
			has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
			if (has_set < 0) {
				if (! mdiserror(ep, MDE_NO_SET)) {
					rval = -1;
					goto out;
				}
				mdclrerror(ep);
				continue;
			}

			if (has_set) {
				(void) mddserror(ep, MDE_DS_NODEHASSET,
				    sp->setno, node_v[i], NULL, sp->setname);
				rval = -1;
				goto out;
			}
		}

		for (setno = 1; setno < max_sets; setno++) {
			for (i = 0; i < node_c; i++) {
				if (clnt_setnumbusy(node_v[i], setno,
				    &bool, ep) == -1) {
					rval = -1;
					goto out;
				}

				if (bool == TRUE)
					break;
			}
			if (i == node_c)
				break;
		}
	} else if (sdssc_rval != SDSSC_OKAY) {
		(void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL,
		    NULL, sp->setname);
		rval = -1;
		goto out;
	}

	if (setno == max_sets) {
		(void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL,
		    NULL, sp->setname);
		rval = -1;
		goto out;
	}

	sp->setno = setno;

	/*
	 * Lock the set on current set members.
	 * Set locking done much earlier for MN diskset than for traditional
	 * diskset since lock_set is used to protect against
	 * other meta* commands running on the other nodes.
	 * Don't issue mdcommd SUSPEND command since there is nothing
	 * to suspend since there currently is no set.
	 */
	if (multi_node) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
		sig_flag = 1;

		/* Lock the set on new set members */
		for (i = 0; i < node_c; i++) {
			if (clnt_lock_set(node_v[i], sp, ep)) {
				rval = -1;
				goto out;
			}
			lock_flag = 1;
		}
		/* Now have the diskset locked, verify set number is still ok */
		for (i = 0; i < node_c; i++) {
			if (clnt_setnumbusy(node_v[i], setno,
			    &bool, ep) == -1) {
				rval = -1;
				goto out;
			}
		}
	}


	if (meta_set_checkname(sp->setname, ep)) {
		rval = -1;
		goto out;
	}

	for (i = 0; i < node_c; i++) {
		if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) {
			rval = -1;
			goto out;
		}
		if (bool == FALSE) {
			(void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
			    node_v[i], NULL, sp->setname);
			rval = -1;
			goto out;
		}
	}

	/* END CHECK CODE */

	/* Lock the set on new set members */
	if (!multi_node) {
		md_rb_sig_handling_on();
		sig_flag = 1;
		for (i = 0; i < node_c; i++) {
			if (clnt_lock_set(node_v[i], sp, ep)) {
				rval = -1;
				goto out;
			}
			lock_flag = 1;
		}
	}

	RB_TEST(1, "create_set", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "create_set", ep)

	if ((rval = create_set_on_hosts(sp, multi_node, node_c, node_v,
	    1, ep)) == -1)
		goto rollback;

	RB_TEST(3, "create_set", ep)

	if (auto_take)
		sr_flags = MD_SR_OK | MD_SR_AUTO_TAKE;
	else
		sr_flags = MD_SR_OK;

	/*
	 * Mark the set record MD_SR_OK
	 */
	for (i = 0; i < node_c; i++)
		if (clnt_upd_sr_flags(node_v[i], sp, sr_flags, ep))
			goto rollback;

	rb_level = 2;	/* level 2 */

	/*
	 * For MN diskset:
	 * On each added node, set the node record for that node
	 * to OK.  Then set all node records for the newly added
	 * nodes on all nodes to ok.
	 *
	 * By setting a node's own node record to ok first, even if
	 * the node adding the hosts panics, the rest of the nodes can
	 * determine the same node list during the choosing of the master
	 * during reconfig.  So, only nodes considered for mastership
	 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
	 * on that node's rpc.metad.  If all nodes have MD_SR_OK set,
	 * but no node has its own MD_MN_NODE_OK set, then the set will
	 * be removed during reconfig since a panic occurred during the
	 * creation of the initial diskset.
	 */

	if (multi_node) {
		md_mnnode_desc	*nd, *saved_nd_next;
		md_set_desc	*sd;

		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
			goto rollback;
		}

		for (i = 0; i < node_c; i++) {
			nd = sd->sd_nodelist;
			/* All nodes are guaranteed to be ALIVE */
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			/* Something wrong, will pick this up in next loop */
			if (nd == NULL)
				continue;

			/* Only changing my local cache of node list */
			saved_nd_next = nd->nd_next;
			nd->nd_next = NULL;

			/* Set node record for added host to ok on that host */
			if (clnt_upd_nr_flags(node_v[i], sp,
			    nd, MD_NR_OK, NULL, ep)) {
				nd->nd_next = saved_nd_next;
				goto rollback;
			}
			nd->nd_next = saved_nd_next;
		}

		/* Now set all node records on all nodes to be ok */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
			    sd->sd_nodelist, MD_NR_OK, NULL, ep)) {
				goto rollback;
			}
			nd = nd->nd_next;
		}
	}

	RB_TEST(4, "create_set", ep)

out:
	if ((rval == 0) && multi_node) {
		/*
		 * Set successfully created.
		 * Notify rpc.mdcommd on all nodes of a nodelist change.
		 * Send reinit command to mdcommd which forces it to get
		 * fresh set description.  Then send resume.
		 * Resume on class 0 will resume all classes.
		 */
		for (i = 0; i < node_c; i++) {
			/* Class is ignored for REINIT */
			if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT,
			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to reinit rpc.mdcommd.\n"));
			}
		}
		for (i = 0; i < node_c; i++) {
			if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
		}
		meta_ping_mnset(sp->setno);
	}
	if (lock_flag) {
		cl_sk = cl_get_setkey(sp->setno, sp->setname);
		for (i = 0; i < node_c; i++) {
			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
			}
		}
		cl_set_setkey(NULL);
	}

	if (sig_flag) {
		if (multi_node) {
			/* release signals back to what they were on entry */
			if (procsigs(FALSE, &oldsigs, &xep) < 0)
				mdclrerror(&xep);
		} else {
			md_rb_sig_handling_off(md_got_sig(), md_which_sig());
		}
	}

	return (rval);

rollback:
	/* all signals already blocked for MN disket */
	if (!multi_node) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	}

	rval = -1;

	/*
	 * For MN diskset:
	 * On each added node (which is now each node to be deleted),
	 * set the node record for that node to DEL.  Then set all
	 * node records for the newly added (soon to be deleted) nodes
	 * on all nodes to ok.
	 *
	 * By setting a node's own node record to DEL first, even if
	 * the node doing the rollback panics, the rest of the nodes can
	 * determine the same node list during the choosing of the master
	 * during reconfig.
	 */

	/* level 3 */
	if ((rb_level > 1) && (multi_node)) {
		md_mnnode_desc	*nd, *saved_nd_next;
		md_set_desc	*sd;

		if ((sd = metaget_setdesc(sp, &xep)) == NULL) {
			mdclrerror(&xep);
		}

		for (i = 0; i < node_c; i++) {
			nd = sd->sd_nodelist;
			/* All nodes are guaranteed to be ALIVE */
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			/* Something wrong, will pick this up in next loop */
			if (nd == NULL)
				continue;

			/* Only changing my local cache of node list */
			saved_nd_next = nd->nd_next;
			nd->nd_next = NULL;

			/* Set node record for added host to DEL on that host */
			if (clnt_upd_nr_flags(node_v[i], sp,
			    nd, MD_NR_DEL, NULL, &xep)) {
				nd->nd_next = saved_nd_next;
				mdclrerror(&xep);
			}
			nd->nd_next = saved_nd_next;
		}

		/* Now set all node records on all nodes to be DEL */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
			    sd->sd_nodelist, MD_NR_DEL, NULL, &xep)) {
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}

		/* Mark set record on all hosts to be DELETED */
		for (i = 0; i < node_c; i++) {
			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) {
				mdclrerror(&xep);
			}
		}
	}
	/* level 1 */
	if (rb_level > 0) {
		for (i = 0; i < node_c; i++) {
			if (clnt_delset(node_v[i], sp, &xep) == -1)
				mdclrerror(&xep);
		}
	}

	/* level 0 */
	/* Don't test lock flag since guaranteed to be set if in rollback */
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	for (i = 0; i < node_c; i++) {
		if (clnt_unlock_set(node_v[i], cl_sk, &xep))
			mdclrerror(&xep);
	}
	cl_set_setkey(NULL);

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	if ((sig_flag) && (!multi_node))
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());

	return (rval);
}

static int
del_db_sidenms(
	mdsetname_t	*sp,
	side_t		sideno,
	md_error_t	*ep
)
{
	md_replicalist_t	*rlp = NULL;
	md_replicalist_t	*rl;
	int			rval = 0;

	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
		return (-1);

	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
		md_replica_t	*r = rl->rl_repp;

		if (meta_db_delsidenm(sp, sideno, r->r_namep, r->r_blkno, ep)) {
			rval = -1;
			goto out;
		}
	}

out:
	metafreereplicalist(rlp);
	return (rval);
}

static int
del_drvs_from_hosts(
	mdsetname_t	*sp,
	md_set_desc	*sd,
	md_drive_desc	*dd,
	int		node_c,
	char		**node_v,
	int		oha,
	md_error_t	*ep
)
{
	int 		i;
	md_mnnode_desc	*nd;

	for (i = 0; i < node_c; i++) {
		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			if (nd == NULL) {
				return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
				    sp->setno, nd->nd_nodename,
				    NULL, sp->setname));
			}

			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				continue;
			}
			if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
				return (-1);
			}
		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
			/*
			 * All nodes should be alive in non-oha mode.
			 */
			if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
				return (-1);
			}
		} else {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				return (-1);
			}
		}
	}

	return (0);
}

static int
del_host_noset(
	mdsetname_t	*sp,
	char		**anode,
	md_error_t	*ep
)
{
	int		rval = 0;
	md_setkey_t	*cl_sk;
	md_drive_desc	*dd;
	md_error_t	xep = mdnullerror;
	md_set_desc	*sd;

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	/* Lock the set on our side */
	if (clnt_lock_set(mynode(), sp, ep)) {
		rval = -1;
		goto out;
	}

	if (clnt_delhosts(mynode(), sp, 1, anode, ep)) {
		rval = -1;
		goto out;
	}

	if (!MD_MNSET_DESC(sd)) {
		if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
		    ep)) == NULL) {
			if (! mdisok(ep)) {
				rval = -1;
				goto out;
			}
		}

		/* If we have drives */
		if (dd != NULL) {
			if (clnt_del_drv_sidenms(mynode(), sp, ep)) {
				rval = -1;
				goto out;
			}
		}
	}

out:
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
		if (rval == 0)
			(void) mdstealerror(ep, &xep);
		rval = -1;
	}
	cl_set_setkey(NULL);

	metaflushsetname(sp);

	return (rval);
}

static int
del_md_sidenms(mdsetname_t *sp, side_t sideno, md_error_t *ep)
{
	mdnm_params_t		nm;
	md_set_desc		*sd;
	int			i;

	if (!metaislocalset(sp)) {
		if ((sd = metaget_setdesc(sp, ep)) == NULL)
			return (-1);
	}
	/* Use rpc.mdcommd to add md side info from all nodes */
	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
		md_mn_result_t			*resultp = NULL;
		md_mn_msg_meta_md_delside_t	md_ds;
		int				send_rval;

		md_ds.msg_sideno = sideno;
		/*
		 * If reconfig cycle has been started, this node is stuck in
		 * in the return step until this command has completed.  If
		 * mdcommd is suspended, ask send_message to fail (instead of
		 * retrying) so that metaset can finish allowing the
		 * reconfig cycle to proceed.
		 */
		send_rval = mdmn_send_message(sp->setno,
		    MD_MN_MSG_META_MD_DELSIDE,
		    MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT,
		    0, (char *)&md_ds, sizeof (md_mn_msg_meta_md_delside_t),
		    &resultp, ep);
		if (send_rval != 0) {
			(void) mdstealerror(ep, &(resultp->mmr_ep));
			if (resultp)
				free_result(resultp);
			return (-1);
		}
		if (resultp)
			free_result(resultp);
	} else {
		(void) memset(&nm, '\0', sizeof (nm));
		nm.key   = MD_KEYWILD;

		/*CONSTCOND*/
		while (1) {
			nm.mde   = mdnullerror;
			nm.setno = sp->setno;
			nm.side  = MD_SIDEWILD;
			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0)
				return (mdstealerror(ep, &nm.mde));

			if (nm.key == MD_KEYWILD)
				return (0);

			/*
			 * The device reference count can be greater than 1 if
			 * more than one softpart is configured on top of the
			 * same device.  If this is the case then we want to
			 * decrement the count to zero so the entry can be
			 * actually removed.
			 */
			for (i = 0; i < nm.ref_count; i++) {
				if (del_name(sp, sideno, nm.key, ep) == -1)
					return (-1);
			}
		}
	}
	return (0);
}

static void
recreate_set(
	mdsetname_t		*sp,
	md_set_desc		*sd
)
{
	int			i;
	int			has_set;
	md_error_t		xep = mdnullerror;
	md_mnnode_desc		*nd;

	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}
			has_set = nodehasset(sp, nd->nd_nodename,
			    NHS_NST_EQ, &xep);

			if (has_set >= 0) {
				nd = nd->nd_next;
				continue;
			}

			mdclrerror(&xep);

			if (clnt_mncreateset(nd->nd_nodename, sp,
			    sd->sd_nodelist,
			    sd->sd_ctime, sd->sd_genid,
			    sd->sd_mn_master_nodenm,
			    sd->sd_mn_master_nodeid, &xep) == -1)
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	} else {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			has_set = nodehasset(sp, sd->sd_nodes[i],
			    NHS_NST_EQ, &xep);

			if (has_set >= 0)
				continue;

			mdclrerror(&xep);

			if (clnt_createset(sd->sd_nodes[i], sp, sd->sd_nodes,
			    sd->sd_ctime, sd->sd_genid, &xep) == -1)
				mdclrerror(&xep);
		}
	}
}

/*
 * If a MN diskset, set is already locked on all nodes via clnt_lock_set.
 */
static int
del_set_nodrives(
	mdsetname_t		*sp,
	int			node_c,
	char			**node_v,
	int			oha,
	md_error_t		*ep
)
{
	md_set_desc		*sd;
	int			i;
	sigset_t		oldsigs;
	md_setkey_t		*cl_sk;
	int			rb_level = 0;
	ulong_t			max_genid = 0;
	int			rval = 0;
	md_error_t		xep = mdnullerror;
	md_mnnode_desc		*nd;
	int			delete_end = 1;

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	if (MD_MNSET_DESC(sd)) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	} else {
		md_rb_sig_handling_on();
	}

	/*
	 * Lock the set on current set members for traditional disksets.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		for (i = 0; i < node_c; i++) {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_lock_set(node_v[i], sp, ep)) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				rval = -1;
				goto out;
			}
		}
	}


	RB_TEST(1, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "deletehosts", ep)

	/*
	 * Mark the set record MD_SR_DEL
	 */
	for (i = 0; i < node_c; i++) {

		RB_TEST(3, "deletehosts", ep)

		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			if (nd == NULL) {
				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
				    sp->setno, nd->nd_nodename,
				    NULL, sp->setname);
				goto rollback;
			}

			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				continue;
			}

			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
				goto rollback;
			}
		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
			/*
			 * All nodes should be alive in non-oha mode.
			 */
			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
				goto rollback;
			}
		} else {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
		}

		RB_TEST(4, "deletehosts", ep)
	}

	RB_TEST(5, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 2;	/* level 2 */

	RB_TEST(6, "deletehosts", ep)

	if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR)
		if (metad_isautotakebyname(sp->setname))
			delete_end = 0;
		else
			goto rollback;

	/* The set is OK to delete, make it so. */
	for (i = 0; i < node_c; i++) {

		RB_TEST(7, "deletehosts", ep)

		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			if (nd == NULL) {
				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
				    sp->setno, nd->nd_nodename,
				    NULL, sp->setname);
				goto rollback;
			}

			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				continue;
			}

			if (clnt_delset(node_v[i], sp, ep) == -1) {
				goto rollback;
			}
		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
			/*
			 * All nodes should be alive in non-oha mode.
			 */
			if (clnt_delset(node_v[i], sp, ep) == -1) {
				goto rollback;
			}
		} else {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_delset(node_v[i], sp, ep) == -1) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
		}

		RB_TEST(8, "deletehosts", ep)
	}

	RB_TEST(9, "deletehosts", ep)

out:
	/*
	 * Unlock the set on current set members
	 * for traditional disksets.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		cl_sk = cl_get_setkey(sp->setno, sp->setname);
		for (i = 0; i < node_c; i++) {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
				if (oha == TRUE && mdanyrpcerror(&xep)) {
					mdclrerror(&xep);
					continue;
				}
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
			}
		}
		cl_set_setkey(NULL);
	}

	/*
	 * A MN diskset has the clnt_locks held by meta_set_deletehosts so
	 * don't flush that data until meta_set_deletehosts has finished
	 * with it.  meta_set_deletehosts will handle the flush of the
	 * setname.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		metaflushsetname(sp);
	}

	if (delete_end &&
	    sdssc_delete_end(sp->setname, SDSSC_COMMIT) == SDSSC_ERROR)
		rval = -1;

	if (MD_MNSET_DESC(sd)) {
		/* release signals back to what they were on entry */
		if (procsigs(FALSE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	} else {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);

rollback:
	/* all signals already blocked for MN disket */
	if (!(MD_MNSET_DESC(sd))) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	}

	rval = -1;

	max_genid = sd->sd_genid;

	/* level 2 */
	if (rb_level > 1) {
		recreate_set(sp, sd);
		max_genid++;

		if (delete_end)
			(void) sdssc_delete_end(sp->setname, SDSSC_CLEANUP);
	}

	/* level 1 */
	if (rb_level > 0) {
		max_genid++;
		resync_genid(sp, sd, max_genid, node_c, node_v);
	}

	/* level 0 */
	/*
	 * Unlock the set on current set members
	 * for traditional disksets.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		cl_sk = cl_get_setkey(sp->setno, sp->setname);
		for (i = 0; i < node_c; i++) {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_unlock_set(node_v[i], cl_sk, &xep))
				mdclrerror(&xep);
		}
		cl_set_setkey(NULL);
	}

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	/*
	 * A MN diskset has the clnt_locks held by meta_set_deletehosts so
	 * don't flush that data until meta_set_deletehosts has finished
	 * with it.  meta_set_deletehosts will handle the flush of the
	 * setname.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		metaflushsetname(sp);
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);
}

/*
 * On entry:
 *   procsigs already called for MN diskset.
 *   md_rb_sig_handling already called for traditional diskset.
 */
static int
del_set_on_hosts(
	mdsetname_t		*sp,
	md_set_desc		*sd,
	md_drive_desc		*dd,
	int			node_c,		/* Number of nodes */
	char			**node_v,	/* Nodes being deleted */
	int			oha,
	md_error_t		*ep
)
{
	int			i;
	int			j;
	side_t			sideno;
	md_replicalist_t	*rlp = NULL;
	sigset_t		oldsigs;
	md_setkey_t		*cl_sk;
	ulong_t			max_genid = 0;
	int			rb_level = 1;	/* This is a special case */
	md_error_t		xep = mdnullerror;
	md_mnnode_desc		*nd;

	RB_PREEMPT;

	RB_TEST(7, "deletehosts", ep)

	if (dd != NULL) {
		/*
		 * May need this to re-add sidenames on roll back.
		 */
		if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp,
		    ep) < 0)
			goto rollback;

		RB_TEST(8, "deletehosts", ep)

		RB_PREEMPT;
		rb_level = 2;	/* level 2 */

		RB_TEST(9, "deletehosts", ep)

		if (del_drvs_from_hosts(sp, sd, dd, node_c, node_v, oha, ep))
			goto rollback;

		RB_TEST(10, "deletehosts", ep)

		RB_PREEMPT;
		rb_level = 3;	/* level 3 */

		RB_TEST(11, "deletehosts", ep)

		/*
		 * Delete the db replica sides
		 * This is done before the next loop, so that
		 * the db does not get unloaded before we are finished
		 * deleting the sides.
		 */
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* Skip hosts not being deleted */
				if (! strinlst(nd->nd_nodename, node_c,
				    node_v)) {
					nd = nd->nd_next;
					continue;
				}

				if (del_db_sidenms(sp, nd->nd_nodeid, ep))
					goto rollback;

				RB_TEST(12, "deletehosts", ep)
				nd = nd->nd_next;
			}
		} else {
			for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
				/* Skip empty slots */
				if (sd->sd_nodes[sideno][0] == '\0')
					continue;

				/* Skip hosts not being deleted */
				if (! strinlst(sd->sd_nodes[sideno], node_c,
				    node_v))
					continue;

				if (del_db_sidenms(sp, sideno, ep))
					goto rollback;

				RB_TEST(12, "deletehosts", ep)
			}
		}

		RB_TEST(13, "deletehosts", ep)

		RB_PREEMPT;
		rb_level = 4;	/* level 4 */

		RB_TEST(14, "deletehosts", ep)

		/* Delete the names from the namespace */
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* Skip hosts not being deleted */
				if (! strinlst(nd->nd_nodename, node_c,
				    node_v)) {
					nd = nd->nd_next;
					continue;
				}

				if (del_md_sidenms(sp, nd->nd_nodeid, ep))
					goto rollback;

				RB_TEST(15, "deletehosts", ep)
				nd = nd->nd_next;
			}
		} else {
			for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
				/* Skip empty slots */
				if (sd->sd_nodes[sideno][0] == '\0')
					continue;

				/* Skip hosts not being deleted */
				if (! strinlst(sd->sd_nodes[sideno], node_c,
				    node_v))
					continue;

				if (del_md_sidenms(sp, sideno, ep))
					goto rollback;

				RB_TEST(15, "deletehosts", ep)
			}
		}
	}

	RB_TEST(16, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 5;	/* level 6 */

	RB_TEST(17, "deletehosts", ep)

	for (i = 0; i < node_c; i++) {
		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			if (nd == NULL) {
				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
				    sp->setno, nd->nd_nodename,
				    NULL, sp->setname);
				goto rollback;
			}

			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				continue;
			}

			if (clnt_delset(node_v[i], sp, ep) == -1) {
				goto rollback;
			}
		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
			/*
			 * All nodes should be alive in non-oha mode.
			 */
			if (clnt_delset(node_v[i], sp, ep) == -1) {
				goto rollback;
			}
		} else {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_delset(node_v[i], sp, ep) == -1) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
		}

		RB_TEST(18, "deletehosts", ep)
	}

	metafreereplicalist(rlp);

	if (MD_MNSET_DESC(sd)) {
		/* release signals back to what they were on entry */
		if (procsigs(FALSE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	} else {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (0);

rollback:
	/* all signals already blocked for MN disket */
	if (!(MD_MNSET_DESC(sd))) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	}

	max_genid = sd->sd_genid;

	/* level 5 */
	if (rb_level > 4) {
		recreate_set(sp, sd);
		max_genid++;
	}

	/* level 2 */
	if (rb_level > 1 && dd != NULL) {
		/*
		 * See if we have to re-add the drives specified.
		 */
		for (i = 0; i < node_c; i++) {
			md_set_record	*sr;

			if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
				/*
				 * During OHA mode, don't issue RPCs to
				 * non-alive nodes since there is no reason to
				 * wait for RPC timeouts.
				 */
				nd = sd->sd_nodelist;
				while (nd) {
					if (strcmp(nd->nd_nodename, node_v[i])
					    == 0)
						break;
					nd = nd->nd_next;
				}
				if (nd == NULL)
					continue;

				if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
					continue;
			}

			/* Don't care if set record is MN or not */
			if (clnt_getset(node_v[i], sp->setname,
			    MD_SET_BAD, &sr, &xep) == -1) {
				mdclrerror(&xep);
				continue;
			}

			/* Drive already added, skip to next node */
			if (sr->sr_drivechain != NULL) {
				/*
				 * Set record structure was allocated from RPC
				 * routine getset so this structure is only of
				 * size md_set_record even if the MN flag is
				 * set.  So, clear the flag so that the free
				 * code doesn't attempt to free a structure
				 * the size of md_mnset_record.
				 */
				sr->sr_flags &= ~MD_SR_MN;
				free_sr(sr);
				continue;
			}

			if (clnt_adddrvs(node_v[i], sp, dd,
			    sr->sr_ctime, sr->sr_genid, &xep) == -1)
				mdclrerror(&xep);

			if (clnt_upd_dr_flags(node_v[i], sp, dd,
			    MD_DR_OK, &xep) == -1)
				mdclrerror(&xep);

			/*
			 * Set record structure was allocated from RPC routine
			 * getset so this structure is only of size
			 * md_set_record even if the MN flag is set.  So,
			 * clear the flag so that the free code doesn't
			 * attempt to free a structure the size of
			 * md_mnset_record.
			 */
			sr->sr_flags &= ~MD_SR_MN;
			free_sr(sr);
		}
		max_genid += 3;
	}

	/* level 3 */
	if (rb_level > 2 && dd != NULL) {
		md_replicalist_t	*rl;

		for (rl = rlp; rl != NULL; rl = rl->rl_next) {
			md_replica_t	*r = rl->rl_repp;

			/*
			 * This is not the first replica being added to the
			 * diskset so call with ADDSIDENMS_BCAST.  If this
			 * is a traditional diskset, the bcast flag is ignored
			 * since traditional disksets don't use the rpc.mdcommd.
			 */
			if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
			    DB_ADDSIDENMS_BCAST, &xep))
				mdclrerror(&xep);
		}
	}

	/* level 4 */
	if (rb_level > 3 && dd != NULL) {
		int	nodeid_addsides = 0;
		/*
		 * Add the device names for the new sides into the namespace,
		 * on all hosts not being deleted.
		 */
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* Find a node that is not being deleted */
				if (! strinlst(nd->nd_nodename, node_c,
				    node_v)) {
					nodeid_addsides = nd->nd_nodeid;
					break;
				}
				nd = nd->nd_next;
			}
		} else {
			for (j = 0; j < MD_MAXSIDES; j++) {
				/* Skip empty slots */
				if (sd->sd_nodes[j][0] == '\0')
					continue;

				/* Find a node that is not being deleted */
				if (! strinlst(sd->sd_nodes[j], node_c,
				    node_v))
					break;
			}
			nodeid_addsides = j;
		}

		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* Skip nodes not being deleted */
				if (!strinlst(nd->nd_nodename, node_c,
				    node_v)) {
					nd = nd->nd_next;
					continue;
				}

				/* this side was just created, add the names */
				if (add_md_sidenms(sp, nd->nd_nodeid,
				    nodeid_addsides, &xep))
					mdclrerror(&xep);
				nd = nd->nd_next;
			}
		} else {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				/* Skip nodes not being deleted */
				if (!strinlst(sd->sd_nodes[i], node_c, node_v))
					continue;

				/* this side was just created, add the names */
				if (add_md_sidenms(sp, i, nodeid_addsides,
				    &xep))
					mdclrerror(&xep);
			}
		}
	}

	/* level 1 */
	if (rb_level > 0) {
		max_genid++;
		resync_genid(sp, sd, max_genid, node_c, node_v);
	}

	/* level 0 */
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
				continue;
			/* To balance lock/unlock; can send to dead node */
			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	} else {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
				mdclrerror(&xep);
		}
	}
	cl_set_setkey(NULL);

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	metafreereplicalist(rlp);

	if (!(MD_MNSET_DESC(sd))) {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (-1);
}

static int
make_sideno_sidenm(
	mdsetname_t	*sp,
	mddrivename_t	*dnp,
	side_t		sideno,
	md_error_t	*ep
)
{
	mdsidenames_t	*sn, **sn_next;
	md_set_desc	*sd;
	mdname_t	*np;
	uint_t		rep_slice;
	int		err = 0;

	assert(dnp->side_names_key != MD_KEYWILD);

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	/* find the end of the link list */
	for (sn = dnp->side_names; sn->next != NULL; sn = sn->next)
		;
	sn_next = &sn->next;

	if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
		return (-1);

	if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
		return (-1);

	sn = Zalloc(sizeof (*sn));
	sn->sideno = sideno;

	if (MD_MNSET_DESC(sd)) {
		/*
		 * For MO diskset the sideno is not an index into
		 * the array of nodes.  Hence getside_devinfo is
		 * used instead of meta_getnextside_devinfo.
		 */
		if (meta_getside_devinfo(sp, np->bname, sideno, &sn->cname,
		    &sn->dname, &sn->mnum, ep) == -1)
			err = -1;
	} else {
		/* decrement sideno, to look like the previous sideno */
		sideno--;
		if (meta_getnextside_devinfo(sp, np->bname, &sideno,
		    &sn->cname, &sn->dname, &sn->mnum, ep) == -1)
			err = -1;
	}

	if (err) {
		Free(sn);
		return (err);
	}
	assert(sn->sideno == sideno);

	/* Add to the end of the linked list */
	*sn_next = sn;
	return (0);
}

static int
validate_nodes(
	mdsetname_t	*sp,
	int		node_c,
	char		**node_v,
	md_error_t	*ep
)
{
	char		*hostname;
	int		i;


	for (i = 0; i < node_c; i++) {
		if (strlen(node_v[i]) > (size_t)MD_MAX_NODENAME)
			return (mddserror(ep, MDE_DS_NODENAMETOOLONG,
			    sp->setno, node_v[i], NULL, sp->setname));
		if (clnt_hostname(node_v[i], &hostname, ep))
			return (-1);
		if (strcmp(node_v[i], hostname) != 0) {
			Free(hostname);
			return (mddserror(ep, MDE_DS_NOTNODENAME, sp->setno,
			    node_v[i], NULL, sp->setname));
		}
		Free(hostname);
	}
	return (0);
}

/*
 * Exported Entry Points
 */

/*
 * Check the given disk set name for syntactic correctness.
 */
int
meta_set_checkname(char *setname, md_error_t *ep)
{
	char	*cp;

	if (strlen(setname) > (size_t)MD_MAX_SETNAME)
		return (mddserror(ep, MDE_DS_SETNAMETOOLONG,
		    MD_SET_BAD, NULL, NULL, setname));

	for (cp = setname; *cp; cp++)
		if (!isprint(*cp) || strchr(INVALID_IN_NAMES, *cp) != NULL)
			return (mddserror(ep, MDE_DS_INVALIDSETNAME,
			    MD_SET_BAD, NULL, NULL, setname));
	return (0);
}

/*
 * Add host(s) to the multi-node diskset provided in sp.
 * 	- create set if non-existent.
 */
static int
meta_multinode_set_addhosts(
	mdsetname_t	*sp,
	int		multi_node,
	int		node_c,
	char		**node_v,
	int		auto_take,
	md_error_t	*ep
)
{
	md_set_desc			*sd;
	md_drive_desc			*dd, *p;
	int				rval = 0;
	int				bool;
	int				nodeindex;
	int 				i;
	int				has_set;
	sigset_t			oldsigs;
	md_setkey_t			*cl_sk;
	int				rb_level = 0;
	md_error_t			xep = mdnullerror;
	md_mnnode_desc			*nd, *nd_curr, *nd_prev;
	md_timeval32_t			now;
	int				nodecnt;
	mndiskset_membershiplist_t	*nl, *nl2;
	int				suspendall_flag = 0;
	int				suspend1_flag = 0;
	int				lock_flag = 0;
	int				stale_flag = 0;
	md_mnnode_desc			*saved_nd_next;
	int				remote_sets_created = 0;

	/*
	 * Check membershiplist first.  If there's
	 * an error, fail to create set and pass back error.
	 */
	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
		return (-1);
	}
	/* Verify that all nodes are in member list */
	for (i = 0; i < node_c; i++) {
		/*
		 * If node in list isn't a member of the membership,
		 * just return error.
		 */
		if (meta_is_member(node_v[i], NULL, nl) == 0) {
			meta_free_nodelist(nl);
			return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
			    sp->setno, node_v[i], NULL, sp->setname));
		}
	}
	/*
	 * Node list is needed later, but there is a lot of error
	 * checking and possible failures between here and there, so
	 * just re-get the list later if there are no errors.
	 */
	meta_free_nodelist(nl);
	nl = NULL;

	/*
	 * Verify that list of nodes being added contains no
	 * duplicates.
	 */
	if (nodesuniq(sp, node_c, node_v, ep))
		return (-1);

	/*
	 * Verify that each node being added thinks that its nodename
	 * is the same as the nodename given.
	 */
	if (validate_nodes(sp, node_c, node_v, ep))
		return (-1);

	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
		if (! mdiserror(ep, MDE_NO_SET))
			return (-1);
		mdclrerror(ep);
		return (create_set(sp, multi_node, node_c, node_v, auto_take,
		    ep));
	} else {
		/*
		 * If this node and another node were both attempting to
		 * create the same setname at the same time, and the other
		 * node has just created the set on this node then sd would
		 * be non-NULL, but sp->setno would be null (setno is filled
		 * in by the create_set). If this is true, then fail since
		 * the other node has already won this race.
		 */
		if (sp->setno == NULL) {
			return (mddserror(ep, MDE_DS_NODEINSET,
			    NULL, mynode(), NULL, sp->setname));
		}
	}

	/* The auto_take behavior is inconsistent with multiple hosts. */
	if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) {
		(void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
		    sp->setname);
		return (-1);
	}

	/*
	 * We already have the set.
	 */

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	/*
	 * The drive and node records are stored in the local mddbs of each
	 * node in the diskset.  Each node's rpc.metad daemon reads in the set,
	 * drive and node records from that node's local mddb and caches them
	 * internally. Any process needing diskset information contacts its
	 * local rpc.metad to get this information.  Since each node in the
	 * diskset is independently reading the set information from its local
	 * mddb, the set, drive and node records in the local mddbs must stay
	 * in-sync, so that all nodes have a consistent view of the diskset.
	 *
	 * For a multinode diskset, explicitly verify that all nodes in the
	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
	 * fail this operation since all nodes must be ALIVE in order to add
	 * the new node record to their local mddb.  If a panic of this node
	 * leaves the local mddbs set, node and drive records out-of-sync, the
	 * reconfig cycle will fix the local mddbs and force them back into
	 * synchronization.
	 */
	nd = sd->sd_nodelist;
	while (nd) {
		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
			return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
			    sp->setno, nd->nd_nodename, NULL,
			    sp->setname));
		}
		nd = nd->nd_next;
	}

	/*
	 * Check if node is already in set.
	 */
	for (i = 0; i < node_c; i++) {
		/* Is node already in set? */
		nd = sd->sd_nodelist;
		while (nd) {
			if (strcmp(nd->nd_nodename, node_v[i]) == 0)
				break;
			nd = nd->nd_next;
		}
		if (nd) {
			return (mddserror(ep, MDE_DS_NODEINSET,
			    sp->setno, node_v[i], NULL,
			    sp->setname));
		}
	}

	/*
	 * Lock the set on current set members.
	 * Set locking done much earlier for MN diskset than for traditional
	 * diskset since lock_set and SUSPEND are used to protect against
	 * other meta* commands running on the other nodes.
	 */
	/* Make sure we are blocking all signals */
	if (procsigs(TRUE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	nd = sd->sd_nodelist;
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
			rval = -1;
			goto out;
		}
		lock_flag = 1;
		nd = nd->nd_next;
	}
	/*
	 * Lock out other meta* commands by suspending
	 * class 1 messages across the diskset.
	 */
	nd = sd->sd_nodelist;
	/* Send suspend to nodes in nodelist before addhosts call */
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		if (clnt_mdcommdctl(nd->nd_nodename,
		    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
		    MD_MSCF_NO_FLAGS, ep)) {
			rval = -1;
			goto out;
		}
		suspend1_flag = 1;
		nd = nd->nd_next;
	}

	/* Lock the set on new set members */
	for (i = 0; i < node_c; i++) {
		/* Already verified to be alive */
		if (clnt_lock_set(node_v[i], sp, ep)) {
			rval = -1;
			goto out;
		}
		lock_flag = 1;
	}

	/*
	 * Perform the required checks for new hosts
	 */
	for (i = 0; i < node_c; i++) {
		/* Make sure this set name is not used on the other hosts */
		has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
		if (has_set < 0) {
			if (! mdiserror(ep, MDE_NO_SET)) {
				rval = -1;
				goto out;
			}
			/* Keep on truck'n */
			mdclrerror(ep);
		} else if (has_set) {
			(void) mddserror(ep, MDE_DS_NODEHASSET, sp->setno,
			    node_v[i], NULL, sp->setname);
			rval = -1;
			goto out;
		}

		if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1) {
			rval = -1;
			goto out;
		}

		if (bool == TRUE) {
			(void) mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno,
			    node_v[i], NULL, sp->setname);
			rval = -1;
			goto out;
		}

		if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) {
			rval = -1;
			goto out;
		}

		if (bool == FALSE) {
			(void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
			    node_v[i], NULL, sp->setname);
			rval = -1;
			goto out;
		}

		if (check_setdrvs_againstnode(sp, node_v[i], ep)) {
			rval = -1;
			goto out;
		}
	}

	/* Get drive descriptors for the set */
	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) {
		if (! mdisok(ep)) {
			rval = -1;
			goto out;
		}
	}

	/* END CHECK CODE */

	RB_TEST(1, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "addhosts", ep)

	/*
	 * Create the set where needed
	 */
	if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) {
		goto rollback;
	}

	/*
	 * Send suspend to rpc.mdcommd on nodes where a set has been
	 * created since rpc.mdcommd must now be running on the remote nodes.
	 */
	remote_sets_created = 1;
	for (i = 0; i < node_c; i++) {
		/*
		 * Lock out other meta* commands by suspending
		 * class 1 messages across the diskset.
		 */
		if (clnt_mdcommdctl(node_v[i],
		    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
		    MD_MSCF_NO_FLAGS, ep)) {
			rval = -1;
			goto rollback;
		}
	}

	/*
	 * Merge the new entries into the set with the existing sides.
	 * Get membershiplist from API routine.  If there's
	 * an error, fail to create set and pass back error.
	 */
	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
		goto rollback;
	}
	if (meta_gettimeofday(&now) == -1) {
		meta_free_nodelist(nl);
		(void) mdsyserror(ep, errno,
		    dgettext(TEXT_DOMAIN, "meta_gettimeofday()"));
		goto rollback;
	}
	for (nodeindex = 0; nodeindex < node_c; nodeindex++) {
		nd = Zalloc(sizeof (*nd));
		(void) strcpy(nd->nd_nodename, node_v[nodeindex]);
		nd->nd_ctime = now;
		nl2 = nl;
		while (nl2) {
			if (strcmp(nl2->msl_node_name,
			    node_v[nodeindex]) == 0) {
				nd->nd_nodeid = nl2->msl_node_id;
				(void) strcpy(nd->nd_priv_ic,
				    nl2->msl_node_addr);
				break;
			}
			nl2 = nl2->next;
		}

		/*
		 * Nodelist must be kept in ascending nodeid order.
		 */
		if (sd->sd_nodelist == NULL) {
			/* Nothing in list, just add it */
			sd->sd_nodelist = nd;
		} else if (nd->nd_nodeid <
		    sd->sd_nodelist->nd_nodeid) {
			/* Add to head of list */
			nd->nd_next = sd->sd_nodelist;
			sd->sd_nodelist = nd;
		} else {
			nd_curr = sd->sd_nodelist->nd_next;
			nd_prev = sd->sd_nodelist;
			/* Search for place to add it */
			while (nd_curr) {
				if (nd->nd_nodeid < nd_curr->nd_nodeid) {
					/* Add before nd_curr */
					nd->nd_next = nd_curr;
					nd_prev->nd_next = nd;
					break;
				}
				nd_prev = nd_curr;
				nd_curr = nd_curr->nd_next;
			}
			/* Add to end of list */
			if (nd_curr == NULL) {
				nd_prev->nd_next = nd;
			}

		}
		/* Node already verified to be in membership */
		nd->nd_flags |= MD_MN_NODE_ALIVE;
	}
	meta_free_nodelist(nl);

	/* If we have drives */
	if (dd != NULL) {
		/*
		 * For all the hosts being added, create a sidename structure
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip nodes not being added */
			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}
			for (p = dd; p != NULL; p = p->dd_next) {
				if (make_sideno_sidenm(sp, p->dd_dnp,
				    nd->nd_nodeid, ep) != 0)
					goto rollback;
			}
			nd = nd->nd_next;
		}

		RB_PREEMPT;
		rb_level = 2;   /* level 2 */

		RB_TEST(4, "addhosts", ep)

		/*
		 * Add the new sidename for each drive to all the hosts
		 *
		 * If a multi-node diskset, each host only stores
		 * the side information for itself.  So, only send
		 * side information to the new hosts where each host
		 * will add the appropriate side information to its
		 * local mddb.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip nodes not being added */
			if (!strinlst(nd->nd_nodename, node_c,
			    node_v)) {
				nd = nd->nd_next;
				continue;
			}

			/* Add side info to new hosts */
			if (clnt_add_drv_sidenms(nd->nd_nodename,
			    mynode(), sp, sd, node_c, node_v, ep))
				goto rollback;

			nd = nd->nd_next;
		}

		RB_TEST(5, "addhosts", ep)

		RB_PREEMPT;
		rb_level = 3;	/* level 3 */

		RB_TEST(6, "addhosts", ep)

		/*
		 * Add the device names for the new sides into the namespace
		 * for all hosts being added.  This is adding the side
		 * names to the diskset's mddb so add sidenames for all
		 * of the new hosts.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip nodes not being added */
			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}

			/* this side was just created, add the names */
			if (add_md_sidenms(sp, nd->nd_nodeid,
			    MD_SIDEWILD, ep))
				goto rollback;

			nd = nd->nd_next;
		}

		RB_TEST(7, "addhosts", ep)

		RB_PREEMPT;
		rb_level = 4;   /* level 4 */

		RB_TEST(8, "addhosts", ep)

		if (add_db_sidenms(sp, ep))
			goto rollback;

	} else {
		RB_PREEMPT;
		rb_level = 4;
	}

	RB_TEST(9, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 5;	/* level 5 */

	RB_TEST(10, "addhosts", ep)

	if (dd != NULL) {
		/*
		 * Notify rpc.mdcommd on all nodes of a nodelist change.
		 * Start by suspending rpc.mdcommd (which drains it of all
		 * messages), then change the nodelist followed by a reinit
		 * and resume.
		 */
		nd = sd->sd_nodelist;
		/* Send suspend_all to nodes in nodelist (existing + new) */
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
				rval = -1;
				goto rollback;
			}
			suspendall_flag = 1;
			nd = nd->nd_next;
		}
	}

	/* Add the node(s) to the each host that is currently in the set */
	nd = sd->sd_nodelist;
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		if (clnt_addhosts(nd->nd_nodename, sp, node_c, node_v, ep)) {
			goto rollback;
		}
		nd = nd->nd_next;
	}

	RB_TEST(11, "addhosts", ep)

	if (dd != NULL) {
		/*
		 * Mark the drives MD_DR_OK.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
			    MD_DR_OK, ep) == -1)
				goto rollback;
			nd = nd->nd_next;
		}
	}

	RB_TEST(12, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 6;   /* level 6 */

	RB_TEST(13, "addhosts", ep)


	/* Add the mediator information to all hosts in the set. */
	nd = sd->sd_nodelist;
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep))
			goto rollback;
		nd = nd->nd_next;
	}

	RB_TEST(14, "addhosts", ep)

	/*
	 * If a MN diskset and there are drives in the set,
	 * set the master on the new nodes and
	 * automatically join the new nodes into the set.
	 */
	if (dd != NULL) {
		mddb_config_t   c;
		/*
		 * Is current set STALE?
		 */
		(void) memset(&c, 0, sizeof (c));
		c.c_id = 0;
		c.c_setno = sp->setno;
		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
			(void) mdstealerror(ep, &c.c_mde);
			rval = -1;
			goto out;
		}
		if (c.c_flags & MDDB_C_STALE) {
			stale_flag = MNSET_IS_STALE;
		}

		/* Set master on newly added nodes */
		for (i = 0; i < node_c; i++) {
			if (clnt_mnsetmaster(node_v[i], sp,
			    sd->sd_mn_master_nodenm,
			    sd->sd_mn_master_nodeid, ep)) {
				goto rollback;
			}
		}
		/* Join newly added nodes to diskset and set OWN flag */
		for (i = 0; i < node_c; i++) {
			if (clnt_joinset(node_v[i], sp, stale_flag, ep))
				goto rollback;
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
					nd->nd_flags |= MD_MN_NODE_OWN;
					/*
					 * Also set ADD flag since this flag
					 * is already set in rpc.metad - it's
					 * just not in the local copy.
					 * Could flush local cache and call
					 * metaget_setdesc, but this just
					 * adds time.  Since this node knows
					 * the state of the node flags in
					 * rpc.metad, just set the ADD
					 * flag and save time.
					 */
					nd->nd_flags |= MD_MN_NODE_ADD;
					break;
				}
				nd = nd->nd_next;
			}
		}

		/* Send new node flag list to all Owner nodes */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
				nd = nd->nd_next;
				continue;
			}
			/*
			 * Will effectively set OWN flag in records kept
			 * cached in rpc.metad.  The ADD flag would have
			 * already been set by the call to clnt_addhosts.
			 */
			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
			    sd->sd_nodelist, MD_NR_SET, NULL, ep)) {
				goto rollback;
			}
			nd = nd->nd_next;
		}
	}

	/*
	 * Mark the set record MD_SR_OK
	 */
	nd = sd->sd_nodelist;
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		if (clnt_upd_sr_flags(nd->nd_nodename, sp, MD_SR_OK,
		    ep)) {
			goto rollback;
		}
		nd = nd->nd_next;
	}

	/*
	 * For MN diskset:
	 * On each newly added node, set the node record for that node
	 * to OK.  Then set all node records for the newly added
	 * nodes on all nodes to ok.
	 *
	 * By setting a node's own node record to ok first, even if
	 * the node adding the hosts panics, the rest of the nodes can
	 * determine the same node list during the choosing of the master
	 * during reconfig.  So, only nodes considered for mastership
	 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
	 * on that node's rpc.metad.  If all nodes have MD_SR_OK set,
	 * but no node has its own MD_MN_NODE_OK set, then the set will
	 * be removed during reconfig since a panic occurred during the
	 * creation of the initial diskset.
	 */

	for (i = 0; i < node_c; i++) {
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (strcmp(nd->nd_nodename, node_v[i]) == 0)
				break;
			nd = nd->nd_next;
		}
		/* Something wrong, will pick this up in next loop */
		if (nd == NULL)
			continue;

		/* Only changing my local cache of node list */
		saved_nd_next = nd->nd_next;
		nd->nd_next = NULL;

		/* Set node record for added host to ok on that host */
		if (clnt_upd_nr_flags(node_v[i], sp,
		    nd, MD_NR_OK, NULL, ep)) {
			nd->nd_next = saved_nd_next;
			goto rollback;
		}
		nd->nd_next = saved_nd_next;
	}

	/* Now set all node records on all nodes to be ok */
	nd = sd->sd_nodelist;
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		if (clnt_upd_nr_flags(nd->nd_nodename, sp,
		    sd->sd_nodelist, MD_NR_OK, NULL, ep)) {
			goto rollback;
		}
		nd = nd->nd_next;
	}

	RB_TEST(15, "addhosts", ep)
out:
	/*
	 * Notify rpc.mdcommd on all nodes of a nodelist change.
	 * Send reinit command to mdcommd which forces it to get
	 * fresh set description.  Then send resume.
	 * Resume on class 0 will resume all classes, so can skip
	 * doing an explicit resume of class1 (ignore suspend1_flag).
	 */
	if (suspendall_flag) {
		/*
		 * Don't know if nodelist contains the nodes being added
		 * or not, so do reinit to nodes not being added (by skipping
		 * any nodes in the nodelist being added) and then do
		 * reinit to nodes being added if remote_sets_created is 1.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			/* Skip nodes being added - handled later */
			if (strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}
			/* Class is ignored for REINIT */
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to reinit rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		/*
		 * Send reinit to added nodes that had a set created since
		 * rpc.mdcommd is running on the nodes with a set.
		 */
		if (remote_sets_created == 1) {
			for (i = 0; i < node_c; i++) {
				if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT,
				    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to reinit rpc.mdcommd.\n"));
				}
			}
		}
	}
	if ((suspend1_flag) || (suspendall_flag)) {
		/*
		 * Unlock diskset by resuming messages across the diskset.
		 * Just resume all classes so that resume is the same whether
		 * just one class was locked or all classes were locked.
		 *
		 * Don't know if nodelist contains the nodes being added
		 * or not, so do resume_all to nodes not being added (by
		 * skipping any nodes in the nodelist being added) and then do
		 * resume_all to nodes being added if remote_sets_created is 1.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			/* Skip nodes being added - handled later */
			if (strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		/*
		 * Send resume to added nodes that had a set created since
		 * rpc.mdcommd is be running on the nodes with a set.
		 */
		if (remote_sets_created == 1) {
			for (i = 0; i < node_c; i++) {
				/* Already verified to be alive */
				if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME,
				    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS,
				    &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to resume rpc.mdcommd.\n"));
				}
			}
		}
		meta_ping_mnset(sp->setno);
		/*
		 * Start a resync thread on the newly added nodes
		 * if set is not stale. Also start a thread to update the
		 * abr state of all soft partitions
		 */
		if (stale_flag != MNSET_IS_STALE) {
			for (i = 0; i < node_c; i++) {
				if (clnt_mn_mirror_resync_all(node_v[i],
				    sp->setno, &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to start resync "
					    "thread.\n"));
				}
				if (clnt_mn_sp_update_abr(node_v[i],
				    sp->setno, &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to start sp update "
					    "thread.\n"));
				}
			}
		}
	}
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	/*
	 * Don't know if nodelist contains the nodes being added
	 * or not, so do clnt_unlock_set to nodes not being added (by
	 * skipping any nodes in the nodelist being added) and then do
	 * clnt_unlock_set to nodes being added.
	 */
	if (lock_flag) {
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			/* Skip hosts we get in the next loop */
			if (strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
			}
			nd = nd->nd_next;
		}
		for (i = 0; i < node_c; i++) {
			/* Already verified to be alive */
			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
			}
		}
	}
	cl_set_setkey(NULL);

	metaflushsetname(sp);

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	return (rval);

rollback:
	rval = -1;

	/* level 6 */
	if (rb_level > 5) {
		/*
		 * For each node being deleted, set DEL flag and
		 * reset OK flag on that node first.
		 * Until a node has turned off its own
		 * rpc.metad's NODE_OK flag, that node could be
		 * considered for master during a reconfig.
		 */
		for (i = 0; i < node_c; i++) {
			nd = sd->sd_nodelist;
			/* All nodes are guaranteed to be ALIVE */
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			/* Something wrong, handle this in next loop */
			if (nd == NULL)
				continue;

			/* Only changing my local cache of node list */
			saved_nd_next = nd->nd_next;
			nd->nd_next = NULL;

			/* Set flags for del host to DEL on that host */
			if (clnt_upd_nr_flags(node_v[i], sp,
			    nd, MD_NR_DEL, NULL, &xep)) {
				mdclrerror(&xep);
			}
			nd->nd_next = saved_nd_next;
		}

		for (i = 0; i < node_c; i++) {
			if (dd != NULL) {
				/* Reset master on newly added node */
				if (clnt_mnsetmaster(node_v[i], sp, "",
				    MD_MN_INVALID_NID, &xep))
					mdclrerror(&xep);
				/* Withdraw set on newly added node */
				if (clnt_withdrawset(node_v[i], sp, &xep))
					mdclrerror(&xep);
			}
			/*
			 * Turn off owner flag in nodes to be deleted
			 * if there are drives in the set.
			 * Also, turn off NODE_OK and turn on NODE_DEL
			 * for nodes to be deleted.
			 * These flags are used to set the node
			 * record flags in all nodes in the set.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
					if (dd != NULL) {
						nd->nd_flags &= ~MD_MN_NODE_OWN;
					}
					nd->nd_flags |= MD_MN_NODE_DEL;
					nd->nd_flags &= ~MD_MN_NODE_OK;
					break;
				}
				nd = nd->nd_next;
			}
		}

		/*
		 * Now, reset owner and set delete flags for the deleted
		 * nodes on all nodes.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
			    sd->sd_nodelist, MD_NR_SET, NULL, &xep)) {
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}

		/*
		 * On each node being deleted, set the set record
		 * to be in DEL state.
		 */
		for (i = 0; i < node_c; i++) {
			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) {
				mdclrerror(&xep);
			}
		}
	}

	/* level 5 */
	if (rb_level > 4) {
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_delhosts(nd->nd_nodename, sp, node_c,
			    node_v, &xep) == -1)
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	}

	/*
	 * Notify rpc.mdcommd on all nodes of a nodelist change.
	 * Send reinit command to mdcommd which forces it to get
	 * fresh set description.  Then send resume.
	 * Nodelist contains all nodes (existing + added).
	 */
	if (suspendall_flag) {
		/* Send reinit */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		/* Send reinit to nodes in nodelist before addhosts call */
		while (nd) {
			/*
			 * Skip nodes being added if remote sets were not
			 * created since rpc.mdcommd may not be running
			 * on the remote nodes.
			 */
			if ((remote_sets_created == 0) &&
			    (strinlst(nd->nd_nodename, node_c, node_v))) {
				nd = nd->nd_next;
				continue;
			}
			/* Class is ignored for REINIT */
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to reinit rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}

		/* Send resume */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			/*
			 * Skip nodes being added if remote sets were not
			 * created since rpc.mdcommd may not be running
			 * on the remote nodes.
			 */
			if ((remote_sets_created == 0) &&
			    (strinlst(nd->nd_nodename, node_c, node_v))) {
				nd = nd->nd_next;
				continue;
			}
			/*
			 * Resume all classes but class 1 so that lock is held
			 * against meta* commands.
			 * Send resume_all_but_1 to nodes in nodelist
			 * before addhosts call.
			 */
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1,
			    &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}

	/* level 4 */
	/* Nodelist may or may not contain nodes being added. */
	if (rb_level > 3 && dd != NULL) {
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip nodes not being added */
			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}

			if (del_db_sidenms(sp, nd->nd_nodeid, &xep))
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	}

	/* level 3 */
	/* Nodelist may or may not contain nodes being added. */
	if (rb_level > 2 && dd != NULL) {
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip nodes not being added */
			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}

			if (del_md_sidenms(sp, nd->nd_nodeid, &xep))
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	}

	/* level 1 */
	if (rb_level > 0) {
		if (dd != NULL) {
			/* delete the drive records */
			for (i = 0; i < node_c; i++) {
				if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1)
					mdclrerror(&xep);
			}
		}

		/* delete the set record */
		for (i = 0; i < node_c; i++) {
			if (clnt_delset(node_v[i], sp, &xep) == -1)
				mdclrerror(&xep);
		}
	}

	/* level 0 */
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	/* Don't test lock flag since guaranteed to be set if in rollback */
	/* Nodelist may or may not contain nodes being added. */
	/*
	 * Unlock diskset by resuming messages across the diskset.
	 * Just resume all classes so that resume is the same whether
	 * just one class was locked or all classes were locked.
	 */
	if ((suspend1_flag) || (suspendall_flag)) {
		/* All nodes are guaranteed to be ALIVE */
		nd = sd->sd_nodelist;
		while (nd) {
			/*
			 * Skip nodes being added since remote sets
			 * were either created and then deleted or
			 * were never created.  Either way - rpc.mdcommd
			 * may not be running on the remote node.
			 */
			if (strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_mdcommdctl(nd->nd_nodename,
			    COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
			    MD_MSCF_NO_FLAGS, &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}
	nd = sd->sd_nodelist;
	/* All nodes are guaranteed to be ALIVE */
	while (nd) {
		/* Skip hosts we get in the next loop */
		if (strinlst(nd->nd_nodename, node_c, node_v)) {
			nd = nd->nd_next;
			continue;
		}

		if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
			mdclrerror(&xep);
		nd = nd->nd_next;
	}

	for (i = 0; i < node_c; i++)
		if (clnt_unlock_set(node_v[i], cl_sk, &xep))
			mdclrerror(&xep);
	cl_set_setkey(NULL);

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	metaflushsetname(sp);

	return (rval);
}

/*
 * Add host(s) to the traditional diskset provided in sp.
 *	- create set if non-existent.
 */
static int
meta_traditional_set_addhosts(
	mdsetname_t	*sp,
	int		multi_node,
	int		node_c,
	char		**node_v,
	int		auto_take,
	md_error_t	*ep
)
{
	md_set_desc	*sd;
	md_drive_desc	*dd, *p;
	med_rec_t	medr;
	med_rec_t	rb_medr;
	int		rval = 0;
	int		bool;
	int		nodeindex;
	int 		i;
	int		has_set;
	int		numsides;
	sigset_t	oldsigs;
	md_setkey_t	*cl_sk;
	int		rb_level = 0;
	md_error_t	xep = mdnullerror;
	int		max_meds;

	if (nodesuniq(sp, node_c, node_v, ep))
		return (-1);

	if (validate_nodes(sp, node_c, node_v, ep))
		return (-1);

	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
		if (! mdiserror(ep, MDE_NO_SET))
			return (-1);
		mdclrerror(ep);
		return (create_set(sp, multi_node, node_c, node_v, auto_take,
		    ep));
	}

	/* The auto_take behavior is inconsistent with multiple hosts. */
	if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) {
		(void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
		    sp->setname);
		return (-1);
	}

	/*
	 * We already have the set.
	 */

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	/*
	 * Perform the required checks for new hosts
	 */
	for (i = 0; i < node_c; i++) {
		if (getnodeside(node_v[i], sd) != MD_SIDEWILD)
			return (mddserror(ep, MDE_DS_NODEINSET, sp->setno,
			    node_v[i], NULL, sp->setname));

		/* Make sure this set name is not used on the other hosts */
		has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
		if (has_set < 0) {
			if (! mdiserror(ep, MDE_NO_SET))
				return (-1);
			/* Keep on truck'n */
			mdclrerror(ep);
		} else if (has_set)
			return (mddserror(ep, MDE_DS_NODEHASSET, sp->setno,
			    node_v[i], NULL, sp->setname));

		if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1)
			return (-1);

		if (bool == TRUE)
			return (mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno,
			    node_v[i], NULL, sp->setname));

		if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1)
			return (-1);

		if (bool == FALSE)
			return (mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
			    node_v[i], NULL, sp->setname));

		if (check_setdrvs_againstnode(sp, node_v[i], ep))
			return (-1);
	}

	/* Count the number of occupied slots */
	numsides = 0;
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* Count occupied slots */
		if (sd->sd_nodes[i][0] != '\0')
			numsides++;
	}

	/* Make sure the we have space to add the new sides */
	if ((numsides + node_c) > MD_MAXSIDES) {
		(void) mddserror(ep, MDE_DS_SIDENUMNOTAVAIL, sp->setno, NULL,
		    NULL, sp->setname);
		return (-1);
	}

	/* Get drive descriptors for the set */
	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL)
		if (! mdisok(ep))
			return (-1);

	/* Setup the mediator record roll-back structure */
	(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
	rb_medr.med_rec_mag = MED_REC_MAGIC;
	rb_medr.med_rec_rev = MED_REC_REV;
	rb_medr.med_rec_fl  = 0;
	rb_medr.med_rec_sn  = sp->setno;
	(void) strcpy(rb_medr.med_rec_snm, sp->setname);
	for (i = 0; i < MD_MAXSIDES; i++)
		(void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]);
	rb_medr.med_rec_meds = sd->sd_med;	/* structure assigment */
	(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
	rb_medr.med_rec_foff = 0;
	crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);

	if ((max_meds = get_max_meds(ep)) == 0)
		return (-1);

	/* END CHECK CODE */

	md_rb_sig_handling_on();

	/* Lock the set on current set members */
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* Skip empty slots */
		if (sd->sd_nodes[i][0] == '\0')
			continue;

		if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
			rval = -1;
			goto out;
		}
	}

	/* Lock the set on new set members */
	for (i = 0; i < node_c; i++) {
		if (clnt_lock_set(node_v[i], sp, ep)) {
			rval = -1;
			goto out;
		}
	}

	RB_TEST(1, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "addhosts", ep)

	/*
	 * Add the new hosts to the existing set record on the existing hosts
	 */
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* skip empty slots */
		if (sd->sd_nodes[i][0] == '\0')
			continue;

		if (clnt_addhosts(sd->sd_nodes[i], sp, node_c, node_v, ep))
			goto rollback;
	}

	RB_PREEMPT;
	rb_level = 2;	/* level 2 */

	RB_TEST(3, "addhosts", ep);

	/* Merge the new entries into the set with the existing sides */
	nodeindex = 0;
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* Skip full slots */
		if (sd->sd_nodes[i][0] != '\0')
			continue;

		(void) strcpy(sd->sd_nodes[i], node_v[nodeindex++]);
		if (nodeindex == node_c)
			break;
	}

	/* If we have drives */
	if (dd != NULL) {
		/*
		 * For all the hosts being added, create a sidename structure
		 */
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip nodes not being added */
			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			for (p = dd; p != NULL; p = p->dd_next) {
				if (make_sideno_sidenm(sp, p->dd_dnp, i,
				    ep) != 0)
					goto rollback;
			}
		}

		/*
		 * Add the new sidename for each drive to the existing hosts
		 */
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip nodes being added */
			if (strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			if (clnt_add_drv_sidenms(sd->sd_nodes[i], mynode(), sp,
			    sd, node_c, node_v, ep)) {
				goto rollback;
			}
		}

		RB_TEST(4, "addhosts", ep)

		RB_PREEMPT;
		rb_level = 3;	/* level 3 */

		RB_TEST(5, "addhosts", ep)

		if (add_db_sidenms(sp, ep)) {
			goto rollback;
		}

	} else {
		RB_PREEMPT;
		rb_level = 3;
	}

	RB_TEST(6, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 4;	/* level 4 */

	RB_TEST(7, "addhosts", ep)


	/* create the set on the new nodes, this adds the drives as well */
	if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) {
		goto rollback;
	}

	RB_TEST(8, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 5;	/* level 5 */

	RB_TEST(9, "addhosts", ep)

	if (dd != NULL) {

		/*
		 * Add the device entries for the new sides into the namespace.
		 */
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip nodes not being added */
			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			if (add_md_sidenms(sp, i, MD_SIDEWILD, ep))
				goto rollback;
		}
	}

	RB_TEST(10, "addhosts", ep)

	RB_PREEMPT;
	rb_level = 6;	/* level 6 */

	RB_TEST(11, "addhosts", ep);

	if (dd != NULL) {
		/*
		 * Mark the drives MD_DR_OK.
		 */
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
			    MD_DR_OK, ep) == -1) {
				goto rollback;
			}
		}
	}

	RB_TEST(12, "addhosts", ep)

	/* Bring the mediator record up to date with the set record */
	medr = rb_medr;				/* structure assignment */
	for (i = 0; i < MD_MAXSIDES; i++)
		(void) strcpy(medr.med_rec_nodes[i], sd->sd_nodes[i]);
	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);

	/* Inform the mediator hosts of the new node list */
	for (i = 0; i < max_meds; i++) {
		if (sd->sd_med.n_lst[i].a_cnt == 0)
			continue;

		if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
			goto rollback;
	}

	/* Add the mediator information to all hosts in the set */
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* Skip empty slots */
		if (sd->sd_nodes[i][0] == '\0')
			continue;

		if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep))
			goto rollback;
	}

	RB_TEST(13, "addhosts", ep)

	/*
	 * Mark the set record MD_SR_OK
	 */
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* Skip empty slots */
		if (sd->sd_nodes[i][0] == '\0')
			continue;

		if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, MD_SR_OK, ep))
			goto rollback;
	}

	RB_TEST(14, "addhosts", ep)

out:
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	for (i = 0; i < MD_MAXSIDES; i++) {
		/* Skip empty slots */
		if (sd->sd_nodes[i][0] == '\0')
			continue;

		/* Skip hosts we get in the next loop */
		if (strinlst(sd->sd_nodes[i], node_c, node_v))
			continue;

		if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
			if (rval == 0)
				(void) mdstealerror(ep, &xep);
			rval = -1;
		}
	}

	if (rval == 0) {
		for (i = 0; i < node_c; i++)
			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
			}
	}
	cl_set_setkey(NULL);

	metaflushsetname(sp);

	md_rb_sig_handling_off(md_got_sig(), md_which_sig());

	return (rval);

rollback:
	/* Make sure we are blocking all signals */
	if (procsigs(TRUE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	rval = -1;

	/* level 6 */
	if (rb_level > 5) {
		for (i = 0; i < max_meds; i++) {
			if (sd->sd_med.n_lst[i].a_cnt == 0)
				continue;

			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
			    &rb_medr, &xep))
				mdclrerror(&xep);
		}
		if (dd != NULL) {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				/* Skip nodes not being added */
				if (! strinlst(sd->sd_nodes[i], node_c, node_v))
					continue;

				if (del_md_sidenms(sp, i, &xep))
					mdclrerror(&xep);
			}
		}
	}

	/* level 5 */
	if (rb_level > 4) {
		if (dd != NULL) {
			/* delete the drive records */
			for (i = 0; i < node_c; i++) {
				if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1)
					mdclrerror(&xep);
			}
		}
		/* delete the set record on the 'new' hosts */
		for (i = 0; i < node_c; i++) {
			if (clnt_delset(node_v[i], sp, &xep) == -1)
				mdclrerror(&xep);
		}
	}

	/* level 4 */
	if (rb_level > 3 && dd != NULL) {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip nodes not being added */
			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			if (del_db_sidenms(sp, i, &xep))
				mdclrerror(&xep);
		}
	}

	/* level 3 */
	if (rb_level > 2 && dd != NULL) {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip nodes not being added */
			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp,
			    &xep) == -1)
				mdclrerror(&xep);
		}
	}

	/* level 2 */
	if (rb_level > 1) {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v,
			    &xep) == -1)
				mdclrerror(&xep);
		}
	}

	/* level 1 */
	if (rb_level > 0) {
		cl_sk = cl_get_setkey(sp->setno, sp->setname);
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip hosts we get in the next loop */
			if (strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
				mdclrerror(&xep);
		}

		for (i = 0; i < node_c; i++)
			if (clnt_unlock_set(node_v[i], cl_sk, &xep))
				mdclrerror(&xep);
		cl_set_setkey(NULL);
	}

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	metaflushsetname(sp);

	md_rb_sig_handling_off(md_got_sig(), md_which_sig());

	return (rval);
}

/*
 * Add host(s) to the diskset provided in sp.
 * 	- create set if non-existent.
 */
int
meta_set_addhosts(
	mdsetname_t	*sp,
	int		multi_node,
	int		node_c,
	char		**node_v,
	int		auto_take,
	md_error_t	*ep
)
{
	if (multi_node)
		return (meta_multinode_set_addhosts(sp, multi_node, node_c,
		    node_v, auto_take, ep));
	else
		return (meta_traditional_set_addhosts(sp, multi_node, node_c,
		    node_v, auto_take, ep));
}

/*
 * Delete host(s) from the diskset provided in sp.
 * 	- destroy set if last host in set is removed.
 */
int
meta_set_deletehosts(
	mdsetname_t		*sp,
	int			node_c,
	char			**node_v,
	int			forceflg,
	md_error_t		*ep
)
{
	md_set_desc		*sd;
	md_drive_desc		*dd;
	med_rec_t		medr;
	med_rec_t		rb_medr;
	int			i, j;
	int			has_set;
	int			numsides = 0;
	int			oha = FALSE;
	sigset_t		oldsigs;
	mhd_mhiargs_t		mhiargs;
	md_replicalist_t	*rlp = NULL;
	md_setkey_t		*cl_sk;
	ulong_t			max_genid = 0;
	int			rval = 0;
	int			rb_level = 0;
	int			max_meds = 0;
	md_error_t		xep = mdnullerror;
	md_mnnode_desc		*nd;
	md_mnnode_record	*nr;
	int			delete_master = 0;
	int			suspendall_flag = 0, suspendall_flag_rb = 0;
	int			suspend1_flag = 0;
	int			lock_flag = 0;
	int			stale_flag = 0;
	int			*node_id_list = NULL;
	int			remote_sets_deleted = 0;

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	/*
	 * Verify that list of nodes being deleted contains no
	 * duplicates.
	 */
	if (nodesuniq(sp, node_c, node_v, ep))
		return (-1);

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	/*
	 * The drive and node records are stored in the local mddbs of each
	 * node in the diskset.  Each node's rpc.metad daemon reads in the set,
	 * drive and node records from that node's local mddb and caches them
	 * internally. Any process needing diskset information contacts its
	 * local rpc.metad to get this information.  Since each node in the
	 * diskset is independently reading the set information from its local
	 * mddb, the set, drive and node records in the local mddbs must stay
	 * in-sync, so that all nodes have a consistent view of the diskset.
	 *
	 * For a multinode diskset, explicitly verify that all nodes in the
	 * diskset are ALIVE (i.e. are in the API membership list) if the
	 * forceflag is FALSE.  (The case of forceflag being TRUE is handled
	 * in OHA check above.)
	 *
	 * If forceflag is FALSE and a node in the diskset is not in
	 * the membership list, then fail this operation since all nodes must
	 * be ALIVE in order to delete the node record from their local mddb.
	 * If a panic of this node leaves the local mddbs set, node and drive
	 * records out-of-sync, the reconfig cycle will fix the local mddbs
	 * and force them back into synchronization.
	 */
	if ((forceflg == FALSE) && (MD_MNSET_DESC(sd))) {
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
				    sp->setno, nd->nd_nodename,
				    NULL, sp->setname));
			}
			nd = nd->nd_next;
		}
	}


	/*
	 * Lock the set on current set members.
	 * Set locking done much earlier for MN diskset than for traditional
	 * diskset since lock_set and SUSPEND are used to protect against
	 * other meta* commands running on the other nodes.
	 */
	if (MD_MNSET_DESC(sd)) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);

		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}

			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
				rval = -1;
				goto out2;
			}
			lock_flag = 1;
			nd = nd->nd_next;
		}
		/*
		 * Lock out other meta* commands by suspending
		 * class 1 messages across the diskset.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_mdcommdctl(nd->nd_nodename,
			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
			    MD_MSCF_NO_FLAGS, ep)) {
				rval = -1;
				goto out2;
			}
			suspend1_flag = 1;
			nd = nd->nd_next;
		}
	}

	for (i = 0; i < node_c; i++)
		if (getnodeside(node_v[i], sd) == MD_SIDEWILD) {
			(void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
			    node_v[i], NULL, sp->setname);
			rval = -1;
			goto out2;
		}

	/*
	 * Count the number of nodes currently in the set.
	 */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			numsides++;
			nd = nd->nd_next;
		}
	} else {
		for (i = 0; i < MD_MAXSIDES; i++)
			/* Count full slots */
			if (sd->sd_nodes[i][0] != '\0')
				numsides++;
	}

	/*
	 * OHA mode == -f -h <hostname>
	 * OHA is One Host Administration that occurs when the forceflag (-f)
	 * is set and at least one host in the diskset isn't responding
	 * to RPC requests.
	 *
	 * When in OHA mode, a node cannot delete itself from a diskset.
	 * When in OHA mode, a node can delete a list of nodes from a diskset
	 * even if some of the nodes in the diskset are unresponsive.
	 *
	 * For multinode diskset, only allow OHA mode when the nodes that
	 * aren't responding in the diskset are not in the membership list
	 * (i.e. nodes that aren't responding are not marked ALIVE).
	 * Nodes that aren't in the membership list will be rejoining
	 * the diskset through a reconfig cycle and the local mddb set
	 * and node records can be reconciled during the reconfig cycle.
	 *
	 * If a node isn't responding, but is still in the membership list,
	 * fail the request since the node may not be responding because
	 * rpc.metad died and is restarting.  In this case, no reconfig
	 * cycle will be started, so there's no way to recover if
	 * the host delete operation was allowed.
	 *
	 * NOTE: if nodes that weren't in the membership when the OHA host
	 * delete occurred are now the only nodes in membership list,
	 * those nodes will see the old view of the diskset.  As soon as
	 * a node re-enters the cluster that was present in the cluster
	 * during the host deletion, the diskset will reflect the host
	 * deletion on all nodes presently in the cluster.
	 */
	if (forceflg == TRUE) {
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/*
				 * If a node isn't ALIVE (in member list),
				 * then allow a force-able delete in OHA mode.
				 */
				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
					oha = TRUE;
					break;
				}
				/*
				 * Don't test for clnt_nullproc since already
				 * tested the RPC connections by clnt_lock_set.
				 */
				nd = nd->nd_next;
			}
		} else {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_nullproc(sd->sd_nodes[i], ep) == -1) {
					/*
					 * If we timeout to at least one
					 * client, then we can allow OHA mode,
					 * otherwise, we are in normal mode.
					 */
					if (mdanyrpcerror(ep)) {
						mdclrerror(ep);
						if (strinlst(sd->sd_nodes[i],
						    node_c, node_v)) {
							oha = TRUE;
							break;
						}
					}
				}
			}
		}
	}

	/*
	 * Don't allow this for MN diskset since meta_set_destroy of 1 node
	 * does NOT remove this node's node record from the other node's set
	 * records in their local mddb.  This leaves a MN diskset in a very
	 * messed up state.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		/* Destroy set */
		if (forceflg == TRUE && node_c == 1 &&
		    strcmp(mynode(), node_v[0]) == 0) {
			/* Can return since !MN diskset so nothing to unlock */
			return (meta_set_destroy(sp, TRUE, ep));
		}
	}


	/*
	 * In multinode diskset, can only delete self if this
	 * is the last node in the set or if all nodes in
	 * the set are being deleted.  The traditional diskset code
	 * allows a node to delete itself (when there are other nodes
	 * in the diskset) when using the force flag, but that code
	 * path doesn't have the node remove itself from
	 * the set node list on the other nodes.  Since this isn't
	 * satisfactory for the multinode diskset, just don't
	 * allow this operation.
	 */
	if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) &&
	    strinlst(mynode(), node_c, node_v)) {
		(void) mddserror(ep, MDE_DS_MNCANTDELSELF, sp->setno,
		    mynode(), NULL, sp->setname);
		rval = -1;
		goto out2;
	}

	/*
	 * In multinode diskset, don't allow deletion of master node unless
	 * this is the only node left or unless all nodes are being
	 * deleted since there is no way to switch
	 * master ownership (unless via a cluster reconfig cycle).
	 */
	delete_master = strinlst(sd->sd_mn_master_nodenm, node_c, node_v);
	if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) &&
	    delete_master) {
		(void) mddserror(ep, MDE_DS_CANTDELMASTER, sp->setno,
		    sd->sd_mn_master_nodenm, NULL, sp->setname);
		rval = -1;
		goto out2;
	}


	/* Deleting self w/o forceflg */
	if (forceflg == FALSE && numsides > 1 &&
	    strinlst(mynode(), node_c, node_v)) {
		(void) mddserror(ep, MDE_DS_CANTDELSELF, sp->setno,
		    mynode(), NULL, sp->setname);
		rval = -1;
		goto out2;
	}

	/*
	 * Setup the mediator record roll-back structure for a trad diskset.
	 *
	 * For a MN diskset, the deletion of a host in the diskset
	 * does not cause an update of the mediator record.  If the
	 * host deletion will cause the diskset to be removed (this is
	 * the last host being removed or all hosts are being removed)
	 * then the mediator record must have already been removed by the
	 * user or this delete host operation will fail (a check for
	 * this is done later in this routine).
	 */
	if (!(MD_MNSET_DESC(sd))) {
		(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
		rb_medr.med_rec_mag = MED_REC_MAGIC;
		rb_medr.med_rec_rev = MED_REC_REV;
		rb_medr.med_rec_fl = 0;
		rb_medr.med_rec_sn  = sp->setno;
		(void) strcpy(rb_medr.med_rec_snm, sp->setname);
		for (i = 0; i < MD_MAXSIDES; i++)
			(void) strcpy(rb_medr.med_rec_nodes[i],
			    sd->sd_nodes[i]);
		rb_medr.med_rec_meds = sd->sd_med;  /* structure assigment */
		(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
		rb_medr.med_rec_foff = 0;
		crcgen(&rb_medr, &rb_medr.med_rec_cks,
		    sizeof (med_rec_t), NULL);

		/* Bring the mediator record up to date with the set record */
		medr = rb_medr;			/* structure assignment */

		if ((max_meds = get_max_meds(ep)) == 0) {
			rval = -1;
			goto out2;
		}
	}

	/*
	 * For traditional diskset:
	 * Check to see if all the hosts we are trying to delete the set from
	 * have a set "setname" that is the same as ours, i.e. - same name,
	 * same time stamp, same genid.  We only do this if forceflg is not
	 * specified or we are in OHA mode.
	 */
	if (!(MD_MNSET_DESC(sd)) && (forceflg == FALSE || oha == TRUE)) {
		int	fix_node_v = FALSE;
		int	j;

		for (i = 0; i < node_c; i++) {
			/* We skip this side */
			if (strcmp(mynode(), node_v[i]) == 0)
				continue;

			has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep);

			if (has_set < 0) {
				char	 *anode[1];

				/*
				 * Can't talk to the host only allowed in OHA
				 * mode.
				 */
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}

				/*
				 * We got an error we do not, or are not,
				 * prepared to handle.
				 */
				if (! mdiserror(ep, MDE_NO_SET) &&
				    ! mdismddberror(ep, MDE_DB_NODB)) {
					rval = -1;
					goto out2;
				}
				mdclrerror(ep);

				/*
				 * If we got here: both hosts are up; a host in
				 * our set record does not have the set. So we
				 * delete the host from our set and invalidate
				 * the node.
				 */
				anode[0] = Strdup(node_v[i]);

				rval = del_host_noset(sp, anode, ep);

				/*
				 * If we delete a host, make sure the mediator
				 * hosts are made aware of this.
				 */
				for (j = 0; j < MD_MAXSIDES; j++) {
					if (strcmp(medr.med_rec_nodes[j],
					    node_v[i]) != 0)
						continue;
					(void) memset(&medr.med_rec_nodes[j],
					    '\0', sizeof (md_node_nm_t));
				}
				crcgen(&medr, &medr.med_rec_cks,
				    sizeof (med_rec_t), NULL);

				rb_medr = medr;		/* struct assignment */

				Free(anode[0]);

				if (rval == -1)
					goto out2;

				node_v[i][0] = '\0';
				fix_node_v = TRUE;
				continue;
			}

			/*
			 * If we can talk to the host, and they do not have the
			 * exact set, then we disallow the operation.
			 */
			if (has_set == FALSE) {
				(void) mddserror(ep, MDE_DS_NODENOSET,
				    sp->setno, node_v[i], NULL, sp->setname);
				rval = -1;
				goto out2;
			}
		}

		/*
		 * Here we prune the node_v's that were invalidated above.
		 */
		if (fix_node_v == TRUE) {
			i = 0;
			while (i < node_c) {
				if (node_v[i][0] == '\0') {
					for (j = i; (j + 1) < node_c; j++)
						node_v[j] = node_v[j + 1];
					node_c--;
				}
				i++;
			}
			/*
			 * If we are left with no nodes, then we have
			 * compeleted the operation.
			 */
			if (node_c == 0) {
				/*
				 * Inform the mediator hosts of the new node
				 * list
				 */
				for (i = 0; i < max_meds; i++) {
					if (sd->sd_med.n_lst[i].a_cnt == 0)
						continue;

					if (clnt_med_upd_rec(
					    &sd->sd_med.n_lst[i], sp, &medr,
					    ep))
						mdclrerror(ep);
				}
				rval = 0;
				goto out2;
			}
		}
	}

	/*
	 * For multinode diskset:
	 * If forceflag is FALSE then check to see if all the hosts we
	 * are trying to delete the set from have a set "setname" that
	 * is the same as ours, i.e. - same name, same time stamp, same genid.
	 * If forceflag is TRUE, then we don't care if the hosts being
	 * deleted have the same set information or not since user is forcing
	 * those hosts to be deleted.
	 */
	if ((MD_MNSET_DESC(sd)) && (forceflg == FALSE)) {
		for (i = 0; i < node_c; i++) {
			/* We skip this node since comparing against it */
			if (strcmp(mynode(), node_v[i]) == 0)
				continue;

			has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep);

			if (has_set < 0) {
				rval = -1;
				goto out2;
			}

			/*
			 * If we can talk to the host, and they do not have the
			 * exact set, then we disallow the operation.
			 */
			if (has_set == FALSE) {
				(void) mddserror(ep, MDE_DS_NODENOSET,
				    sp->setno, node_v[i], NULL, sp->setname);
				rval = -1;
				goto out2;
			}
		}
	}

	/*
	 * For traditional diskset:
	 * Can't allow user to delete their node (without deleting all nodes)
	 * out of a set in OHA mode, would leave a real mess.
	 * This action was already failed above for a MN diskset.
	 */
	if (!(MD_MNSET_DESC(sd)) && (oha == TRUE) &&
	    strinlst(mynode(), node_c, node_v)) {
		/* Can directly return since !MN diskset; nothing to unlock */
		return (mddserror(ep, MDE_DS_OHACANTDELSELF, sp->setno,
		    mynode(), NULL, sp->setname));
	}


	/* Get the drive descriptors for this set */
	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
	    ep)) == NULL) {
		if (! mdisok(ep)) {
			rval = -1;
			goto out2;
		}
	}

	/*
	 * We have been asked to delete all the hosts in the set, i.e. - delete
	 * the whole set.
	 */
	if (node_c == numsides) {
		/*
		 * This is only a valid operation if all drives have been
		 * removed first.
		 */

		if (dd != NULL) {
			(void) mddserror(ep, MDE_DS_HASDRIVES, sp->setno,
			    NULL, NULL, sp->setname);
			rval = -1;
			goto out2;
		}

		/*
		 * If a mediator is currently associated with this set,
		 * fail the deletion of the last host(s).
		 */
		if (sd->sd_med.n_cnt != 0) {
			(void) mddserror(ep, MDE_DS_HASMED, sp->setno,
			    NULL, NULL, sp->setname);
			rval = -1;
			goto out2;
		}

		if (! mdisok(ep)) {
			rval = -1;
			goto out2;
		}

		rval = del_set_nodrives(sp, node_c, node_v, oha, ep);
		remote_sets_deleted = 1;
		goto out2;
	}

	/*
	 * Get timeout values in case we need to roll back
	 */
	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
	if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0) {
		rval = -1;
		goto out2;
	}

	if (dd != NULL) {
		/*
		 * We need this around for re-adding DB side names later.
		 */
		if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
			rval = -1;
			goto out2;
		}

		/*
		 * Alloc nodeid list if drives are present in diskset.
		 * nodeid list is used to reset mirror owners if the
		 * owner is a deleted node.
		 */
		if (MD_MNSET_DESC(sd)) {
			node_id_list = Zalloc(sizeof (int) * node_c);
		}
	}

	/* Lock the set on current set members */
	if (!(MD_MNSET_DESC(sd))) {
		md_rb_sig_handling_on();
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				rval = -1;
				goto out2;
			}
			lock_flag = 1;
		}
	}

	RB_TEST(1, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "deletehosts", ep)

	if (MD_MNSET_DESC(sd)) {
		md_mnnode_desc		*saved_nd_next;
		mddb_config_t		c;

		if (dd != NULL) {
			/*
			 * Notify rpc.mdcommd on all nodes of a nodelist change.
			 * Start by suspending rpc.mdcommd (which drains it of
			 * all messages), then change the nodelist followed
			 * by a reinit and resume.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
					nd = nd->nd_next;
					continue;
				}
				if (clnt_mdcommdctl(nd->nd_nodename,
				    COMMDCTL_SUSPEND, sp,
				    MD_MSG_CLASS0,
				    MD_MSCF_NO_FLAGS, ep)) {
					rval = -1;
					goto out2;
				}
				suspendall_flag = 1;
				nd = nd->nd_next;
			}
			/*
			 * Is current set STALE?
			 * Need to know this if delete host fails and node
			 * is re-joined to diskset.
			 */
			(void) memset(&c, 0, sizeof (c));
			c.c_id = 0;
			c.c_setno = sp->setno;
			if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
				(void) mdstealerror(ep, &c.c_mde);
				rval = -1;
				goto out2;
			}
			if (c.c_flags & MDDB_C_STALE) {
				stale_flag = MNSET_IS_STALE;
			}
		}

		/*
		 * For each node being deleted, set DEL flag and
		 * reset OK flag on that node first.
		 * Until a node has turned off its own
		 * rpc.metad's NODE_OK flag, that node could be
		 * considered for master during a reconfig.
		 */
		for (i = 0; i < node_c; i++) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
					break;
				nd = nd->nd_next;
			}
			/* Something wrong, handle this in next loop */
			if (nd == NULL)
				continue;

			/* If node_id_list is alloc'd, fill in for later use */
			if (node_id_list)
				node_id_list[i] = nd->nd_nodeid;

			/* All nodes are guaranteed to be ALIVE unless OHA */
			if ((oha == TRUE) &&
			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
				continue;
			}

			/* Only changing my local cache of node list */
			saved_nd_next = nd->nd_next;
			nd->nd_next = NULL;

			/* Set flags for del host to DEL on that host */
			if (clnt_upd_nr_flags(node_v[i], sp,
			    nd, MD_NR_DEL, NULL, ep)) {
				nd->nd_next = saved_nd_next;
				goto rollback;
			}
			nd->nd_next = saved_nd_next;
		}
		for (i = 0; i < node_c; i++) {
			/*
			 * Turn off owner flag in nodes to be deleted
			 * if this node has been joined.
			 * Also, turn off NODE_OK and turn on NODE_DEL
			 * for nodes to be deleted.
			 * These flags are used to set the node
			 * record flags in all nodes in the set.
			 * Only withdraw nodes that are joined.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				/*
				 * Don't communicate with non-ALIVE node if
				 * in OHA - but set flags in master list so
				 * alive nodes are updated correctly.
				 */
				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
					if ((oha == TRUE) && (!(nd->nd_flags &
					    MD_MN_NODE_ALIVE))) {
						nd->nd_flags |= MD_MN_NODE_DEL;
						nd->nd_flags &= ~MD_MN_NODE_OK;
						nd = nd->nd_next;
						continue;
					}
					if (nd->nd_flags & MD_MN_NODE_OWN) {
						/*
						 * Going to set locally cached
						 * node flags to rollback join
						 * so in case of error, the
						 * rollback code knows which
						 * nodes to re-join.  rpc.metad
						 * ignores the RB_JOIN flag.
						 */
						nd->nd_flags |=
						    MD_MN_NODE_RB_JOIN;
						nd->nd_flags &= ~MD_MN_NODE_OWN;

						/*
						 * Be careful in ordering of
						 * following steps so that
						 * recovery from a panic
						 * between the steps is viable.
						 * Only reset master info in
						 * rpc.metad - don't reset
						 * local cached info which will
						 * be used to set master info
						 * back if failure (rollback).
						 */
						if (clnt_withdrawset(
						    nd->nd_nodename, sp, ep))
							goto rollback;

						/*
						 * Reset master on deleted node
						 */
						if (clnt_mnsetmaster(node_v[i],
						    sp, "", MD_MN_INVALID_NID,
						    ep))
							goto rollback;
					}

					nd->nd_flags |= MD_MN_NODE_DEL;
					nd->nd_flags &= ~MD_MN_NODE_OK;
				}
				nd = nd->nd_next;
			}
		}

		/*
		 * Now, reset owner and set delete flags for the
		 * deleted nodes on all nodes.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip non-ALIVE node if in OHA */
			if ((oha == TRUE) &&
			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
			    sd->sd_nodelist, MD_NR_SET, NULL, ep)) {
				goto rollback;
			}
			nd = nd->nd_next;
		}
		/*
		 * Notify rpc.mdcommd on all nodes of a nodelist change.
		 * Send reinit command to mdcommd which forces it to get
		 * fresh set description.
		 */
		if (suspendall_flag) {
			/* Send reinit */
			nd = sd->sd_nodelist;
			while (nd) {
				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					nd = nd->nd_next;
					continue;
				}
				/* Class is ignored for REINIT */
				if (clnt_mdcommdctl(nd->nd_nodename,
				    COMMDCTL_REINIT, sp, NULL,
				    MD_MSCF_NO_FLAGS, ep)) {
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to reinit rpc.mdcommd.\n"));
					goto rollback;
				}
				nd = nd->nd_next;
			}
			/* Send resume */
			nd = sd->sd_nodelist;
			while (nd) {
				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					nd = nd->nd_next;
					continue;
				}
				if (clnt_mdcommdctl(nd->nd_nodename,
				    COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
				    MD_MSCF_DONT_RESUME_CLASS1, ep)) {
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to resume rpc.mdcommd.\n"));
					goto rollback;
				}
				nd = nd->nd_next;
			}
			meta_ping_mnset(sp->setno);
		}
	}


	/*
	 * Mark the set record MD_SR_DEL on the hosts we are deleting
	 * If a MN diskset and OHA mode, don't issue RPC to nodes that
	 * are not ALIVE.
	 * If a MN diskset and not in OHA mode, then all nodes must respond
	 * to RPC (be alive) or this routine will return failure.
	 * If a traditional diskset, all RPC failures if in OHA mode.
	 */
	for (i = 0; i < node_c; i++) {

		RB_TEST(3, "deletehosts", ep)

		if ((MD_MNSET_DESC(sd)) && (oha == TRUE)) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
					break;
				}
				nd = nd->nd_next;
			}
			if (nd == NULL) {
				(void) mddserror(ep, MDE_DS_NODENOTINSET,
				    sp->setno, node_v[i], NULL, sp->setname);
				goto rollback;
			} else if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				/* Skip non-ALIVE node if in OHA mode */
				continue;
			} else {
				if (clnt_upd_sr_flags(node_v[i], sp,
				    MD_SR_DEL, ep)) {
					goto rollback;
				}
			}
		} else if ((MD_MNSET_DESC(sd)) && (oha == FALSE)) {
			/*
			 * All nodes should be alive in non-oha mode.
			 */
			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
				goto rollback;
			}
		} else {
			/*
			 * For traditional diskset, issue the RPC and
			 * ignore RPC failure if in OHA mode.
			 */
			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
		}

		RB_TEST(4, "deletehosts", ep)
	}

	RB_TEST(5, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 2;	/* level 2 */

	RB_TEST(6, "deletehosts", ep)

	/* Delete the set on the hosts we are deleting */
	if (del_set_on_hosts(sp, sd, dd, node_c, node_v, oha, ep)) {
		if (node_id_list)
			Free(node_id_list);
		/*
		 * Failure during del_set_on_hosts would have recreated
		 * the diskset on the remote hosts, but for multi-owner
		 * disksets need to set node flags properly and REINIT and
		 * RESUME rpc.mdcommd, so just let the rollback code
		 * do this.
		 */
		if (MD_MNSET_DESC(sd))
			goto rollback;
		return (-1);
	}
	remote_sets_deleted = 1;

	RB_TEST(19, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 3;	/* level 3 */

	RB_TEST(20, "deletehosts", ep)

	/* Delete the host from sets on hosts not being deleted */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE unless in oha mode */
		while (nd) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			if ((oha == TRUE) &&
			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
				nd = nd->nd_next;
				continue;
			}

			/* Skip nodes being deleted */
			if (strinlst(nd->nd_nodename, node_c, node_v)) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_delhosts(nd->nd_nodename, sp, node_c, node_v,
			    ep) == -1) {
				goto rollback;
			}

			RB_TEST(21, "deletehosts", ep)
			nd = nd->nd_next;
		}
	} else {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			/* Skip nodes being deleted */
			if (strinlst(sd->sd_nodes[i], node_c, node_v))
				continue;

			if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v,
			    ep) == -1) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}

			RB_TEST(21, "deletehosts", ep)
		}
	}

	/* We have drives */
	if (dd != NULL) {
		RB_TEST(22, "deletehosts", ep)

		RB_PREEMPT;
		rb_level = 4;	/* level 4 */

		RB_TEST(23, "deletehosts", ep)

		/*
		 * Delete the old sidename for each drive on all the hosts.
		 * If a multi-node diskset, each host only stores
		 * the side information for itself.  So, a multi-node
		 * diskset doesn't delete the old sidename for
		 * an old host.
		 *
		 * If a MN diskset, reset owners of mirrors that are
		 * owned by the deleted nodes.
		 */
		if (!(MD_MNSET_DESC(sd))) {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				/* Skip nodes being deleted */
				if (strinlst(sd->sd_nodes[i], node_c, node_v))
					continue;

				if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp,
				    ep)) {
					if (oha == TRUE && mdanyrpcerror(ep)) {
						mdclrerror(ep);
						continue;
					}
					metaflushsetname(sp);
					goto rollback;
				}

				RB_TEST(24, "deletehosts", ep)
			}
		} else {
			nd = sd->sd_nodelist;
			/* All nodes guaranteed ALIVE unless in oha mode */
			while (nd) {
				/*
				 * If mirror owner was set to a deleted node,
				 * then each existing node resets mirror owner
				 * to NULL.
				 *
				 * During OHA mode, don't issue RPCs to
				 * non-alive nodes since there is no reason to
				 * wait for RPC timeouts.
				 */
				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					nd = nd->nd_next;
					continue;
				}

				/* Skip nodes being deleted */
				if (strinlst(nd->nd_nodename, node_c, node_v)) {
					nd = nd->nd_next;
					continue;
				}

				/*
				 * If mirror owner is a deleted node, reset
				 * mirror owners to NULL.  If an error occurs,
				 * print a warning and continue.  Don't fail
				 * metaset because of mirror owner reset
				 * problem since next node to grab mirror
				 * will resolve this issue.  Before next node
				 * grabs mirrors, metaset will show the deleted
				 * node as owner which is why an attempt to
				 * reset the mirror owner is made.
				 */
				if (clnt_reset_mirror_owner(nd->nd_nodename, sp,
				    node_c, &node_id_list[0], &xep) == -1) {
					mde_perror(&xep, dgettext(TEXT_DOMAIN,
					    "Unable to reset mirror owner on"
					    " node %s\n"), nd->nd_nodename);
					mdclrerror(&xep);
				}

				RB_TEST(21, "deletehosts", ep)
				nd = nd->nd_next;
			}
		}
	}

	RB_TEST(25, "deletehosts", ep)

	RB_PREEMPT;
	rb_level = 4;	/* level 4 */

	RB_TEST(26, "deletehosts", ep)

	/*
	 * Bring the mediator record up to date with the set record for
	 * traditional diskset.
	 */
	if (!(MD_MNSET_DESC(sd))) {
		medr = rb_medr;			/* structure assignment */
		for (i = 0; i < MD_MAXSIDES; i++) {
			if (strinlst(sd->sd_nodes[i], node_c, node_v))
				(void) memset(&medr.med_rec_nodes[i],
				    '\0', sizeof (md_node_nm_t));
			else
				(void) strcpy(medr.med_rec_nodes[i],
				    sd->sd_nodes[i]);
		}
		crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);

		/* Inform the mediator hosts of the new node list */
		for (i = 0; i < max_meds; i++) {
			if (sd->sd_med.n_lst[i].a_cnt == 0)
				continue;

			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
			    &medr, ep)) {
				if (oha == TRUE && mdanyrpcerror(ep)) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
		}
	}

	RB_TEST(27, "deletehosts", ep)

	/*
	 * For traditional diskset:
	 * We are deleting ourselves out of the set and we have drives to
	 * consider; so we need to halt the set, release the drives and
	 * reset the timeout.  **** THIS IS A ONE WAY TICKET, NO ROLL BACK
	 * IS POSSIBLE AS SOON AS THE HALT SET COMPLETES, SO THIS IS DONE
	 * WITH ALL SIGNALS BLOCKED AND LAST ****
	 *
	 * This situation cannot occur in a MN diskset since a node can't
	 * delete itself unless all nodes are being deleted and a diskset
	 * cannot contain any drives if all nodes are being deleted.
	 * So, don't even test for this if a MN diskset.
	 */
	if (!(MD_MNSET_DESC(sd)) && (dd != NULL) &&
	    strinlst(mynode(), node_c, node_v)) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, ep) < 0) {
			rval = -1;
			goto out1;
		}

		if (halt_set(sp, ep)) {
			rval = -1;
			goto out1;
		}

		if (rel_own_bydd(sp, dd, FALSE, ep))
			rval = -1;

out1:
		/* release signals back to what they were on entry */
		if (procsigs(FALSE, &oldsigs, &xep) < 0) {
			if (rval == 0)
				(void) mdstealerror(ep, &xep);
			rval = -1;
		}
	}

out2:
	/*
	 * Unlock diskset by resuming messages across the diskset.
	 * Just resume all classes so that resume is the same whether
	 * just one class was locked or all classes were locked.
	 */
	if ((suspend1_flag) || (suspendall_flag)) {
		/* Send resume */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}
			/*
			 * Skip nodes being deleted if remote set
			 * was deleted since rpc.mdcommd may no longer
			 * be running on remote node.
			 */
			if ((remote_sets_deleted == 1) &&
			    (strinlst(nd->nd_nodename, node_c, node_v))) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}

	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	if (lock_flag) {
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/*
				 * During OHA mode, don't issue RPCs to
				 * non-alive nodes since there is no reason to
				 * wait for RPC timeouts.
				 */
				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					nd = nd->nd_next;
					continue;
				}
				if (clnt_unlock_set(nd->nd_nodename,
				    cl_sk, &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
				}
				nd = nd->nd_next;
			}
		} else {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_unlock_set(sd->sd_nodes[i],
				    cl_sk, &xep)) {
					if (oha == TRUE &&
					    mdanyrpcerror(&xep)) {
						mdclrerror(&xep);
						continue;
					}
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
				}
			}
		}
	}
	cl_set_setkey(NULL);

out3:
	metafreereplicalist(rlp);
	if (node_id_list)
		Free(node_id_list);

	metaflushsetname(sp);

	if (MD_MNSET_DESC(sd)) {
		/* release signals back to what they were on entry */
		if (procsigs(FALSE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	} else {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}


	return (rval);

rollback:
	/* all signals already blocked for MN disket */
	if (!(MD_MNSET_DESC(sd))) {
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	}

	rval = -1;

	max_genid = sd->sd_genid;


	/*
	 * Send reinit command to rpc.mdcommd which forces it to get
	 * fresh set description and resume all classes but class 0.
	 * Don't send any commands to rpc.mdcommd if set on that node
	 * has been removed.
	 */
	if (suspendall_flag) {
		/* Send reinit */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}
			/*
			 * If the remote set was deleted, rpc.mdcommd
			 * may no longer be running so send nothing to it.
			 */
			if ((remote_sets_deleted == 1) &&
			    (strinlst(nd->nd_nodename, node_c, node_v))) {
				nd = nd->nd_next;
				continue;
			}
			/* Class is ignored for REINIT */
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to reinit rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}
		/* Send resume */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}
			/*
			 * If the remote set was deleted, rpc.mdcommd
			 * may no longer be running so send nothing to it.
			 */
			if ((remote_sets_deleted == 1) &&
			    (strinlst(nd->nd_nodename, node_c, node_v))) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1,
			    &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}

	/* level 2 */
	if (rb_level > 1) {
		md_set_record		*sr;
		md_replicalist_t	*rl;

		recreate_set(sp, sd);

		/*
		 * Lock out other meta* commands on nodes with the newly
		 * re-created sets by suspending class 1 messages
		 * across the diskset.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			/* Skip nodes not being deleted */
			if (!(strinlst(nd->nd_nodename, node_c, node_v))) {
				nd = nd->nd_next;
				continue;
			}
			/* Suspend commd on nodes with re-created sets */
			if (clnt_mdcommdctl(nd->nd_nodename,
			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
			    MD_MSCF_NO_FLAGS, &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to suspend rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}

		max_genid++;

		/*
		 * See if we have to re-add the drives specified.
		 */
		for (i = 0; i < node_c; i++) {
			if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
				/*
				 * During OHA mode, don't issue RPCs to
				 * non-alive nodes since there is no reason to
				 * wait for RPC timeouts.
				 */
				nd = sd->sd_nodelist;
				while (nd) {
					if (strcmp(nd->nd_nodename, node_v[i])
					    == 0) {
						break;
					}
					nd = nd->nd_next;
				}
				if (nd == 0)
					continue;
				if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
					continue;
			}

			/* Don't care if set record is MN or not */
			if (clnt_getset(node_v[i], sp->setname, MD_SET_BAD, &sr,
			    &xep) == -1) {
				mdclrerror(&xep);
				continue;
			}

			/* Drive already added, skip to next node */
			if (sr->sr_drivechain != NULL) {
				/*
				 * Set record structure was allocated from RPC
				 * routine getset so this structure is only of
				 * size md_set_record even if the MN flag is
				 * set.  So, clear the flag so that the free
				 * code doesn't attempt to free a structure
				 * the size of md_mnset_record.
				 */
				sr->sr_flags &= ~MD_SR_MN;
				free_sr(sr);
				continue;
			}

			if (clnt_adddrvs(node_v[i], sp, dd, sr->sr_ctime,
			    sr->sr_genid, &xep) == -1)
				mdclrerror(&xep);

			if (clnt_upd_dr_flags(node_v[i], sp, dd, MD_DR_OK,
			    &xep) == -1)
				mdclrerror(&xep);

			/*
			 * Set record structure was allocated from RPC routine
			 * getset so this structure is only of size
			 * md_set_record even if the MN flag is set.  So,
			 * clear the flag so that the free code doesn't
			 * attempt to free a structure the size of
			 * md_mnset_record.
			 */
			sr->sr_flags &= ~MD_SR_MN;
			free_sr(sr);
		}
		max_genid += 3;

		for (rl = rlp; rl != NULL; rl = rl->rl_next) {
			md_replica_t	*r = rl->rl_repp;
			/*
			 * This is not the first replica being added to the
			 * diskset so call with ADDSIDENMS_BCAST.  If this
			 * is a traditional diskset, the bcast flag is ignored
			 * since traditional disksets don't use the rpc.mdcommd.
			 */
			if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
			    DB_ADDSIDENMS_BCAST, &xep))
				mdclrerror(&xep);
		}

		/*
		 * Add the device names for the new sides into the namespace,
		 * on all hosts not being deleted.
		 */
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* Find a node that is not being deleted */
				if (!strinlst(nd->nd_nodename, node_c,
				    node_v)) {
					j = nd->nd_nodeid;
					break;
				}
				nd = nd->nd_next;
			}
		} else {
			for (j = 0; j < MD_MAXSIDES; j++) {
				/* Skip empty slots */
				if (sd->sd_nodes[j][0] == '\0')
					continue;

				/* Find a node that is not being deleted */
				if (!strinlst(sd->sd_nodes[j], node_c, node_v))
					break;
			}
		}

		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* Skip nodes not being deleted */
				if (!strinlst(nd->nd_nodename, node_c,
				    node_v)) {
					nd = nd->nd_next;
					continue;
				}

				/* this side was just created, add the names */
				if (add_md_sidenms(sp, nd->nd_nodeid, j, &xep))
					mdclrerror(&xep);
				nd = nd->nd_next;
			}
		} else {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				/* Skip nodes not being deleted */
				if (!strinlst(sd->sd_nodes[i], node_c, node_v))
					continue;

				/* this side was just created, add the names */
				if (add_md_sidenms(sp, i, j, &xep))
					mdclrerror(&xep);
			}
		}
	}

	/* level 4 */
	if (rb_level > 3 && dd != NULL) {
		/*
		 * Add the new sidename for each drive to all the hosts
		 * Multi-node disksets only store the sidename for
		 * that host, so there is nothing to re-add.
		 */
		if (!(MD_MNSET_DESC(sd))) {
			for (j = 0; j < MD_MAXSIDES; j++) {
				/* Skip empty slots */
				if (sd->sd_nodes[j][0] == '\0')
					continue;

				/* Skip nodes not being deleted */
				if (!strinlst(sd->sd_nodes[j], node_c, node_v))
					break;
			}
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_add_drv_sidenms(sd->sd_nodes[i],
				    sd->sd_nodes[j], sp, sd, node_c, node_v,
				    &xep))
					mdclrerror(&xep);
			}
		}

	}

	/* level 5 */
	if ((rb_level > 4) && (!(MD_MNSET_DESC(sd)))) {
		/* rollback the mediator record */
		for (i = 0; i < max_meds; i++) {
			if (sd->sd_med.n_lst[i].a_cnt == 0)
				continue;

			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
			    &rb_medr, &xep))
				mdclrerror(&xep);
		}
	}

	/* level 3 */
	if (rb_level > 2) {
		md_set_record		*sr;
		md_mnset_record		*mnsr;

		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			while (nd) {
				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					nd = nd->nd_next;
					continue;
				}
				/* Record should be for a multi-node diskset */
				if (clnt_mngetset(nd->nd_nodename, sp->setname,
				    MD_SET_BAD, &mnsr, &xep) == -1) {
					mdclrerror(&xep);
					nd = nd->nd_next;
					continue;
				}

				has_set = 1;

				nr = mnsr->sr_nodechain;
				while (nr) {
					if (nd->nd_nodeid == nr->nr_nodeid) {
						break;
					}
					nr = nr->nr_next;
				}
				if (nr == NULL)
					has_set = 0;

				free_sr((struct md_set_record *)mnsr);
				if (has_set) {
					nd = nd->nd_next;
					continue;
				}

				if (clnt_addhosts(nd->nd_nodename, sp, node_c,
				    node_v, &xep) == -1)
					mdclrerror(&xep);

				nd = nd->nd_next;
			}
		} else {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				/* Record should be for a non-multi-node set */
				if (clnt_getset(sd->sd_nodes[i], sp->setname,
				    MD_SET_BAD, &sr, &xep) == -1) {
					mdclrerror(&xep);
					continue;
				}

				/*
				 * Set record structure was allocated from RPC
				 * routine getset so this structure is only of
				 * size md_set_record even if the MN flag is
				 * set.  So, clear the flag so that the free
				 * code doesn't attempt to free a structure
				 * the size of md_mnset_record.
				 */
				if (MD_MNSET_REC(sr)) {
					sr->sr_flags &= ~MD_SR_MN;
					free_sr(sr);
					continue;
				}

				has_set = 1;
				for (j = 0; j < MD_MAXSIDES; j++) {
					/* Skip empty slots */
					if (sd->sd_nodes[j][0] == '\0')
						continue;

					if (sr->sr_nodes[j][0] == '\0') {
						has_set = 0;
						break;
					}
				}

				free_sr(sr);
				if (has_set)
					continue;

				if (clnt_addhosts(sd->sd_nodes[i], sp, node_c,
				    node_v, &xep) == -1)
					mdclrerror(&xep);
			}
		}
		max_genid++;
	}

	/* level 1 */
	if (rb_level > 0) {
		max_genid++;
		/* Sets MD_SR_OK on given nodes. */
		resync_genid(sp, sd, max_genid, node_c, node_v);

		/*
		 * For MN diskset:
		 * On each newly re-added node, set the node record for that
		 * node to OK.  Then set all node records for the newly added
		 * nodes on all nodes to ok.
		 *
		 * By setting a node's own node record to ok first, even if
		 * the node re-adding the hosts panics, the rest of the nodes
		 * can determine the same node list during the choosing of the
		 * master during reconfig.  So, only nodes considered for
		 * mastership are nodes that have both MD_MN_NODE_OK and
		 * MD_SR_OK set on that node's rpc.metad.  If all nodes have
		 * MD_SR_OK set, but no node has its own MD_MN_NODE_OK set,
		 * then the set will be removed during reconfig since a panic
		 * occurred during the re-creation of the deletion of
		 * the initial diskset.
		 */
		if (MD_MNSET_DESC(sd)) {
			md_mnnode_desc	*saved_nd_next;
			if (dd != NULL) {
				/*
				 * Notify rpc.mdcommd on all nodes of a
				 * nodelist change.  Start by suspending
				 * rpc.mdcommd (which drains it of all
				 * messages), then change the nodelist
				 * followed by a reinit and resume.
				 */
				nd = sd->sd_nodelist;
				while (nd) {
					if (!(nd->nd_flags &
					    MD_MN_NODE_ALIVE)) {
						nd = nd->nd_next;
						continue;
					}
					if (clnt_mdcommdctl(nd->nd_nodename,
					    COMMDCTL_SUSPEND, sp,
					    MD_MSG_CLASS0,
					    MD_MSCF_NO_FLAGS, &xep)) {
						mde_perror(&xep,
						    dgettext(TEXT_DOMAIN,
						    "Unable to suspend "
						    "rpc.mdcommd.\n"));
						mdclrerror(&xep);
					}
					suspendall_flag_rb = 1;
					nd = nd->nd_next;
				}
			}
			for (i = 0; i < node_c; i++) {
				/*
				 * During OHA mode, don't issue RPCs to
				 * non-alive nodes since there is no reason to
				 * wait for RPC timeouts.
				 */
				nd = sd->sd_nodelist;
				while (nd) {
					if (strcmp(nd->nd_nodename, node_v[i])
					    == 0)
						break;
					nd = nd->nd_next;
				}
				/* Something wrong, finish this in next loop */
				if (nd == NULL)
					continue;

				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					continue;
				}

				if (dd != NULL) {
					/* Set master on re-joining node. */
					if (clnt_mnsetmaster(node_v[i], sp,
					    sd->sd_mn_master_nodenm,
					    sd->sd_mn_master_nodeid, &xep)) {
						mdclrerror(&xep);
					}

					/*
					 * Re-join set to same state as
					 * before - stale or non-stale.
					 */
					if (clnt_joinset(node_v[i], sp,
					    stale_flag, &xep)) {
						mdclrerror(&xep);
					}
				}

				/* Only changing my local cache of node list */
				saved_nd_next = nd->nd_next;
				nd->nd_next = NULL;

				/* Set record for host to ok on that host */
				if (clnt_upd_nr_flags(node_v[i], sp,
				    nd, MD_NR_OK, NULL, &xep)) {
					mdclrerror(&xep);
				}
				nd->nd_next = saved_nd_next;
			}

			/* Now set all node records on all nodes to be ok */
			nd = sd->sd_nodelist;
			while (nd) {
				/*
				 * During OHA mode, don't issue RPCs to
				 * non-alive nodes since there is no reason to
				 * wait for RPC timeouts.
				 */
				if ((oha == TRUE) &&
				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
					nd = nd->nd_next;
					continue;
				}
				if (clnt_upd_nr_flags(nd->nd_nodename, sp,
				    sd->sd_nodelist, MD_NR_OK, NULL, &xep)) {
					mdclrerror(&xep);
				}
				nd = nd->nd_next;
			}
		}
	}

	/*
	 * Notify rpc.mdcommd on all nodes of a nodelist change.
	 * Send reinit command to mdcommd which forces it to get
	 * fresh set description.
	 */
	if (suspendall_flag_rb) {
		/* Send reinit */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}

			/* Class is ignored for REINIT */
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to reinit rpc.mdcommd.\n"));
				mdclrerror(&xep);
			}
			nd = nd->nd_next;
		}
	}

	/*
	 * Unlock diskset by resuming messages across the diskset.
	 * Just resume all classes so that resume is the same whether
	 * just one class was locked or all classes were locked.
	 */
	if ((suspend1_flag) || (suspendall_flag) || (suspendall_flag_rb)) {
		/* Send resume */
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				mde_perror(&xep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}

	/*
	 * Start a resync thread on the re-added nodes
	 * if set is not stale. Also start a thread to update the
	 * abr state of all soft partitions
	 */
	if (stale_flag != MNSET_IS_STALE) {
		for (i = 0; i < node_c; i++) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			nd = sd->sd_nodelist;
			while (nd) {
				if (strcmp(nd->nd_nodename, node_v[i])
				    == 0)
					break;
				nd = nd->nd_next;
			}
			if (nd == NULL)
				continue;

			if ((oha == TRUE) &&
			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
				continue;
			}

			if (dd != 0) {
				if (clnt_mn_mirror_resync_all(node_v[i],
				    sp->setno, &xep)) {
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to start resync "
					    "thread.\n"));
				}
				if (clnt_mn_sp_update_abr(node_v[i],
				    sp->setno, &xep)) {
					mde_perror(ep, dgettext(TEXT_DOMAIN,
					    "Unable to start sp update "
					    "thread.\n"));
				}
			}
		}
	}

	/* level 0 */
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	/* Don't test lock flag since guaranteed to be set if in rollback */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			/*
			 * During OHA mode, don't issue RPCs to
			 * non-alive nodes since there is no reason to
			 * wait for RPC timeouts.
			 */
			if ((oha == TRUE) &&
			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
				nd = nd->nd_next;
				continue;
			}
			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	} else {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
				mdclrerror(&xep);
		}
	}
	cl_set_setkey(NULL);

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	metafreereplicalist(rlp);
	if (node_id_list)
		Free(node_id_list);

	metaflushsetname(sp);

	if (!(MD_MNSET_DESC(sd))) {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);
}

int
meta_set_auto_take(
	mdsetname_t	*sp,
	int		take_val,
	md_error_t	*ep
)
{
	int		i;
	md_set_desc	*sd;
	int		rval = 0;
	md_setkey_t	*cl_sk;
	md_error_t	xep = mdnullerror;
	char		*hostname;
	md_drive_desc	*dd;

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	hostname = mynode();

	/* Lock the set on our side */
	if (clnt_lock_set(hostname, sp, ep)) {
		rval = -1;
		goto out;
	}

	if (take_val) {
		/* enable auto_take but only if it is not already set */
		if (! (sd->sd_flags & MD_SR_AUTO_TAKE)) {
			/* verify that we're the only host in the set */
			for (i = 0; i < MD_MAXSIDES; i++) {
				if (sd->sd_nodes[i] == NULL ||
				    sd->sd_nodes[i][0] == '\0')
					continue;

				if (strcmp(sd->sd_nodes[i], hostname) != 0) {
					(void) mddserror(ep, MDE_DS_SINGLEHOST,
					    sp->setno, NULL, NULL, sp->setname);
					rval = -1;
					goto out;
				}
			}

			if (clnt_enable_sr_flags(hostname, sp,
			    MD_SR_AUTO_TAKE, ep))
				rval = -1;

			/* Disable SCSI reservations */
			if (sd->sd_flags & MD_SR_MB_DEVID)
				dd = metaget_drivedesc(sp, MD_BASICNAME_OK |
				    PRINT_FAST, &xep);
			else
				dd = metaget_drivedesc(sp, MD_BASICNAME_OK,
				    &xep);

			if (! mdisok(&xep))
				mdclrerror(&xep);

			if (dd != NULL) {
				if (rel_own_bydd(sp, dd, TRUE, &xep))
					mdclrerror(&xep);
			}
		}

	} else {
		/* disable auto_take, if set, or error */
		if (sd->sd_flags & MD_SR_AUTO_TAKE) {
			if (clnt_disable_sr_flags(hostname, sp,
			    MD_SR_AUTO_TAKE, ep))
				rval = -1;

			/* Enable SCSI reservations */
			if (sd->sd_flags & MD_SR_MB_DEVID)
				dd = metaget_drivedesc(sp, MD_BASICNAME_OK |
				    PRINT_FAST, &xep);
			else
				dd = metaget_drivedesc(sp, MD_BASICNAME_OK,
				    &xep);

			if (! mdisok(&xep))
				mdclrerror(&xep);

			if (dd != NULL) {
				mhd_mhiargs_t	mhiargs = defmhiargs;

				if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep))
					mdclrerror(&xep);
			}
		} else {
			(void) mddserror(ep, MDE_DS_AUTONOTSET, sp->setno,
			    NULL, NULL, sp->setname);
			rval = -1;
		}
	}

out:
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	if (clnt_unlock_set(hostname, cl_sk, &xep)) {
		if (rval == 0)
			(void) mdstealerror(ep, &xep);
		rval = -1;
	}
	cl_set_setkey(NULL);

	return (rval);
}