OpenSolaris_b135/lib/lvm/libmeta/common/meta_set_med.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * Metadevice diskset interfaces
 */

#include "meta_set_prv.h"
#include <sys/lvm/md_crc.h>
#include <sys/lvm/mdmed.h>

#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/svm.h>

#define	MALSIZ	32

static int
add_lst(char ***listp, char *item)
{
	int	i, j;

	if (*listp) {
		for (i = 0; (*listp)[i]; i++)
			/* void */;
	} else {
		*listp = (char **)Zalloc(MALSIZ * sizeof (char *));
		i = 0;
	}

	(*listp)[i] = Strdup(item);

	if ((++i % MALSIZ) == 0) {
		*listp = (char **)Realloc((void *)*listp,
			(i + MALSIZ) * sizeof (char *));
		for (j = i; j < (i + MALSIZ); j++)
			(*listp)[j] = (char *)NULL;
	}
	return (i);
}

static int
del_lst(char ***listp)
{
	int	i;

	if (*listp) {
		for (i = 0; (*listp)[i]; i++)
			free((*listp)[i]);
		free(*listp);
		*listp = NULL;
		return (1);
	} else
		return (0);
}


static int
validate_med_nodes(
	mdsetname_t	*sp,
	md_h_arr_t	*mhp,
	md_error_t	*ep
)
{
	char		*hostname;
	char		*nodename;
	char		*nm;
	char		*cp;
	int		i, j;


	for (i = 0; i < MED_MAX_HOSTS; i++) {
		if (mhp->n_lst[i].a_cnt == 0)
			continue;

		for (j = 0; j < mhp->n_lst[i].a_cnt; j++) {
			nm = mhp->n_lst[i].a_nm[j];

			for (cp = nm; *cp; cp++)
				if (!isprint(*cp) ||
				    strchr(INVALID_IN_NAMES, *cp) != NULL)
					return (mddserror(ep,
					    MDE_DS_INVALIDMEDNAME,
					    sp->setno, nm, NULL, sp->setname));

			if (clnt_med_hostname(nm, &hostname, ep))
				return (-1);

			if (j == 0) {
				if (strcmp(nm, hostname) != 0) {
					Free(hostname);
					return (mddserror(ep,
					    MDE_DS_NOTNODENAME, sp->setno, nm,
					    NULL, sp->setname));
				}
				nodename = nm;
			} else {
				if (strcmp(nodename, hostname) != 0) {
					Free(hostname);
					return (mddserror(ep,
					    MDE_DS_ALIASNOMATCH, sp->setno, nm,
					    nodename, sp->setname));
				}
			}
			Free(hostname);
		}
	}
	return (0);
}

/*
 * Exported Entry Points
 */

int
meta_set_addmeds(
	mdsetname_t		*sp,
	int			node_c,
	char			**node_v,
	md_error_t		*ep
)
{
	md_set_desc		*sd = NULL;
	md_drive_desc		*dd = NULL;
	mddb_med_parm_t		mp;
	mddb_med_upd_parm_t	mup;
	md_h_arr_t		t;
	md_h_arr_t		rb_t;
	med_rec_t		medr;
	med_rec_t		rb_medr;
	char			*cp;
	char			**n_l = NULL;
	int			n_c = 0;
	int			i, j;
	sigset_t		oldsigs;
	md_setkey_t		*cl_sk;
	int			rb_level = 0;
	md_error_t		xep = mdnullerror;
	int			rval = 0;
	int			max_meds;
	md_mnnode_desc		*nd;
	int			suspend1_flag = 0;
	int			lock_flag = 0;

	/* Initialize */
	(void) memset(&t, '\0', sizeof (t));
	t.n_cnt = node_c;
	mdclrerror(ep);

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	if ((max_meds = get_max_meds(ep)) == 0)
		return (-1);

	/*
	 * The mediator information (which is part of the set record) is
	 * stored in the local mddbs of each node in the diskset.
	 * Each node's rpc.metad daemon reads in the set
	 * records from that node's local mddb and caches them
	 * internally. Any process needing diskset information contacts its
	 * local rpc.metad to get this information.  Since each node in the
	 * diskset is independently reading the set information from its local
	 * mddb, the set records in the local mddbs must stay
	 * in-sync, so that all nodes have a consistent view of the diskset.
	 *
	 * For a multinode diskset, explicitly verify that all nodes in the
	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
	 * fail this operation since all nodes must be ALIVE in order to add
	 * the mediator information to the set record in their local mddb.
	 * If a panic of this node leaves the local mddbs set records
	 * out-of-sync, the reconfig cycle will fix the local mddbs and
	 * force them back into synchronization.
	 */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
					sp->setno,
					nd->nd_nodename, NULL, sp->setname);
				return (-1);
			}
			nd = nd->nd_next;
		}
	}

	/* Parse the command line into a the md_h_arr_t structure */
	for (i = 0; i < t.n_cnt; i++) {
		cp = strtok(node_v[i], ",");
		j = 0;
		while (cp) {
			if (strlen(cp) > (size_t)MD_MAX_NODENAME)
				return (mddserror(ep, MDE_DS_NODENAMETOOLONG,
				    sp->setno, cp, NULL, sp->setname));
			if (j >= MAX_HOST_ADDRS)
				return (mddserror(ep, MDE_DS_TOOMANYALIAS,
				    sp->setno, cp, NULL, sp->setname));

			(void) strcpy(t.n_lst[i].a_nm[j], cp);

			j++;

			cp = strtok(NULL, ",");
		}
		t.n_lst[i].a_cnt = j;
	}

	/* Make a list of nodes to check */
	for (i = 0; i < t.n_cnt; i++)
		for (j = 0; j < t.n_lst[i].a_cnt; j++)
			n_c = add_lst(&n_l, t.n_lst[i].a_nm[j]);

	/* Make sure that there are no redundant nodes */
	rval = nodesuniq(sp, n_c, n_l, ep);

	(void) del_lst(&n_l);

	if (rval != 0)
		return (rval);

	/*
	 * Lock the set on current set members.
	 * Set locking done much earlier for MN diskset than for traditional
	 * diskset since lock_set and SUSPEND are used to protect against
	 * other metaset commands running on the other nodes.
	 */
	if (MD_MNSET_DESC(sd)) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
				rval = -1;
				goto out;
			}
			lock_flag = 1;
			nd = nd->nd_next;
		}
		/*
		 * Lock out other meta* commands by suspending
		 * class 1 messages across the diskset.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_mdcommdctl(nd->nd_nodename,
			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
			    MD_MSCF_NO_FLAGS, ep)) {
				rval = -1;
				goto out;
			}
			suspend1_flag = 1;
			nd = nd->nd_next;
		}
	}

	if (validate_med_nodes(sp, &t, ep)) {
		rval = -1;
		goto out;
	}

	/* Check existing mediators against new, if any */
	if (sd->sd_med.n_cnt > 0) {
		for (i = 0; i < max_meds; i++)
			if (sd->sd_med.n_lst[i].a_cnt > 0)
				n_c = add_lst(&n_l,
				    sd->sd_med.n_lst[i].a_nm[0]);

		for (i = 0; i < t.n_cnt; i++) {
			if (strinlst(t.n_lst[i].a_nm[0], n_c, n_l)) {
				(void) del_lst(&n_l);
				(void) mddserror(ep, MDE_DS_ISMED, sp->setno,
				    t.n_lst[i].a_nm[0], NULL,
				    sp->setname);
				rval = -1;
				goto out;
			}
		}
		(void) del_lst(&n_l);
	}

	if ((t.n_cnt + sd->sd_med.n_cnt) > max_meds) {
		(void) mderror(ep, MDE_TOOMANYMED, NULL);
		rval = -1;
		goto out;
	}

	/* Copy the current mediator list for rollback */
	rb_t = sd->sd_med;			/* structure assignment */

	/* Setup the mediator record roll-back structure */
	(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
	rb_medr.med_rec_mag = MED_REC_MAGIC;
	rb_medr.med_rec_rev = MED_REC_REV;
	rb_medr.med_rec_fl  = 0;
	rb_medr.med_rec_sn  = sp->setno;
	(void) strcpy(rb_medr.med_rec_snm, sp->setname);
	if (MD_MNSET_DESC(sd)) {
		/*
		 * For a MN diskset the mediator is not given a list of
		 * hosts in the set.  Instead a generic name (multiowner) is
		 * given to the mediator which will allow any node to access
		 * the mediator data as long as it provides the correct
		 * setname and set number.  In a MN diskset, the mediator
		 * data is only used when a first node joins the diskset
		 * and becomes the master of the MN diskset.
		 *
		 * The traditional diskset code keeps the host list in
		 * the mediator record up to date with respect to the host
		 * list in the traditional diskset.  This keeps an unauthorized
		 * node in the traditional diskset from accessing the data
		 * in the mediator record and being able to 'take' the
		 * diskset.
		 *
		 * This additional check is needed in the traditional diskset
		 * since a panic during the metaset command can leave
		 * the diskset with some nodes thinking that an
		 * action has occurred and other nodes thinking the opposite.
		 * A node may have really been removed from a diskset, but
		 * that node doesn't realize this so this node must be
		 * blocked from using the mediator data when attempting
		 * to 'take' the diskset.
		 * (Traditional diskset code has each node's rpc.metad
		 * cleaning up from an inconsistent state without any
		 * knowledge from the other nodes in the diskset).
		 *
		 * In the MN diskset, the reconfig steps force a consistent
		 * state across all nodes in the diskset, so no node
		 * needs to be blocked from accessing the mediator data.
		 * This allow the MN diskset to use a common 'nodename'
		 * in the mediator record.  This allows the mediator
		 * daemon to remain unchanged even though a large number of
		 * nodes are supported by the MN diskset.
		 */
		(void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER,
		    MD_MAX_NODENAME_PLUS_1);
	} else {
		for (i = 0; i < MD_MAXSIDES; i++)
			(void) strcpy(rb_medr.med_rec_nodes[i],
				sd->sd_nodes[i]);
	}
	rb_medr.med_rec_meds = sd->sd_med;	/* structure assigment */
	(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
	rb_medr.med_rec_foff = 0;
	crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);

	/* Merge new mediators into the set record */
	for (i = 0; i < t.n_cnt; i++) {
		for (j = 0; j < max_meds; j++) {
			if (sd->sd_med.n_lst[j].a_cnt > 0)
				continue;
			sd->sd_med.n_lst[j] = t.n_lst[i];
			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_MEDIATOR,
			    sp->setno, j);
			sd->sd_med.n_cnt++;
			break;
		}
	}

	/*
	 * Setup the kernel mediator list, which also validates that the
	 * hosts have valid IP addresses
	 */
	(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
	mp.med_setno = sp->setno;

	/* Copy the hostnames */
	if (meta_h2hi(&sd->sd_med, &mp.med, ep)) {
		rval = -1;
		goto out;
	}

	/* Resolve the IP addresses for the host list */
	if (meta_med_hnm2ip(&mp.med, ep)) {
		rval = -1;
		goto out;
	}

	/* Bring the mediator record up to date with the set record */
	medr = rb_medr;				/* structure assignment */
	medr.med_rec_meds = sd->sd_med;		/* structure assigment */
	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);

	/* END CHECK CODE */

	/* Lock the set on current set members */
	if (!(MD_MNSET_DESC(sd))) {
		/* all signals already blocked for MN disket */
		md_rb_sig_handling_on();
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
				rval = -1;
				goto out;
			}
			lock_flag = 1;
		}
	}

	RB_TEST(1, "meta_set_addmeds", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "meta_set_addmeds", ep)

	/*
	 * Add the new mediator information to all hosts in the set.
	 * For MN diskset, each node sends mediator list to its kernel.
	 */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			/* All nodes are guaranteed to be ALIVE */
			if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep))
				goto rollback;
			nd = nd->nd_next;
		}
	} else  {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep))
				goto rollback;
		}
	}

	RB_TEST(3, "meta_set_addmeds", ep)

	RB_PREEMPT;
	rb_level = 2;	/* level 2 */

	RB_TEST(4, "meta_set_addmeds", ep)

	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
	    ep)) == NULL) {
		if (! mdisok(ep))
			goto rollback;
	}

	RB_TEST(5, "meta_set_addmeds", ep)

	RB_PREEMPT;
	rb_level = 3;	/* level 3 */

	RB_TEST(6, "meta_set_addmeds", ep)

	/* Inform the mediator hosts of the new information */
	for (i = 0; i < max_meds; i++) {
		if (sd->sd_med.n_lst[i].a_cnt == 0)
			continue;

		/* medr contains new mediator node list */
		if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
			goto rollback;
	}

	RB_TEST(7, "meta_set_addmeds", ep)

	RB_PREEMPT;
	rb_level = 4;	/* level 4 */

	RB_TEST(8, "meta_set_addmeds", ep)

	/* In MN diskset, mediator list updated in clnt_updmeds call */
	if (dd != NULL) {
		if (!(MD_MNSET_DESC(sd))) {
			if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde,
			    NULL) != 0) {
				(void) mdstealerror(ep, &mp.med_mde);
				goto rollback;
			}
		}

		/*
		 * If only 50% mddbs available, mediator will be
		 * golden by this ioctl on a traditional diskset.
		 *
		 * On a MN disket, this only happens if the mediator
		 * add operation is executed on the master node.
		 * If a slave node is adding the mediator, the mediator
		 * won't be marked golden until the next mddb change.
		 */
		(void) memset(&mup, '\0', sizeof (mddb_med_upd_parm_t));
		mup.med_setno = sp->setno;
		if (metaioctl(MD_MED_UPD_MED, &mup, &mup.med_mde, NULL) != 0)
			mdclrerror(&mup.med_mde);
	}

out:
	if (suspend1_flag) {
		/*
		 * Unlock diskset by resuming messages across the diskset.
		 * Just resume all classes so that resume is the same whether
		 * just one class was locked or all classes were locked.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}
	if (lock_flag) {
		cl_sk = cl_get_setkey(sp->setno, sp->setname);
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* All nodes are guaranteed to be ALIVE */
				if (clnt_unlock_set(nd->nd_nodename,
				    cl_sk, &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
				}
				nd = nd->nd_next;
			}
		} else  {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_unlock_set(sd->sd_nodes[i],
				    cl_sk, &xep)) {
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
				}
			}
		}
		cl_set_setkey(NULL);
	}

	metafreedrivedesc(&dd);

	if (MD_MNSET_DESC(sd)) {
		/* release signals back to what they were on entry */
		if (procsigs(FALSE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	} else {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);

rollback:
	/* all signals already blocked for MN disket */
	if (!(MD_MNSET_DESC(sd))) {
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	}

	rval = -1;

	/*
	 * level 4
	 * In MN diskset, mediator list updated in clnt_updmeds call
	 */
	if (rb_level > 3 && (dd != NULL) && (!(MD_MNSET_DESC(sd)))) {
		(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
		mp.med_setno = sp->setno;
		(void) meta_h2hi(&rb_t, &mp.med, &xep);
		mdclrerror(&xep);
		(void) meta_med_hnm2ip(&mp.med, &xep);
		mdclrerror(&xep);
		(void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL);
	}

	/* level 3 */
	if (rb_level > 2) {
		for (i = 0; i < max_meds; i++) {
			if (sd->sd_med.n_lst[i].a_cnt == 0)
				continue;

			/*
			 * rb_medr contains the rollback mediator node list.
			 * Send the rollback mediator information to the
			 * new mediator node list.  If a node had this RPC
			 * called, but its node is not in the mediator node
			 * list, rpc.metamedd will delete the mediator
			 * record on that node.
			 */
			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
			    &rb_medr, &xep))
				mdclrerror(&xep);
		}
	}

	/* level 2 */
	if (rb_level > 1) {
		metafreedrivedesc(&dd);
	}

	/* level 1 */
	if (rb_level > 0) {
		/* Delete mediator information from all hosts in the set */
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* All nodes are guaranteed to be ALIVE */
				if (clnt_updmeds(nd->nd_nodename, sp, &rb_t,
				    &xep))
					mdclrerror(&xep);
				nd = nd->nd_next;
			}
		} else  {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t,
				    &xep))
					mdclrerror(&xep);
			}
		}
	}

	/* level 0 */
	if (suspend1_flag) {
		/*
		 * Unlock diskset by resuming messages across the diskset.
		 * Just resume all classes so that resume is the same whether
		 * just one class was locked or all classes were locked.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				mdclrerror(&xep);
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}
	if (lock_flag) {
		cl_sk = cl_get_setkey(sp->setno, sp->setname);
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* All nodes are guaranteed to be ALIVE */
				if (clnt_unlock_set(nd->nd_nodename,
				    cl_sk, &xep)) {
					mdclrerror(&xep);
				}
				nd = nd->nd_next;
			}
		} else  {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_unlock_set(sd->sd_nodes[i],
				    cl_sk, &xep)) {
					mdclrerror(&xep);
				}
			}
		}
		cl_set_setkey(NULL);
	}

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	if (!(MD_MNSET_DESC(sd))) {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);
}

int
meta_set_deletemeds(
	mdsetname_t		*sp,
	int			node_c,
	char			**node_v,
	int			forceflg,
	md_error_t		*ep
)
{
	md_set_desc		*sd = NULL;
	md_drive_desc		*dd = NULL;
	mddb_med_parm_t		mp;
	md_h_arr_t		rb_t;
	med_rec_t		medr;
	med_rec_t		rb_medr;
	int			i, j;
	char			**n_l = NULL;
	int			n_c = 0;
	sigset_t		oldsigs;
	md_setkey_t		*cl_sk;
	int			rb_level = 0;
	md_error_t		xep = mdnullerror;
	int			rval = 0;
	int			max_meds;
	md_mnnode_desc		*nd;
	int			suspend1_flag = 0;
	int			lock_flag = 0;

	mdclrerror(ep);

	if ((sd = metaget_setdesc(sp, ep)) == NULL)
		return (-1);

	/* Make sure we own the set */
	if (meta_check_ownership(sp, ep) != 0)
		return (-1);

	for (i = 0; i < node_c; i++)
		if (strchr(node_v[i], ',') != NULL)
			return (mderror(ep, MDE_ONLYNODENAME, node_v[i]));

	if (nodesuniq(sp, node_c, node_v, ep))
		return (-1);

	if ((max_meds = get_max_meds(ep)) == 0)
		return (-1);

	/*
	 * The mediator information (which is part of the set record) is
	 * stored in the local mddbs of each node in the diskset.
	 * Each node's rpc.metad daemon reads in the set
	 * records from that node's local mddb and caches them
	 * internally. Any process needing diskset information contacts its
	 * local rpc.metad to get this information.  Since each node in the
	 * diskset is independently reading the set information from its local
	 * mddb, the set records in the local mddbs must stay
	 * in-sync, so that all nodes have a consistent view of the diskset.
	 *
	 * For a multinode diskset, explicitly verify that all nodes in the
	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
	 * fail this operation since all nodes must be ALIVE in order to delete
	 * the mediator information from the set record in their local mddb.
	 * If a panic of this node leaves the local mddbs set records
	 * out-of-sync, the reconfig cycle will fix the local mddbs and
	 * force them back into synchronization.
	 */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
					sp->setno,
					nd->nd_nodename, NULL, sp->setname);
				return (-1);
			}
			nd = nd->nd_next;
		}
	}

	if (sd->sd_med.n_cnt == 0)
		return (mderror(ep, MDE_NOMED, NULL));

	/* Make a list of nodes to check */
	for (i = 0; i < max_meds; i++)
		if (sd->sd_med.n_lst[i].a_cnt > 0)
			n_c = add_lst(&n_l, sd->sd_med.n_lst[i].a_nm[0]);

	for (i = 0; i < node_c; i++) {
		if (! strinlst(node_v[i], n_c, n_l)) {
			(void) del_lst(&n_l);
			return (mddserror(ep, MDE_DS_ISNOTMED, sp->setno,
			    node_v[i], NULL, sp->setname));
		}
	}

	(void) del_lst(&n_l);

	/* Save a copy of the current mediator information */
	rb_t = sd->sd_med;			/* structure assignment */

	/* Setup the mediator record for rollback */
	(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
	rb_medr.med_rec_mag = MED_REC_MAGIC;
	rb_medr.med_rec_rev = MED_REC_REV;
	rb_medr.med_rec_fl  = 0;
	rb_medr.med_rec_sn  = sp->setno;
	(void) strcpy(rb_medr.med_rec_snm, sp->setname);
	if (MD_MNSET_DESC(sd)) {
		/*
		 * In MN diskset, use a generic nodename, multiowner, in the
		 * mediator record which allows any node to access mediator
		 * information.  MN diskset reconfig cycle forces consistent
		 * view of set/node/drive/mediator information across all nodes
		 * in the MN diskset.  This allows the relaxation of
		 * node name checking in rpc.metamedd for MN disksets.
		 *
		 * In the traditional diskset, only a node that is in the
		 * mediator record's diskset nodelist can access mediator
		 * data.
		 */
		(void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER,
		    MD_MAX_NODENAME_PLUS_1);
	} else {
		for (i = 0; i < MD_MAXSIDES; i++)
			(void) strcpy(rb_medr.med_rec_nodes[i],
				sd->sd_nodes[i]);
	}
	rb_medr.med_rec_meds = sd->sd_med;	/* structure assignment */
	(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
	rb_medr.med_rec_foff = 0;
	crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);

	/* Delete the mediators requested from the set */
	for (i = 0; i < node_c; i++) {
		for (j = 0; j < max_meds; j++) {
			if (sd->sd_med.n_lst[j].a_cnt == 0)
				continue;
			if (strcmp(node_v[i],
			    sd->sd_med.n_lst[j].a_nm[0]) != 0)
				continue;
			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE,
			    SVM_TAG_MEDIATOR, sp->setno, j);
			(void) memset(&sd->sd_med.n_lst[j], '\0',
			    sizeof (md_h_t));
			sd->sd_med.n_cnt--;
			break;
		}
	}

	medr = rb_medr;				/* structure assignment */
	medr.med_rec_meds = sd->sd_med;		/* structure assignment */
	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);

	/* END CHECK CODE */

	/* Lock the set on current set members */
	if (MD_MNSET_DESC(sd)) {
		/* Make sure we are blocking all signals */
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
		/*
		 * Lock the set on current set members.
		 * lock_set and SUSPEND are used to protect against
		 * other metaset commands running on the other nodes.
		 */
		nd = sd->sd_nodelist;
		while (nd) {
			/* All nodes are guaranteed to be ALIVE */
			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
				if (forceflg && strcmp(mynode(),
				    nd->nd_nodename) != 0) {
					mdclrerror(ep);
					nd = nd->nd_next;
					continue;
				}
				rval = -1;
				goto out;
			}
			lock_flag = 1;
			nd = nd->nd_next;
		}
		/*
		 * Lock out other meta* commands by suspending
		 * class 1 messages across the diskset.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_mdcommdctl(nd->nd_nodename,
			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
			    MD_MSCF_NO_FLAGS, ep)) {
				rval = -1;
				goto out;
			}
			suspend1_flag = 1;
			nd = nd->nd_next;
		}
	} else  {
		md_rb_sig_handling_on();
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
				if (forceflg &&
				    strcmp(mynode(), sd->sd_nodes[i]) != 0) {
					mdclrerror(ep);
					continue;
				}
				rval = -1;
				goto out;
			}
			lock_flag = 1;
		}
	}

	RB_TEST(1, "meta_set_deletemeds", ep)

	RB_PREEMPT;
	rb_level = 1;	/* level 1 */

	RB_TEST(2, "meta_set_deletemeds", ep)

	/* Update the mediator information on all hosts in the set */
	if (MD_MNSET_DESC(sd)) {
		nd = sd->sd_nodelist;
		while (nd) {
			/* All nodes are guaranteed to be ALIVE */
			if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med,
			    ep)) {
				if (forceflg && strcmp(mynode(),
				    nd->nd_nodename) != 0) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
			nd = nd->nd_next;
		}
	} else  {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med,
			    ep)) {
				if (forceflg && strcmp(mynode(),
				    sd->sd_nodes[i]) != 0) {
					mdclrerror(ep);
					continue;
				}
				goto rollback;
			}
		}
	}

	RB_TEST(3, "meta_set_deletemeds", ep)

	RB_PREEMPT;
	rb_level = 2;	/* level 2 */

	RB_TEST(5, "meta_set_deletemeds", ep)

	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
	    ep)) == NULL) {
		if (! mdisok(ep))
			goto rollback;
	}

	RB_TEST(5, "meta_set_deletemeds", ep)

	RB_PREEMPT;
	rb_level = 3;	/* level 3 */

	RB_TEST(6, "meta_set_deletemeds", ep)

	if (dd != NULL) {
		/*
		 * Set up the parameters to the call to update the
		 * kernel mediator list
		 */
		(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
		mp.med_setno = sp->setno;
		if (meta_h2hi(&sd->sd_med, &mp.med, ep))
			goto rollback;

		/* Resolve the IP addresses for the host list */
		if (meta_med_hnm2ip(&mp.med, ep))
			goto rollback;

		if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL) != 0) {
			(void) mdstealerror(ep, &mp.med_mde);
			goto rollback;
		}
	}

	RB_TEST(7, "meta_set_deletemeds", ep)

	RB_PREEMPT;
	rb_level = 4;	/* level 4 */

	RB_TEST(8, "meta_set_deletemeds", ep)

	/* Inform the mediator hosts of the new status */
	for (i = 0; i < max_meds; i++) {
		if (rb_t.n_lst[i].a_cnt == 0)
			continue;

		/*
		 * medr contains the new mediator node list.
		 * Send the new mediator information to the
		 * new mediator node list.  If a node had this RPC
		 * called, but its node is no longer in the new mediator
		 * node list, rpc.metamedd will delete the mediator
		 * record on that node.
		 */
		if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &medr, ep)) {
			if ((forceflg && mdanyrpcerror(ep)) ||
			    mdisrpcerror(ep, RPC_PROGNOTREGISTERED)) {
				mdclrerror(ep);
				continue;
			}
			goto rollback;
		}
	}

out:
	if (dd)
		metafreedrivedesc(&dd);

	if (suspend1_flag) {
		/*
		 * Unlock diskset by resuming messages across the diskset.
		 * Just resume all classes so that resume is the same whether
		 * just one class was locked or all classes were locked.
		 */
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
				if (rval == 0)
					(void) mdstealerror(ep, &xep);
				rval = -1;
				mde_perror(ep, dgettext(TEXT_DOMAIN,
				    "Unable to resume rpc.mdcommd.\n"));
			}
			nd = nd->nd_next;
		}
		meta_ping_mnset(sp->setno);
	}

	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	if (lock_flag) {
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* All nodes are guaranteed to be ALIVE */
				if (clnt_unlock_set(nd->nd_nodename,
				    cl_sk, &xep)) {
					if (forceflg &&
					    strcmp(mynode(),
					    nd->nd_nodename) != 0) {
						mdclrerror(ep);
						continue;
					}
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
				}
				nd = nd->nd_next;
			}
		} else {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_unlock_set(sd->sd_nodes[i],
				    cl_sk, &xep)) {
					if (forceflg &&
					    strcmp(mynode(),
					    sd->sd_nodes[i]) != 0) {
						mdclrerror(ep);
						continue;
					}
					if (rval == 0)
						(void) mdstealerror(ep, &xep);
					rval = -1;
				}
			}
		}
	}
	cl_set_setkey(NULL);

	if (MD_MNSET_DESC(sd)) {
		/* release signals back to what they were on entry */
		if (procsigs(FALSE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	} else {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);

rollback:
	/* all signals already blocked for MN disket */
	if (!(MD_MNSET_DESC(sd))) {
		if (procsigs(TRUE, &oldsigs, &xep) < 0)
			mdclrerror(&xep);
	}

	rval = -1;

	(void) del_lst(&n_l);

	/* level 4 */
	if (rb_level > 4) {
		for (i = 0; i < max_meds; i++) {
			if (rb_t.n_lst[i].a_cnt == 0)
				continue;

			/*
			 * rb_medr contains the rollback mediator node list.
			 * Send the rollback mediator information to the
			 * new mediator node list.  This will recreate the
			 * mediator record on all nodes where the mediator
			 * record had been removed.
			 */
			if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &rb_medr,
			    &xep))
				mdclrerror(&xep);
		}
	}

	/* level 3 */
	if (rb_level > 2 && dd != NULL) {
		(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
		mp.med_setno = sp->setno;
		(void) meta_h2hi(&rb_t, &mp.med, &xep);
		mdclrerror(&xep);
		(void) meta_med_hnm2ip(&mp.med, &xep);
		mdclrerror(&xep);
		(void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL);
	}

	/* level 2 */
	if (rb_level > 1) {
		metafreedrivedesc(&dd);
	}

	/* level 1 */
	if (rb_level > 0) {
		/* Delete mediator information from all hosts in the set */
		if (MD_MNSET_DESC(sd)) {
			nd = sd->sd_nodelist;
			while (nd) {
				/* All nodes are guaranteed to be ALIVE */
				if (clnt_updmeds(nd->nd_nodename, sp, &rb_t,
				    &xep))
					mdclrerror(&xep);
				nd = nd->nd_next;
			}
		} else  {
			for (i = 0; i < MD_MAXSIDES; i++) {
				/* Skip empty slots */
				if (sd->sd_nodes[i][0] == '\0')
					continue;

				if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t,
				    &xep))
					mdclrerror(&xep);
			}
		}
	}

	/* level 0 */
	cl_sk = cl_get_setkey(sp->setno, sp->setname);
	/* Unlock the set */
	/* Don't test lock flag since guaranteed to be set if in rollback */
	if (MD_MNSET_DESC(sd)) {
		/*
		 * Unlock diskset by resuming messages across the diskset.
		 * Just resume all classes so that resume is the same whether
		 * just one class was locked or all classes were locked.
		 */
		if (suspend1_flag) {
			/* All nodes are guaranteed to be ALIVE */
			nd = sd->sd_nodelist;
			while (nd) {
				if (clnt_mdcommdctl(nd->nd_nodename,
				    COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
				    MD_MSCF_NO_FLAGS, &xep)) {
				    mde_perror(&xep, dgettext(TEXT_DOMAIN,
					"Unable to resume rpc.mdcommd.\n"));
				    mdclrerror(&xep);
				}
				nd = nd->nd_next;
			}
			meta_ping_mnset(sp->setno);
		}
		nd = sd->sd_nodelist;
		/* All nodes are guaranteed to be ALIVE */
		while (nd) {
			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
				mdclrerror(&xep);
			nd = nd->nd_next;
		}
	} else  {
		for (i = 0; i < MD_MAXSIDES; i++) {
			/* Skip empty slots */
			if (sd->sd_nodes[i][0] == '\0')
				continue;

			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
				mdclrerror(&xep);
		}
	}
	cl_set_setkey(NULL);

	/* release signals back to what they were on entry */
	if (procsigs(FALSE, &oldsigs, &xep) < 0)
		mdclrerror(&xep);

	if (!(MD_MNSET_DESC(sd))) {
		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
	}

	return (rval);
}