OpenSolaris_b135/lib/udapl/udapl_tavor/common/dapl_ia_open.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


/*
 *
 * MODULE: dapl_ia_open.c
 *
 * PURPOSE: Interface Adapter management
 * Description: Interfaces in this file are completely described in
 *		the DAPL 1.1 API, Chapter 6, section 2
 *
 * $Id: dapl_ia_open.c,v 1.30 2003/07/31 14:04:17 jlentini Exp $
 */

#include "dapl.h"
#include "dapl_provider.h"
#include "dapl_evd_util.h"
#include "dapl_hca_util.h"
#include "dapl_ia_util.h"
#include "dapl_adapter_util.h"
#include <sys/systeminfo.h>
#include <libdevinfo.h>


/*
 * LOCAL PROTOTYPES
 */
#if defined(IBHOSTS_NAMING)
void dapli_assign_hca_ip_address(
	DAPL_HCA		*hca_ptr,
	char 			*device_name);
#endif /* IBHOSTS_NAMING */

static void dapli_hca_cleanup(DAPL_HCA *hca_ptr, DAT_BOOLEAN dec_ref);

/*
 * Determine whether the platform supports RO (Relaxed ordering)
 * Return B_TRUE if it does support RO and B_FALSE if it does not support RO
 *
 * udapl_ro_disallowed is an out paramter returning whether or not
 * relaxed ordering should be disabled (regardless of whether the platform
 * is capable of supporting relaxed ordering)
 *
 */
static boolean_t
dapl_ro_disallowed(void)
{
	static const char * const non_ro_capable_platforms[] = {
		"i86pc",
		"i86xpv",
		"SUNW,Sun-Fire-V215",
		"SUNW,Sun-Fire-V245",
		"SUNW,Sun-Fire-V445",
		"SUNW,Sun-Fire-T1000",
		"SUNW,Sun-Fire-T200",
		"SUNW,Sun-Blade-T6300",
		"SUNW,Sun-Blade-T6320",
		"SUNW,SPARC-Enterprise-T1000",
		"SUNW,SPARC-Enterprise-T2000",
		"SUNW,SPARC-Enterprise-T5120",
		"SUNW,SPARC-Enterprise-T5220",
		NULL
	};
	char platform[256 + 1];
	register int i;
	register const char *cp;
	int ret;
	di_node_t root_node, node;
	boolean_t ro_disallowed;
	static const char *ro_disallowed_property =
	    "pci-relaxed-ordering-disallowed";
	int bool;
	int *boolp = &bool;

	ret = sysinfo(SI_PLATFORM, platform, sizeof (platform));
	if ((ret != -1) && (ret <= sizeof (platform))) {
		for (i = 0; (cp = non_ro_capable_platforms[i]) != NULL; ++i) {
			if (strcmp(platform, cp) == 0)
				return (B_TRUE);
		}
	}

	/*
	 * This function only finds and looks at the FIRST udapl node.
	 * It is assumed that there can only be one such node.
	 */
	if ((root_node = di_init("/", DINFOSUBTREE | DINFOPROP)) == DI_NODE_NIL)
		return (B_FALSE);

	node = di_drv_first_node("daplt", root_node);
	if (node != DI_NODE_NIL) {
		ret = di_prop_lookup_ints(DDI_DEV_T_ANY, node,
		    ro_disallowed_property, &boolp);
		switch (ret) {
		case 0:
		case 1:
			ro_disallowed = B_TRUE;
			break;
		default:
			ro_disallowed = B_FALSE;
			break;
		}

	}
	else
		ro_disallowed = B_FALSE;

	di_fini(root_node);

	return (ro_disallowed);
}

/*
 * dapl_ia_open
 *
 * DAPL Requirements Version xxx, 6.2.1.1
 *
 * Open a provider and return a handle. The handle enables the user
 * to invoke operations on this provider.
 *
 * The dat_ia_open  call is actually part of the DAT registration module.
 * That function maps the DAT_NAME parameter of dat_ia_open to a DAT_PROVIDER,
 * and calls this function.
 *
 * Input:
 *	provider
 *	async_evd_qlen
 *	async_evd_handle_ptr
 *
 * Output:
 *	async_evd_handle
 *	ia_handle
 *
 * Return Values:
 * 	DAT_SUCCESS
 * 	DAT_INSUFFICIENT_RESOURCES
 * 	DAT_INVALID_PARAMETER
 * 	DAT_INVALID_HANDLE
 * 	DAT_NAME_NOT_FOUND	(returned by dat registry if necessary)
 */
DAT_RETURN
dapl_ia_open(
	IN	const DAT_NAME_PTR	name,
	IN	DAT_COUNT		async_evd_qlen,
	INOUT	DAT_EVD_HANDLE		*async_evd_handle_ptr,
	OUT	DAT_IA_HANDLE		*ia_handle_ptr,
	IN	boolean_t		ro_aware_client)
{
	DAT_RETURN	dat_status;
	DAT_PROVIDER	*provider;
	DAPL_HCA	*hca_ptr;
	DAPL_IA		*ia_ptr;
	DAPL_EVD	*evd_ptr;
	boolean_t	ro_disallowed;

	dat_status = DAT_SUCCESS;
	hca_ptr = NULL;
	ia_ptr = NULL;

	dapl_dbg_log(DAPL_DBG_TYPE_API,
	    "dapl_ia_open(%s, %d, %p, %p, %d)\n",
	    name,
	    async_evd_qlen,
	    async_evd_handle_ptr,
	    ia_handle_ptr,
	    ro_aware_client);

	dat_status = dapl_provider_list_search(name, &provider);
	if (DAT_SUCCESS != dat_status) {
		dapl_dbg_log(DAPL_DBG_TYPE_API,
		    "dapl_ia_open: dapl_provider_list_search(\"%s\") returned "
		    "%d\n",
		    name,
		    dat_status);

		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG1);
		goto bail;
	}

	/* ia_handle_ptr and async_evd_handle_ptr cannot be NULL */
	if (ia_handle_ptr == NULL) {
		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG4);
		goto bail;
	}
	if (async_evd_handle_ptr == NULL) {
		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
		goto bail;
	}

	/* initialize the caller's OUT param */
	*ia_handle_ptr = DAT_HANDLE_NULL;

	/* get the hca_ptr */
	hca_ptr = (DAPL_HCA *)provider->extension;

	/*
	 * Open the HCA if it has not been done before.
	 */
	dapl_os_lock(&hca_ptr->lock);
	if (hca_ptr->ib_hca_handle == IB_INVALID_HANDLE) {
		/* register with the HW */
		dat_status = dapls_ib_open_hca(hca_ptr,
		    &hca_ptr->ib_hca_handle);

		if (dat_status != DAT_SUCCESS) {
			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
			    "dapls_ib_open_hca failed %d\n", dat_status);
			dapl_os_unlock(&hca_ptr->lock);
			goto bail;
		}

		/* create a cq domain for this HCA */
		dat_status = dapls_ib_cqd_create(hca_ptr);

		if (dat_status != DAT_SUCCESS) {
			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
			    "ERR: Cannot allocate CQD: err %x\n", dat_status);
			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
			dapl_os_unlock(&hca_ptr->lock);
			goto bail;
		}
		/*
		 * Obtain the IP address associated with this name and HCA.
		 */

#ifdef IBHOSTS_NAMING
		dapli_assign_hca_ip_address(hca_ptr, name);
#endif /* IBHOSTS_NAMING */

		/*
		 * Obtain IA attributes from the HCA to limit certain
		 * operations.
		 * If using DAPL_ATS naming, ib_query_hca will also set the ip
		 * address.
		 */
		dat_status = dapls_ib_query_hca(hca_ptr,
		    &hca_ptr->ia_attr,
		    NULL,
		    &hca_ptr->hca_address, NULL);
		if (dat_status != DAT_SUCCESS) {
			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
			dapl_os_unlock(&hca_ptr->lock);
			goto bail;
		}
	}

	/* is the IA going to use the ConnectX? */
	if (hca_ptr->hermon_resize_cq != 0) {
		/*
		 * We are running with a ConnectX.
		 * Determine whether platform is RO capable.
		 * If platform support RO and client does not
		 * support RO and we are not disabling RO, reject the open.
		 */
		ro_disallowed = dapl_ro_disallowed();

		if (! ro_aware_client && ! ro_disallowed) {
			dapl_dbg_log(DAPL_DBG_TYPE_API,
			    "dapl_ia_open: failing ro_disallowed %d "
			    "ro_aware_client %d \n",
			    ro_disallowed, ro_aware_client);

			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
			    DAT_INVALID_RO_COOKIE);
			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
			dapl_os_unlock(&hca_ptr->lock);
			goto bail;
		}
	} else {
		/* We are not running with a Connect X */
		ro_disallowed = B_TRUE;
	}


	/* Take a reference on the hca_handle */
	dapl_os_atomic_inc(&hca_ptr->handle_ref_count);
	dapl_os_unlock(&hca_ptr->lock);

	/* Allocate and initialize ia structure */
	ia_ptr = dapl_ia_alloc(provider, hca_ptr);
	if (!ia_ptr) {
		dapl_os_lock(&hca_ptr->lock);
		dapli_hca_cleanup(hca_ptr, DAT_TRUE);
		dapl_os_unlock(&hca_ptr->lock);
		dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
		    DAT_RESOURCE_MEMORY);
		goto bail;
	}

	/*
	 * Note when we should be disabling relaxed ordering.
	 * If the property indicates that we should not use relaxed ordering
	 * we remember that fact.  If the platform is supposed to be
	 * non relaxed ordering capable, we disable relaxed ordering as
	 * well, just in case the property or the list indicating that
	 * this platform is not relaxed ordering capable is mistaken.
	 */
	if (ro_disallowed)
		ia_ptr->dapl_flags |= DAPL_DISABLE_RO;

	/*
	 * we need an async EVD for this IA
	 * use the one passed in (if non-NULL) or create one
	 */

	evd_ptr = (DAPL_EVD *) *async_evd_handle_ptr;
	if (evd_ptr) {
		if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD) ||
		    ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) {
			dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
			    DAT_INVALID_HANDLE_EVD_ASYNC);
			goto bail;
		}
		/*
		 * InfiniBand allows only 1 asychronous event handler per HCA
		 * (see InfiniBand Spec, release 1.1, vol I, section 11.5.2,
		 *  page 559).
		 *
		 * We only need to make sure that this EVD's CQ belongs to
		 * the same HCA as is being opened.
		 */

		if (evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle !=
		    hca_ptr->ib_hca_handle) {
			dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
			    DAT_INVALID_HANDLE_EVD_ASYNC);
			goto bail;
		}

		ia_ptr->cleanup_async_error_evd = DAT_FALSE;
		ia_ptr->async_error_evd = evd_ptr;
	} else {
		/*
		 * Verify we have >0 length, and let the provider check the
		 * size
		 */
		if (async_evd_qlen <= 0) {
			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
			    DAT_INVALID_ARG2);
			goto bail;
		}
		dat_status = dapls_evd_internal_create(ia_ptr,
		    NULL,	/* CNO ptr */
		    async_evd_qlen,
		    DAT_EVD_ASYNC_FLAG,
		    &evd_ptr);
		if (dat_status != DAT_SUCCESS) {
			goto bail;
		}

		dapl_os_atomic_inc(&evd_ptr->evd_ref_count);

		dapl_os_lock(&hca_ptr->lock);
		if (hca_ptr->async_evd != (DAPL_EVD *) 0) {
#if 0
			/*
			 * The async EVD for this HCA has already been assigned.
			 * It's an error to try and assign another one.
			 *
			 * However, we need to somehow allow multiple IAs
			 * off of the same HCA.  The right way to do this
			 * is by dispatching events off the HCA to the
			 * appropriate IA, but we aren't there yet.  So for
			 * now we create the EVD but don't connect it to
			 * anything.
			 */
			dapl_os_atomic_dec(&evd_ptr->evd_ref_count);
			dapl_evd_free(evd_ptr);
			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
			    DAT_INVALID_ARG4);
			goto bail;
#endif
			dapl_os_unlock(&hca_ptr->lock);
		} else {
			hca_ptr->async_evd = evd_ptr;
			dapl_os_unlock(&hca_ptr->lock);

			/*
			 * Register the handlers associated with the async EVD.
			 */
			dat_status = dapls_ia_setup_callbacks(ia_ptr, evd_ptr);
			if (dat_status != DAT_SUCCESS) {
				/* Assign the EVD so it gets cleaned up */
				ia_ptr->cleanup_async_error_evd = DAT_TRUE;
				ia_ptr->async_error_evd = evd_ptr;
				goto bail;
			}
		}

		ia_ptr->cleanup_async_error_evd = DAT_TRUE;
		ia_ptr->async_error_evd = evd_ptr;
	}

	dat_status = DAT_SUCCESS;
	*ia_handle_ptr = ia_ptr;
	*async_evd_handle_ptr = evd_ptr;

bail:
	if (dat_status != DAT_SUCCESS) {
		if (ia_ptr) {
			/* This will release the async EVD if needed.  */
			(void) dapl_ia_close(ia_ptr, DAT_CLOSE_ABRUPT_FLAG);
		}
	}

	dapl_dbg_log(DAPL_DBG_TYPE_RTN,
	    "dapl_ia_open () returns 0x%x\n",
	    dat_status);

	return (dat_status);
}

/*
 * dapli_hca_cleanup
 *
 * Clean up partially allocated HCA stuff. Strictly to make cleanup
 * simple.
 */
void
dapli_hca_cleanup(
	DAPL_HCA	*hca_ptr,
	DAT_BOOLEAN	dec_ref)
{
	(void) dapls_ib_close_hca(hca_ptr->ib_hca_handle);
	hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
	if (dec_ref == DAT_TRUE) {
		dapl_os_atomic_dec(&hca_ptr->handle_ref_count);
	}
}

#if defined(IBHOSTS_NAMING)

char *dapli_get_adapter_num(
	char 			*device_name);

void dapli_setup_dummy_addr(
	IN  DAPL_HCA		*hca_ptr,
	IN  char		*hca_name);
/*
 * dapli_assign_hca_ip_address
 *
 * Obtain the IP address of the passed in name, which represents a
 * port on the hca. There are three methods here to obtain the
 * appropriate IP address, each with their own shortcoming:
 * 1) IPOIB_NAMING. Requires the implementation of the IPoIB
 *    interface defined in include/dapl/ipoib_names.h. This is
 *    not the recommended interface as IPoIB is limited at
 *    the point we need to obtain an IP address on the
 *    passive side of a connection. The code supporting this
 *    implementation has been removed.
 *
 * 2) IBHOSTS. An entry exists in DNS and in the /etc/dapl/ibhosts
 *    file. The immediate drawback here is that we must dictate
 *    how to name the interface, which is a stated DAPL non-goal.
 *    In the broader perspective, this method requires us to xmit
 *    the IP address in the private data of a connection, which has
 *    other fun problems. This is the default method and is known to
 *    work, but it has problems.
 *
 * 3) Obtain the IP address from the driver, which has registered
 *    the address with the SA for retrieval.
 *
 *
 * Input:
 *	hca_ptr			Pointer to HCA structure
 *	device_name		Name of device as reported by the provider
 *
 * Output:
 * 	none
 *
 * Returns:
 * 	char * to string number
 */
void
dapli_assign_hca_ip_address(
	DAPL_HCA		*hca_ptr,
	char 			*device_name)
{
	char		*adapter_num;
#define	NAMELEN	128
	struct addrinfo	*addr;
	char 		hostname[NAMELEN];
	char		*str;
	int		rc;

	/*
	 * Obtain the IP address of the adapter. This is a simple
	 * scheme that creates a name that must appear available to
	 * DNS, e.g. it must be in the local site DNS or in the local
	 * /etc/hosts file, etc.
	 *
	 *	<hostname>-ib<index>
	 *
	 * This scheme obviously doesn't work with adapters from
	 * multiple vendors, but will suffice in common installations.
	 */

	rc = gethostname(hostname, NAMELEN);
	/*
	 * Strip off domain info if it exists (e.g. mynode.mydomain.com)
	 */
	for (str = hostname; *str && *str != '.'; ) {
		str++;
	}
	if (*str == '.') {
		*str = '\0';
	}
	dapl_os_strcat(hostname, "-ib");
	adapter_num = dapli_get_adapter_num(device_name);
	dapl_os_strcat(hostname, adapter_num);

	rc = dapls_osd_getaddrinfo(hostname, &addr);

	if (rc != 0) {
		/* Not registered in DNS, provide a dummy value */
		dapli_setup_dummy_addr(hca_ptr, hostname);
	} else {
		/*
		 * hca_address is defined as a DAT_SOCK_ADDR6 whereas ai_addr
		 * is a sockaddr
		 */
		(void) dapl_os_memcpy((void *)&hca_ptr->hca_address,
		    (void *)(addr->ai_addr), sizeof (DAT_SOCK_ADDR6));
	}
}


/*
 * dapli_stup_dummy_addr
 *
 * Set up a dummy local address for the HCA. Things are not going
 * to work too well if this happens.
 * We call this routine if:
 *  - remote host adapter name is not in DNS
 *  - IPoIB implementation is not correctly set up
 *  - Similar nonsense.
 *
 * Input:
 *      hca_ptr
 *	rhost_name		Name of remote adapter
 *
 * Output:
 * 	none
 *
 * Returns:
 * 	none
 */
void
dapli_setup_dummy_addr(
	IN  DAPL_HCA		*hca_ptr,
	IN  char		*rhost_name)
{
	struct sockaddr_in	*si;

	/* Not registered in DNS, provide a dummy value */
	dapl_dbg_log(DAPL_DBG_TYPE_ERR, "WARNING: <%s> not registered in DNS,"
	    " using dummy IP value\n", rhost_name);
	si = (struct sockaddr_in *)&hca_ptr->hca_address;
	si->sin_family = AF_INET;
	si->sin_addr.s_addr = 0x01020304;
}


/*
 * dapls_get_adapter_num
 *
 * Given a device name, return a string of the device number
 *
 * Input:
 *	device_name		Name of device as reported by the provider
 *
 * Output:
 * 	none
 *
 * Returns:
 * 	char * to string number
 */
char *
dapli_get_adapter_num(
	char 		*device_name)
{
	static char	*zero = "0";
	char		*p;

	/*
	 * Optimisticaly simple algorithm: the device number appears at
	 * the end of the device name string. Device that do not end
	 * in a number are by default "0".
	 */

	for (p = device_name; *p; p++) {
		if (isdigit(*p)) {
			return (p);
		}
	}

	return (zero);
}
#endif /* IBHOSTS_NAMING */


/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 *  tab-width: 8
 * End:
 */