OpenSolaris_b135/uts/i86pc/io/immu_intrmap.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Portions Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Copyright (c) 2009, Intel Corporation.
 * All rights reserved.
 */


#include <sys/apic.h>
#include <vm/hat_i86.h>
#include <sys/sysmacros.h>
#include <sys/smp_impldefs.h>
#include <sys/immu.h>


typedef struct intrmap_private {
	immu_t		*ir_immu;
	uint16_t	ir_idx;
	uint32_t	ir_sid_svt_sq;
} intrmap_private_t;

#define	INTRMAP_PRIVATE(airq) ((intrmap_private_t *)airq->airq_intrmap_private)
#define	AIRQ_PRIVATE(airq) (airq->airq_intrmap_private)

/* interrupt remapping table entry */
typedef struct intrmap_rte {
	uint64_t	lo;
	uint64_t	hi;
} intrmap_rte_t;

#define	IRTE_HIGH(sid_svt_sq) (sid_svt_sq)
#define	IRTE_LOW(dst, vector, dlm, tm, rh, dm, fpd, p)	\
	    (((uint64_t)(dst) << 32) |  \
	    ((uint64_t)(vector) << 16) | \
	    ((uint64_t)(dlm) << 5) | \
	    ((uint64_t)(tm) << 4) | \
	    ((uint64_t)(rh) << 3) | \
	    ((uint64_t)(dm) << 2) | \
	    ((uint64_t)(fpd) << 1) | \
	    (p))

typedef enum {
	SVT_NO_VERIFY = 0, 	/* no verification */
	SVT_ALL_VERIFY,		/* using sid and sq to verify */
	SVT_BUS_VERIFY,		/* verify #startbus and #endbus */
	SVT_RSVD
} intrmap_svt_t;

typedef enum {
	SQ_VERIFY_ALL = 0,	/* verify all 16 bits */
	SQ_VERIFY_IGR_1,	/* ignore bit 3 */
	SQ_VERIFY_IGR_2,	/* ignore bit 2-3 */
	SQ_VERIFY_IGR_3		/* ignore bit 1-3 */
} intrmap_sq_t;

/*
 * S field of the Interrupt Remapping Table Address Register
 * the size of the interrupt remapping table is 1 << (immu_intrmap_irta_s + 1)
 */
static uint_t intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;

/*
 * If true, arrange to suppress broadcast EOI by setting edge-triggered mode
 * even for level-triggered interrupts in the interrupt-remapping engine.
 * If false, broadcast EOI can still be suppressed if the CPU supports the
 * APIC_SVR_SUPPRESS_BROADCAST_EOI bit.  In both cases, the IOAPIC is still
 * programmed with the correct trigger mode, and pcplusmp must send an EOI
 * to the IOAPIC by writing to the IOAPIC's EOI register to make up for the
 * missing broadcast EOI.
 */
static int intrmap_suppress_brdcst_eoi = 0;

/*
 * whether verify the source id of interrupt request
 */
static int intrmap_enable_sid_verify = 0;

/* fault types for DVMA remapping */
static char *immu_dvma_faults[] = {
	"Reserved",
	"The present field in root-entry is Clear",
	"The present field in context-entry is Clear",
	"Hardware detected invalid programming of a context-entry",
	"The DMA request attempted to access an address beyond max support",
	"The Write field in a page-table entry is Clear when DMA write",
	"The Read field in a page-table entry is Clear when DMA read",
	"Access the next level page table resulted in error",
	"Access the root-entry table resulted in error",
	"Access the context-entry table resulted in error",
	"Reserved field not initialized to zero in a present root-entry",
	"Reserved field not initialized to zero in a present context-entry",
	"Reserved field not initialized to zero in a present page-table entry",
	"DMA blocked due to the Translation Type field in context-entry",
	"Incorrect fault event reason number",
};
#define	DVMA_MAX_FAULTS (sizeof (immu_dvma_faults)/(sizeof (char *))) - 1

/* fault types for interrupt remapping */
static char *immu_intrmap_faults[] = {
	"reserved field set in IRTE",
	"interrupt_index exceed the intr-remap table size",
	"present field in IRTE is clear",
	"hardware access intr-remap table address resulted in error",
	"reserved field set in IRTE, include various conditional",
	"hardware blocked an interrupt request in Compatibility format",
	"remappable interrupt request blocked due to verification failure"
};
#define	INTRMAP_MAX_FAULTS \
	(sizeof (immu_intrmap_faults) / (sizeof (char *))) - 1

/* Function prototypes */
static int immu_intrmap_init(int apic_mode);
static void immu_intrmap_switchon(int suppress_brdcst_eoi);
static void immu_intrmap_alloc(apic_irq_t *irq_ptr);
static void immu_intrmap_map(apic_irq_t *irq_ptr, void *intrmap_data);
static void immu_intrmap_free(apic_irq_t *irq_ptr);
static void immu_intrmap_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
static void immu_intrmap_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);

static struct apic_intrmap_ops intrmap_ops = {
	immu_intrmap_init,
	immu_intrmap_switchon,
	immu_intrmap_alloc,
	immu_intrmap_map,
	immu_intrmap_free,
	immu_intrmap_rdt,
	immu_intrmap_msi,
};

/* apic mode, APIC/X2APIC */
static int intrmap_apic_mode = LOCAL_APIC;


/*
 * helper functions
 */
static uint_t
bitset_find_free(bitset_t *b, uint_t post)
{
	uint_t	i;
	uint_t	cap = bitset_capacity(b);

	if (post == cap)
		post = 0;

	ASSERT(post < cap);

	for (i = post; i < cap; i++) {
		if (!bitset_in_set(b, i))
			return (i);
	}

	for (i = 0; i < post; i++) {
		if (!bitset_in_set(b, i))
			return (i);
	}

	return (INTRMAP_IDX_FULL);	/* no free index */
}

/*
 * helper function to find 'count' contigous free
 * interrupt remapping table entries
 */
static uint_t
bitset_find_multi_free(bitset_t *b, uint_t post, uint_t count)
{
	uint_t  i, j;
	uint_t	cap = bitset_capacity(b);

	if (post == INTRMAP_IDX_FULL) {
		return (INTRMAP_IDX_FULL);
	}

	if (count > cap)
		return (INTRMAP_IDX_FULL);

	ASSERT(post < cap);

	for (i = post; (i + count) <= cap; i++) {
		for (j = 0; j < count; j++) {
			if (bitset_in_set(b, (i + j))) {
				i = i + j;
				break;
			}
			if (j == count - 1)
				return (i);
		}
	}

	for (i = 0; (i < post) && ((i + count) <= cap); i++) {
		for (j = 0; j < count; j++) {
			if (bitset_in_set(b, (i + j))) {
				i = i + j;
				break;
			}
			if (j == count - 1)
				return (i);
		}
	}

	return (INTRMAP_IDX_FULL);  		/* no free index */
}

/* alloc one interrupt remapping table entry */
static int
alloc_tbl_entry(intrmap_t *intrmap)
{
	uint32_t idx;

	for (;;) {
		mutex_enter(&intrmap->intrmap_lock);
		idx = intrmap->intrmap_free;
		if (idx != INTRMAP_IDX_FULL) {
			bitset_add(&intrmap->intrmap_map, idx);
			intrmap->intrmap_free =
			    bitset_find_free(&intrmap->intrmap_map, idx + 1);
			mutex_exit(&intrmap->intrmap_lock);
			break;
		}

		/* no free intr entry, use compatible format intr */
		mutex_exit(&intrmap->intrmap_lock);

		if (intrmap_apic_mode != LOCAL_X2APIC) {
			break;
		}

		/*
		 * x2apic mode not allowed compatible
		 * interrupt
		 */
		delay(IMMU_ALLOC_RESOURCE_DELAY);
	}

	return (idx);
}

/* alloc 'cnt' contigous interrupt remapping table entries */
static int
alloc_tbl_multi_entries(intrmap_t *intrmap, uint_t cnt)
{
	uint_t idx, pos, i;

	for (; ; ) {
		mutex_enter(&intrmap->intrmap_lock);
		pos = intrmap->intrmap_free;
		idx = bitset_find_multi_free(&intrmap->intrmap_map, pos, cnt);

		if (idx != INTRMAP_IDX_FULL) {
			if (idx <= pos && pos < (idx + cnt)) {
				intrmap->intrmap_free = bitset_find_free(
				    &intrmap->intrmap_map, idx + cnt);
			}
			for (i = 0; i < cnt; i++) {
				bitset_add(&intrmap->intrmap_map, idx + i);
			}
			mutex_exit(&intrmap->intrmap_lock);
		}

		mutex_exit(&intrmap->intrmap_lock);

		if (intrmap_apic_mode != LOCAL_X2APIC) {
			break;
		}

		/* x2apic mode not allowed comapitible interrupt */
		delay(IMMU_ALLOC_RESOURCE_DELAY);
	}

	return (idx);
}

/* init interrupt remapping table */
static int
init_unit(immu_t *immu)
{
	intrmap_t *intrmap;
	size_t size;

	ddi_dma_attr_t intrmap_dma_attr = {
		DMA_ATTR_V0,
		0U,
		0xffffffffU,
		0xffffffffU,
		MMU_PAGESIZE,	/* page aligned */
		0x1,
		0x1,
		0xffffffffU,
		0xffffffffU,
		1,
		4,
		0
	};

	ddi_device_acc_attr_t intrmap_acc_attr = {
		DDI_DEVICE_ATTR_V0,
		DDI_NEVERSWAP_ACC,
		DDI_STRICTORDER_ACC
	};

	if (intrmap_apic_mode == LOCAL_X2APIC) {
		if (!IMMU_ECAP_GET_EIM(immu->immu_regs_excap)) {
			return (DDI_FAILURE);
		}
	}

	if (intrmap_irta_s > INTRMAP_MAX_IRTA_SIZE) {
		intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
	}

	intrmap =  kmem_zalloc(sizeof (intrmap_t), KM_SLEEP);

	if (ddi_dma_alloc_handle(immu->immu_dip,
	    &intrmap_dma_attr,
	    DDI_DMA_SLEEP,
	    NULL,
	    &(intrmap->intrmap_dma_hdl)) != DDI_SUCCESS) {
		kmem_free(intrmap, sizeof (intrmap_t));
		return (DDI_FAILURE);
	}

	intrmap->intrmap_size = 1 << (intrmap_irta_s + 1);
	size = intrmap->intrmap_size * INTRMAP_RTE_SIZE;
	if (ddi_dma_mem_alloc(intrmap->intrmap_dma_hdl,
	    size,
	    &intrmap_acc_attr,
	    DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED,
	    DDI_DMA_SLEEP,
	    NULL,
	    &(intrmap->intrmap_vaddr),
	    &size,
	    &(intrmap->intrmap_acc_hdl)) != DDI_SUCCESS) {
		ddi_dma_free_handle(&(intrmap->intrmap_dma_hdl));
		kmem_free(intrmap, sizeof (intrmap_t));
		return (DDI_FAILURE);
	}

	ASSERT(!((uintptr_t)intrmap->intrmap_vaddr & MMU_PAGEOFFSET));
	bzero(intrmap->intrmap_vaddr, size);
	intrmap->intrmap_paddr = pfn_to_pa(
	    hat_getpfnum(kas.a_hat, intrmap->intrmap_vaddr));

	mutex_init(&(intrmap->intrmap_lock), NULL, MUTEX_DRIVER, NULL);
	bitset_init(&intrmap->intrmap_map);
	bitset_resize(&intrmap->intrmap_map, intrmap->intrmap_size);
	intrmap->intrmap_free = 0;

	immu->immu_intrmap = intrmap;

	return (DDI_SUCCESS);
}

static void
get_immu(apic_irq_t *irq_ptr)
{
	immu_t	*immu = NULL;

	ASSERT(INTRMAP_PRIVATE(irq_ptr)->ir_immu == NULL);

	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
		immu = immu_dmar_ioapic_immu(irq_ptr->airq_ioapicindex);
	} else {
		if (irq_ptr->airq_dip != NULL) {
			immu = immu_dmar_get_immu(irq_ptr->airq_dip);
		}
	}

	if (immu && (immu->immu_intrmap_running == B_TRUE)) {
		INTRMAP_PRIVATE(irq_ptr)->ir_immu = immu;
	}
}

static int
get_top_pcibridge(dev_info_t *dip, void *arg)
{
	dev_info_t **topdipp = arg;
	immu_devi_t *immu_devi;

	mutex_enter(&(DEVI(dip)->devi_lock));
	immu_devi = DEVI(dip)->devi_iommu;
	mutex_exit(&(DEVI(dip)->devi_lock));

	if (immu_devi == NULL || immu_devi->imd_pcib_type == IMMU_PCIB_BAD ||
	    immu_devi->imd_pcib_type == IMMU_PCIB_ENDPOINT) {
		return (DDI_WALK_CONTINUE);
	}

	*topdipp = dip;

	return (DDI_WALK_CONTINUE);
}

static dev_info_t *
intrmap_top_pcibridge(dev_info_t *rdip)
{
	dev_info_t *top_pcibridge = NULL;

	if (immu_walk_ancestor(rdip, NULL, get_top_pcibridge,
	    &top_pcibridge, NULL, 0) != DDI_SUCCESS) {
		return (NULL);
	}

	return (top_pcibridge);
}

/* function to get interrupt request source id */
static void
get_sid(apic_irq_t *irq_ptr)
{
	dev_info_t	*dip, *pdip;
	immu_devi_t	*immu_devi;
	uint16_t	sid;
	uchar_t		svt, sq;

	if (!intrmap_enable_sid_verify) {
		return;
	}

	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
		/* for interrupt through I/O APIC */
		sid = immu_dmar_ioapic_sid(irq_ptr->airq_ioapicindex);
		svt = SVT_ALL_VERIFY;
		sq = SQ_VERIFY_ALL;
	} else {
		/* MSI/MSI-X interrupt */
		dip = irq_ptr->airq_dip;
		ASSERT(dip);
		pdip = intrmap_top_pcibridge(dip);
		ASSERT(pdip);
		immu_devi = DEVI(pdip)->devi_iommu;
		ASSERT(immu_devi);
		if (immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCI) {
			/* device behind pcie to pci bridge */
			sid = (immu_devi->imd_bus << 8) | immu_devi->imd_sec;
			svt = SVT_BUS_VERIFY;
			sq = SQ_VERIFY_ALL;
		} else {
			/* pcie device or device behind pci to pci bridge */
			sid = (immu_devi->imd_bus << 8) |
			    immu_devi->imd_devfunc;
			svt = SVT_ALL_VERIFY;
			sq = SQ_VERIFY_ALL;
		}
	}

	INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq =
	    sid | (svt << 18) | (sq << 16);
}

static void
intrmap_enable(immu_t *immu)
{
	intrmap_t *intrmap;
	uint64_t irta_reg;

	intrmap = immu->immu_intrmap;

	irta_reg = intrmap->intrmap_paddr | intrmap_irta_s;
	if (intrmap_apic_mode == LOCAL_X2APIC) {
		irta_reg |= (0x1 << 11);
	}

	immu_regs_intrmap_enable(immu, irta_reg);
}

/* ####################################################################### */

/*
 * immu_intr_handler()
 * 	the fault event handler for a single immu unit
 */
int
immu_intr_handler(immu_t *immu)
{
	uint32_t status;
	int index, fault_reg_offset;
	int max_fault_index;
	boolean_t found_fault;
	dev_info_t *idip;

	mutex_enter(&(immu->immu_intr_lock));
	mutex_enter(&(immu->immu_regs_lock));

	/* read the fault status */
	status = immu_regs_get32(immu, IMMU_REG_FAULT_STS);

	idip = immu->immu_dip;
	ASSERT(idip);

	/* check if we have a pending fault for this immu unit */
	if ((status & IMMU_FAULT_STS_PPF) == 0) {
		mutex_exit(&(immu->immu_regs_lock));
		mutex_exit(&(immu->immu_intr_lock));
		return (DDI_INTR_UNCLAIMED);
	}

	/*
	 * handle all primary pending faults
	 */
	index = IMMU_FAULT_GET_INDEX(status);
	max_fault_index =  IMMU_CAP_GET_NFR(immu->immu_regs_cap) - 1;
	fault_reg_offset = IMMU_CAP_GET_FRO(immu->immu_regs_cap);

	found_fault = B_FALSE;
	_NOTE(CONSTCOND)
	while (1) {
		uint64_t val;
		uint8_t fault_reason;
		uint8_t fault_type;
		uint16_t sid;
		uint64_t pg_addr;
		uint64_t idx;

		/* read the higher 64bits */
		val = immu_regs_get64(immu, fault_reg_offset + index * 16 + 8);

		/* check if this fault register has pending fault */
		if (!IMMU_FRR_GET_F(val)) {
			break;
		}

		found_fault = B_TRUE;

		/* get the fault reason, fault type and sid */
		fault_reason = IMMU_FRR_GET_FR(val);
		fault_type = IMMU_FRR_GET_FT(val);
		sid = IMMU_FRR_GET_SID(val);

		/* read the first 64bits */
		val = immu_regs_get64(immu, fault_reg_offset + index * 16);
		pg_addr = val & IMMU_PAGEMASK;
		idx = val >> 48;

		/* clear the fault */
		immu_regs_put32(immu, fault_reg_offset + index * 16 + 12,
		    (((uint32_t)1) << 31));

		/* report the fault info */
		if (fault_reason < 0x20) {
			/* immu-remapping fault */
			ddi_err(DER_WARN, idip,
			    "generated a fault event when translating DMA %s\n"
			    "\t on address 0x%" PRIx64 " for PCI(%d, %d, %d), "
			    "the reason is:\n\t %s",
			    fault_type ? "read" : "write", pg_addr,
			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
			    immu_dvma_faults[MIN(fault_reason,
			    DVMA_MAX_FAULTS)]);
		} else if (fault_reason < 0x27) {
			/* intr-remapping fault */
			ddi_err(DER_WARN, idip,
			    "generated a fault event when translating "
			    "interrupt request\n"
			    "\t on index 0x%" PRIx64 " for PCI(%d, %d, %d), "
			    "the reason is:\n\t %s",
			    idx,
			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
			    immu_intrmap_faults[MIN((fault_reason - 0x20),
			    INTRMAP_MAX_FAULTS)]);
		} else {
			ddi_err(DER_WARN, idip, "Unknown fault reason: 0x%x",
			    fault_reason);
		}

		index++;
		if (index > max_fault_index)
			index = 0;
	}

	/* Clear the fault */
	if (!found_fault) {
		ddi_err(DER_MODE, idip,
		    "Fault register set but no fault present");
	}
	immu_regs_put32(immu, IMMU_REG_FAULT_STS, 1);
	mutex_exit(&(immu->immu_regs_lock));
	mutex_exit(&(immu->immu_intr_lock));
	return (DDI_INTR_CLAIMED);
}
/* ######################################################################### */

/*
 * Interrupt remap entry points
 */

/* initialize interrupt remapping */
static int
immu_intrmap_init(int apic_mode)
{
	immu_t *immu;
	int error = DDI_FAILURE;

	if (immu_intrmap_enable == B_FALSE) {
		return (DDI_SUCCESS);
	}

	intrmap_apic_mode = apic_mode;

	immu = list_head(&immu_list);
	for (; immu; immu = list_next(&immu_list, immu)) {
		if ((immu->immu_intrmap_running == B_TRUE) &&
		    IMMU_ECAP_GET_IR(immu->immu_regs_excap)) {
			if (init_unit(immu) == DDI_SUCCESS) {
				error = DDI_SUCCESS;
			}
		}
	}

	/*
	 * if all IOMMU units disable intr remapping,
	 * return FAILURE
	 */
	return (error);
}



/* enable interrupt remapping */
static void
immu_intrmap_switchon(int suppress_brdcst_eoi)
{
	immu_t *immu;


	intrmap_suppress_brdcst_eoi = suppress_brdcst_eoi;

	immu = list_head(&immu_list);
	for (; immu; immu = list_next(&immu_list, immu)) {
		if (immu->immu_intrmap_setup == B_TRUE) {
			intrmap_enable(immu);
		}
	}
}

/* alloc remapping entry for the interrupt */
static void
immu_intrmap_alloc(apic_irq_t *irq_ptr)
{
	immu_t	*immu;
	intrmap_t *intrmap;
	uint32_t		idx, cnt, i;
	uint_t			vector, irqno;
	uint32_t		sid_svt_sq;

	if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE ||
	    AIRQ_PRIVATE(irq_ptr) != NULL) {
		return;
	}

	AIRQ_PRIVATE(irq_ptr) =
	    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);

	get_immu(irq_ptr);

	immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu;
	if (immu == NULL) {
		goto intrmap_disable;
	}

	intrmap = immu->immu_intrmap;

	if (irq_ptr->airq_mps_intr_index == MSI_INDEX) {
		cnt = irq_ptr->airq_intin_no;
	} else {
		cnt = 1;
	}

	if (cnt == 1) {
		idx = alloc_tbl_entry(intrmap);
	} else {
		idx = alloc_tbl_multi_entries(intrmap, cnt);
	}

	if (idx == INTRMAP_IDX_FULL) {
		goto intrmap_disable;
	}

	INTRMAP_PRIVATE(irq_ptr)->ir_idx = idx;

	get_sid(irq_ptr);

	if (cnt == 1) {
		if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
			immu_qinv_intr_one_cache(immu, idx);
		} else {
			immu_regs_wbf_flush(immu);
		}
		return;
	}

	sid_svt_sq = INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq;

	vector = irq_ptr->airq_vector;

	for (i = 1; i < cnt; i++) {
		irqno = apic_vector_to_irq[vector + i];
		irq_ptr = apic_irq_table[irqno];

		ASSERT(irq_ptr);

		AIRQ_PRIVATE(irq_ptr) =
		    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);

		INTRMAP_PRIVATE(irq_ptr)->ir_immu = immu;
		INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq = sid_svt_sq;
		INTRMAP_PRIVATE(irq_ptr)->ir_idx = idx + i;
	}

	if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
		immu_qinv_intr_caches(immu, idx, cnt);
	} else {
		immu_regs_wbf_flush(immu);
	}

	return;

intrmap_disable:
	kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intrmap_private_t));
	AIRQ_PRIVATE(irq_ptr) = INTRMAP_DISABLE;
}


/* remapping the interrupt */
static void
immu_intrmap_map(apic_irq_t *irq_ptr, void *intrmap_data)
{
	immu_t	*immu;
	intrmap_t	*intrmap;
	ioapic_rdt_t	*irdt = (ioapic_rdt_t *)intrmap_data;
	msi_regs_t	*mregs = (msi_regs_t *)intrmap_data;
	intrmap_rte_t	irte;
	uint_t		idx, i, cnt;
	uint32_t	dst, sid_svt_sq;
	uchar_t		vector, dlm, tm, rh, dm;

	if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE) {
		return;
	}

	if (irq_ptr->airq_mps_intr_index == MSI_INDEX) {
		cnt = irq_ptr->airq_intin_no;
	} else {
		cnt = 1;
	}

	idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;
	immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu;
	intrmap = immu->immu_intrmap;
	sid_svt_sq = INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq;
	vector = irq_ptr->airq_vector;

	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
		dm = RDT_DM(irdt->ir_lo);
		rh = 0;
		tm = RDT_TM(irdt->ir_lo);
		dlm = RDT_DLM(irdt->ir_lo);
		dst = irdt->ir_hi;

		/*
		 * Mark the IRTE's TM as Edge to suppress broadcast EOI.
		 */
		if (intrmap_suppress_brdcst_eoi) {
			tm = TRIGGER_MODE_EDGE;
		}
	} else {
		dm = MSI_ADDR_DM_PHYSICAL;
		rh = MSI_ADDR_RH_FIXED;
		tm = TRIGGER_MODE_EDGE;
		dlm = 0;
		dst = mregs->mr_addr;
	}

	if (intrmap_apic_mode == LOCAL_APIC)
		dst = (dst & 0xFF) << 8;

	if (cnt == 1) {
		irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
		irte.hi = IRTE_HIGH(sid_svt_sq);

		/* set interrupt remapping table entry */
		bcopy(&irte, intrmap->intrmap_vaddr +
		    idx * INTRMAP_RTE_SIZE,
		    INTRMAP_RTE_SIZE);

		immu_qinv_intr_one_cache(immu, idx);

	} else {
		vector = irq_ptr->airq_vector;
		for (i = 0; i < cnt; i++) {
			irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
			irte.hi = IRTE_HIGH(sid_svt_sq);

			/* set interrupt remapping table entry */
			bcopy(&irte, intrmap->intrmap_vaddr +
			    idx * INTRMAP_RTE_SIZE,
			    INTRMAP_RTE_SIZE);
			vector++;
			idx++;
		}

		immu_qinv_intr_caches(immu, idx, cnt);
	}
}

/* free the remapping entry */
static void
immu_intrmap_free(apic_irq_t *irq_ptr)
{
	immu_t *immu;
	intrmap_t *intrmap;
	uint32_t idx;

	if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE) {
		AIRQ_PRIVATE(irq_ptr) = NULL;
		return;
	}

	immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu;
	intrmap = immu->immu_intrmap;
	idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;

	bzero(intrmap->intrmap_vaddr + idx * INTRMAP_RTE_SIZE,
	    INTRMAP_RTE_SIZE);

	immu_qinv_intr_one_cache(immu, idx);

	mutex_enter(&intrmap->intrmap_lock);
	bitset_del(&intrmap->intrmap_map, idx);
	if (intrmap->intrmap_free == INTRMAP_IDX_FULL) {
		intrmap->intrmap_free = idx;
	}
	mutex_exit(&intrmap->intrmap_lock);

	kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intrmap_private_t));
	AIRQ_PRIVATE(irq_ptr) = NULL;
}

/* record the ioapic rdt entry */
static void
immu_intrmap_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
{
	uint32_t rdt_entry, tm, pol, idx, vector;

	rdt_entry = irdt->ir_lo;

	if (INTRMAP_PRIVATE(irq_ptr) != NULL) {
		idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;
		tm = RDT_TM(rdt_entry);
		pol = RDT_POL(rdt_entry);
		vector = irq_ptr->airq_vector;
		irdt->ir_lo = (tm << INTRMAP_IOAPIC_TM_SHIFT) |
		    (pol << INTRMAP_IOAPIC_POL_SHIFT) |
		    ((idx >> 15) << INTRMAP_IOAPIC_IDX15_SHIFT) |
		    vector;
		irdt->ir_hi = (idx << INTRMAP_IOAPIC_IDX_SHIFT) |
		    (1 << INTRMAP_IOAPIC_FORMAT_SHIFT);
	} else {
		irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
	}
}

/* record the msi interrupt structure */
/*ARGSUSED*/
static void
immu_intrmap_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
{
	uint_t	idx;

	if (INTRMAP_PRIVATE(irq_ptr) != NULL) {
		idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;

		mregs->mr_data = 0;
		mregs->mr_addr = MSI_ADDR_HDR |
		    ((idx & 0x7fff) << INTRMAP_MSI_IDX_SHIFT) |
		    (1 << INTRMAP_MSI_FORMAT_SHIFT) |
		    (1 << INTRMAP_MSI_SHV_SHIFT) |
		    ((idx >> 15) << INTRMAP_MSI_IDX15_SHIFT);
	} else {
		mregs->mr_addr = MSI_ADDR_HDR |
		    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
		    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
		    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
		mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
		    mregs->mr_data;
	}
}

/* ######################################################################### */
/*
 * Functions exported by immu_intr.c
 */
void
immu_intrmap_setup(list_t *listp)
{
	immu_t *immu;

	/*
	 * Check if ACPI DMAR tables say that
	 * interrupt remapping is supported
	 */
	if (immu_dmar_intrmap_supported() == B_FALSE) {
		return;
	}

	/*
	 * Check if interrupt remapping is disabled.
	 */
	if (immu_intrmap_enable == B_FALSE) {
		return;
	}

	psm_vt_ops = &intrmap_ops;

	immu = list_head(listp);
	for (; immu; immu = list_next(listp, immu)) {
		mutex_init(&(immu->immu_intrmap_lock), NULL,
		    MUTEX_DEFAULT, NULL);
		mutex_enter(&(immu->immu_intrmap_lock));
		immu->immu_intrmap_setup = B_TRUE;
		mutex_exit(&(immu->immu_intrmap_lock));
	}
}

void
immu_intrmap_startup(immu_t *immu)
{
	/* do nothing */
	mutex_enter(&(immu->immu_intrmap_lock));
	if (immu->immu_intrmap_setup == B_TRUE) {
		immu->immu_intrmap_running = B_TRUE;
	}
	mutex_exit(&(immu->immu_intrmap_lock));
}

/*
 * Register a Intel IOMMU unit (i.e. DMAR unit's)
 * interrupt handler
 */
void
immu_intr_register(immu_t *immu)
{
	int irq, vect;
	char intr_handler_name[IMMU_MAXNAMELEN];
	uint32_t msi_data;
	uint32_t uaddr;
	uint32_t msi_addr;

	msi_addr = (MSI_ADDR_HDR |
	    apic_cpus[0].aci_local_id & 0xFF) << ((MSI_ADDR_DEST_SHIFT) |
	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));

	if (intrmap_apic_mode == LOCAL_X2APIC) {
		uaddr = (apic_cpus[0].aci_local_id & 0xFFFFFF00);
	} else {
		uaddr = 0;
	}

	/* Dont need to hold immu_intr_lock since we are in boot */
	irq = psm_get_ipivect(IMMU_INTR_IPL, -1);
	vect = apic_irq_table[irq]->airq_vector;
	msi_data = ((MSI_DATA_DELIVERY_FIXED <<
	    MSI_DATA_DELIVERY_SHIFT) | vect);

	(void) snprintf(intr_handler_name, sizeof (intr_handler_name),
	    "%s-intr-handler", immu->immu_name);

	(void) add_avintr((void *)NULL, IMMU_INTR_IPL,
	    (avfunc)(immu_intr_handler), intr_handler_name, irq,
	    (caddr_t)immu, NULL, NULL, NULL);

	immu_regs_intr_enable(immu, msi_addr, msi_data, uaddr);

	(void) immu_intr_handler(immu);
}