OpenSolaris_b135/uts/sun4/os/trap.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


#include <sys/mmu.h>
#include <sys/systm.h>
#include <sys/trap.h>
#include <sys/machtrap.h>
#include <sys/vtrace.h>
#include <sys/prsystm.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/fpu/fpusystm.h>
#include <sys/tnf.h>
#include <sys/tnf_probe.h>
#include <sys/simulate.h>
#include <sys/ftrace.h>
#include <sys/ontrap.h>
#include <sys/kcpc.h>
#include <sys/kobj.h>
#include <sys/procfs.h>
#include <sys/sun4asi.h>
#include <sys/sdt.h>
#include <sys/fpras.h>
#include <sys/contract/process_impl.h>

#ifdef  TRAPTRACE
#include <sys/traptrace.h>
#endif

int tudebug = 0;
static int tudebugbpt = 0;
static int tudebugfpe = 0;

static int alignfaults = 0;

#if defined(TRAPDEBUG) || defined(lint)
static int lodebug = 0;
#else
#define	lodebug	0
#endif /* defined(TRAPDEBUG) || defined(lint) */


int vis1_partial_support(struct regs *rp, k_siginfo_t *siginfo, uint_t *fault);
#pragma weak vis1_partial_support

void showregs(unsigned, struct regs *, caddr_t, uint_t);
#pragma weak showregs

void trap_async_hwerr(void);
#pragma weak trap_async_hwerr

void trap_async_berr_bto(int, struct regs *);
#pragma weak trap_async_berr_bto

static enum seg_rw get_accesstype(struct regs *);
static int nfload(struct regs *, int *);
static int swap_nc(struct regs *, int);
static int ldstub_nc(struct regs *, int);
void	trap_cleanup(struct regs *, uint_t, k_siginfo_t *, int);
void	trap_rtt(void);

static int
die(unsigned type, struct regs *rp, caddr_t addr, uint_t mmu_fsr)
{
	struct panic_trap_info ti;

#ifdef TRAPTRACE
	TRAPTRACE_FREEZE;
#endif

	ti.trap_regs = rp;
	ti.trap_type = type;
	ti.trap_addr = addr;
	ti.trap_mmu_fsr = mmu_fsr;

	curthread->t_panic_trap = &ti;

	if (type == T_DATA_MMU_MISS && addr < (caddr_t)KERNELBASE) {
		panic("BAD TRAP: type=%x rp=%p addr=%p mmu_fsr=%x "
		    "occurred in module \"%s\" due to %s",
		    type, (void *)rp, (void *)addr, mmu_fsr,
		    mod_containing_pc((caddr_t)rp->r_pc),
		    addr < (caddr_t)PAGESIZE ?
		    "a NULL pointer dereference" :
		    "an illegal access to a user address");
	} else {
		panic("BAD TRAP: type=%x rp=%p addr=%p mmu_fsr=%x",
		    type, (void *)rp, (void *)addr, mmu_fsr);
	}

	return (0);	/* avoid optimization of restore in call's delay slot */
}

#if defined(SF_ERRATA_23) || defined(SF_ERRATA_30) /* call ... illegal-insn */
int	ill_calls;
#endif

/*
 * Currently, the only PREFETCH/PREFETCHA instructions which cause traps
 * are the "strong" prefetches (fcn=20-23).  But we check for all flavors of
 * PREFETCH, in case some future variant also causes a DATA_MMU_MISS.
 */
#define	IS_PREFETCH(i)	(((i) & 0xc1780000) == 0xc1680000)

#define	IS_FLUSH(i)	(((i) & 0xc1f80000) == 0x81d80000)
#define	IS_SWAP(i)	(((i) & 0xc1f80000) == 0xc0780000)
#define	IS_LDSTUB(i)	(((i) & 0xc1f80000) == 0xc0680000)
#define	IS_FLOAT(i)	(((i) & 0x1000000) != 0)
#define	IS_STORE(i)	(((i) >> 21) & 1)

/*
 * Called from the trap handler when a processor trap occurs.
 */
/*VARARGS2*/
void
trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t mmu_fsr)
{
	proc_t *p = ttoproc(curthread);
	klwp_id_t lwp = ttolwp(curthread);
	struct machpcb *mpcb = NULL;
	k_siginfo_t siginfo;
	uint_t op3, fault = 0;
	int stepped = 0;
	greg_t oldpc;
	int mstate;
	char *badaddr;
	faultcode_t res;
	enum fault_type fault_type;
	enum seg_rw rw;
	uintptr_t lofault;
	int instr;
	int iskernel;
	int watchcode;
	int watchpage;
	extern faultcode_t pagefault(caddr_t, enum fault_type,
	    enum seg_rw, int);
#ifdef sun4v
	extern boolean_t tick_stick_emulation_active;
#endif	/* sun4v */

	CPU_STATS_ADDQ(CPU, sys, trap, 1);

#ifdef SF_ERRATA_23 /* call causes illegal-insn */
	ASSERT((curthread->t_schedflag & TS_DONT_SWAP) ||
	    (type == T_UNIMP_INSTR));
#else
	ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
#endif /* SF_ERRATA_23 */

	if (USERMODE(rp->r_tstate) || (type & T_USER)) {
		/*
		 * Set lwp_state before trying to acquire any
		 * adaptive lock
		 */
		ASSERT(lwp != NULL);
		lwp->lwp_state = LWP_SYS;
		/*
		 * Set up the current cred to use during this trap. u_cred
		 * no longer exists.  t_cred is used instead.
		 * The current process credential applies to the thread for
		 * the entire trap.  If trapping from the kernel, this
		 * should already be set up.
		 */
		if (curthread->t_cred != p->p_cred) {
			cred_t *oldcred = curthread->t_cred;
			/*
			 * DTrace accesses t_cred in probe context.  t_cred
			 * must always be either NULL, or point to a valid,
			 * allocated cred structure.
			 */
			curthread->t_cred = crgetcred();
			crfree(oldcred);
		}
		type |= T_USER;
		ASSERT((type == (T_SYS_RTT_PAGE | T_USER)) ||
		    (type == (T_SYS_RTT_ALIGN | T_USER)) ||
		    lwp->lwp_regs == rp);
		mpcb = lwptompcb(lwp);
		switch (type) {
		case T_WIN_OVERFLOW + T_USER:
		case T_WIN_UNDERFLOW + T_USER:
		case T_SYS_RTT_PAGE + T_USER:
		case T_DATA_MMU_MISS + T_USER:
			mstate = LMS_DFAULT;
			break;
		case T_INSTR_MMU_MISS + T_USER:
			mstate = LMS_TFAULT;
			break;
		default:
			mstate = LMS_TRAP;
			break;
		}
		/* Kernel probe */
		TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
		    tnf_microstate, state, (char)mstate);
		mstate = new_mstate(curthread, mstate);
		siginfo.si_signo = 0;
		stepped =
		    lwp->lwp_pcb.pcb_step != STEP_NONE &&
		    ((oldpc = rp->r_pc), prundostep()) &&
		    mmu_btop((uintptr_t)addr) == mmu_btop((uintptr_t)oldpc);
		/* this assignment must not precede call to prundostep() */
		oldpc = rp->r_pc;
	}

	TRACE_1(TR_FAC_TRAP, TR_C_TRAP_HANDLER_ENTER,
	    "C_trap_handler_enter:type %x", type);

#ifdef	F_DEFERRED
	/*
	 * Take any pending floating point exceptions now.
	 * If the floating point unit has an exception to handle,
	 * just return to user-level to let the signal handler run.
	 * The instruction that got us to trap() will be reexecuted on
	 * return from the signal handler and we will trap to here again.
	 * This is necessary to disambiguate simultaneous traps which
	 * happen when a floating-point exception is pending and a
	 * machine fault is incurred.
	 */
	if (type & USER) {
		/*
		 * FP_TRAPPED is set only by sendsig() when it copies
		 * out the floating-point queue for the signal handler.
		 * It is set there so we can test it here and in syscall().
		 */
		mpcb->mpcb_flags &= ~FP_TRAPPED;
		syncfpu();
		if (mpcb->mpcb_flags & FP_TRAPPED) {
			/*
			 * trap() has have been called recursively and may
			 * have stopped the process, so do single step
			 * support for /proc.
			 */
			mpcb->mpcb_flags &= ~FP_TRAPPED;
			goto out;
		}
	}
#endif
	switch (type) {
		case T_DATA_MMU_MISS:
		case T_INSTR_MMU_MISS + T_USER:
		case T_DATA_MMU_MISS + T_USER:
		case T_DATA_PROT + T_USER:
		case T_AST + T_USER:
		case T_SYS_RTT_PAGE + T_USER:
		case T_FLUSH_PCB + T_USER:
		case T_FLUSHW + T_USER:
			break;

		default:
			FTRACE_3("trap(): type=0x%lx, regs=0x%lx, addr=0x%lx",
			    (ulong_t)type, (ulong_t)rp, (ulong_t)addr);
			break;
	}

	switch (type) {

	default:
		/*
		 * Check for user software trap.
		 */
		if (type & T_USER) {
			if (tudebug)
				showregs(type, rp, (caddr_t)0, 0);
			if ((type & ~T_USER) >= T_SOFTWARE_TRAP) {
				bzero(&siginfo, sizeof (siginfo));
				siginfo.si_signo = SIGILL;
				siginfo.si_code  = ILL_ILLTRP;
				siginfo.si_addr  = (caddr_t)rp->r_pc;
				siginfo.si_trapno = type &~ T_USER;
				fault = FLTILL;
				break;
			}
		}
		addr = (caddr_t)rp->r_pc;
		(void) die(type, rp, addr, 0);
		/*NOTREACHED*/

	case T_ALIGNMENT:	/* supv alignment error */
		if (nfload(rp, NULL))
			goto cleanup;

		if (curthread->t_lofault) {
			if (lodebug) {
				showregs(type, rp, addr, 0);
				traceback((caddr_t)rp->r_sp);
			}
			rp->r_g1 = EFAULT;
			rp->r_pc = curthread->t_lofault;
			rp->r_npc = rp->r_pc + 4;
			goto cleanup;
		}
		(void) die(type, rp, addr, 0);
		/*NOTREACHED*/

	case T_INSTR_EXCEPTION:		/* sys instruction access exception */
		addr = (caddr_t)rp->r_pc;
		(void) die(type, rp, addr, mmu_fsr);
		/*NOTREACHED*/

	case T_INSTR_MMU_MISS:		/* sys instruction mmu miss */
		addr = (caddr_t)rp->r_pc;
		(void) die(type, rp, addr, 0);
		/*NOTREACHED*/

	case T_DATA_EXCEPTION:		/* system data access exception */
		switch (X_FAULT_TYPE(mmu_fsr)) {
		case FT_RANGE:
			/*
			 * This happens when we attempt to dereference an
			 * address in the address hole.  If t_ontrap is set,
			 * then break and fall through to T_DATA_MMU_MISS /
			 * T_DATA_PROT case below.  If lofault is set, then
			 * honour it (perhaps the user gave us a bogus
			 * address in the hole to copyin from or copyout to?)
			 */

			if (curthread->t_ontrap != NULL)
				break;

			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			if (curthread->t_lofault) {
				if (lodebug) {
					showregs(type, rp, addr, 0);
					traceback((caddr_t)rp->r_sp);
				}
				rp->r_g1 = EFAULT;
				rp->r_pc = curthread->t_lofault;
				rp->r_npc = rp->r_pc + 4;
				goto cleanup;
			}
			(void) die(type, rp, addr, mmu_fsr);
			/*NOTREACHED*/

		case FT_PRIV:
			/*
			 * This can happen if we access ASI_USER from a kernel
			 * thread.  To support pxfs, we need to honor lofault if
			 * we're doing a copyin/copyout from a kernel thread.
			 */

			if (nfload(rp, NULL))
				goto cleanup;
			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			if (curthread->t_lofault) {
				if (lodebug) {
					showregs(type, rp, addr, 0);
					traceback((caddr_t)rp->r_sp);
				}
				rp->r_g1 = EFAULT;
				rp->r_pc = curthread->t_lofault;
				rp->r_npc = rp->r_pc + 4;
				goto cleanup;
			}
			(void) die(type, rp, addr, mmu_fsr);
			/*NOTREACHED*/

		default:
			if (nfload(rp, NULL))
				goto cleanup;
			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			(void) die(type, rp, addr, mmu_fsr);
			/*NOTREACHED*/

		case FT_NFO:
			break;
		}
		/* fall into ... */

	case T_DATA_MMU_MISS:		/* system data mmu miss */
	case T_DATA_PROT:		/* system data protection fault */
		if (nfload(rp, &instr))
			goto cleanup;

		/*
		 * If we're under on_trap() protection (see <sys/ontrap.h>),
		 * set ot_trap and return from the trap to the trampoline.
		 */
		if (curthread->t_ontrap != NULL) {
			on_trap_data_t *otp = curthread->t_ontrap;

			TRACE_0(TR_FAC_TRAP, TR_C_TRAP_HANDLER_EXIT,
			    "C_trap_handler_exit");
			TRACE_0(TR_FAC_TRAP, TR_TRAP_END, "trap_end");

			if (otp->ot_prot & OT_DATA_ACCESS) {
				otp->ot_trap |= OT_DATA_ACCESS;
				rp->r_pc = otp->ot_trampoline;
				rp->r_npc = rp->r_pc + 4;
				goto cleanup;
			}
		}
		lofault = curthread->t_lofault;
		curthread->t_lofault = 0;

		mstate = new_mstate(curthread, LMS_KFAULT);

		switch (type) {
		case T_DATA_PROT:
			fault_type = F_PROT;
			rw = S_WRITE;
			break;
		case T_INSTR_MMU_MISS:
			fault_type = F_INVAL;
			rw = S_EXEC;
			break;
		case T_DATA_MMU_MISS:
		case T_DATA_EXCEPTION:
			/*
			 * The hardware doesn't update the sfsr on mmu
			 * misses so it is not easy to find out whether
			 * the access was a read or a write so we need
			 * to decode the actual instruction.
			 */
			fault_type = F_INVAL;
			rw = get_accesstype(rp);
			break;
		default:
			cmn_err(CE_PANIC, "trap: unknown type %x", type);
			break;
		}
		/*
		 * We determine if access was done to kernel or user
		 * address space.  The addr passed into trap is really the
		 * tag access register.
		 */
		iskernel = (((uintptr_t)addr & TAGACC_CTX_MASK) == KCONTEXT);
		addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);

		res = pagefault(addr, fault_type, rw, iskernel);
		if (!iskernel && res == FC_NOMAP &&
		    addr < p->p_usrstack && grow(addr))
			res = 0;

		(void) new_mstate(curthread, mstate);

		/*
		 * Restore lofault.  If we resolved the fault, exit.
		 * If we didn't and lofault wasn't set, die.
		 */
		curthread->t_lofault = lofault;

		if (res == 0)
			goto cleanup;

		if (IS_PREFETCH(instr)) {
			/* skip prefetch instructions in kernel-land */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto cleanup;
		}

		if ((lofault == 0 || lodebug) &&
		    (calc_memaddr(rp, &badaddr) == SIMU_SUCCESS))
			addr = badaddr;
		if (lofault == 0)
			(void) die(type, rp, addr, 0);
		/*
		 * Cannot resolve fault.  Return to lofault.
		 */
		if (lodebug) {
			showregs(type, rp, addr, 0);
			traceback((caddr_t)rp->r_sp);
		}
		if (FC_CODE(res) == FC_OBJERR)
			res = FC_ERRNO(res);
		else
			res = EFAULT;
		rp->r_g1 = res;
		rp->r_pc = curthread->t_lofault;
		rp->r_npc = curthread->t_lofault + 4;
		goto cleanup;

	case T_INSTR_EXCEPTION + T_USER: /* user insn access exception */
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_addr = (caddr_t)rp->r_pc;
		siginfo.si_signo = SIGSEGV;
		siginfo.si_code = X_FAULT_TYPE(mmu_fsr) == FT_PRIV ?
		    SEGV_ACCERR : SEGV_MAPERR;
		fault = FLTBOUNDS;
		break;

	case T_WIN_OVERFLOW + T_USER:	/* window overflow in ??? */
	case T_WIN_UNDERFLOW + T_USER:	/* window underflow in ??? */
	case T_SYS_RTT_PAGE + T_USER:	/* window underflow in user_rtt */
	case T_INSTR_MMU_MISS + T_USER:	/* user instruction mmu miss */
	case T_DATA_MMU_MISS + T_USER:	/* user data mmu miss */
	case T_DATA_PROT + T_USER:	/* user data protection fault */
		switch (type) {
		case T_INSTR_MMU_MISS + T_USER:
			addr = (caddr_t)rp->r_pc;
			fault_type = F_INVAL;
			rw = S_EXEC;
			break;

		case T_DATA_MMU_MISS + T_USER:
			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			fault_type = F_INVAL;
			/*
			 * The hardware doesn't update the sfsr on mmu misses
			 * so it is not easy to find out whether the access
			 * was a read or a write so we need to decode the
			 * actual instruction.  XXX BUGLY HW
			 */
			rw = get_accesstype(rp);
			break;

		case T_DATA_PROT + T_USER:
			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			fault_type = F_PROT;
			rw = S_WRITE;
			break;

		case T_WIN_OVERFLOW + T_USER:
			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			fault_type = F_INVAL;
			rw = S_WRITE;
			break;

		case T_WIN_UNDERFLOW + T_USER:
		case T_SYS_RTT_PAGE + T_USER:
			addr = (caddr_t)((uintptr_t)addr & TAGACC_VADDR_MASK);
			fault_type = F_INVAL;
			rw = S_READ;
			break;

		default:
			cmn_err(CE_PANIC, "trap: unknown type %x", type);
			break;
		}

		/*
		 * If we are single stepping do not call pagefault
		 */
		if (stepped) {
			res = FC_NOMAP;
		} else {
			caddr_t vaddr = addr;
			size_t sz;
			int ta;

			ASSERT(!(curthread->t_flag & T_WATCHPT));
			watchpage = (pr_watch_active(p) &&
			    type != T_WIN_OVERFLOW + T_USER &&
			    type != T_WIN_UNDERFLOW + T_USER &&
			    type != T_SYS_RTT_PAGE + T_USER &&
			    pr_is_watchpage(addr, rw));

			if (!watchpage ||
			    (sz = instr_size(rp, &vaddr, rw)) <= 0)
				/* EMPTY */;
			else if ((watchcode = pr_is_watchpoint(&vaddr, &ta,
			    sz, NULL, rw)) != 0) {
				if (ta) {
					do_watch_step(vaddr, sz, rw,
					    watchcode, rp->r_pc);
					fault_type = F_INVAL;
				} else {
					bzero(&siginfo,	sizeof (siginfo));
					siginfo.si_signo = SIGTRAP;
					siginfo.si_code = watchcode;
					siginfo.si_addr = vaddr;
					siginfo.si_trapafter = 0;
					siginfo.si_pc = (caddr_t)rp->r_pc;
					fault = FLTWATCH;
					break;
				}
			} else {
				if (rw != S_EXEC &&
				    pr_watch_emul(rp, vaddr, rw))
					goto out;
				do_watch_step(vaddr, sz, rw, 0, 0);
				fault_type = F_INVAL;
			}

			if (pr_watch_active(p) &&
			    (type == T_WIN_OVERFLOW + T_USER ||
			    type == T_WIN_UNDERFLOW + T_USER ||
			    type == T_SYS_RTT_PAGE + T_USER)) {
				int dotwo = (type == T_WIN_UNDERFLOW + T_USER);
				if (copy_return_window(dotwo))
					goto out;
				fault_type = F_INVAL;
			}

			res = pagefault(addr, fault_type, rw, 0);

			/*
			 * If pagefault succeed, ok.
			 * Otherwise grow the stack automatically.
			 */
			if (res == 0 ||
			    (res == FC_NOMAP &&
			    type != T_INSTR_MMU_MISS + T_USER &&
			    addr < p->p_usrstack &&
			    grow(addr))) {
				int ismem = prismember(&p->p_fltmask, FLTPAGE);

				/*
				 * instr_size() is used to get the exact
				 * address of the fault, instead of the
				 * page of the fault. Unfortunately it is
				 * very slow, and this is an important
				 * code path. Don't call it unless
				 * correctness is needed. ie. if FLTPAGE
				 * is set, or we're profiling.
				 */

				if (curthread->t_rprof != NULL || ismem)
					(void) instr_size(rp, &addr, rw);

				lwp->lwp_lastfault = FLTPAGE;
				lwp->lwp_lastfaddr = addr;

				if (ismem) {
					bzero(&siginfo, sizeof (siginfo));
					siginfo.si_addr = addr;
					(void) stop_on_fault(FLTPAGE, &siginfo);
				}
				goto out;
			}

			if (type != (T_INSTR_MMU_MISS + T_USER)) {
				/*
				 * check for non-faulting loads, also
				 * fetch the instruction to check for
				 * flush
				 */
				if (nfload(rp, &instr))
					goto out;

				/* skip userland prefetch instructions */
				if (IS_PREFETCH(instr)) {
					rp->r_pc = rp->r_npc;
					rp->r_npc += 4;
					goto out;
					/*NOTREACHED*/
				}

				/*
				 * check if the instruction was a
				 * flush.  ABI allows users to specify
				 * an illegal address on the flush
				 * instruction so we simply return in
				 * this case.
				 *
				 * NB: the hardware should set a bit
				 * indicating this trap was caused by
				 * a flush instruction.  Instruction
				 * decoding is bugly!
				 */
				if (IS_FLUSH(instr)) {
					/* skip the flush instruction */
					rp->r_pc = rp->r_npc;
					rp->r_npc += 4;
					goto out;
					/*NOTREACHED*/
				}
			} else if (res == FC_PROT) {
				report_stack_exec(p, addr);
			}

			if (tudebug)
				showregs(type, rp, addr, 0);
		}

		/*
		 * In the case where both pagefault and grow fail,
		 * set the code to the value provided by pagefault.
		 */
		(void) instr_size(rp, &addr, rw);
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_addr = addr;
		if (FC_CODE(res) == FC_OBJERR) {
			siginfo.si_errno = FC_ERRNO(res);
			if (siginfo.si_errno != EINTR) {
				siginfo.si_signo = SIGBUS;
				siginfo.si_code = BUS_OBJERR;
				fault = FLTACCESS;
			}
		} else { /* FC_NOMAP || FC_PROT */
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = (res == FC_NOMAP) ?
			    SEGV_MAPERR : SEGV_ACCERR;
			fault = FLTBOUNDS;
		}
		/*
		 * If this is the culmination of a single-step,
		 * reset the addr, code, signal and fault to
		 * indicate a hardware trace trap.
		 */
		if (stepped) {
			pcb_t *pcb = &lwp->lwp_pcb;

			siginfo.si_signo = 0;
			fault = 0;
			if (pcb->pcb_step == STEP_WASACTIVE) {
				pcb->pcb_step = STEP_NONE;
				pcb->pcb_tracepc = NULL;
				oldpc = rp->r_pc - 4;
			}
			/*
			 * If both NORMAL_STEP and WATCH_STEP are in
			 * effect, give precedence to WATCH_STEP.
			 * One or the other must be set at this point.
			 */
			ASSERT(pcb->pcb_flags & (NORMAL_STEP|WATCH_STEP));
			if ((fault = undo_watch_step(&siginfo)) == 0 &&
			    (pcb->pcb_flags & NORMAL_STEP)) {
				siginfo.si_signo = SIGTRAP;
				siginfo.si_code = TRAP_TRACE;
				siginfo.si_addr = (caddr_t)rp->r_pc;
				fault = FLTTRACE;
			}
			pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
		}
		break;

	case T_DATA_EXCEPTION + T_USER:	/* user data access exception */

		if (&vis1_partial_support != NULL) {
			bzero(&siginfo, sizeof (siginfo));
			if (vis1_partial_support(rp,
			    &siginfo, &fault) == 0)
				goto out;
		}

		if (nfload(rp, &instr))
			goto out;
		if (IS_FLUSH(instr)) {
			/* skip the flush instruction */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto out;
			/*NOTREACHED*/
		}
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_addr = addr;
		switch (X_FAULT_TYPE(mmu_fsr)) {
		case FT_ATOMIC_NC:
			if ((IS_SWAP(instr) && swap_nc(rp, instr)) ||
			    (IS_LDSTUB(instr) && ldstub_nc(rp, instr))) {
				/* skip the atomic */
				rp->r_pc = rp->r_npc;
				rp->r_npc += 4;
				goto out;
			}
			/* fall into ... */
		case FT_PRIV:
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = SEGV_ACCERR;
			fault = FLTBOUNDS;
			break;
		case FT_SPEC_LD:
		case FT_ILL_ALT:
			siginfo.si_signo = SIGILL;
			siginfo.si_code = ILL_ILLADR;
			fault = FLTILL;
			break;
		default:
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = SEGV_MAPERR;
			fault = FLTBOUNDS;
			break;
		}
		break;

	case T_SYS_RTT_ALIGN + T_USER:	/* user alignment error */
	case T_ALIGNMENT + T_USER:	/* user alignment error */
		if (tudebug)
			showregs(type, rp, addr, 0);
		/*
		 * If the user has to do unaligned references
		 * the ugly stuff gets done here.
		 */
		alignfaults++;
		if (&vis1_partial_support != NULL) {
			bzero(&siginfo, sizeof (siginfo));
			if (vis1_partial_support(rp,
			    &siginfo, &fault) == 0)
				goto out;
		}

		bzero(&siginfo, sizeof (siginfo));
		if (type == T_SYS_RTT_ALIGN + T_USER) {
			if (nfload(rp, NULL))
				goto out;
			/*
			 * Can't do unaligned stack access
			 */
			siginfo.si_signo = SIGBUS;
			siginfo.si_code = BUS_ADRALN;
			siginfo.si_addr = addr;
			fault = FLTACCESS;
			break;
		}

		/*
		 * Try to fix alignment before non-faulting load test.
		 */
		if (p->p_fixalignment) {
			if (do_unaligned(rp, &badaddr) == SIMU_SUCCESS) {
				rp->r_pc = rp->r_npc;
				rp->r_npc += 4;
				goto out;
			}
			if (nfload(rp, NULL))
				goto out;
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = SEGV_MAPERR;
			siginfo.si_addr = badaddr;
			fault = FLTBOUNDS;
		} else {
			if (nfload(rp, NULL))
				goto out;
			siginfo.si_signo = SIGBUS;
			siginfo.si_code = BUS_ADRALN;
			if (rp->r_pc & 3) {	/* offending address, if pc */
				siginfo.si_addr = (caddr_t)rp->r_pc;
			} else {
				if (calc_memaddr(rp, &badaddr) == SIMU_UNALIGN)
					siginfo.si_addr = badaddr;
				else
					siginfo.si_addr = (caddr_t)rp->r_pc;
			}
			fault = FLTACCESS;
		}
		break;

	case T_PRIV_INSTR + T_USER:	/* privileged instruction fault */
		if (tudebug)
			showregs(type, rp, (caddr_t)0, 0);

		bzero(&siginfo, sizeof (siginfo));
#ifdef	sun4v
		/*
		 * If this instruction fault is a non-privileged %tick
		 * or %stick trap, and %tick/%stick user emulation is
		 * enabled as a result of an OS suspend, then simulate
		 * the register read. We rely on simulate_rdtick to fail
		 * if the instruction is not a %tick or %stick read,
		 * causing us to fall through to the normal privileged
		 * instruction handling.
		 */
		if (tick_stick_emulation_active &&
		    (X_FAULT_TYPE(mmu_fsr) == FT_NEW_PRVACT) &&
		    simulate_rdtick(rp) == SIMU_SUCCESS) {
			/* skip the successfully simulated instruction */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto out;
		}
#endif
		siginfo.si_signo = SIGILL;
		siginfo.si_code = ILL_PRVOPC;
		siginfo.si_addr = (caddr_t)rp->r_pc;
		fault = FLTILL;
		break;

	case T_UNIMP_INSTR:		/* priv illegal instruction fault */
		if (fpras_implemented) {
			/*
			 * Call fpras_chktrap indicating that
			 * we've come from a trap handler and pass
			 * the regs.  That function may choose to panic
			 * (in which case it won't return) or it may
			 * determine that a reboot is desired.  In the
			 * latter case it must alter pc/npc to skip
			 * the illegal instruction and continue at
			 * a controlled address.
			 */
			if (&fpras_chktrap) {
				if (fpras_chktrap(rp))
					goto cleanup;
			}
		}
#if defined(SF_ERRATA_23) || defined(SF_ERRATA_30) /* call ... illegal-insn */
		instr = *(int *)rp->r_pc;
		if ((instr & 0xc0000000) == 0x40000000) {
			long pc;

			rp->r_o7 = (long long)rp->r_pc;
			pc = rp->r_pc + ((instr & 0x3fffffff) << 2);
			rp->r_pc = rp->r_npc;
			rp->r_npc = pc;
			ill_calls++;
			goto cleanup;
		}
#endif /* SF_ERRATA_23 || SF_ERRATA_30 */
		/*
		 * It's not an fpras failure and it's not SF_ERRATA_23 - die
		 */
		addr = (caddr_t)rp->r_pc;
		(void) die(type, rp, addr, 0);
		/*NOTREACHED*/

	case T_UNIMP_INSTR + T_USER:	/* illegal instruction fault */
#if defined(SF_ERRATA_23) || defined(SF_ERRATA_30) /* call ... illegal-insn */
		instr = fetch_user_instr((caddr_t)rp->r_pc);
		if ((instr & 0xc0000000) == 0x40000000) {
			long pc;

			rp->r_o7 = (long long)rp->r_pc;
			pc = rp->r_pc + ((instr & 0x3fffffff) << 2);
			rp->r_pc = rp->r_npc;
			rp->r_npc = pc;
			ill_calls++;
			goto out;
		}
#endif /* SF_ERRATA_23 || SF_ERRATA_30 */
		if (tudebug)
			showregs(type, rp, (caddr_t)0, 0);
		bzero(&siginfo, sizeof (siginfo));
		/*
		 * Try to simulate the instruction.
		 */
		switch (simulate_unimp(rp, &badaddr)) {
		case SIMU_RETRY:
			goto out;	/* regs are already set up */
			/*NOTREACHED*/

		case SIMU_SUCCESS:
			/* skip the successfully simulated instruction */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto out;
			/*NOTREACHED*/

		case SIMU_FAULT:
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = SEGV_MAPERR;
			siginfo.si_addr = badaddr;
			fault = FLTBOUNDS;
			break;

		case SIMU_DZERO:
			siginfo.si_signo = SIGFPE;
			siginfo.si_code = FPE_INTDIV;
			siginfo.si_addr = (caddr_t)rp->r_pc;
			fault = FLTIZDIV;
			break;

		case SIMU_UNALIGN:
			siginfo.si_signo = SIGBUS;
			siginfo.si_code = BUS_ADRALN;
			siginfo.si_addr = badaddr;
			fault = FLTACCESS;
			break;

		case SIMU_ILLEGAL:
		default:
			siginfo.si_signo = SIGILL;
			op3 = (instr >> 19) & 0x3F;
			if ((IS_FLOAT(instr) && (op3 == IOP_V8_STQFA) ||
			    (op3 == IOP_V8_STDFA)))
				siginfo.si_code = ILL_ILLADR;
			else
				siginfo.si_code = ILL_ILLOPC;
			siginfo.si_addr = (caddr_t)rp->r_pc;
			fault = FLTILL;
			break;
		}
		break;

	case T_UNIMP_LDD + T_USER:
	case T_UNIMP_STD + T_USER:
		if (tudebug)
			showregs(type, rp, (caddr_t)0, 0);
		switch (simulate_lddstd(rp, &badaddr)) {
		case SIMU_SUCCESS:
			/* skip the successfully simulated instruction */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto out;
			/*NOTREACHED*/

		case SIMU_FAULT:
			if (nfload(rp, NULL))
				goto out;
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = SEGV_MAPERR;
			siginfo.si_addr = badaddr;
			fault = FLTBOUNDS;
			break;

		case SIMU_UNALIGN:
			if (nfload(rp, NULL))
				goto out;
			siginfo.si_signo = SIGBUS;
			siginfo.si_code = BUS_ADRALN;
			siginfo.si_addr = badaddr;
			fault = FLTACCESS;
			break;

		case SIMU_ILLEGAL:
		default:
			siginfo.si_signo = SIGILL;
			siginfo.si_code = ILL_ILLOPC;
			siginfo.si_addr = (caddr_t)rp->r_pc;
			fault = FLTILL;
			break;
		}
		break;

	case T_UNIMP_LDD:
	case T_UNIMP_STD:
		if (simulate_lddstd(rp, &badaddr) == SIMU_SUCCESS) {
			/* skip the successfully simulated instruction */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto cleanup;
			/*NOTREACHED*/
		}
		/*
		 * A third party driver executed an {LDD,STD,LDDA,STDA}
		 * that we couldn't simulate.
		 */
		if (nfload(rp, NULL))
			goto cleanup;

		if (curthread->t_lofault) {
			if (lodebug) {
				showregs(type, rp, addr, 0);
				traceback((caddr_t)rp->r_sp);
			}
			rp->r_g1 = EFAULT;
			rp->r_pc = curthread->t_lofault;
			rp->r_npc = rp->r_pc + 4;
			goto cleanup;
		}
		(void) die(type, rp, addr, 0);
		/*NOTREACHED*/

	case T_IDIV0 + T_USER:		/* integer divide by zero */
	case T_DIV0 + T_USER:		/* integer divide by zero */
		if (tudebug && tudebugfpe)
			showregs(type, rp, (caddr_t)0, 0);
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_signo = SIGFPE;
		siginfo.si_code = FPE_INTDIV;
		siginfo.si_addr = (caddr_t)rp->r_pc;
		fault = FLTIZDIV;
		break;

	case T_INT_OVERFLOW + T_USER:	/* integer overflow */
		if (tudebug && tudebugfpe)
			showregs(type, rp, (caddr_t)0, 0);
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_signo = SIGFPE;
		siginfo.si_code  = FPE_INTOVF;
		siginfo.si_addr  = (caddr_t)rp->r_pc;
		fault = FLTIOVF;
		break;

	case T_BREAKPOINT + T_USER:	/* breakpoint trap (t 1) */
		if (tudebug && tudebugbpt)
			showregs(type, rp, (caddr_t)0, 0);
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_signo = SIGTRAP;
		siginfo.si_code = TRAP_BRKPT;
		siginfo.si_addr = (caddr_t)rp->r_pc;
		fault = FLTBPT;
		break;

	case T_TAG_OVERFLOW + T_USER:	/* tag overflow (taddcctv, tsubcctv) */
		if (tudebug)
			showregs(type, rp, (caddr_t)0, 0);
		bzero(&siginfo, sizeof (siginfo));
		siginfo.si_signo = SIGEMT;
		siginfo.si_code = EMT_TAGOVF;
		siginfo.si_addr = (caddr_t)rp->r_pc;
		fault = FLTACCESS;
		break;

	case T_FLUSH_PCB + T_USER:	/* finish user window overflow */
	case T_FLUSHW + T_USER:		/* finish user window flush */
		/*
		 * This trap is entered from sys_rtt in locore.s when,
		 * upon return to user is is found that there are user
		 * windows in pcb_wbuf.  This happens because they could
		 * not be saved on the user stack, either because it
		 * wasn't resident or because it was misaligned.
		 */
	{
		int error;
		caddr_t sp;

		error = flush_user_windows_to_stack(&sp);
		/*
		 * Possible errors:
		 *	error copying out
		 *	unaligned stack pointer
		 * The first is given to us as the return value
		 * from flush_user_windows_to_stack().  The second
		 * results in residual windows in the pcb.
		 */
		if (error != 0) {
			/*
			 * EINTR comes from a signal during copyout;
			 * we should not post another signal.
			 */
			if (error != EINTR) {
				/*
				 * Zap the process with a SIGSEGV - process
				 * may be managing its own stack growth by
				 * taking SIGSEGVs on a different signal stack.
				 */
				bzero(&siginfo, sizeof (siginfo));
				siginfo.si_signo = SIGSEGV;
				siginfo.si_code  = SEGV_MAPERR;
				siginfo.si_addr  = sp;
				fault = FLTBOUNDS;
			}
			break;
		} else if (mpcb->mpcb_wbcnt) {
			bzero(&siginfo, sizeof (siginfo));
			siginfo.si_signo = SIGILL;
			siginfo.si_code  = ILL_BADSTK;
			siginfo.si_addr  = (caddr_t)rp->r_pc;
			fault = FLTILL;
			break;
		}
	}

		/*
		 * T_FLUSHW is used when handling a ta 0x3 -- the old flush
		 * window trap -- which is implemented by executing the
		 * flushw instruction. The flushw can trap if any of the
		 * stack pages are not writable for whatever reason. In this
		 * case only, we advance the pc to the next instruction so
		 * that the user thread doesn't needlessly execute the trap
		 * again. Normally this wouldn't be a problem -- we'll
		 * usually only end up here if this is the first touch to a
		 * stack page -- since the second execution won't trap, but
		 * if there's a watchpoint on the stack page the user thread
		 * would spin, continuously executing the trap instruction.
		 */
		if (type == T_FLUSHW + T_USER) {
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
		}
		goto out;

	case T_AST + T_USER:		/* profiling or resched pseudo trap */
		if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW) {
			lwp->lwp_pcb.pcb_flags &= ~CPC_OVERFLOW;
			if (kcpc_overflow_ast()) {
				/*
				 * Signal performance counter overflow
				 */
				if (tudebug)
					showregs(type, rp, (caddr_t)0, 0);
				bzero(&siginfo, sizeof (siginfo));
				siginfo.si_signo = SIGEMT;
				siginfo.si_code = EMT_CPCOVF;
				siginfo.si_addr = (caddr_t)rp->r_pc;
				/* for trap_cleanup(), below */
				oldpc = rp->r_pc - 4;
				fault = FLTCPCOVF;
			}
		}

		/*
		 * The CPC_OVERFLOW check above may already have populated
		 * siginfo and set fault, so the checks below must not
		 * touch these and the functions they call must use
		 * trapsig() directly.
		 */

		if (lwp->lwp_pcb.pcb_flags & ASYNC_HWERR) {
			lwp->lwp_pcb.pcb_flags &= ~ASYNC_HWERR;
			trap_async_hwerr();
		}

		if (lwp->lwp_pcb.pcb_flags & ASYNC_BERR) {
			lwp->lwp_pcb.pcb_flags &= ~ASYNC_BERR;
			trap_async_berr_bto(ASYNC_BERR, rp);
		}

		if (lwp->lwp_pcb.pcb_flags & ASYNC_BTO) {
			lwp->lwp_pcb.pcb_flags &= ~ASYNC_BTO;
			trap_async_berr_bto(ASYNC_BTO, rp);
		}

		break;
	}

	if (fault) {
		/* We took a fault so abort single step. */
		lwp->lwp_pcb.pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
	}
	trap_cleanup(rp, fault, &siginfo, oldpc == rp->r_pc);

out:	/* We can't get here from a system trap */
	ASSERT(type & T_USER);
	trap_rtt();
	(void) new_mstate(curthread, mstate);
	/* Kernel probe */
	TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
		tnf_microstate, state, LMS_USER);

	TRACE_0(TR_FAC_TRAP, TR_C_TRAP_HANDLER_EXIT, "C_trap_handler_exit");
	return;

cleanup:	/* system traps end up here */
	ASSERT(!(type & T_USER));

	TRACE_0(TR_FAC_TRAP, TR_C_TRAP_HANDLER_EXIT, "C_trap_handler_exit");
}

void
trap_cleanup(
	struct regs *rp,
	uint_t fault,
	k_siginfo_t *sip,
	int restartable)
{
	extern void aio_cleanup();
	proc_t *p = ttoproc(curthread);
	klwp_id_t lwp = ttolwp(curthread);

	if (fault) {
		/*
		 * Remember the fault and fault address
		 * for real-time (SIGPROF) profiling.
		 */
		lwp->lwp_lastfault = fault;
		lwp->lwp_lastfaddr = sip->si_addr;

		DTRACE_PROC2(fault, int, fault, ksiginfo_t *, sip);

		/*
		 * If a debugger has declared this fault to be an
		 * event of interest, stop the lwp.  Otherwise just
		 * deliver the associated signal.
		 */
		if (sip->si_signo != SIGKILL &&
		    prismember(&p->p_fltmask, fault) &&
		    stop_on_fault(fault, sip) == 0)
			sip->si_signo = 0;
	}

	if (sip->si_signo)
		trapsig(sip, restartable);

	if (lwp->lwp_oweupc)
		profil_tick(rp->r_pc);

	if (curthread->t_astflag | curthread->t_sig_check) {
		/*
		 * Turn off the AST flag before checking all the conditions that
		 * may have caused an AST.  This flag is on whenever a signal or
		 * unusual condition should be handled after the next trap or
		 * syscall.
		 */
		astoff(curthread);
		curthread->t_sig_check = 0;

		/*
		 * The following check is legal for the following reasons:
		 *	1) The thread we are checking, is ourselves, so there is
		 *	   no way the proc can go away.
		 *	2) The only time we need to be protected by the
		 *	   lock is if the binding is changed.
		 *
		 *	Note we will still take the lock and check the binding
		 *	if the condition was true without the lock held.  This
		 *	prevents lock contention among threads owned by the
		 *	same proc.
		 */

		if (curthread->t_proc_flag & TP_CHANGEBIND) {
			mutex_enter(&p->p_lock);
			if (curthread->t_proc_flag & TP_CHANGEBIND) {
				timer_lwpbind();
				curthread->t_proc_flag &= ~TP_CHANGEBIND;
			}
			mutex_exit(&p->p_lock);
		}

		/*
		 * for kaio requests that are on the per-process poll queue,
		 * aiop->aio_pollq, they're AIO_POLL bit is set, the kernel
		 * should copyout their result_t to user memory. by copying
		 * out the result_t, the user can poll on memory waiting
		 * for the kaio request to complete.
		 */
		if (p->p_aio)
			aio_cleanup(0);

		/*
		 * If this LWP was asked to hold, call holdlwp(), which will
		 * stop.  holdlwps() sets this up and calls pokelwps() which
		 * sets the AST flag.
		 *
		 * Also check TP_EXITLWP, since this is used by fresh new LWPs
		 * through lwp_rtt().  That flag is set if the lwp_create(2)
		 * syscall failed after creating the LWP.
		 */
		if (ISHOLD(p))
			holdlwp();

		/*
		 * All code that sets signals and makes ISSIG evaluate true must
		 * set t_astflag afterwards.
		 */
		if (ISSIG_PENDING(curthread, lwp, p)) {
			if (issig(FORREAL))
				psig();
			curthread->t_sig_check = 1;
		}

		if (curthread->t_rprof != NULL) {
			realsigprof(0, 0, 0);
			curthread->t_sig_check = 1;
		}
	}
}

/*
 * Called from fp_traps when a floating point trap occurs.
 * Note that the T_DATA_EXCEPTION case does not use X_FAULT_TYPE(mmu_fsr),
 * because mmu_fsr (now changed to code) is always 0.
 * Note that the T_UNIMP_INSTR case does not call simulate_unimp(),
 * because the simulator only simulates multiply and divide instructions,
 * which would not cause floating point traps in the first place.
 * XXX - Supervisor mode floating point traps?
 */
void
fpu_trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t code)
{
	proc_t *p = ttoproc(curthread);
	klwp_id_t lwp = ttolwp(curthread);
	k_siginfo_t siginfo;
	uint_t op3, fault = 0;
	int mstate;
	char *badaddr;
	kfpu_t *fp;
	struct fpq *pfpq;
	uint32_t inst;
	utrap_handler_t *utrapp;

	CPU_STATS_ADDQ(CPU, sys, trap, 1);

	ASSERT(curthread->t_schedflag & TS_DONT_SWAP);

	if (USERMODE(rp->r_tstate)) {
		/*
		 * Set lwp_state before trying to acquire any
		 * adaptive lock
		 */
		ASSERT(lwp != NULL);
		lwp->lwp_state = LWP_SYS;
		/*
		 * Set up the current cred to use during this trap. u_cred
		 * no longer exists.  t_cred is used instead.
		 * The current process credential applies to the thread for
		 * the entire trap.  If trapping from the kernel, this
		 * should already be set up.
		 */
		if (curthread->t_cred != p->p_cred) {
			cred_t *oldcred = curthread->t_cred;
			/*
			 * DTrace accesses t_cred in probe context.  t_cred
			 * must always be either NULL, or point to a valid,
			 * allocated cred structure.
			 */
			curthread->t_cred = crgetcred();
			crfree(oldcred);
		}
		ASSERT(lwp->lwp_regs == rp);
		mstate = new_mstate(curthread, LMS_TRAP);
		siginfo.si_signo = 0;
		type |= T_USER;
	}

	TRACE_1(TR_FAC_TRAP, TR_C_TRAP_HANDLER_ENTER,
	    "C_fpu_trap_handler_enter:type %x", type);

	if (tudebug && tudebugfpe)
		showregs(type, rp, addr, 0);

	bzero(&siginfo, sizeof (siginfo));
	siginfo.si_code = code;
	siginfo.si_addr = addr;

	switch (type) {

	case T_FP_EXCEPTION_IEEE + T_USER:	/* FPU arithmetic exception */
		/*
		 * FPU arithmetic exception - fake up a fpq if we
		 *	came here directly from _fp_ieee_exception,
		 *	which is indicated by a zero fpu_qcnt.
		 */
		fp = lwptofpu(curthread->t_lwp);
		utrapp = curthread->t_procp->p_utraps;
		if (fp->fpu_qcnt == 0) {
			inst = fetch_user_instr((caddr_t)rp->r_pc);
			lwp->lwp_state = LWP_SYS;
			pfpq = &fp->fpu_q->FQu.fpq;
			pfpq->fpq_addr = (uint32_t *)rp->r_pc;
			pfpq->fpq_instr = inst;
			fp->fpu_qcnt = 1;
			fp->fpu_q_entrysize = sizeof (struct fpq);
#ifdef SF_V9_TABLE_28
			/*
			 * Spitfire and blackbird followed the SPARC V9 manual
			 * paragraph 3 of section 5.1.7.9 FSR_current_exception
			 * (cexc) for setting fsr.cexc bits on underflow and
			 * overflow traps when the fsr.tem.inexact bit is set,
			 * instead of following Table 28. Bugid 1263234.
			 */
			{
				extern int spitfire_bb_fsr_bug;

				if (spitfire_bb_fsr_bug &&
				    (fp->fpu_fsr & FSR_TEM_NX)) {
					if (((fp->fpu_fsr & FSR_TEM_OF) == 0) &&
					    (fp->fpu_fsr & FSR_CEXC_OF)) {
						fp->fpu_fsr &= ~FSR_CEXC_OF;
						fp->fpu_fsr |= FSR_CEXC_NX;
						_fp_write_pfsr(&fp->fpu_fsr);
						siginfo.si_code = FPE_FLTRES;
					}
					if (((fp->fpu_fsr & FSR_TEM_UF) == 0) &&
					    (fp->fpu_fsr & FSR_CEXC_UF)) {
						fp->fpu_fsr &= ~FSR_CEXC_UF;
						fp->fpu_fsr |= FSR_CEXC_NX;
						_fp_write_pfsr(&fp->fpu_fsr);
						siginfo.si_code = FPE_FLTRES;
					}
				}
			}
#endif /* SF_V9_TABLE_28 */
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
		} else if (utrapp && utrapp[UT_FP_EXCEPTION_IEEE_754]) {
			/*
			 * The user had a trap handler installed.  Jump to
			 * the trap handler instead of signalling the process.
			 */
			rp->r_pc = (long)utrapp[UT_FP_EXCEPTION_IEEE_754];
			rp->r_npc = rp->r_pc + 4;
			break;
		}
		siginfo.si_signo = SIGFPE;
		fault = FLTFPE;
		break;

	case T_DATA_EXCEPTION + T_USER:		/* user data access exception */
		siginfo.si_signo = SIGSEGV;
		fault = FLTBOUNDS;
		break;

	case T_LDDF_ALIGN + T_USER: /* 64 bit user lddfa alignment error */
	case T_STDF_ALIGN + T_USER: /* 64 bit user stdfa alignment error */
		alignfaults++;
		lwp->lwp_state = LWP_SYS;
		if (&vis1_partial_support != NULL) {
			bzero(&siginfo, sizeof (siginfo));
			if (vis1_partial_support(rp,
			    &siginfo, &fault) == 0)
				goto out;
		}
		if (do_unaligned(rp, &badaddr) == SIMU_SUCCESS) {
			rp->r_pc = rp->r_npc;
			rp->r_npc += 4;
			goto out;
		}
		fp = lwptofpu(curthread->t_lwp);
		fp->fpu_qcnt = 0;
		siginfo.si_signo = SIGSEGV;
		siginfo.si_code = SEGV_MAPERR;
		siginfo.si_addr = badaddr;
		fault = FLTBOUNDS;
		break;

	case T_ALIGNMENT + T_USER:		/* user alignment error */
		/*
		 * If the user has to do unaligned references
		 * the ugly stuff gets done here.
		 * Only handles vanilla loads and stores.
		 */
		alignfaults++;
		if (p->p_fixalignment) {
			if (do_unaligned(rp, &badaddr) == SIMU_SUCCESS) {
				rp->r_pc = rp->r_npc;
				rp->r_npc += 4;
				goto out;
			}
			siginfo.si_signo = SIGSEGV;
			siginfo.si_code = SEGV_MAPERR;
			siginfo.si_addr = badaddr;
			fault = FLTBOUNDS;
		} else {
			siginfo.si_signo = SIGBUS;
			siginfo.si_code = BUS_ADRALN;
			if (rp->r_pc & 3) {	/* offending address, if pc */
				siginfo.si_addr = (caddr_t)rp->r_pc;
			} else {
				if (calc_memaddr(rp, &badaddr) == SIMU_UNALIGN)
					siginfo.si_addr = badaddr;
				else
					siginfo.si_addr = (caddr_t)rp->r_pc;
			}
			fault = FLTACCESS;
		}
		break;

	case T_UNIMP_INSTR + T_USER:		/* illegal instruction fault */
		siginfo.si_signo = SIGILL;
		inst = fetch_user_instr((caddr_t)rp->r_pc);
		op3 = (inst >> 19) & 0x3F;
		if ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA))
			siginfo.si_code = ILL_ILLADR;
		else
			siginfo.si_code = ILL_ILLTRP;
		fault = FLTILL;
		break;

	default:
		(void) die(type, rp, addr, 0);
		/*NOTREACHED*/
	}

	/*
	 * We can't get here from a system trap
	 * Never restart any instruction which got here from an fp trap.
	 */
	ASSERT(type & T_USER);

	trap_cleanup(rp, fault, &siginfo, 0);
out:
	trap_rtt();
	(void) new_mstate(curthread, mstate);
}

void
trap_rtt(void)
{
	klwp_id_t lwp = ttolwp(curthread);

	/*
	 * Restore register window if a debugger modified it.
	 * Set up to perform a single-step if a debugger requested it.
	 */
	if (lwp->lwp_pcb.pcb_xregstat != XREGNONE)
		xregrestore(lwp, 0);

	/*
	 * Set state to LWP_USER here so preempt won't give us a kernel
	 * priority if it occurs after this point.  Call CL_TRAPRET() to
	 * restore the user-level priority.
	 *
	 * It is important that no locks (other than spinlocks) be entered
	 * after this point before returning to user mode (unless lwp_state
	 * is set back to LWP_SYS).
	 */
	lwp->lwp_state = LWP_USER;
	if (curthread->t_trapret) {
		curthread->t_trapret = 0;
		thread_lock(curthread);
		CL_TRAPRET(curthread);
		thread_unlock(curthread);
	}
	if (CPU->cpu_runrun || curthread->t_schedflag & TS_ANYWAITQ)
		preempt();
	prunstop();
	if (lwp->lwp_pcb.pcb_step != STEP_NONE)
		prdostep();

	TRACE_0(TR_FAC_TRAP, TR_C_TRAP_HANDLER_EXIT, "C_trap_handler_exit");
}

#define	IS_LDASI(o)	\
	((o) == (uint32_t)0xC0C00000 || (o) == (uint32_t)0xC0800000 ||	\
	(o) == (uint32_t)0xC1800000)
#define	IS_IMM_ASI(i)	(((i) & 0x2000) == 0)
#define	IS_ASINF(a)	(((a) & 0xF6) == 0x82)
#define	IS_LDDA(i)	(((i) & 0xC1F80000) == 0xC0980000)

static int
nfload(struct regs *rp, int *instrp)
{
	uint_t	instr, asi, op3, rd;
	size_t	len;
	struct as *as;
	caddr_t addr;
	FPU_DREGS_TYPE zero;
	extern int segnf_create();

	if (USERMODE(rp->r_tstate))
		instr = fetch_user_instr((caddr_t)rp->r_pc);
	else
		instr = *(int *)rp->r_pc;

	if (instrp)
		*instrp = instr;

	op3 = (uint_t)(instr & 0xC1E00000);
	if (!IS_LDASI(op3))
		return (0);
	if (IS_IMM_ASI(instr))
		asi = (instr & 0x1FE0) >> 5;
	else
		asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
		    TSTATE_ASI_MASK);
	if (!IS_ASINF(asi))
		return (0);
	if (calc_memaddr(rp, &addr) == SIMU_SUCCESS) {
		len = 1;
		as = USERMODE(rp->r_tstate) ? ttoproc(curthread)->p_as : &kas;
		as_rangelock(as);
		if (as_gap(as, len, &addr, &len, 0, addr) == 0)
			(void) as_map(as, addr, len, segnf_create, NULL);
		as_rangeunlock(as);
	}
	zero = 0;
	rd = (instr >> 25) & 0x1f;
	if (IS_FLOAT(instr)) {
		uint_t dbflg = ((instr >> 19) & 3) == 3;

		if (dbflg) {		/* clever v9 reg encoding */
			if (rd & 1)
				rd = (rd & 0x1e) | 0x20;
			rd >>= 1;
		}
		if (fpu_exists) {
			if (!(_fp_read_fprs() & FPRS_FEF))
				fp_enable();

			if (dbflg)
				_fp_write_pdreg(&zero, rd);
			else
				_fp_write_pfreg((uint_t *)&zero, rd);
		} else {
			kfpu_t *fp = lwptofpu(curthread->t_lwp);

			if (!fp->fpu_en)
				fp_enable();

			if (dbflg)
				fp->fpu_fr.fpu_dregs[rd] = zero;
			else
				fp->fpu_fr.fpu_regs[rd] = 0;
		}
	} else {
		(void) putreg(&zero, rp, rd, &addr);
		if (IS_LDDA(instr))
			(void) putreg(&zero, rp, rd + 1, &addr);
	}
	rp->r_pc = rp->r_npc;
	rp->r_npc += 4;
	return (1);
}

kmutex_t atomic_nc_mutex;

/*
 * The following couple of routines are for userland drivers which
 * do atomics to noncached addresses.  This sort of worked on previous
 * platforms -- the operation really wasn't atomic, but it didn't generate
 * a trap as sun4u systems do.
 */
static int
swap_nc(struct regs *rp, int instr)
{
	uint64_t rdata, mdata;
	caddr_t addr, badaddr;
	uint_t tmp, rd;

	(void) flush_user_windows_to_stack(NULL);
	rd = (instr >> 25) & 0x1f;
	if (calc_memaddr(rp, &addr) != SIMU_SUCCESS)
		return (0);
	if (getreg(rp, rd, &rdata, &badaddr))
		return (0);
	mutex_enter(&atomic_nc_mutex);
	if (fuword32(addr, &tmp) == -1) {
		mutex_exit(&atomic_nc_mutex);
		return (0);
	}
	mdata = (u_longlong_t)tmp;
	if (suword32(addr, (uint32_t)rdata) == -1) {
		mutex_exit(&atomic_nc_mutex);
		return (0);
	}
	(void) putreg(&mdata, rp, rd, &badaddr);
	mutex_exit(&atomic_nc_mutex);
	return (1);
}

static int
ldstub_nc(struct regs *rp, int instr)
{
	uint64_t mdata;
	caddr_t addr, badaddr;
	uint_t rd;
	uint8_t tmp;

	(void) flush_user_windows_to_stack(NULL);
	rd = (instr >> 25) & 0x1f;
	if (calc_memaddr(rp, &addr) != SIMU_SUCCESS)
		return (0);
	mutex_enter(&atomic_nc_mutex);
	if (fuword8(addr, &tmp) == -1) {
		mutex_exit(&atomic_nc_mutex);
		return (0);
	}
	mdata = (u_longlong_t)tmp;
	if (suword8(addr, (uint8_t)0xff) == -1) {
		mutex_exit(&atomic_nc_mutex);
		return (0);
	}
	(void) putreg(&mdata, rp, rd, &badaddr);
	mutex_exit(&atomic_nc_mutex);
	return (1);
}

/*
 * This function helps instr_size() determine the operand size.
 * It is called for the extended ldda/stda asi's.
 */
int
extended_asi_size(int asi)
{
	switch (asi) {
	case ASI_PST8_P:
	case ASI_PST8_S:
	case ASI_PST16_P:
	case ASI_PST16_S:
	case ASI_PST32_P:
	case ASI_PST32_S:
	case ASI_PST8_PL:
	case ASI_PST8_SL:
	case ASI_PST16_PL:
	case ASI_PST16_SL:
	case ASI_PST32_PL:
	case ASI_PST32_SL:
		return (8);
	case ASI_FL8_P:
	case ASI_FL8_S:
	case ASI_FL8_PL:
	case ASI_FL8_SL:
		return (1);
	case ASI_FL16_P:
	case ASI_FL16_S:
	case ASI_FL16_PL:
	case ASI_FL16_SL:
		return (2);
	case ASI_BLK_P:
	case ASI_BLK_S:
	case ASI_BLK_PL:
	case ASI_BLK_SL:
	case ASI_BLK_COMMIT_P:
	case ASI_BLK_COMMIT_S:
		return (64);
	}

	return (0);
}

/*
 * Patch non-zero to disable preemption of threads in the kernel.
 */
int IGNORE_KERNEL_PREEMPTION = 0;	/* XXX - delete this someday */

struct kpreempt_cnts {	/* kernel preemption statistics */
	int	kpc_idle;	/* executing idle thread */
	int	kpc_intr;	/* executing interrupt thread */
	int	kpc_clock;	/* executing clock thread */
	int	kpc_blocked;	/* thread has blocked preemption (t_preempt) */
	int	kpc_notonproc;	/* thread is surrendering processor */
	int	kpc_inswtch;	/* thread has ratified scheduling decision */
	int	kpc_prilevel;	/* processor interrupt level is too high */
	int	kpc_apreempt;	/* asynchronous preemption */
	int	kpc_spreempt;	/* synchronous preemption */
}	kpreempt_cnts;

/*
 * kernel preemption: forced rescheduling
 *	preempt the running kernel thread.
 */
void
kpreempt(int asyncspl)
{
	if (IGNORE_KERNEL_PREEMPTION) {
		aston(CPU->cpu_dispthread);
		return;
	}
	/*
	 * Check that conditions are right for kernel preemption
	 */
	do {
		if (curthread->t_preempt) {
			/*
			 * either a privileged thread (idle, panic, interrupt)
			 * or will check when t_preempt is lowered
			 * We need to specifically handle the case where
			 * the thread is in the middle of swtch (resume has
			 * been called) and has its t_preempt set
			 * [idle thread and a thread which is in kpreempt
			 * already] and then a high priority thread is
			 * available in the local dispatch queue.
			 * In this case the resumed thread needs to take a
			 * trap so that it can call kpreempt. We achieve
			 * this by using siron().
			 * How do we detect this condition:
			 * idle thread is running and is in the midst of
			 * resume: curthread->t_pri == -1 && CPU->dispthread
			 * != CPU->thread
			 * Need to ensure that this happens only at high pil
			 * resume is called at high pil
			 * Only in resume_from_idle is the pil changed.
			 */
			if (curthread->t_pri < 0) {
				kpreempt_cnts.kpc_idle++;
				if (CPU->cpu_dispthread != CPU->cpu_thread)
					siron();
			} else if (curthread->t_flag & T_INTR_THREAD) {
				kpreempt_cnts.kpc_intr++;
				if (curthread->t_pil == CLOCK_LEVEL)
					kpreempt_cnts.kpc_clock++;
			} else {
				kpreempt_cnts.kpc_blocked++;
				if (CPU->cpu_dispthread != CPU->cpu_thread)
					siron();
			}
			aston(CPU->cpu_dispthread);
			return;
		}
		if (curthread->t_state != TS_ONPROC ||
		    curthread->t_disp_queue != CPU->cpu_disp) {
			/* this thread will be calling swtch() shortly */
			kpreempt_cnts.kpc_notonproc++;
			if (CPU->cpu_thread != CPU->cpu_dispthread) {
				/* already in swtch(), force another */
				kpreempt_cnts.kpc_inswtch++;
				siron();
			}
			return;
		}

		if (((asyncspl != KPREEMPT_SYNC) ? spltoipl(asyncspl) :
		    getpil()) >= DISP_LEVEL) {
			/*
			 * We can't preempt this thread if it is at
			 * a PIL >= DISP_LEVEL since it may be holding
			 * a spin lock (like sched_lock).
			 */
			siron();	/* check back later */
			kpreempt_cnts.kpc_prilevel++;
			return;
		}

		/*
		 * block preemption so we don't have multiple preemptions
		 * pending on the interrupt stack
		 */
		curthread->t_preempt++;
		if (asyncspl != KPREEMPT_SYNC) {
			splx(asyncspl);
			kpreempt_cnts.kpc_apreempt++;
		} else
			kpreempt_cnts.kpc_spreempt++;

		preempt();
		curthread->t_preempt--;
	} while (CPU->cpu_kprunrun);
}

static enum seg_rw
get_accesstype(struct regs *rp)
{
	uint32_t instr;

	if (USERMODE(rp->r_tstate))
		instr = fetch_user_instr((caddr_t)rp->r_pc);
	else
		instr = *(uint32_t *)rp->r_pc;

	if (IS_FLUSH(instr))
		return (S_OTHER);

	if (IS_STORE(instr))
		return (S_WRITE);
	else
		return (S_READ);
}

/*
 * Handle an asynchronous hardware error.
 * The policy is currently to send a hardware error contract event to
 * the process's process contract and to kill the process.  Eventually
 * we may want to instead send a special signal whose default
 * disposition is to generate the contract event.
 */
void
trap_async_hwerr(void)
{
	k_siginfo_t si;
	proc_t *p = ttoproc(curthread);
	extern void print_msg_hwerr(ctid_t ct_id, proc_t *p);

	errorq_drain(ue_queue); /* flush pending async error messages */

	print_msg_hwerr(p->p_ct_process->conp_contract.ct_id, p);

	contract_process_hwerr(p->p_ct_process, p);

	bzero(&si, sizeof (k_siginfo_t));
	si.si_signo = SIGKILL;
	si.si_code = SI_NOINFO;
	trapsig(&si, 1);
}

/*
 * Handle bus error and bus timeout for a user process by sending SIGBUS
 * The type is either ASYNC_BERR or ASYNC_BTO.
 */
void
trap_async_berr_bto(int type, struct regs *rp)
{
	k_siginfo_t si;

	errorq_drain(ue_queue); /* flush pending async error messages */
	bzero(&si, sizeof (k_siginfo_t));

	si.si_signo = SIGBUS;
	si.si_code = (type == ASYNC_BERR ? BUS_OBJERR : BUS_ADRERR);
	si.si_addr = (caddr_t)rp->r_pc; /* AFAR unavailable - future RFE */
	si.si_errno = ENXIO;

	trapsig(&si, 1);
}