OpenSolaris_b135/uts/sun4u/cpu/spitfire_asm.s

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#if !defined(lint)
#include "assym.h"
#endif	/* lint */

#include <sys/asm_linkage.h>
#include <sys/mmu.h>
#include <vm/hat_sfmmu.h>
#include <sys/machparam.h>
#include <sys/machcpuvar.h>
#include <sys/machthread.h>
#include <sys/privregs.h>
#include <sys/asm_linkage.h>
#include <sys/machasi.h>
#include <sys/trap.h>
#include <sys/spitregs.h>
#include <sys/xc_impl.h>
#include <sys/intreg.h>
#include <sys/async.h>

#ifdef TRAPTRACE
#include <sys/traptrace.h>
#endif /* TRAPTRACE */

#ifndef	lint

/* BEGIN CSTYLED */
#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
	ldxa	[%g0]ASI_LSU, tmp1					;\
	btst	LSU_DC, tmp1		/* is dcache enabled? */	;\
	bz,pn	%icc, 1f						;\
	sethi	%hi(dcache_linesize), tmp1				;\
	ld	[tmp1 + %lo(dcache_linesize)], tmp1			;\
	sethi	%hi(dflush_type), tmp2					;\
	ld	[tmp2 + %lo(dflush_type)], tmp2				;\
	cmp	tmp2, FLUSHPAGE_TYPE					;\
	be,pt	%icc, 2f						;\
	sllx	arg1, SF_DC_VBIT_SHIFT, arg1	/* tag to compare */	;\
	sethi	%hi(dcache_size), tmp3					;\
	ld	[tmp3 + %lo(dcache_size)], tmp3				;\
	cmp	tmp2, FLUSHMATCH_TYPE					;\
	be,pt	%icc, 3f						;\
	nop								;\
	/*								\
	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
	 * tmp3 = cache size						\
	 * tmp1 = cache line size					\
	 */								\
	sub	tmp3, tmp1, tmp2					;\
4:									\
	stxa	%g0, [tmp2]ASI_DC_TAG					;\
	membar	#Sync							;\
	cmp	%g0, tmp2						;\
	bne,pt	%icc, 4b						;\
	sub	tmp2, tmp1, tmp2					;\
	ba,pt	%icc, 1f						;\
	nop								;\
	/*								\
	 * flushtype = FLUSHPAGE_TYPE					\
	 * arg1 = tag to compare against				\
	 * arg2 = virtual color						\
	 * tmp1 = cache line size					\
	 * tmp2 = tag from cache					\
	 * tmp3 = counter						\
	 */								\
2:									\
	set	MMU_PAGESIZE, tmp3					;\
	sllx	arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */	;\
	sub	tmp3, tmp1, tmp3					;\
4:									\
	ldxa	[arg2 + tmp3]ASI_DC_TAG, tmp2	/* read tag */		;\
	btst	SF_DC_VBIT_MASK, tmp2					;\
	bz,pn	%icc, 5f	  /* branch if no valid sub-blocks */	;\
	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
	cmp	tmp2, arg1						;\
	bne,pn	%icc, 5f			/* br if tag miss */	;\
	nop								;\
	stxa	%g0, [arg2 + tmp3]ASI_DC_TAG				;\
	membar	#Sync							;\
5:									\
	cmp	%g0, tmp3						;\
	bnz,pt	%icc, 4b		/* branch if not done */	;\
	sub	tmp3, tmp1, tmp3					;\
	ba,pt	%icc, 1f						;\
	nop								;\
	/*								\
	 * flushtype = FLUSHMATCH_TYPE					\
	 * arg1 = tag to compare against				\
	 * tmp1 = cache line size					\
	 * tmp3 = cache size						\
	 * arg2 = counter						\
	 * tmp2 = cache tag						\
	 */								\
3:									\
	sub	tmp3, tmp1, arg2					;\
4:									\
	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
	btst	SF_DC_VBIT_MASK, tmp2					;\
	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
	cmp	tmp2, arg1						;\
	bne,pn	%icc, 5f		/* branch if tag miss */	;\
	nop								;\
	stxa	%g0, [arg2]ASI_DC_TAG					;\
	membar	#Sync							;\
5:									\
	cmp	%g0, arg2						;\
	bne,pt	%icc, 4b		/* branch if not done */	;\
	sub	arg2, tmp1, arg2					;\
1:

/*
 * macro that flushes the entire dcache color
 */
#define	DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)				\
	ldxa	[%g0]ASI_LSU, tmp1;					\
	btst	LSU_DC, tmp1;		/* is dcache enabled? */	\
	bz,pn	%icc, 1f;						\
	sethi	%hi(dcache_linesize), tmp1;				\
	ld	[tmp1 + %lo(dcache_linesize)], tmp1;			\
	set	MMU_PAGESIZE, tmp2;					\
	/*								\
	 * arg = virtual color						\
	 * tmp2 = page size						\
	 * tmp1 = cache line size					\
	 */								\
	sllx	arg, MMU_PAGESHIFT, arg; /* color to dcache page */	\
	sub	tmp2, tmp1, tmp2;					\
2:									\
	stxa	%g0, [arg + tmp2]ASI_DC_TAG;				\
	membar	#Sync;							\
	cmp	%g0, tmp2;						\
	bne,pt	%icc, 2b;						\
	sub	tmp2, tmp1, tmp2;					\
1:

/*
 * macro that flushes the entire dcache
 */
#define	DCACHE_FLUSHALL(size, linesize, tmp)				\
	ldxa	[%g0]ASI_LSU, tmp;					\
	btst	LSU_DC, tmp;		/* is dcache enabled? */	\
	bz,pn	%icc, 1f;						\
									\
	sub	size, linesize, tmp;					\
2:									\
	stxa	%g0, [tmp]ASI_DC_TAG;					\
	membar	#Sync;							\
	cmp	%g0, tmp;						\
	bne,pt	%icc, 2b;						\
	sub	tmp, linesize, tmp;					\
1:

/*
 * macro that flushes the entire icache
 */
#define	ICACHE_FLUSHALL(size, linesize, tmp)				\
	ldxa	[%g0]ASI_LSU, tmp;					\
	btst	LSU_IC, tmp;						\
	bz,pn	%icc, 1f;						\
									\
	sub	size, linesize, tmp;					\
2:									\
	stxa	%g0, [tmp]ASI_IC_TAG;					\
	membar	#Sync;							\
	cmp	%g0, tmp;						\
	bne,pt	%icc, 2b;						\
	sub	tmp, linesize, tmp;					\
1:

#ifdef SF_ERRATA_32
#define SF_WORKAROUND(tmp1, tmp2)                               \
        sethi   %hi(FLUSH_ADDR), tmp2                           ;\
        set     MMU_PCONTEXT, tmp1                              ;\
        stxa    %g0, [tmp1]ASI_DMMU                             ;\
        flush   tmp2                                            ;
#else
#define SF_WORKAROUND(tmp1, tmp2)
#endif /* SF_ERRATA_32 */

/*
 * arg1 = vaddr
 * arg2 = ctxnum
 *      - disable interrupts and clear address mask
 *        to access 64 bit physaddr
 *      - Blow out the TLB, flush user page.
 *        . use secondary context.
 */
#define VTAG_FLUSHUPAGE(lbl, arg1, arg2, tmp1, tmp2, tmp3, tmp4) \
        rdpr    %pstate, tmp1                                   ;\
        andn    tmp1, PSTATE_IE, tmp2				;\
        wrpr    tmp2, 0, %pstate                                ;\
        sethi   %hi(FLUSH_ADDR), tmp2                           ;\
        set     MMU_SCONTEXT, tmp3                              ;\
        ldxa    [tmp3]ASI_DMMU, tmp4                            ;\
        or      DEMAP_SECOND | DEMAP_PAGE_TYPE, arg1, arg1      ;\
        cmp     tmp4, arg2                                      ;\
        be,a,pt %icc, lbl/**/4                                  ;\
          nop                                                   ;\
        stxa    arg2, [tmp3]ASI_DMMU                            ;\
lbl/**/4:                                                       ;\
        stxa    %g0, [arg1]ASI_DTLB_DEMAP                       ;\
        stxa    %g0, [arg1]ASI_ITLB_DEMAP                       ;\
        flush   tmp2                                            ;\
        be,a,pt %icc, lbl/**/5                                  ;\
          nop                                                   ;\
        stxa    tmp4, [tmp3]ASI_DMMU                            ;\
        flush   tmp2                                            ;\
lbl/**/5:                                                       ;\
        wrpr    %g0, tmp1, %pstate

	
/*
 * macro that flushes all the user entries in dtlb
 * arg1 = dtlb entries
 *	- Before first compare:
 *              tmp4 = tte
 *              tmp5 = vaddr
 *              tmp6 = cntxnum
 */
#define DTLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
                                tmp4, tmp5, tmp6) \
lbl/**/0:                                                       ;\
        sllx    arg1, 3, tmp3                                   ;\
        SF_WORKAROUND(tmp1, tmp2)                               ;\
        ldxa    [tmp3]ASI_DTLB_ACCESS, tmp4                     ;\
        srlx    tmp4, 6, tmp4                                   ;\
        andcc   tmp4, 1, %g0                                    ;\
        bnz,pn  %xcc, lbl/**/1                                  ;\
        srlx    tmp4, 57, tmp4                                  ;\
        andcc   tmp4, 1, %g0                                    ;\
        beq,pn  %xcc, lbl/**/1                                  ;\
          nop                                                   ;\
        set     TAGREAD_CTX_MASK, tmp1                          ;\
        ldxa    [tmp3]ASI_DTLB_TAGREAD, tmp2                    ;\
        and     tmp2, tmp1, tmp6                                ;\
        andn    tmp2, tmp1, tmp5                                ;\
	set	KCONTEXT, tmp4					;\
	cmp	tmp6, tmp4					;\
	be	lbl/**/1					;\
	  nop							;\
        VTAG_FLUSHUPAGE(VD/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
lbl/**/1:                                                       ;\
        brgz,pt arg1, lbl/**/0                                  ;\
          sub     arg1, 1, arg1


/*
 * macro that flushes all the user entries in itlb	
 * arg1 = itlb entries
 *      - Before first compare:
 *              tmp4 = tte
 *              tmp5 = vaddr
 *              tmp6 = cntxnum
 */
#define ITLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
                                tmp4, tmp5, tmp6) \
lbl/**/0:                                                       ;\
        sllx    arg1, 3, tmp3                                   ;\
        SF_WORKAROUND(tmp1, tmp2)                               ;\
        ldxa    [tmp3]ASI_ITLB_ACCESS, tmp4                     ;\
        srlx    tmp4, 6, tmp4                                   ;\
        andcc   tmp4, 1, %g0                                    ;\
        bnz,pn  %xcc, lbl/**/1                                  ;\
        srlx    tmp4, 57, tmp4                                  ;\
        andcc   tmp4, 1, %g0                                    ;\
        beq,pn  %xcc, lbl/**/1                                  ;\
          nop                                                   ;\
        set     TAGREAD_CTX_MASK, tmp1                          ;\
        ldxa    [tmp3]ASI_ITLB_TAGREAD, tmp2                    ;\
        and     tmp2, tmp1, tmp6                                ;\
        andn    tmp2, tmp1, tmp5                                ;\
	set	KCONTEXT, tmp4					;\
	cmp	tmp6, tmp4					;\
	be	lbl/**/1					;\
	  nop							;\
        VTAG_FLUSHUPAGE(VI/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
lbl/**/1:                                                       ;\
        brgz,pt arg1, lbl/**/0                                  ;\
        sub     arg1, 1, arg1


	
/*
 * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
 * ptr is in the machcpu structure.
 * r_or_s:	Register or symbol off offset from 'cpu_private' ptr.
 * scr1:	Scratch, ptr is returned in this register.
 * scr2:	Scratch
 */
#define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)		\
	CPU_ADDR(scr1, scr2);						\
	ldn	[scr1 + CPU_PRIVATE], scr1; 				\
	cmp	scr1, 0; 						\
	be	label;							\
	 nop; 								\
	add	scr1, r_or_s, scr1;  					\

#ifdef HUMMINGBIRD
/*
 * UltraSPARC-IIe processor supports both 4-way set associative and
 * direct map E$. For performance reasons, we flush E$ by placing it
 * in direct map mode for data load/store and restore the state after
 * we are done flushing it. Keep interrupts off while flushing in this
 * manner.
 *
 * We flush the entire ecache by starting at one end and loading each
 * successive ecache line for the 2*ecache-size range. We have to repeat
 * the flush operation to guarantee that the entire ecache has been
 * flushed.
 *
 * For flushing a specific physical address, we start at the aliased
 * address and load at set-size stride, wrapping around at 2*ecache-size
 * boundary and skipping the physical address being flushed. It takes
 * 10 loads to guarantee that the physical address has been flushed.
 */

#define	HB_ECACHE_FLUSH_CNT	2
#define	HB_PHYS_FLUSH_CNT	10	/* #loads to flush specific paddr */
#endif /* HUMMINGBIRD */

/* END CSTYLED */

#endif	/* !lint */

/*
 * Spitfire MMU and Cache operations.
 */

#if defined(lint)

/*ARGSUSED*/
void
vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
{}

/*ARGSUSED*/
void
vtag_flushall(void)
{}
	
/*ARGSUSED*/
void
vtag_flushall_uctxs(void)
{}
		
/*ARGSUSED*/
void
vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
{}

/*ARGSUSED*/
void
vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
{}

/*ARGSUSED*/
void
vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
{}

/*ARGSUSED*/
void
vac_flushpage(pfn_t pfnum, int vcolor)
{}

/*ARGSUSED*/
void
vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
{}

/*ARGSUSED*/
void
init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
{}

/*ARGSUSED*/
void
init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
{}

/*ARGSUSED*/
void
flush_instr_mem(caddr_t vaddr, size_t len)
{}

/*ARGSUSED*/
void
flush_ecache(uint64_t physaddr, size_t size, size_t linesize)
{}

/*ARGSUSED*/
void
get_ecache_dtag(uint32_t ecache_idx, uint64_t *ecache_data,
		uint64_t *ecache_tag, uint64_t *oafsr, uint64_t *acc_afsr)
{}

/* ARGSUSED */
uint64_t
get_ecache_tag(uint32_t id, uint64_t *nafsr, uint64_t *acc_afsr)
{
	return ((uint64_t)0);
}

/* ARGSUSED */
uint64_t
check_ecache_line(uint32_t id, uint64_t *acc_afsr)
{
	return ((uint64_t)0);
}

/*ARGSUSED*/
void
kdi_flush_idcache(int dcache_size, int dcache_lsize,
    int icache_size, int icache_lsize)
{}

#else	/* lint */

	ENTRY_NP(vtag_flushpage)
	/*
	 * flush page from the tlb
	 *
	 * %o0 = vaddr
	 * %o1 = sfmmup
	 */
	rdpr	%pstate, %o5
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o5, sfdi_label1, %g1)
#endif /* DEBUG */
	/*
	 * disable ints
	 */
	andn	%o5, PSTATE_IE, %o4
	wrpr	%o4, 0, %pstate

	/*
	 * Then, blow out the tlb
	 * Interrupts are disabled to prevent the secondary ctx register
	 * from changing underneath us.
	 */
	sethi   %hi(ksfmmup), %o3
        ldx     [%o3 + %lo(ksfmmup)], %o3
        cmp     %o3, %o1
        bne,pt   %xcc, 1f			! if not kernel as, go to 1
	  sethi	%hi(FLUSH_ADDR), %o3
	/*
	 * For KCONTEXT demaps use primary. type = page implicitly
	 */
	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
	flush	%o3
	b	5f
	  nop
1:
	/*
	 * User demap.  We need to set the secondary context properly.
	 * %o0 = vaddr
	 * %o1 = sfmmup
	 * %o3 = FLUSH_ADDR
	 */
	SFMMU_CPU_CNUM(%o1, %g1, %g2)	/* %g1 = sfmmu cnum on this CPU */
	
	set	MMU_SCONTEXT, %o4
	ldxa	[%o4]ASI_DMMU, %o2		/* rd old ctxnum */
	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
	cmp	%o2, %g1
	be,pt	%icc, 4f
	  nop
	stxa	%g1, [%o4]ASI_DMMU		/* wr new ctxum */
4:
	stxa	%g0, [%o0]ASI_DTLB_DEMAP
	stxa	%g0, [%o0]ASI_ITLB_DEMAP
	flush	%o3
	be,pt	%icc, 5f
	  nop
	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
	flush	%o3
5:
	retl
	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
	SET_SIZE(vtag_flushpage)
	
        .seg    ".text"
.flushallmsg:
        .asciz  "sfmmu_asm: unimplemented flush operation"

        ENTRY_NP(vtag_flushall)
        sethi   %hi(.flushallmsg), %o0
        call    panic
          or    %o0, %lo(.flushallmsg), %o0
        SET_SIZE(vtag_flushall)

	ENTRY_NP(vtag_flushall_uctxs)
	/*
	 * flush entire DTLB/ITLB.
	 */
	CPU_INDEX(%g1, %g2)
	mulx	%g1, CPU_NODE_SIZE, %g1
	set	cpunodes, %g2
	add	%g1, %g2, %g1
	lduh	[%g1 + ITLB_SIZE], %g2		! %g2 = # entries in ITLB
	lduh	[%g1 + DTLB_SIZE], %g1		! %g1 = # entries in DTLB
	sub	%g2, 1, %g2			! %g2 = # entries in ITLB - 1
	sub	%g1, 1, %g1			! %g1 = # entries in DTLB - 1

        !
        ! Flush itlb's
        !
        ITLB_FLUSH_UNLOCKED_UCTXS(I, %g2, %g3, %g4, %o2, %o3, %o4, %o5)

	!
        ! Flush dtlb's
        !
        DTLB_FLUSH_UNLOCKED_UCTXS(D, %g1, %g3, %g4, %o2, %o3, %o4, %o5)

	membar  #Sync
	retl
	  nop
	
	SET_SIZE(vtag_flushall_uctxs)

	ENTRY_NP(vtag_flushpage_tl1)
	/*
	 * x-trap to flush page from tlb and tsb
	 *
	 * %g1 = vaddr, zero-extended on 32-bit kernel
	 * %g2 = sfmmup
	 *
	 * assumes TSBE_TAG = 0
	 */
	srln	%g1, MMU_PAGESHIFT, %g1
	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
	
	SFMMU_CPU_CNUM(%g2, %g3, %g4)   /* %g3 = sfmmu cnum on this CPU */

	/* We need to set the secondary context properly. */
	set	MMU_SCONTEXT, %g4
	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
	stxa	%g3, [%g4]ASI_DMMU		/* wr new ctxum */
	stxa	%g0, [%g1]ASI_DTLB_DEMAP
	stxa	%g0, [%g1]ASI_ITLB_DEMAP
	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
	membar #Sync
	retry
	SET_SIZE(vtag_flushpage_tl1)

	ENTRY_NP(vtag_flush_pgcnt_tl1)
	/*
	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
	 *
	 * %g1 = vaddr, zero-extended on 32-bit kernel
	 * %g2 = <sfmmup58 | pgcnt6>
	 *
	 * NOTE: this handler relies on the fact that no
	 *	interrupts or traps can occur during the loop
	 *	issuing the TLB_DEMAP operations. It is assumed
	 *	that interrupts are disabled and this code is
	 *	fetching from the kernel locked text address.
	 *
	 * assumes TSBE_TAG = 0
	 */
	srln	%g1, MMU_PAGESHIFT, %g1
	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
	
	set	SFMMU_PGCNT_MASK, %g4
	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
	add	%g3, 1, %g3			/* g3 = pgcnt */

	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */

	SFMMU_CPU_CNUM(%g2, %g5, %g6)   ! %g5 = sfmmu cnum on this CPU

	/* We need to set the secondary context properly. */
	set	MMU_SCONTEXT, %g4
	ldxa	[%g4]ASI_DMMU, %g6		/* read old ctxnum */
	stxa	%g5, [%g4]ASI_DMMU		/* write new ctxum */

	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
	sethi	 %hi(FLUSH_ADDR), %g5
1:
	stxa	%g0, [%g1]ASI_DTLB_DEMAP
	stxa	%g0, [%g1]ASI_ITLB_DEMAP
	flush	%g5
	deccc	%g3				/* decr pgcnt */
	bnz,pt	%icc,1b
	  add	%g1, %g2, %g1			/* go to nextpage */

	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
	membar #Sync
	retry
	SET_SIZE(vtag_flush_pgcnt_tl1)

	! Not implemented on US1/US2
	ENTRY_NP(vtag_flushall_tl1)
	retry
	SET_SIZE(vtag_flushall_tl1)

/*
 * vac_flushpage(pfnum, color)
 *	Flush 1 8k page of the D-$ with physical page = pfnum
 *	Algorithm:
 *		The spitfire dcache is a 16k direct mapped virtual indexed,
 *		physically tagged cache.  Given the pfnum we read all cache
 *		lines for the corresponding page in the cache (determined by
 *		the color).  Each cache line is compared with
 *		the tag created from the pfnum. If the tags match we flush
 *		the line.
 */
	.seg	".data"
	.align	8
	.global	dflush_type
dflush_type:
	.word	FLUSHPAGE_TYPE
	.seg	".text"

	ENTRY(vac_flushpage)
	/*
	 * flush page from the d$
	 *
	 * %o0 = pfnum, %o1 = color
	 */
	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
	retl
	nop
	SET_SIZE(vac_flushpage)

	ENTRY_NP(vac_flushpage_tl1)
	/*
	 * x-trap to flush page from the d$
	 *
	 * %g1 = pfnum, %g2 = color
	 */
	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
	retry
	SET_SIZE(vac_flushpage_tl1)

	ENTRY(vac_flushcolor)
	/*
	 * %o0 = vcolor
	 */
	DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
	retl
	  nop
	SET_SIZE(vac_flushcolor)

	ENTRY(vac_flushcolor_tl1)
	/*
	 * %g1 = vcolor
	 */
	DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
	retry
	SET_SIZE(vac_flushcolor_tl1)


	.global _dispatch_status_busy
_dispatch_status_busy:
	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
	.align	4

/*
 * Determine whether or not the IDSR is busy.
 * Entry: no arguments
 * Returns: 1 if busy, 0 otherwise
 */
	ENTRY(idsr_busy)
	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
	clr	%o0
	btst	IDSR_BUSY, %g1
	bz,a,pt	%xcc, 1f
	mov	1, %o0
1:
	retl
	nop
	SET_SIZE(idsr_busy)
	
/*
 * Setup interrupt dispatch data registers
 * Entry:
 *	%o0 - function or inumber to call
 *	%o1, %o2 - arguments (2 uint64_t's)
 */
	.seg "text"

	ENTRY(init_mondo)
#ifdef DEBUG
	!
	! IDSR should not be busy at the moment
	!
	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
	btst	IDSR_BUSY, %g1
	bz,pt	%xcc, 1f
	nop

	sethi	%hi(_dispatch_status_busy), %o0
	call	panic
	or	%o0, %lo(_dispatch_status_busy), %o0
#endif /* DEBUG */

	ALTENTRY(init_mondo_nocheck)
	!
	! interrupt vector dispach data reg 0
	!
1:
	mov	IDDR_0, %g1
	mov	IDDR_1, %g2
	mov	IDDR_2, %g3
	stxa	%o0, [%g1]ASI_INTR_DISPATCH

	!
	! interrupt vector dispach data reg 1
	!
	stxa	%o1, [%g2]ASI_INTR_DISPATCH

	!
	! interrupt vector dispach data reg 2
	!
	stxa	%o2, [%g3]ASI_INTR_DISPATCH

	retl
	membar	#Sync			! allowed to be in the delay slot
	SET_SIZE(init_mondo)

/*
 * Ship mondo to upaid
 */
	ENTRY_NP(shipit)
	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = upa id
	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
#if defined(SF_ERRATA_54)
	membar	#Sync				! store must occur before load
	mov	0x20, %g3			! UDBH Control Register Read
	ldxa	[%g3]ASI_SDB_INTR_R, %g0
#endif
	retl
	membar	#Sync
	SET_SIZE(shipit)


/*
 * flush_instr_mem:
 *	Flush a portion of the I-$ starting at vaddr
 * 	%o0 vaddr
 *	%o1 bytes to be flushed
 */

	ENTRY(flush_instr_mem)
	membar	#StoreStore				! Ensure the stores
							! are globally visible
1:
	flush	%o0
	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
	bgu,pt	%ncc, 1b
	add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20

	retl
	nop
	SET_SIZE(flush_instr_mem)

/*
 * flush_ecache:
 * Flush the entire e$ using displacement flush by reading through a
 * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
 * reading this physical address range so that data doesn't go to d$.
 * incoming arguments:
 *	%o0 - 64 bit physical address
 *	%o1 - size of address range to read
 *	%o2 - ecache linesize
 */
	ENTRY(flush_ecache)
#ifndef HUMMINGBIRD
	b	2f
	  nop
1:
	ldxa	[%o0 + %o1]ASI_MEM, %g0	! start reading from physaddr + size
2:
	subcc	%o1, %o2, %o1
	bcc,a,pt %ncc, 1b
	  nop

#else /* HUMMINGBIRD */
	/*
	 * UltraSPARC-IIe processor supports both 4-way set associative
	 * and direct map E$. For performance reasons, we flush E$ by
	 * placing it in direct map mode for data load/store and restore
	 * the state after we are done flushing it. It takes 2 iterations
	 * to guarantee that the entire ecache has been flushed.
	 *
	 * Keep the interrupts disabled while flushing E$ in this manner.
	 */
	rdpr	%pstate, %g4		! current pstate (restored later)
	andn	%g4, PSTATE_IE, %g5
	wrpr	%g0, %g5, %pstate	! disable interrupts

	! Place E$ in direct map mode for data access
	or	%g0, 1, %g5
	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
	or	%g1, %g5, %g5
	membar	#Sync
	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
	membar	#Sync

	! flush entire ecache HB_ECACHE_FLUSH_CNT times
	mov	HB_ECACHE_FLUSH_CNT-1, %g5
2:
	sub	%o1, %o2, %g3		! start from last entry
1:
	ldxa	[%o0 + %g3]ASI_MEM, %g0	! start reading from physaddr + size
	subcc	%g3, %o2, %g3
	bgeu,a,pt %ncc, 1b
	  nop
	brgz,a,pt %g5, 2b
	  dec	%g5

	membar	#Sync
	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
	membar	#Sync
	wrpr	%g0, %g4, %pstate	! restore earlier pstate
#endif /* HUMMINGBIRD */

	retl
	nop
	SET_SIZE(flush_ecache)

/*
 * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
 *			int icache_size, int icache_linesize)
 */
	ENTRY(kdi_flush_idcache)
	DCACHE_FLUSHALL(%o0, %o1, %g1)
	ICACHE_FLUSHALL(%o2, %o3, %g1)
	membar	#Sync
	retl
	nop
	SET_SIZE(kdi_flush_idcache)
	

/*
 * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
 * 			uint64_t *oafsr, uint64_t *acc_afsr)
 *
 * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
 * on a 64-byte boundary.  The corresponding AFSR value is also read for each
 * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
 * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
 * data and tag should be atomic to make sense.  We will be executing at PIL15
 * and will disable IE, so nothing can occur between the two reads.  We also
 * assume that the execution of this code does not interfere with what we are
 * reading - not really possible, but we'll live with it for now.
 * We also pass the old AFSR value before clearing it, and caller will take
 * appropriate actions if the important bits are non-zero. 
 *
 * If the caller wishes to track the AFSR in cases where the CP bit is
 * set, an address should be passed in for acc_afsr.  Otherwise, this
 * argument may be null.
 *
 * Register Usage:
 * i0: In: 32-bit e$ index
 * i1: In: addr of e$ data
 * i2: In: addr of e$ tag
 * i3: In: addr of old afsr
 * i4: In: addr of accumulated afsr - may be null
 */
	ENTRY(get_ecache_dtag)
	save	%sp, -SA(MINFRAME), %sp
	or	%g0, 1, %l4
	sllx	%l4, 39, %l4	! set bit 39 for e$ data access
	or	%i0, %l4, %g6	! %g6 = e$ addr for data read
	sllx	%l4, 1, %l4	! set bit 40 for e$ tag access
	or	%i0, %l4, %l4	! %l4 = e$ addr for tag read

	rdpr    %pstate, %i5
	andn    %i5, PSTATE_IE | PSTATE_AM, %i0
	wrpr    %i0, %g0, %pstate       ! clear IE, AM bits

	ldxa    [%g0]ASI_ESTATE_ERR, %g1
	stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
	membar  #Sync

	ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
	stx     %i0, [%i3]		! write back the old-afsr

	ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
	ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
	stx     %i0, [%i2]              ! write back tag result

	clr	%i2			! loop count

	brz	%i4, 1f			! acc_afsr == NULL?
	  ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
	srlx	%i0, P_AFSR_CP_SHIFT, %l0
	btst	1, %l0
	bz	1f
	  nop
	ldx	[%i4], %g4
	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
	stx	%g4, [%i4]
1:
	stxa    %i0, [%g0]ASI_AFSR	! clear AFSR
	membar  #Sync
	ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
	stx     %i0, [%i1]              ! save the E$data
	add     %g6, 8, %g6
	add     %i1, 8, %i1
	ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
	srlx	%i0, P_AFSR_CP_SHIFT, %l0
	btst	1, %l0
	bz	2f
	  stx     %i0, [%i1]		! save the AFSR

	brz	%i4, 2f			! acc_afsr == NULL?
	  nop
	ldx	[%i4], %g4
	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
	stx	%g4, [%i4]
2:
	add     %i2, 8, %i2
	cmp     %i2, 64
	bl,a    1b
	  add     %i1, 8, %i1
	stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
	membar  #Sync
	stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
	membar  #Sync
	wrpr    %g0, %i5, %pstate
	ret
	  restore
	SET_SIZE(get_ecache_dtag)
#endif /* lint */

#if defined(lint)
/*
 * The ce_err function handles trap type 0x63 (corrected_ECC_error) at tl=0.
 * Steps: 1. GET AFSR  2. Get AFAR <40:4> 3. Get datapath error status
 *	  4. Clear datapath error bit(s) 5. Clear AFSR error bit
 *	  6. package data in %g2 and %g3 7. call cpu_ce_error vis sys_trap
 * %g2: [ 52:43 UDB lower | 42:33 UDB upper | 32:0 afsr ] - arg #3/arg #1
 * %g3: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
 */
void
ce_err(void)
{}

void
ce_err_tl1(void)
{}


/*
 * The async_err function handles trap types 0x0A (instruction_access_error)
 * and 0x32 (data_access_error) at TL = 0 and TL > 0.  When we branch here,
 * %g5 will have the trap type (with 0x200 set if we're at TL > 0).
 *
 * Steps: 1. Get AFSR 2. Get AFAR <40:4> 3. If not UE error skip UDP registers.
 *	  4. Else get and clear datapath error bit(s) 4. Clear AFSR error bits
 *	  6. package data in %g2 and %g3 7. disable all cpu errors, because
 *	  trap is likely to be fatal 8. call cpu_async_error vis sys_trap
 *
 * %g3: [ 63:53 tt | 52:43 UDB_L | 42:33 UDB_U | 32:0 afsr ] - arg #3/arg #1
 * %g2: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
 */
void
async_err(void)
{}

/*
 * The clr_datapath function clears any error bits set in the UDB regs.
 */
void
clr_datapath(void)
{}

/*
 * The get_udb_errors() function gets the current value of the
 * Datapath Error Registers.
 */
/*ARGSUSED*/
void
get_udb_errors(uint64_t *udbh, uint64_t *udbl)
{
	*udbh = 0;
	*udbl = 0;
}

#else 	/* lint */

	ENTRY_NP(ce_err)
	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3

	!
	! Check for a UE... From Kevin.Normoyle:
	! We try to switch to the trap for the UE, but since that's
	! a hardware pipeline, we might get to the CE trap before we
	! can switch. The UDB and AFSR registers will have both the
	! UE and CE bits set but the UDB syndrome and the AFAR will be
	! for the UE.
	!
	or	%g0, 1, %g1		! put 1 in g1
	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
	andcc	%g1, %g3, %g0		! check for UE in afsr
	bnz	async_err		! handle the UE, not the CE
	  or	%g0, 0x63, %g5		! pass along the CE ttype
	!
	! Disable further CE traps to avoid recursion (stack overflow)
	! and staying above XCALL_PIL for extended periods.
	!
	ldxa	[%g0]ASI_ESTATE_ERR, %g2
	andn	%g2, 0x1, %g2		! clear bit 0 - CEEN
	stxa	%g2, [%g0]ASI_ESTATE_ERR
	membar	#Sync			! required
	!
	! handle the CE
	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2

	set	P_DER_H, %g4		! put P_DER_H in g4
	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
	or	%g0, 1, %g6		! put 1 in g6
	sllx	%g6, 8, %g6		! shift g6 to <8> sdb CE
	andcc	%g5, %g6, %g1		! check for CE in upper half
	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
	or	%g3, %g5, %g3		! or with afsr bits
	bz,a	1f			! no error, goto 1f
	  nop
	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
	membar	#Sync			! membar sync required
1:
	set	P_DER_L, %g4		! put P_DER_L in g4
	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
	andcc	%g5, %g6, %g1		! check for CE in lower half
	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
	or	%g3, %g5, %g3		! or with afsr bits
	bz,a	2f			! no error, goto 2f
	  nop
	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
	membar	#Sync			! membar sync required
2:
	or	%g0, 1, %g4		! put 1 in g4
	sllx	%g4, 20, %g4		! shift left to <20> afsr CE
	stxa	%g4, [%g0]ASI_AFSR	! use g4 to clear afsr CE error
	membar	#Sync			! membar sync required

	set	cpu_ce_error, %g1	! put *cpu_ce_error() in g1
	rdpr	%pil, %g6		! read pil into %g6
	subcc	%g6, PIL_15, %g0
	  movneg	%icc, PIL_14, %g4 ! run at pil 14 unless already at 15
	sethi	%hi(sys_trap), %g5
	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
	  movge	%icc, PIL_15, %g4	! already at pil 15
	SET_SIZE(ce_err)

	ENTRY_NP(ce_err_tl1)
#ifndef	TRAPTRACE
	ldxa	[%g0]ASI_AFSR, %g7
	stxa	%g7, [%g0]ASI_AFSR
	membar	#Sync
	retry
#else
	set	ce_trap_tl1, %g1
	sethi	%hi(dis_err_panic1), %g4
	jmp	%g4 + %lo(dis_err_panic1)
	nop
#endif
	SET_SIZE(ce_err_tl1)

#ifdef	TRAPTRACE
.celevel1msg:
	.asciz	"Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";

	ENTRY_NP(ce_trap_tl1)
	! upper 32 bits of AFSR already in o3
	mov	%o4, %o0		! save AFAR upper 32 bits
	mov	%o2, %o4		! lower 32 bits of AFSR
	mov	%o1, %o2		! lower 32 bits of AFAR
	mov	%o0, %o1		! upper 32 bits of AFAR
	set	.celevel1msg, %o0
	call	panic
	nop
	SET_SIZE(ce_trap_tl1)
#endif

	!
	! async_err is the assembly glue code to get us from the actual trap
	! into the CPU module's C error handler.  Note that we also branch
	! here from ce_err() above.
	!
	ENTRY_NP(async_err)
	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
	membar	#Sync			! membar sync required

	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2

	sllx	%g5, 53, %g5		! move ttype to <63:53>
	or	%g3, %g5, %g3		! or to afsr in g3

	or	%g0, 1, %g1		! put 1 in g1
	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
	andcc	%g1, %g3, %g0		! check for UE in afsr
	bz,a,pn %icc, 2f		! if !UE skip sdb read/clear
	  nop

	set	P_DER_H, %g4		! put P_DER_H in g4
	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
	or	%g0, 1, %g6		! put 1 in g6
	sllx	%g6, 9, %g6		! shift g6 to <9> sdb UE
	andcc	%g5, %g6, %g1		! check for UE in upper half
	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
	or	%g3, %g5, %g3		! or with afsr bits
	bz,a	1f			! no error, goto 1f
	  nop
	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
	membar	#Sync			! membar sync required
1:
	set	P_DER_L, %g4		! put P_DER_L in g4
	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
	andcc	%g5, %g6, %g1		! check for UE in lower half
	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
	or	%g3, %g5, %g3		! or with afsr bits
	bz,a	2f			! no error, goto 2f
	  nop
	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
	membar	#Sync			! membar sync required
2:
	stxa	%g3, [%g0]ASI_AFSR	! clear all the sticky bits
	membar	#Sync			! membar sync required

	RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
async_err_resetskip:

	set	cpu_async_error, %g1	! put cpu_async_error in g1
	sethi	%hi(sys_trap), %g5
	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
	  or	%g0, PIL_15, %g4	! run at pil 15
	SET_SIZE(async_err)

	ENTRY_NP(dis_err_panic1)
	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
	membar	#Sync
	! save destination routine is in g1
	ldxa	[%g0]ASI_AFAR, %g2	! read afar
	ldxa	[%g0]ASI_AFSR, %g3	! read afsr
	set	P_DER_H, %g4		! put P_DER_H in g4
	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
	or	%g3, %g5, %g3		! or with afsr bits
	set	P_DER_L, %g4		! put P_DER_L in g4
	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
	or	%g3, %g5, %g3		! or with afsr bits

	RESET_USER_RTT_REGS(%g4, %g5, dis_err_panic1_resetskip)
dis_err_panic1_resetskip:

	sethi	%hi(sys_trap), %g5
	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
	  sub	%g0, 1, %g4
	SET_SIZE(dis_err_panic1)

	ENTRY(clr_datapath)
	set	P_DER_H, %o4			! put P_DER_H in o4
	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb upper half into o3
	or	%g0, 0x3, %o2			! put 0x3 in o2
	sllx	%o2, 8, %o2			! shift o2 to <9:8> sdb
	andcc	%o5, %o2, %o1			! check for UE,CE in upper half
	bz,a	1f				! no error, goto 1f
	  nop
	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
	membar	#Sync				! membar sync required
1:
	set	P_DER_L, %o4			! put P_DER_L in o4
	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb lower half into o5
	andcc	%o5, %o2, %o1			! check for UE,CE in lower half
	bz,a	2f				! no error, goto 2f
	  nop
	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
	membar	#Sync
2:
	retl
	  nop
	SET_SIZE(clr_datapath)

	ENTRY(get_udb_errors)
	set	P_DER_H, %o3
	ldxa	[%o3]ASI_SDB_INTR_R, %o2
	stx	%o2, [%o0]
	set	P_DER_L, %o3
	ldxa	[%o3]ASI_SDB_INTR_R, %o2
	retl
	  stx	%o2, [%o1]
	SET_SIZE(get_udb_errors)

#endif /* lint */

#if defined(lint)
/*
 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
 * should only be used in places where you have no choice but to look at the
 * tlb itself.
 *
 * Note: These two routines are required by the Estar "cpr" loadable module.
 */
/*ARGSUSED*/
void
itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
{}

/*ARGSUSED*/
void
dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
{}
#else 	/* lint */
/*
 * NB - In Spitfire cpus, when reading a tte from the hardware, we
 * need to clear [42-41] because the general definitions in pte.h
 * define the PA to be [42-13] whereas Spitfire really uses [40-13].
 * When cloning these routines for other cpus the "andn" below is not
 * necessary.
 */
	ENTRY_NP(itlb_rd_entry)
	sllx	%o0, 3, %o0
#if defined(SF_ERRATA_32)
	sethi	%hi(FLUSH_ADDR), %g2
	set	MMU_PCONTEXT, %g1
	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
	flush	%g2
#endif
	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
	andn	%g1, %g2, %g1				! for details
	stx	%g1, [%o1]
	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
	set	TAGREAD_CTX_MASK, %o4
	andn	%g2, %o4, %o5
	retl
	  stx	%o5, [%o2]
	SET_SIZE(itlb_rd_entry)

	ENTRY_NP(dtlb_rd_entry)
	sllx	%o0, 3, %o0
#if defined(SF_ERRATA_32)
	sethi	%hi(FLUSH_ADDR), %g2
	set	MMU_PCONTEXT, %g1
	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
	flush	%g2
#endif
	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
	andn	%g1, %g2, %g1				! itlb_rd_entry
	stx	%g1, [%o1]
	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
	set	TAGREAD_CTX_MASK, %o4
	andn	%g2, %o4, %o5
	retl
	  stx	%o5, [%o2]
	SET_SIZE(dtlb_rd_entry)
#endif /* lint */

#if defined(lint)

/*
 * routines to get and set the LSU register
 */
uint64_t
get_lsu(void)
{
	return ((uint64_t)0);
}

/*ARGSUSED*/
void
set_lsu(uint64_t lsu)
{}

#else /* lint */

	ENTRY(set_lsu)
	stxa	%o0, [%g0]ASI_LSU		! store to LSU
	retl
	membar	#Sync
	SET_SIZE(set_lsu)

	ENTRY(get_lsu)
	retl
	ldxa	[%g0]ASI_LSU, %o0		! load LSU
	SET_SIZE(get_lsu)

#endif /* lint */

#ifndef lint
	/*
	 * Clear the NPT (non-privileged trap) bit in the %tick
	 * registers. In an effort to make the change in the
	 * tick counter as consistent as possible, we disable
	 * all interrupts while we're changing the registers. We also
	 * ensure that the read and write instructions are in the same
	 * line in the instruction cache.
	 */
	ENTRY_NP(cpu_clearticknpt)
	rdpr	%pstate, %g1		/* save processor state */
	andn	%g1, PSTATE_IE, %g3	/* turn off */
	wrpr	%g0, %g3, %pstate	/*   interrupts */
	rdpr	%tick, %g2		/* get tick register */
	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
	mov	1, %g3			/* create mask */
	sllx	%g3, 63, %g3		/*   for NPT bit */
	ba,a,pt	%xcc, 2f
	.align	64			/* Align to I$ boundary */
2:
	rdpr	%tick, %g2		/* get tick register */
	wrpr	%g3, %g2, %tick		/* write tick register, */
					/*   clearing NPT bit   */
#if defined(BB_ERRATA_1)
	rdpr	%tick, %g0		/* read (s)tick (BB_ERRATA_1) */
#endif
1:
	jmp	%g4 + 4
	wrpr	%g0, %g1, %pstate	/* restore processor state */
	SET_SIZE(cpu_clearticknpt)

	/*
	 * get_ecache_tag()
	 * Register Usage:
	 * %o0: In: 32-bit E$ index
	 *      Out: 64-bit E$ tag value
	 * %o1: In: 64-bit AFSR value after clearing sticky bits
	 * %o2: In: address of cpu private afsr storage
	 */
	ENTRY(get_ecache_tag)
	or	%g0, 1, %o4
	sllx	%o4, 40, %o4			! set bit 40 for e$ tag access
	or	%o0, %o4, %o4			! %o4 = e$ addr for tag read
	rdpr	%pstate, %o5
	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
	wrpr	%o0, %g0, %pstate		! clear IE, AM bits

	ldxa	[%g0]ASI_ESTATE_ERR, %g1
	stxa	%g0, [%g0]ASI_ESTATE_ERR	! Turn off Error enable
	membar	#Sync

	ldxa	[%g0]ASI_AFSR, %o0
	srlx	%o0, P_AFSR_CP_SHIFT, %o3
	btst	1, %o3
	bz	1f
	  nop
	ldx	[%o2], %g4
	or	%g4, %o0, %g4			! aggregate AFSR in cpu private
	stx	%g4, [%o2]
1:
	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
	membar  #Sync

	ldxa	[%o4]ASI_EC_R, %g0
	ldxa	[%g0]ASI_EC_DIAG, %o0		! read tag from e$ tag reg

	ldxa	[%g0]ASI_AFSR, %o3
	srlx	%o3, P_AFSR_CP_SHIFT, %o4
	btst	1, %o4
	bz	2f
	  stx	%o3, [%o1]			! AFSR after sticky clear
	ldx	[%o2], %g4
	or	%g4, %o3, %g4			! aggregate AFSR in cpu private
	stx	%g4, [%o2]
2:
	membar	#Sync

	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
	membar	#Sync
	retl
	wrpr	%g0, %o5, %pstate
	SET_SIZE(get_ecache_tag)

	/*
	 * check_ecache_line()
	 * Register Usage:
	 * %o0: In: 32-bit E$ index
	 *      Out: 64-bit accumulated AFSR
	 * %o1: In: address of cpu private afsr storage
	 */
	ENTRY(check_ecache_line)
	or	%g0, 1, %o4
	sllx	%o4, 39, %o4			! set bit 39 for e$ data access
	or	%o0, %o4, %o4		 	! %o4 = e$ addr for data read

	rdpr	%pstate, %o5
	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
	wrpr	%o0, %g0, %pstate		! clear IE, AM bits

	ldxa	[%g0]ASI_ESTATE_ERR, %g1
	stxa	%g0, [%g0]ASI_ESTATE_ERR 	! Turn off Error enable
	membar	#Sync

	ldxa 	[%g0]ASI_AFSR, %o0
	srlx	%o0, P_AFSR_CP_SHIFT, %o2
	btst	1, %o2
	bz	1f
	  clr	%o2				! loop count
	ldx	[%o1], %o3
	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
	stx	%o3, [%o1]
1: 
	stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
	membar	#Sync

2:
	ldxa	[%o4]ASI_EC_R, %g0		! Read the E$ data 8bytes each
	add	%o2, 1, %o2
	cmp	%o2, 8
	bl,a 	2b
	  add	%o4, 8, %o4

	membar	#Sync
	ldxa	[%g0]ASI_AFSR, %o0		! read accumulated AFSR
	srlx	%o0, P_AFSR_CP_SHIFT, %o2
	btst	1, %o2
	bz	3f
	  nop
	ldx	[%o1], %o3
	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
	stx	%o3, [%o1]
3:
	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
	membar	#Sync
	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
	membar	#Sync
	retl
	wrpr	%g0, %o5, %pstate
	SET_SIZE(check_ecache_line)
#endif /* lint */

#if defined(lint)
uint64_t
read_and_clear_afsr()
{
	return ((uint64_t)0);
}
#else	/* lint */
	ENTRY(read_and_clear_afsr)
	ldxa	[%g0]ASI_AFSR, %o0
	retl
	  stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
	SET_SIZE(read_and_clear_afsr)
#endif	/* lint */

#if defined(lint)
/* ARGSUSED */
void
scrubphys(uint64_t paddr, int ecache_size)
{
}

#else	/* lint */

/*
 * scrubphys - Pass in the aligned physical memory address that you want
 * to scrub, along with the ecache size.
 *
 *	1) Displacement flush the E$ line corresponding to %addr.
 *	   The first ldxa guarantees that the %addr is no longer in
 *	   M, O, or E (goes to I or S (if instruction fetch also happens).
 *	2) "Write" the data using a CAS %addr,%g0,%g0.
 *	   The casxa guarantees a transition from I to M or S to M.
 *	3) Displacement flush the E$ line corresponding to %addr.
 *	   The second ldxa pushes the M line out of the ecache, into the
 *	   writeback buffers, on the way to memory.
 *	4) The "membar #Sync" pushes the cache line out of the writeback
 *	   buffers onto the bus, on the way to dram finally.
 *
 * This is a modified version of the algorithm suggested by Gary Lauterbach.
 * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
 * as modified, but then we found out that for spitfire, if it misses in the
 * E$ it will probably install as an M, but if it hits in the E$, then it
 * will stay E, if the store doesn't happen. So the first displacement flush
 * should ensure that the CAS will miss in the E$.  Arrgh.
 */

	ENTRY(scrubphys)
	or	%o1, %g0, %o2	! put ecache size in %o2
#ifndef HUMMINGBIRD
	xor	%o0, %o2, %o1	! calculate alias address
	add	%o2, %o2, %o3	! 2 * ecachesize in case
				! addr == ecache_flushaddr
	sub	%o3, 1, %o3	! -1 == mask
	and	%o1, %o3, %o1	! and with xor'd address
	set	ecache_flushaddr, %o3
	ldx	[%o3], %o3

	rdpr	%pstate, %o4
	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
	wrpr	%o5, %g0, %pstate	! clear IE, AM bits

	ldxa	[%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
	casxa	[%o0]ASI_MEM, %g0, %g0
	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias

#else /* HUMMINGBIRD */
	/*
	 * UltraSPARC-IIe processor supports both 4-way set associative
	 * and direct map E$. We need to reconfigure E$ to direct map
	 * mode for data load/store before displacement flush. Also, we
	 * need to flush all 4 sets of the E$ to ensure that the physaddr
	 * has been flushed. Keep the interrupts disabled while flushing
	 * E$ in this manner.
	 *
	 * For flushing a specific physical address, we start at the
	 * aliased address and load at set-size stride, wrapping around
	 * at 2*ecache-size boundary and skipping fault physical address.
	 * It takes 10 loads to guarantee that the physical address has
	 * been flushed.
	 *
	 * Usage:
	 *	%o0	physaddr
	 *	%o5	physaddr - ecache_flushaddr
	 *	%g1	UPA config (restored later)
	 *	%g2	E$ set size
	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
	 *	%g4	#loads to flush phys address
	 *	%g5	temp 
	 */

	sethi	%hi(ecache_associativity), %g5
	ld	[%g5 + %lo(ecache_associativity)], %g5
	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
	xor	%o0, %o2, %o1	! calculate alias address
	add	%o2, %o2, %g3	! 2 * ecachesize in case
				! addr == ecache_flushaddr
	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
	and	%o1, %g3, %o1	! and with xor'd address
	sethi	%hi(ecache_flushaddr), %o3
	ldx	[%o3 + %lo(ecache_flushaddr)], %o3

	rdpr	%pstate, %o4
	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
	wrpr	%o5, %g0, %pstate	! clear IE, AM bits

	! Place E$ in direct map mode for data access
	or	%g0, 1, %g5
	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
	or	%g1, %g5, %g5
	membar	#Sync
	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
	membar	#Sync

	! Displace cache line from each set of E$ starting at the
	! aliased address. at set-size stride, wrapping at 2*ecache_size
	! and skipping load from physaddr. We need 10 loads to flush the
	! physaddr from E$.
	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
	or	%o1, %g0, %g5		! starting aliased offset
2:
	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1:
	add	%g5, %g2, %g5		! calculate offset in next set
	and	%g5, %g3, %g5		! force offset within aliased range
	cmp	%g5, %o5		! skip loads from physaddr
	be,pn %ncc, 1b
	  nop
	brgz,pt	%g4, 2b
	  dec	%g4

	casxa	[%o0]ASI_MEM, %g0, %g0

	! Flush %o0 from ecahe again.
	! Need single displacement flush at offset %o1 this time as
	! the E$ is already in direct map mode.
	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias

	membar	#Sync
	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
	membar	#Sync
#endif /* HUMMINGBIRD */
	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value

	retl
	membar	#Sync			! move the data out of the load buffer
	SET_SIZE(scrubphys)

#endif	/* lint */

#if defined(lint)

/*
 * clearphys - Pass in the aligned physical memory address that you want
 * to push out, as a 64 byte block of zeros, from the ecache zero-filled.
 * Since this routine does not bypass the ecache, it is possible that
 * it could generate a UE error while trying to clear the a bad line.
 * This routine clears and restores the error enable flag.
 * TBD - Hummingbird may need similar protection
 */
/* ARGSUSED */
void
clearphys(uint64_t paddr, int ecache_size, int ecache_linesize)
{
}

#else	/* lint */

	ENTRY(clearphys)
	or	%o2, %g0, %o3	! ecache linesize
	or	%o1, %g0, %o2	! ecache size
#ifndef HUMMINGBIRD
	or	%o3, %g0, %o4	! save ecache linesize
	xor	%o0, %o2, %o1	! calculate alias address
	add	%o2, %o2, %o3	! 2 * ecachesize
	sub	%o3, 1, %o3	! -1 == mask
	and	%o1, %o3, %o1	! and with xor'd address
	set	ecache_flushaddr, %o3
	ldx	[%o3], %o3
	or	%o4, %g0, %o2	! saved ecache linesize

	rdpr	%pstate, %o4
	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
	wrpr	%o5, %g0, %pstate	! clear IE, AM bits

	ldxa	[%g0]ASI_ESTATE_ERR, %g1
	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
	membar	#Sync

	! need to put zeros in the cache line before displacing it

	sub	%o2, 8, %o2	! get offset of last double word in ecache line
1:
	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
	sub	%o2, 8, %o2
	brgez,a,pt %o2, 1b
	nop
	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
	casxa	[%o0]ASI_MEM, %g0, %g0
	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias

	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
	membar	#Sync

#else /* HUMMINGBIRD... */
	/*
	 * UltraSPARC-IIe processor supports both 4-way set associative
	 * and direct map E$. We need to reconfigure E$ to direct map
	 * mode for data load/store before displacement flush. Also, we
	 * need to flush all 4 sets of the E$ to ensure that the physaddr
	 * has been flushed. Keep the interrupts disabled while flushing
	 * E$ in this manner.
	 *
	 * For flushing a specific physical address, we start at the
	 * aliased address and load at set-size stride, wrapping around
	 * at 2*ecache-size boundary and skipping fault physical address.
	 * It takes 10 loads to guarantee that the physical address has
	 * been flushed.
	 *
	 * Usage:
	 *	%o0	physaddr
	 *	%o5	physaddr - ecache_flushaddr
	 *	%g1	UPA config (restored later)
	 *	%g2	E$ set size
	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
	 *	%g4	#loads to flush phys address
	 *	%g5	temp 
	 */

	or	%o3, %g0, %o4	! save ecache linesize
	sethi	%hi(ecache_associativity), %g5
	ld	[%g5 + %lo(ecache_associativity)], %g5
	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)

	xor	%o0, %o2, %o1	! calculate alias address
	add	%o2, %o2, %g3	! 2 * ecachesize
	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
	and	%o1, %g3, %o1	! and with xor'd address
	sethi	%hi(ecache_flushaddr), %o3
	ldx	[%o3 +%lo(ecache_flushaddr)], %o3
	or	%o4, %g0, %o2	! saved ecache linesize

	rdpr	%pstate, %o4
	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
	wrpr	%o5, %g0, %pstate	! clear IE, AM bits

	! Place E$ in direct map mode for data access
	or	%g0, 1, %g5
	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
	or	%g1, %g5, %g5
	membar	#Sync
	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
	membar	#Sync

	! need to put zeros in the cache line before displacing it

	sub	%o2, 8, %o2	! get offset of last double word in ecache line
1:
	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
	sub	%o2, 8, %o2
	brgez,a,pt %o2, 1b
	nop

	! Displace cache line from each set of E$ starting at the
	! aliased address. at set-size stride, wrapping at 2*ecache_size
	! and skipping load from physaddr. We need 10 loads to flush the
	! physaddr from E$.
	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
	or	%o1, %g0, %g5		! starting offset
2:
	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
3:
	add	%g5, %g2, %g5		! calculate offset in next set
	and	%g5, %g3, %g5		! force offset within aliased range
	cmp	%g5, %o5		! skip loads from physaddr
	be,pn %ncc, 3b
	  nop
	brgz,pt	%g4, 2b
	  dec	%g4

	casxa	[%o0]ASI_MEM, %g0, %g0

	! Flush %o0 from ecahe again.
	! Need single displacement flush at offset %o1 this time as
	! the E$ is already in direct map mode.
	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias

	membar	#Sync
	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
	membar	#Sync
#endif /* HUMMINGBIRD... */

	retl
	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
	SET_SIZE(clearphys)

#endif	/* lint */

#if defined(lint)
/* ARGSUSED */
void
flushecacheline(uint64_t paddr, int ecache_size)
{
}

#else	/* lint */
/*
 * flushecacheline - This is a simpler version of scrubphys
 * which simply does a displacement flush of the line in
 * question. This routine is mainly used in handling async
 * errors where we want to get rid of a bad line in ecache.
 * Note that if the line is modified and it has suffered
 * data corruption - we are guarantee that the hw will write
 * a UE back to mark the page poisoned.
 */
        ENTRY(flushecacheline)
        or      %o1, %g0, %o2   ! put ecache size in %o2
#ifndef HUMMINGBIRD
        xor     %o0, %o2, %o1   ! calculate alias address
        add     %o2, %o2, %o3   ! 2 * ecachesize in case
                                ! addr == ecache_flushaddr
        sub     %o3, 1, %o3     ! -1 == mask
        and     %o1, %o3, %o1   ! and with xor'd address
        set     ecache_flushaddr, %o3
        ldx     [%o3], %o3

        rdpr    %pstate, %o4
        andn    %o4, PSTATE_IE | PSTATE_AM, %o5
        wrpr    %o5, %g0, %pstate       ! clear IE, AM bits

	ldxa	[%g0]ASI_ESTATE_ERR, %g1
	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
	membar	#Sync

        ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
	membar	#Sync
	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
        membar  #Sync                   
#else /* HUMMINGBIRD */
	/*
	 * UltraSPARC-IIe processor supports both 4-way set associative
	 * and direct map E$. We need to reconfigure E$ to direct map
	 * mode for data load/store before displacement flush. Also, we
	 * need to flush all 4 sets of the E$ to ensure that the physaddr
	 * has been flushed. Keep the interrupts disabled while flushing
	 * E$ in this manner.
	 *
	 * For flushing a specific physical address, we start at the
	 * aliased address and load at set-size stride, wrapping around
	 * at 2*ecache-size boundary and skipping fault physical address.
	 * It takes 10 loads to guarantee that the physical address has
	 * been flushed.
	 *
	 * Usage:
	 *	%o0	physaddr
	 *	%o5	physaddr - ecache_flushaddr
	 *	%g1	error enable register
	 *	%g2	E$ set size
	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
	 *	%g4	UPA config (restored later)
	 *	%g5	temp 
	 */

	sethi	%hi(ecache_associativity), %g5
	ld	[%g5 + %lo(ecache_associativity)], %g5
	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
	xor	%o0, %o2, %o1	! calculate alias address
	add	%o2, %o2, %g3	! 2 * ecachesize in case
				! addr == ecache_flushaddr
	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
	and	%o1, %g3, %o1	! and with xor'd address
	sethi	%hi(ecache_flushaddr), %o3
	ldx	[%o3 + %lo(ecache_flushaddr)], %o3

	rdpr	%pstate, %o4
	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
	wrpr	%o5, %g0, %pstate	! clear IE, AM bits

	! Place E$ in direct map mode for data access
	or	%g0, 1, %g5
	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
	ldxa	[%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
	or	%g4, %g5, %g5
	membar	#Sync
	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
	membar	#Sync

	ldxa	[%g0]ASI_ESTATE_ERR, %g1
	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
	membar	#Sync

	! Displace cache line from each set of E$ starting at the
	! aliased address. at set-size stride, wrapping at 2*ecache_size
	! and skipping load from physaddr. We need 10 loads to flush the
	! physaddr from E$.
	mov	HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
2:
	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
3:
	add	%o1, %g2, %o1		! calculate offset in next set
	and	%o1, %g3, %o1		! force offset within aliased range
	cmp	%o1, %o5		! skip loads from physaddr
	be,pn %ncc, 3b
	  nop
	brgz,pt	%g5, 2b
	  dec	%g5
	
	membar	#Sync
	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
        membar  #Sync                   

	stxa	%g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
	membar	#Sync
#endif /* HUMMINGBIRD */
        retl
        wrpr    %g0, %o4, %pstate       
        SET_SIZE(flushecacheline)

#endif	/* lint */

#if defined(lint)
/* ARGSUSED */
void
ecache_scrubreq_tl1(uint64_t inum, uint64_t dummy)
{
}

#else	/* lint */
/*
 * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
 * from the clock CPU.  It atomically increments the outstanding request
 * counter and, if there was not already an outstanding request,
 * branches to setsoftint_tl1 to enqueue an intr_vec for the given inum.
 */

	! Register usage:
	!
	! Arguments:
	! %g1 - inum
	!
	! Internal:
	! %g2, %g3, %g5 - scratch
	! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
	! %g6 - setsoftint_tl1 address

	ENTRY_NP(ecache_scrubreq_tl1)
	set	SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
	ld	[%g4], %g2		! cpu's ec_scrub_outstanding.
	set	setsoftint_tl1, %g6
	!
	! no need to use atomic instructions for the following
	! increment - we're at tl1
	!
	add	%g2, 0x1, %g3
	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
	  st	%g3, [%g4]		! delay - store incremented counter
	jmp	%g6			! setsoftint_tl1(%g1) - queue intr_vec
	  nop
	! not reached
1:
	retry
	SET_SIZE(ecache_scrubreq_tl1)

#endif	/* lint */

#if defined(lint)
/*ARGSUSED*/
void
write_ec_tag_parity(uint32_t id)
{}
#else /* lint */

	/*
         * write_ec_tag_parity(), which zero's the ecache tag,
         * marks the state as invalid and writes good parity to the tag.
         * Input %o1= 32 bit E$ index
         */
        ENTRY(write_ec_tag_parity)
        or      %g0, 1, %o4
        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
        or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write

        rdpr    %pstate, %o5
        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits

        ldxa    [%g0]ASI_ESTATE_ERR, %g1
        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
        membar  #Sync

        ba      1f
         nop
	/*
         * Align on the ecache boundary in order to force
         * ciritical code section onto the same ecache line.
         */
         .align 64

1:
        set     S_EC_PARITY, %o3         	! clear tag, state invalid
        sllx    %o3, S_ECPAR_SHIFT, %o3   	! and with good tag parity
        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
        stxa    %g0, [%o4]ASI_EC_W
        membar  #Sync

        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
        membar  #Sync
        retl
        wrpr    %g0, %o5, %pstate
        SET_SIZE(write_ec_tag_parity)

#endif /* lint */

#if defined(lint)
/*ARGSUSED*/
void
write_hb_ec_tag_parity(uint32_t id)
{}
#else /* lint */

	/*
         * write_hb_ec_tag_parity(), which zero's the ecache tag,
         * marks the state as invalid and writes good parity to the tag.
         * Input %o1= 32 bit E$ index
         */
        ENTRY(write_hb_ec_tag_parity)
        or      %g0, 1, %o4
        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
        or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write

        rdpr    %pstate, %o5
        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits

        ldxa    [%g0]ASI_ESTATE_ERR, %g1
        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
        membar  #Sync

        ba      1f
         nop
	/*
         * Align on the ecache boundary in order to force
         * ciritical code section onto the same ecache line.
         */
         .align 64
1: 
#ifdef HUMMINGBIRD
        set     HB_EC_PARITY, %o3         	! clear tag, state invalid
        sllx    %o3, HB_ECPAR_SHIFT, %o3   	! and with good tag parity
#else /* !HUMMINGBIRD */
        set     SB_EC_PARITY, %o3         	! clear tag, state invalid
        sllx    %o3, SB_ECPAR_SHIFT, %o3   	! and with good tag parity
#endif /* !HUMMINGBIRD */

        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
        stxa    %g0, [%o4]ASI_EC_W
        membar  #Sync

        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
        membar  #Sync
        retl
        wrpr    %g0, %o5, %pstate
        SET_SIZE(write_hb_ec_tag_parity)

#endif /* lint */

#define	VIS_BLOCKSIZE		64

#if defined(lint)

/*ARGSUSED*/
int
dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
{ return (0); }

#else

	ENTRY(dtrace_blksuword32)
	save	%sp, -SA(MINFRAME + 4), %sp

	rdpr	%pstate, %l1
	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
	wrpr	%g0, %l2, %pstate		! protect our FPU diddling

	rd	%fprs, %l0
	andcc	%l0, FPRS_FEF, %g0
	bz,a,pt	%xcc, 1f			! if the fpu is disabled
	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu

	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
1:
	set	0f, %l5
        /*
         * We're about to write a block full or either total garbage
         * (not kernel data, don't worry) or user floating-point data
         * (so it only _looks_ like garbage).
         */
	ld	[%i1], %f0			! modify the block
	membar	#Sync
	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
	membar	#Sync
	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler

	bz,a,pt	%xcc, 1f
	wr	%g0, %l0, %fprs			! restore %fprs

	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
1:

	wrpr	%g0, %l1, %pstate		! restore interrupts

	ret
	restore	%g0, %g0, %o0

0:
	membar	#Sync
	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler

	bz,a,pt	%xcc, 1f
	wr	%g0, %l0, %fprs			! restore %fprs

	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
1:

	wrpr	%g0, %l1, %pstate		! restore interrupts

	/*
	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
	 * which deals with watchpoints. Otherwise, just return -1.
	 */
	brnz,pt	%i2, 1f
	nop
	ret
	restore	%g0, -1, %o0
1:
	call	dtrace_blksuword32_err
	restore

	SET_SIZE(dtrace_blksuword32)

#endif /* lint */