NetBSD-5.0.2/sys/arch/hppa/hppa/fpu.c
/* $NetBSD: fpu.c,v 1.16 2008/08/28 08:45:26 skrll Exp $ */
/*
* Copyright (c) 2002 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Matthew Fredette.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* FPU handling for NetBSD/hppa.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.16 2008/08/28 08:45:26 skrll Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/user.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/frame.h>
#include <machine/reg.h>
#include <hppa/hppa/machdep.h>
#include "../spmath/float.h"
#include "../spmath/fpudispatch.h"
/* Some macros representing opcodes. */
#define OPCODE_NOP 0x08000240
#define OPCODE_COPR_0_0 0x30000000
/* Some macros representing fields in load/store opcodes. */
#define OPCODE_CMPLT_S 0x00002000
#define OPCODE_CMPLT_M 0x00000020
#define OPCODE_CMPLT_SM (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
#define OPCODE_CMPLT_MB OPCODE_CMPLT_M
#define OPCODE_CMPLT_MA (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
#define OPCODE_CMPLT (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
#define OPCODE_DOUBLE 0x08000000
#define OPCODE_STORE 0x00000200
#define OPCODE_INDEXED 0x00001000
/* This is nonzero iff we're using a hardware FPU. */
int fpu_present;
/* If we have any FPU, this is its version. */
u_int fpu_version;
/* The number of times we have had to switch the FPU context. */
u_int fpu_csw;
/* The U-space physical address of the proc in the FPU, or zero. */
paddr_t fpu_cur_uspace;
/* In locore.S, this swaps states in and out of the FPU. */
void hppa_fpu_swap(struct pcb *, struct pcb *);
#ifdef FPEMUL
/*
* Given a trapframe and a general register number, the
* FRAME_REG macro returns a pointer to that general
* register. The _frame_reg_positions array is a lookup
* table, since the general registers aren't in order
* in a trapframe.
*
* NB: this more or less assumes that all members of
* struct trapframe are u_ints.
*/
#define FRAME_REG(f, reg, r0) \
((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
#define _FRAME_POSITION(f) \
((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
const int _frame_reg_positions[32] = {
-1, /* r0 */
_FRAME_POSITION(tf_r1),
_FRAME_POSITION(tf_rp), /* r2 */
_FRAME_POSITION(tf_r3),
_FRAME_POSITION(tf_r4),
_FRAME_POSITION(tf_r5),
_FRAME_POSITION(tf_r6),
_FRAME_POSITION(tf_r7),
_FRAME_POSITION(tf_r8),
_FRAME_POSITION(tf_r9),
_FRAME_POSITION(tf_r10),
_FRAME_POSITION(tf_r11),
_FRAME_POSITION(tf_r12),
_FRAME_POSITION(tf_r13),
_FRAME_POSITION(tf_r14),
_FRAME_POSITION(tf_r15),
_FRAME_POSITION(tf_r16),
_FRAME_POSITION(tf_r17),
_FRAME_POSITION(tf_r18),
_FRAME_POSITION(tf_t4), /* r19 */
_FRAME_POSITION(tf_t3), /* r20 */
_FRAME_POSITION(tf_t2), /* r21 */
_FRAME_POSITION(tf_t1), /* r22 */
_FRAME_POSITION(tf_arg3), /* r23 */
_FRAME_POSITION(tf_arg2), /* r24 */
_FRAME_POSITION(tf_arg1), /* r25 */
_FRAME_POSITION(tf_arg0), /* r26 */
_FRAME_POSITION(tf_dp), /* r27 */
_FRAME_POSITION(tf_ret0), /* r28 */
_FRAME_POSITION(tf_ret1), /* r29 */
_FRAME_POSITION(tf_sp), /* r30 */
_FRAME_POSITION(tf_r31),
};
#endif /* FPEMUL */
/*
* Bootstraps the FPU.
*/
void
hppa_fpu_bootstrap(u_int ccr_enable)
{
u_int32_t junk[2];
u_int32_t vers[2];
extern u_int hppa_fpu_nop0;
extern u_int hppa_fpu_nop1;
/* See if we have a present and functioning hardware FPU. */
fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
/* Initialize the FPU and get its version. */
if (fpu_present) {
/*
* To somewhat optimize the emulation
* assist trap handling and context
* switching (to save them from having
* to always load and check fpu_present),
* there are two instructions in locore.S
* that are replaced with nops when
* there is a hardware FPU.
*/
hppa_fpu_nop0 = OPCODE_NOP;
hppa_fpu_nop1 = OPCODE_NOP;
fcacheall();
/*
* We track what process has the FPU,
* and how many times we have to swap
* in and out.
*/
/*
* The PA-RISC 1.1 Architecture manual is
* pretty clear that the copr,0,0 must be
* wrapped in double word stores of fr0,
* otherwise its operation is undefined.
*/
__asm volatile(
" ldo %0, %%r22 \n"
" fstds %%fr0, 0(%%r22) \n"
" ldo %1, %%r22 \n"
" copr,0,0 \n"
" fstds %%fr0, 0(%%r22) \n"
: "=m" (junk), "=m" (vers) : : "r22");
/*
* Now mark that no process has the FPU,
* and disable it, so the first time it
* gets used the process' state gets
* swapped in.
*/
fpu_csw = 0;
fpu_cur_uspace = 0;
mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
}
#ifdef FPEMUL
else
/*
* XXX This is a hack - to avoid
* having to set up the emulator so
* it can work for one instruction for
* proc0, we dispatch the copr,0,0 opcode
* into the emulator directly.
*/
decode_0c(OPCODE_COPR_0_0, 0, 0, vers);
#endif /* FPEMUL */
fpu_version = vers[0];
}
/*
* If the given LWP has its state in the FPU,
* flush that state out into the LWP's PCB.
*/
void
hppa_fpu_flush(struct lwp *l)
{
struct trapframe *tf = l->l_md.md_regs;
/*
* If we have a hardware FPU, and this process'
* state is currently in it, swap it out.
*/
if (!fpu_present || fpu_cur_uspace == 0 ||
fpu_cur_uspace != tf->tf_cr30) {
return;
}
hppa_fpu_swap(&l->l_addr->u_pcb, NULL);
fpu_cur_uspace = 0;
}
#ifdef FPEMUL
/*
* This emulates a coprocessor load/store instruction.
*/
static int hppa_fpu_ls(struct trapframe *, struct lwp *);
static int
hppa_fpu_ls(struct trapframe *frame, struct lwp *l)
{
u_int inst, inst_b, inst_x, inst_s, inst_t;
int log2size;
u_int *base;
u_int offset, index, im5;
void *fpreg;
u_int r0 = 0;
int error;
/*
* Get the instruction that we're emulating,
* and break it down. Using HP bit notation,
* b is a five-bit field starting at bit 10,
* x is a five-bit field starting at bit 15,
* s is a two-bit field starting at bit 17,
* and t is a five-bit field starting at bit 31.
*/
inst = frame->tf_iir;
__asm volatile(
" extru %4, 10, 5, %1 \n"
" extru %4, 15, 5, %2 \n"
" extru %4, 17, 2, %3 \n"
" extru %4, 31, 5, %4 \n"
: "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
: "r" (inst));
/*
* The space must be the user's space, else we
* segfault.
*/
if (inst_s != l->l_addr->u_pcb.pcb_space)
return (EFAULT);
/* See whether or not this is a doubleword load/store. */
log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
/* Get the floating point register. */
fpreg = ((char *)l->l_addr->u_pcb.pcb_fpregs) + (inst_t << log2size);
/* Get the base register. */
base = FRAME_REG(frame, inst_b, r0);
/* Dispatch on whether or not this is an indexed load/store. */
if (inst & OPCODE_INDEXED) {
/* Get the index register value. */
index = *FRAME_REG(frame, inst_x, r0);
/* Dispatch on the completer. */
switch (inst & OPCODE_CMPLT) {
case OPCODE_CMPLT_S:
offset = *base + (index << log2size);
break;
case OPCODE_CMPLT_M:
offset = *base;
*base = *base + index;
break;
case OPCODE_CMPLT_SM:
offset = *base;
*base = *base + (index << log2size);
break;
default:
offset = *base + index;
break;
}
} else {
/* Do a low_sign_ext(x, 5). */
im5 = inst_x >> 1;
if (inst_x & 1)
im5 |= 0xfffffff0;
/* Dispatch on the completer. */
switch (inst & OPCODE_CMPLT) {
case OPCODE_CMPLT_MB:
offset = *base + im5;
*base = *base + im5;
break;
case OPCODE_CMPLT_MA:
offset = *base;
*base = *base + im5;
break;
default:
offset = *base + im5;
break;
}
}
/*
* The offset we calculated must be the same as the
* offset in the IOR.
*/
KASSERT(offset == frame->tf_ior);
/* Perform the load or store. */
error = (inst & OPCODE_STORE) ?
copyout(fpreg, (void *) offset, 1 << log2size) :
copyin((const void *) offset, fpreg, 1 << log2size);
fdcache(HPPA_SID_KERNEL, (vaddr_t)fpreg,
sizeof(l->l_addr->u_pcb.pcb_fpregs));
return error;
}
/*
* This is called to emulate an instruction.
*/
void
hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst)
{
u_int opcode, class, sub;
u_int *fpregs;
int exception;
ksiginfo_t ksi;
/*
* If the process' state is in any hardware FPU,
* flush it out - we need to operate on it.
*/
hppa_fpu_flush(l);
/*
* Get the instruction that we're emulating,
* and break it down. Using HP bit notation,
* the class is a two-bit field starting at
* bit 22, the opcode is a 6-bit field starting
* at bit 5, and sub for a class 1 instruction
* is a two bit field starting at bit 16, else
* it is a three bit field starting at bit 18.
*/
#if 0
__asm volatile(
" extru %3, 22, 2, %1 \n"
" extru %3, 5, 6, %0 \n"
" extru %3, 18, 3, %2 \n"
" comib,<> 1, %1, 0 \n"
" extru %3, 16, 2, %2 \n"
: "=r" (opcode), "=r" (class), "=r" (sub)
: "r" (inst));
#else
opcode = (inst >> (31 - 5)) & 0x3f;
class = (inst >> (31 - 22)) & 0x3;
if (class == 1) {
sub = (inst >> (31 - 16)) & 3;
} else {
sub = (inst >> (31 - 18)) & 7;
}
#endif
/* Get this LWP's FPU registers. */
fpregs = (u_int *) l->l_addr->u_pcb.pcb_fpregs;
/* Dispatch on the opcode. */
switch (opcode) {
case 0x09:
case 0x0b:
if (hppa_fpu_ls(frame, l) != 0) {
KSI_INIT_TRAP(&ksi);
ksi.ksi_signo = SIGSEGV;
ksi.ksi_code = SEGV_MAPERR;
ksi.ksi_trap = T_DTLBMISS;
ksi.ksi_addr = (void *)frame->tf_iioq_head;
trapsignal(l, &ksi);
}
return;
case 0x0c:
exception = decode_0c(inst, class, sub, fpregs);
break;
case 0x0e:
exception = decode_0e(inst, class, sub, fpregs);
break;
case 0x06:
exception = decode_06(inst, fpregs);
break;
case 0x26:
exception = decode_26(inst, fpregs);
break;
default:
exception = UNIMPLEMENTEDEXCEPTION;
break;
}
if (exception) {
KSI_INIT_TRAP(&ksi);
if (exception & UNIMPLEMENTEDEXCEPTION) {
ksi.ksi_signo = SIGILL;
ksi.ksi_code = ILL_COPROC;
} else {
ksi.ksi_signo = SIGFPE;
if (exception & INVALIDEXCEPTION) {
ksi.ksi_code = FPE_FLTINV;
} else if (exception & DIVISIONBYZEROEXCEPTION) {
ksi.ksi_code = FPE_FLTDIV;
} else if (exception & OVERFLOWEXCEPTION) {
ksi.ksi_code = FPE_FLTOVF;
} else if (exception & UNDERFLOWEXCEPTION) {
ksi.ksi_code = FPE_FLTUND;
} else if (exception & INEXACTEXCEPTION) {
ksi.ksi_code = FPE_FLTRES;
}
}
ksi.ksi_trap = T_EMULATION;
ksi.ksi_addr = (void *)frame->tf_iioq_head;
trapsignal(l, &ksi);
}
fdcache(HPPA_SID_KERNEL, (vaddr_t)fpregs,
sizeof(l->l_addr->u_pcb.pcb_fpregs));
}
#endif /* FPEMUL */