FreeBSD-5.3/sys/alpha/alpha/ieee_float.c

Compare this file to the similar file:
Show the results in this format:

/*-
 * Copyright (c) 1998 Doug Rabson
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * An implementation of IEEE 754 floating point arithmetic supporting
 * multiply, divide, addition, subtraction and conversion to and from
 * integer.  Probably not the fastest floating point code in the world
 * but it should be pretty accurate.
 *
 * A special thanks to John Polstra for pointing out some problems
 * with an earlier version of this code and for educating me as to the
 * correct use of sticky bits.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/sys/alpha/alpha/ieee_float.c,v 1.10 2004/05/06 09:36:11 das Exp $");

#include <sys/types.h>
#ifdef TEST
#include "../include/fpu.h"
#include "ieee_float.h"
#else
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/sysent.h>
#include <sys/proc.h>
#include <machine/fpu.h>
#include <alpha/alpha/ieee_float.h>
#endif

/*
 * The number of fraction bits in a T format float.
 */
#define T_FRACBITS	52

/*
 * The number of fraction bits in a S format float.
 */
#define S_FRACBITS	23

/*
 * Mask the fraction part of a float to contain only those bits which
 * should be in single precision number.
 */
#define S_FRACMASK	((1ULL << 52) - (1ULL << 29))

/*
 * The number of extra zero bits we shift into the fraction part
 * to gain accuracy.  Two guard bits and one sticky bit are required
 * to ensure accurate rounding.
 */
#define FRAC_SHIFT	3

/*
 * Values for 1.0 and 2.0 fractions (including the extra FRAC_SHIFT
 * bits).
 */
#define ONE		(1ULL << (T_FRACBITS + FRAC_SHIFT))
#define TWO		(ONE + ONE)

/*
 * The maximum and minimum values for S and T format exponents.
 */
#define T_MAXEXP	0x3ff
#define T_MINEXP	-0x3fe
#define S_MAXEXP	0x7f
#define S_MINEXP	-0x7e

/*
 * Exponent values in registers are biased by adding this value.
 */
#define BIAS_EXP	0x3ff

/*
 * Exponent value for INF and NaN.
 */
#define NAN_EXP		0x7ff

/*
 * If this bit is set in the fraction part of a NaN, then the number
 * is a quiet NaN, i.e. no traps are generated.
 */
#define QNAN_BIT	(1ULL << 51)

/*
 * Return true if the number is any kind of NaN.
 */
static __inline int
isNaN(fp_register_t f)
{
	return f.t.exponent == NAN_EXP && f.t.fraction != 0;
}

/*
 * Return true if the number is a quiet NaN.
 */
static __inline int
isQNaN(fp_register_t f)
{
	return f.t.exponent == NAN_EXP && (f.t.fraction & QNAN_BIT);
}

/*
 * Return true if the number is a signalling NaN.
 */
static __inline int
isSNaN(fp_register_t f)
{
	return isNaN(f) && !isQNaN(f);
}

/*
 * Return true if the number is +/- INF.
 */
static __inline int
isINF(fp_register_t f)
{
	return f.t.exponent == NAN_EXP && f.t.fraction == 0;
}

/*
 * Return true if the number is +/- 0.
 */
static __inline int
isZERO(fp_register_t f)
{
	return f.t.exponent == 0 && f.t.fraction == 0;
}

/*
 * Return true if the number is denormalised.
 */
static __inline int
isDENORM(fp_register_t f)
{
	return f.t.exponent == 0 && f.t.fraction != 0;
}

/*
 * Extract the exponent part of a float register.  If the exponent is
 * zero, the number may be denormalised (if the fraction is nonzero).
 * If so, return the minimum exponent for the source datatype.
 */
static __inline int
getexp(fp_register_t f, int src)
{
	int minexp[] = { S_MINEXP, 0, T_MINEXP, 0 };
	if (f.t.exponent == 0) {
		if (f.t.fraction)
			return minexp[src];
		else
			return 0;
	}
	return f.t.exponent - BIAS_EXP;
}

/*
 * Extract the fraction part of a float register, shift it up a bit
 * to give extra accuracy and add in the implicit 1 bit.  Must be
 * careful to handle denormalised numbers and zero correctly.
 */
static __inline u_int64_t
getfrac(fp_register_t f)
{
	if (f.t.exponent == 0)
		return f.t.fraction << FRAC_SHIFT;
	else
		return (f.t.fraction << FRAC_SHIFT) | ONE;
}

/*
 * Make a float (in register format) from a sign, exponent and
 * fraction, normalising and rounding as necessary.
 * Return the float and set *status if any traps are generated.
 */
static fp_register_t
makefloat(int sign, int exp, u_int64_t frac,
	  int src, int rnd,
	  u_int64_t control, u_int64_t *status)
{
	fp_register_t f;
	int minexp = 0, maxexp = 0, alpha = 0;
	u_int64_t epsilon = 0, max = 0;

 	if (frac == 0) {
		f.t.sign = sign;
		f.t.exponent = 0;
		f.t.fraction = 0;
		return f;
	}

	if (frac >= TWO) {
		/*
		 * Fraction is >= 2.0.
		 * Shift the fraction down, preserving the 'sticky'
		 * bit.
		 */
		while (frac >= TWO) {
			frac = (frac >> 1) | (frac & 1);
			exp++;
		}
	} else if (frac < ONE) {
		/*
		 * Fraction is < 1.0. Shift it up.
		 */
		while (frac < ONE) {
			frac = (frac << 1) | (frac & 1);
			exp--;
		}
	}

	switch (src) {
	case S_FORMAT:
		minexp = S_MINEXP;
		maxexp = S_MAXEXP;
		alpha = 0xc0;
		epsilon = (1ULL << (T_FRACBITS - S_FRACBITS + FRAC_SHIFT));
		max = TWO - epsilon;
		break;

	case T_FORMAT:
		minexp = T_MINEXP;
		maxexp = T_MAXEXP;
		alpha = 0x600;
		epsilon = (1ULL << FRAC_SHIFT);
		max = TWO - epsilon;
		break;
	}
		
	/*
	 * Handle underflow before rounding so that denormalised
	 * numbers are rounded correctly.
	 */
	if (exp < minexp) {
		*status |= FPCR_INE;
		if (control & IEEE_TRAP_ENABLE_UNF) {
			*status |= FPCR_UNF;
			exp += alpha;
		} else {
			/* denormalise */
			while (exp < minexp) {
				exp++;
				frac = (frac >> 1) | (frac & 1);
			}
			exp = minexp - 1;
		}
	}

	/*
	 * Round the fraction according to the rounding mode.
	 */
	if (frac & (epsilon - 1)) {
		u_int64_t fraclo, frachi;
		u_int64_t difflo, diffhi;

		fraclo = frac & max;
		frachi = fraclo + epsilon;
		switch (rnd) {
		case ROUND_CHOP:
			frac = fraclo;
			break;
		case ROUND_MINUS_INF:
			if (f.t.sign)
				frac = frachi;
			else
				frac = fraclo;
			break;
		case ROUND_NORMAL:
			difflo = frac - fraclo;
			diffhi = frachi - frac;
			if (difflo < diffhi)
				frac = fraclo;
			else if (diffhi < difflo)
				frac = frachi;
			else
				/* round to even */
				if (fraclo & epsilon)
					frac = frachi;
				else
					frac = fraclo;
			break;
		case ROUND_PLUS_INF:
			if (f.t.sign)
				frac = fraclo;
			else
				frac = frachi;
			break;
		}

		if (frac == 0)
			*status |= FPCR_UNF;

		/*
		 * Rounding up may take us to TWO if
		 * fraclo == (TWO - epsilon).  Also If fraclo has been
		 * denormalised to (ONE - epsilon) then there is a
		 * possibility that we will round to ONE exactly.
		 */
		if (frac >= TWO) {
			frac = (frac >> 1) & ~(epsilon - 1);
			exp++;
		} else if (exp == minexp - 1 && frac == ONE) {
			/* Renormalise to ONE * 2^minexp */
			exp = minexp;
		}

		*status |= FPCR_INE;
	}

	/*
	 * Check for overflow and round to the correct INF as needed.
	 */
	if (exp > maxexp) {
		*status |= FPCR_OVF | FPCR_INE;
		if (control & IEEE_TRAP_ENABLE_OVF) {
			exp -= alpha;
		} else {
			switch (rnd) {
			case ROUND_CHOP:
				exp = maxexp;
				frac = max;
				break;
			case ROUND_MINUS_INF:
				if (sign) {
					exp = maxexp + 1; /* INF */
					frac = 0;
				} else {
					exp = maxexp;
					frac = max;
				}
				break;
			case ROUND_NORMAL:
				exp = maxexp + 1; /* INF */
				frac = 0;
				break;
			case ROUND_PLUS_INF:
				if (sign) {
					exp = maxexp;
					frac = max;
				} else {
					exp = maxexp + 1; /* INF */
					frac = 0;
				}
				break;
			}
		}
	}

	f.t.sign = sign;
	if (exp > maxexp)	/* NaN, INF */
		f.t.exponent = NAN_EXP;
	else if (exp < minexp)	/* denorm, zero */
		f.t.exponent = 0;
	else
		f.t.exponent = exp + BIAS_EXP;
	f.t.fraction = (frac & ~ONE) >> FRAC_SHIFT;
	return f;
}

/*
 * Return the canonical quiet NaN in register format.
 */
static fp_register_t
makeQNaN(void)
{
	fp_register_t f;
	f.t.sign = 0;
	f.t.exponent = NAN_EXP;
	f.t.fraction = QNAN_BIT;
	return f;
}

/*
 * Return +/- INF.
 */
static fp_register_t
makeINF(int sign)
{
	fp_register_t f;
	f.t.sign = sign;
	f.t.exponent = NAN_EXP;
	f.t.fraction = 0;
	return f;
}

/*
 * Return +/- 0.
 */
static fp_register_t
makeZERO(int sign)
{
	fp_register_t f;
	f.t.sign = sign;
	f.t.exponent = 0;
	f.t.fraction = 0;
	return f;
}

fp_register_t
ieee_add(fp_register_t fa, fp_register_t fb,
	 int src, int rnd,
	 u_int64_t control, u_int64_t *status)
{
	int shift;
	int expa, expb, exp;
	u_int64_t fraca, fracb, frac;
	int sign, sticky;

	/* First handle NaNs */
	if (isNaN(fa) || isNaN(fb)) {
		fp_register_t result;

		/* Instructions Descriptions (I) section 4.7.10.4 */
		if (isQNaN(fb))
			result = fb;
		else if (isSNaN(fb)) {
			result = fb;
			result.t.fraction |= QNAN_BIT;
		} else if (isQNaN(fa))
			result = fa;
		else if (isSNaN(fa)) {
			result = fa;
			result.t.fraction |= QNAN_BIT;
		}
		
		/* If either operand is a signalling NaN, trap. */
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;

		return result;
	}

	/* Handle +/- INF */
	if (isINF(fa))
		if (isINF(fb))
			if (fa.t.sign != fb.t.sign) {
				/* If adding -INF to +INF, generate a trap. */
				*status |= FPCR_INV;
				return makeQNaN();
			} else
				return fa;
		else
			return fa;
	else if (isINF(fb))
		return fb;

	/*
	 * Unpack the registers.
	 */
	expa = getexp(fa, src);
	expb = getexp(fb, src);
	fraca = getfrac(fa);
	fracb = getfrac(fb);
	shift = expa - expb;
	if (shift < 0) {
		shift = -shift;
		exp = expb;
		sticky = (fraca & ((1ULL << shift) - 1)) != 0;
		if (shift >= 64)
			fraca = sticky;
		else
			fraca = (fraca >> shift) | sticky;
	} else if (shift > 0) {
		exp = expa;
		sticky = (fracb & ((1ULL << shift) - 1)) != 0;
		if (shift >= 64)
			fracb = sticky;
		else
			fracb = (fracb >> shift) | sticky;
	} else
		exp = expa;
	if (fa.t.sign) fraca = -fraca;
	if (fb.t.sign) fracb = -fracb;
	frac = fraca + fracb;
	if (frac >> 63) {
		sign = 1;
		frac = -frac;
	} else
		sign = 0;

	/* -0 + -0 = -0 */
	if (fa.t.exponent == 0 && fa.t.fraction == 0
	    && fb.t.exponent == 0 && fb.t.fraction == 0)
		sign = fa.t.sign && fb.t.sign;

	return makefloat(sign, exp, frac, src, rnd, control, status);
}

fp_register_t
ieee_sub(fp_register_t fa, fp_register_t fb,
	 int src, int rnd,
	 u_int64_t control, u_int64_t *status)
{
	fb.t.sign = !fb.t.sign;
	return ieee_add(fa, fb, src, rnd, control, status);
}

typedef struct {
	u_int64_t lo;
	u_int64_t hi;
} u_int128_t;

#define SRL128(x, b)				\
do {						\
	x.lo >>= b;				\
	x.lo |= x.hi << (64 - b);		\
	x.hi >>= b;				\
} while (0)

#define SLL128(x, b)				\
do {						\
	if (b >= 64) {				\
		x.hi = x.lo << (b - 64);	\
		x.lo = 0;			\
	} else {				\
		x.hi <<= b;			\
		x.hi |= x.lo >> (64 - b);	\
		x.lo <<= b;			\
	}					\
} while (0)

#define SUB128(a, b)				\
do {						\
	int borrow = a.lo < b.lo;		\
	a.lo = a.lo - b.lo;			\
	a.hi = a.hi - b.hi - borrow;		\
} while (0)

#define LESS128(a, b) (a.hi < b.hi || (a.hi == b.hi && a.lo < b.lo))

fp_register_t
ieee_mul(fp_register_t fa, fp_register_t fb,
	 int src, int rnd,
	 u_int64_t control, u_int64_t *status)
{
	int expa, expb, exp;
	u_int64_t fraca, fracb, tmp;
	u_int128_t frac;
	int sign;

	/* First handle NaNs */
	if (isNaN(fa) || isNaN(fb)) {
		fp_register_t result;

		/* Instructions Descriptions (I) section 4.7.10.4 */
		if (isQNaN(fb))
			result = fb;
		else if (isSNaN(fb)) {
			result = fb;
			result.t.fraction |= QNAN_BIT;
		} else if (isQNaN(fa))
			result = fa;
		else if (isSNaN(fa)) {
			result = fa;
			result.t.fraction |= QNAN_BIT;
		}

		/* If either operand is a signalling NaN, trap. */
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;

		return result;
	}

	/* Handle INF and 0 */
	if ((isINF(fa) && isZERO(fb)) || (isZERO(fa) && isINF(fb))) {
		/* INF * 0 = NaN */
		*status |= FPCR_INV;
		return makeQNaN();
	} else
		/* If either is INF or zero, get the sign right */
		if (isINF(fa) || isINF(fb))
			return makeINF(fa.t.sign ^ fb.t.sign);
		else if (isZERO(fa) || isZERO(fb))
			return makeZERO(fa.t.sign ^ fb.t.sign);

	/*
	 * Unpack the registers.
	 */
	expa = getexp(fa, src);
	expb = getexp(fb, src);
	fraca = getfrac(fa);
	fracb = getfrac(fb);
	sign = fa.t.sign ^ fb.t.sign;

#define LO32(x)	((x) & ((1ULL << 32) - 1))
#define HI32(x)	((x) >> 32)

	/*
	 * Calculate the 128bit result of multiplying fraca and fracb.
	 */
	frac.lo = fraca * fracb;
#ifdef __alpha__
	/*
	 * The alpha has a handy instruction to find the high word.
	 */
	__asm__ __volatile__ ("umulh %1,%2,%0"
			      : "=r"(tmp)
			      : "r"(fraca), "r"(fracb));
	frac.hi = tmp;
#else
	/*
	 * Do the multiply longhand otherwise.
	 */
	frac.hi = HI32(LO32(fraca) * HI32(fracb)
		       + HI32(fraca) * LO32(fracb)
		       + HI32(LO32(fraca) * LO32(fracb)))
		+ HI32(fraca) * HI32(fracb);
#endif
	exp = expa + expb - (T_FRACBITS + FRAC_SHIFT);

	while (frac.hi > 0) {
		int sticky;
		exp++;
		sticky = frac.lo & 1;
		SRL128(frac, 1);
		frac.lo |= sticky;
	}

	return makefloat(sign, exp, frac.lo, src, rnd, control, status);
}

static u_int128_t
divide_128(u_int128_t a, u_int128_t b)
{
	u_int128_t result;
	u_int64_t bit;
	int i;

	/*
	 * Make a couple of assumptions on the numbers passed in.  The
	 * value in 'a' will have bits set in the upper 64 bits only
	 * and the number in 'b' will have zeros in the upper 64 bits.
	 * Also, 'b' will not be zero.
	 */
#ifdef TEST
	if (a.hi == 0 || b.hi != 0 || b.lo == 0)
		abort();
#endif

	/*
	 * Find out how many bits of zeros are at the beginning of the divisor.
	 */
	i = 64;
	bit = 1ULL << 63;
	while (i < 127) {
		if (b.lo & bit)
			break;
		i++;
		bit >>= 1;
	}

	/*
	 * Find out how much to shift the divisor so that its msb
	 * matches the msb of the dividend.
	 */
	bit = 1ULL << 63;
	while (i) {
		if (a.hi & bit)
			break;
		i--;
		bit >>= 1;
	}
	
	result.lo = 0;
	result.hi = 0;
	SLL128(b, i);

	/*
	 * Calculate the result in two parts to avoid keeping a 128bit
	 * value for the result bit.
	 */
	if (i >= 64) {
		bit = 1ULL << (i - 64);
		while (bit) {
			if (!LESS128(a, b)) {
				result.hi |= bit;
				SUB128(a, b);
				if (!a.lo && !a.hi)
					return result;
			}
			bit >>= 1;
			SRL128(b, 1);
		}
		i = 63;
	}
	bit = 1ULL << i;
	while (bit) {
		if (!LESS128(a, b)) {
			result.lo |= bit;
			SUB128(a, b);
			if (!a.lo && !a.hi)
				return result;
		}
		bit >>= 1;
		SRL128(b, 1);
	}

	return result;
}

fp_register_t
ieee_div(fp_register_t fa, fp_register_t fb,
	 int src, int rnd,
	 u_int64_t control, u_int64_t *status)
{
	int expa, expb, exp;
	u_int128_t fraca, fracb, frac;
	int sign;

	/* First handle NaNs, INFs and ZEROs */
	if (isNaN(fa) || isNaN(fb)) {
		fp_register_t result;

		/* Instructions Descriptions (I) section 4.7.10.4 */
		if (isQNaN(fb))
			result = fb;
		else if (isSNaN(fb)) {
			result = fb;
			result.t.fraction |= QNAN_BIT;
		} else if (isQNaN(fa))
			result = fa;
		else if (isSNaN(fa)) {
			result = fa;
			result.t.fraction |= QNAN_BIT;
		}
		
		/* If either operand is a signalling NaN, trap. */
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;

		return result;
	}

	/* Handle INF and 0 */
	if (isINF(fa) && isINF(fb)) {
		*status |= FPCR_INV;
		return makeQNaN();
	} else if (isZERO(fb))
		if (isZERO(fa)) {
			*status |= FPCR_INV;
			return makeQNaN();
		} else {
			*status |= FPCR_DZE;
			return makeINF(fa.t.sign ^ fb.t.sign);
		}
	else if (isZERO(fa))
		return makeZERO(fa.t.sign ^ fb.t.sign);

	/*
	 * Unpack the registers.
	 */
	expa = getexp(fa, src);
	expb = getexp(fb, src);
	fraca.hi = getfrac(fa);
	fraca.lo = 0;
	fracb.lo = getfrac(fb);
	fracb.hi = 0;
	sign = fa.t.sign ^ fb.t.sign;

	frac = divide_128(fraca, fracb);

	exp = expa - expb - (64 - T_FRACBITS - FRAC_SHIFT);
	while (frac.hi > 0) {
		int sticky;
		exp++;
		sticky = frac.lo & 1;
		SRL128(frac, 1);
		frac.lo |= sticky;
	}
	frac.lo |= 1;

	return makefloat(sign, exp, frac.lo, src, rnd, control, status);
}

#define IEEE_TRUE 0x4000000000000000ULL
#define IEEE_FALSE 0

fp_register_t
ieee_cmpun(fp_register_t fa, fp_register_t fb, u_int64_t *status)
{
	fp_register_t result;
	if (isNaN(fa) || isNaN(fb)) {
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;
		result.q = IEEE_TRUE;
	} else
		result.q = IEEE_FALSE;

	return result;
}

fp_register_t
ieee_cmpeq(fp_register_t fa, fp_register_t fb, u_int64_t *status)
{
	fp_register_t result;
	if (isNaN(fa) || isNaN(fb)) {
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;
		result.q = IEEE_FALSE;
	} else {
		if (isZERO(fa) && isZERO(fb))
			result.q = IEEE_TRUE;
		else if (fa.q == fb.q)
			result.q = IEEE_TRUE;
		else
			result.q = IEEE_FALSE;
	}

	return result;
}

fp_register_t
ieee_cmplt(fp_register_t fa, fp_register_t fb, u_int64_t *status)
{
	fp_register_t result;
	if (isNaN(fa) || isNaN(fb)) {
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;
		result.q = IEEE_FALSE;
	} else {
		if (isZERO(fa) && isZERO(fb))
			result.q = IEEE_FALSE;
		else if (fa.t.sign) {
			/* fa is negative */
			if (!fb.t.sign)
				/* fb is positive, return true */
				result.q = IEEE_TRUE;
			else if (fa.t.exponent > fb.t.exponent)
				/* fa has a larger exponent, return true */
				result.q = IEEE_TRUE;
			else if (fa.t.exponent == fb.t.exponent
				 && fa.t.fraction > fb.t.fraction)
				/* compare fractions */
				result.q = IEEE_TRUE;
			else
				result.q = IEEE_FALSE;
		} else {
			/* fa is positive */
			if (fb.t.sign)
				/* fb is negative, return false */
				result.q = IEEE_FALSE;
			else if (fb.t.exponent > fa.t.exponent)
				/* fb has a larger exponent, return true */
				result.q = IEEE_TRUE;
			else if (fa.t.exponent == fb.t.exponent
				 && fa.t.fraction < fb.t.fraction)
				/* compare fractions */
				result.q = IEEE_TRUE;
			else
				result.q = IEEE_FALSE;
		}
	}

	return result;
}

fp_register_t
ieee_cmple(fp_register_t fa, fp_register_t fb, u_int64_t *status)
{
	fp_register_t result;
	if (isNaN(fa) || isNaN(fb)) {
		if (isSNaN(fa) || isSNaN(fb))
			*status |= FPCR_INV;
		result.q = IEEE_FALSE;
	} else {
		if (isZERO(fa) && isZERO(fb))
			result.q = IEEE_TRUE;
		else if (fa.t.sign) {
			/* fa is negative */
			if (!fb.t.sign)
				/* fb is positive, return true */
				result.q = IEEE_TRUE;
			else if (fa.t.exponent > fb.t.exponent)
				/* fa has a larger exponent, return true */
				result.q = IEEE_TRUE;
			else if (fa.t.exponent == fb.t.exponent
				 && fa.t.fraction >= fb.t.fraction)
				/* compare fractions */
				result.q = IEEE_TRUE;
			else
				result.q = IEEE_FALSE;
		} else {
			/* fa is positive */
			if (fb.t.sign)
				/* fb is negative, return false */
				result.q = IEEE_FALSE;
			else if (fb.t.exponent > fa.t.exponent)
				/* fb has a larger exponent, return true */
				result.q = IEEE_TRUE;
			else if (fa.t.exponent == fb.t.exponent
				 && fa.t.fraction <= fb.t.fraction)
				/* compare fractions */
				result.q = IEEE_TRUE;
			else
				result.q = IEEE_FALSE;
		}
	}

	return result;
}

fp_register_t
ieee_convert_S_T(fp_register_t f, int rnd,
		 u_int64_t control, u_int64_t *status)
{
	/*
	 * Handle exceptional values.
	 */
	if (isNaN(f)) {
		/* Instructions Descriptions (I) section 4.7.10.1 */
		f.t.fraction |= QNAN_BIT;
		*status |= FPCR_INV;
	}
	if (isQNaN(f) || isINF(f))
		return f;

	/*
	 * If the number is a denormalised float, renormalise.
	 */
	if (isDENORM(f))
		return makefloat(f.t.sign,
				 getexp(f, S_FORMAT),
				 getfrac(f),
				 T_FORMAT, rnd, control, status);
	else
		return f;
}

fp_register_t
ieee_convert_T_S(fp_register_t f, int rnd,
		 u_int64_t control, u_int64_t *status)
{
	/*
	 * Handle exceptional values.
	 */
	if (isNaN(f)) {
		/* Instructions Descriptions (I) section 4.7.10.1 */
		f.t.fraction |= QNAN_BIT;
		f.t.fraction &= ~S_FRACMASK;
		*status |= FPCR_INV;
	}
	if (isQNaN(f) || isINF(f))
		return f;

	return makefloat(f.t.sign,
			 getexp(f, T_FORMAT),
			 getfrac(f),
			 S_FORMAT, rnd, control, status);
}

fp_register_t
ieee_convert_Q_S(fp_register_t f, int rnd,
		 u_int64_t control, u_int64_t *status)
{
	u_int64_t frac = f.q;
	int sign, exponent;

	if (frac >> 63) {
		sign = 1;
		frac = -frac;
	} else
		sign = 0;
	
	/*
	 * We shift up one bit to leave the sticky bit clear.  This is
	 * possible unless frac == (1<<63), in which case the sticky
	 * bit is already clear.
	 */
	exponent = T_FRACBITS + FRAC_SHIFT;
	if (frac < (1ULL << 63)) {
		frac <<= 1;
		exponent--;
	}

	return makefloat(sign, exponent, frac, S_FORMAT, rnd,
			 control, status);
}

fp_register_t
ieee_convert_Q_T(fp_register_t f, int rnd,
		 u_int64_t control, u_int64_t *status)
{
	u_int64_t frac = f.q;
	int sign, exponent;

	if (frac >> 63) {
		sign = 1;
		frac = -frac;
	} else
		sign = 0;
	
	/*
	 * We shift up one bit to leave the sticky bit clear.  This is
	 * possible unless frac == (1<<63), in which case the sticky
	 * bit is already clear.
	 */
	exponent = T_FRACBITS + FRAC_SHIFT;
	if (frac < (1ULL << 63)) {
		frac <<= 1;
		exponent--;
	}
	
	return makefloat(sign, exponent, frac, T_FORMAT, rnd,
			 control, status);
}

fp_register_t
ieee_convert_T_Q(fp_register_t f, int rnd,
		 u_int64_t control, u_int64_t *status)
{
	u_int64_t frac;
	int exp;

	/*
	 * Handle exceptional values.
	 */
	if (isNaN(f)) {
		/* Instructions Descriptions (I) section 4.7.10.1 */
		if (isSNaN(f))
			*status |= FPCR_INV;
		f.q = 0;
		return f;
	}
	if (isINF(f)) {
		/* Instructions Descriptions (I) section 4.7.10.1 */
		*status |= FPCR_INV;
		f.q = 0;
		return f;
	}

	exp = getexp(f, T_FORMAT) - (T_FRACBITS + FRAC_SHIFT);
	frac = getfrac(f);

	if (exp > 0) {
		if (exp > 64 || frac >= (1 << (64 - exp)))
			*status |= FPCR_IOV | FPCR_INE;
		if (exp < 64)
			frac <<= exp;
		else
			frac = 0;
	} else if (exp < 0) {
		u_int64_t mask;
		u_int64_t fraclo, frachi;
		u_int64_t diffhi, difflo;
		exp = -exp;
		if (exp > 64) {
			fraclo = 0;
			diffhi = 0;
			difflo = 0;
			if (frac) {
				frachi = 1;
				*status |= FPCR_INE;
			} else
				frachi = 0;
		} else if (exp == 64) {
			fraclo = 0;
			if (frac) {
				frachi = 1;
				difflo = frac;
				diffhi = -frac;
				*status |= FPCR_INE;
			} else {
				frachi = 0;
				difflo = 0;
				diffhi = 0;
			}
		} else {
			mask = (1 << exp) - 1;
			fraclo = frac >> exp;
			if (frac & mask) {
				frachi = fraclo + 1;
				difflo = frac - (fraclo << exp);
				diffhi = (frachi << exp) - frac;
				*status |= FPCR_INE;
			} else {
				frachi = fraclo;
				difflo = 0;
				diffhi = 0;
			}
		}
		switch (rnd) {
		case ROUND_CHOP:
			frac = fraclo;
			break;
		case ROUND_MINUS_INF:
			if (f.t.sign)
				frac = frachi;
			else
				frac = fraclo;
			break;
		case ROUND_NORMAL:
#if 0
			/*
			 * Round to nearest.
			 */
			if (difflo < diffhi)
				frac = fraclo;
			else if (diffhi > difflo)
				frac = frachi;
			else if (fraclo & 1)
				frac = frachi;
			else
				frac = fraclo;
#else
			/*
			 * Round to zero.
			 */
			frac = fraclo;
#endif
			break;
		case ROUND_PLUS_INF:
			if (f.t.sign)
				frac = fraclo;
			else
				frac = frachi;
			break;
		}
	}

	if (f.t.sign) {
		if (frac > (1ULL << 63))
			*status |= FPCR_IOV | FPCR_INE;
		frac = -frac;
	} else {
		if (frac > (1ULL << 63) - 1)
			*status |= FPCR_IOV | FPCR_INE;
	}
	
	f.q = frac;
	return f;
}

fp_register_t
ieee_convert_S_Q(fp_register_t f, int rnd,
		 u_int64_t control, u_int64_t *status)
{
	f = ieee_convert_S_T(f, rnd, control, status);
	return ieee_convert_T_Q(f, rnd, control, status);
}

#ifndef _KERNEL

#include <stdio.h>
#include <math.h>
#include <stdlib.h>

union value {
	double d;
	fp_register_t r;
};


static double
random_double()
{
	union value a;
	int exp;
	
	a.r.t.fraction = ((long long)random() & (1ULL << 20) - 1) << 32
		| random();
	exp = random() & 0x7ff;
#if 1
	if (exp == 0)
		exp = 1;	/* no denorms */
	else if (exp == 0x7ff)
		exp = 0x7fe;	/* no NaNs and INFs */
#endif

	a.r.t.exponent = exp;
	a.r.t.sign = random() & 1;
	return a.d;
}

static float
random_float()
{
	union value a;
	int exp;
	
	a.r.t.fraction = ((long)random() & (1ULL << 23) - 1) << 29;
	exp = random() & 0xff;
#if 1
	if (exp == 0)
		exp = 1;	/* no denorms */
	else if (exp == 0xff)
		exp = 0xfe;	/* no NaNs and INFs */
#endif

	/* map exponent from S to T format */
	if (exp == 255)
		a.r.t.exponent = 0x7ff;
	else if (exp & 0x80)
		a.r.t.exponent = 0x400 + (exp & 0x7f);
	else if (exp)
		a.r.t.exponent = 0x380 + exp;
	else
		a.r.t.exponent = 0;
	a.r.t.sign = random() & 1;

	return a.d;
}

/*
 * Ignore epsilon errors
 */
int
equal_T(union value a, union value b)
{
	if (isZERO(a.r) && isZERO(b.r))
		return 1;
	if (a.r.t.sign != b.r.t.sign)
		return 0;
	if (a.r.t.exponent != b.r.t.exponent)
		return 0;

	return a.r.t.fraction == b.r.t.fraction;
}

int
equal_S(union value a, union value b)
{
	int64_t epsilon = 1ULL << 29;

	if (isZERO(a.r) && isZERO(b.r))
		return 1;
	if (a.r.t.sign != b.r.t.sign)
		return 0;
	if (a.r.t.exponent != b.r.t.exponent)
		return 0;

	return ((a.r.t.fraction & ~(epsilon-1))
		== (b.r.t.fraction & ~(epsilon-1)));
}

#define ITER 1000000

static void
test_double_add()
{
	union value a, b, c, x;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.d = random_double();
		b.d = random_double();
		status = 0;
		c.r = ieee_add(a.r, b.r, T_FORMAT, ROUND_NORMAL,
			       0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r))
			continue;
		x.d = a.d + b.d;
		if (!equal_T(c, x)) {
			printf("bad double add, %g + %g = %g (should be %g)\n",
			       a.d, b.d, c.d, x.d);
			c.r = ieee_add(a.r, b.r, T_FORMAT, ROUND_NORMAL,
				       0, &status);
		}
	}
}

static void
test_single_add()
{
	union value a, b, c, x, t;
	float xf;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
#if 0
		if (i == 0) {
			a.r.q = 0xb33acf292ca49700ULL;
			b.r.q = 0xcad3191058a693aeULL;
		}
#endif
		a.d = random_float();
		b.d = random_float();
		status = 0;
		c.r = ieee_add(a.r, b.r, S_FORMAT, ROUND_NORMAL,
			       0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r))
			continue;
		xf = a.d + b.d;
		x.d = xf;
		t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status);
		if (!equal_S(t, x)) {
			printf("bad single add, %g + %g = %g (should be %g)\n",
			       a.d, b.d, t.d, x.d);
			c.r = ieee_add(a.r, b.r, S_FORMAT, ROUND_NORMAL,
				       0, &status);
		}
	}
}

static void
test_double_mul()
{
	union value a, b, c, x;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.d = random_double();
		b.d = random_double();
		status = 0;
		c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL,
			       0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r))
			continue;
		x.d = a.d * b.d;
		if (!equal_T(c, x)) {
			printf("bad double mul, %g * %g = %g (should be %g)\n",
			       a.d, b.d, c.d, x.d);
			c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL,
				       0, &status);
		}
	}
}

static void
test_single_mul()
{
	union value a, b, c, x, t;
	float xf;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.d = random_double();
		b.d = random_double();
		status = 0;
		c.r = ieee_mul(a.r, b.r, S_FORMAT, ROUND_NORMAL,
			       0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r))
			continue;
		xf = a.d * b.d;
		x.d = xf;
		t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status);
		if (!equal_S(t, x)) {
			printf("bad single mul, %g * %g = %g (should be %g)\n",
			       a.d, b.d, t.d, x.d);
			c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL,
				       0, &status);
		}
	}
}

static void
test_double_div()
{
	union value a, b, c, x;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.d = random_double();
		b.d = random_double();
		status = 0;
		c.r = ieee_div(a.r, b.r, T_FORMAT, ROUND_NORMAL,
			       0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r))
			continue;
		x.d = a.d / b.d;
		if (!equal_T(c, x) && !isZERO(x.r)) {
			printf("bad double div, %g / %g = %g (should be %g)\n",
			       a.d, b.d, c.d, x.d);
			c.r = ieee_div(a.r, b.r, T_FORMAT, ROUND_NORMAL,
				       0, &status);
		}
	}
}

static void
test_single_div()
{
	union value a, b, c, x, t;
	float xf;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.d = random_double();
		b.d = random_double();
		status = 0;
		c.r = ieee_div(a.r, b.r, S_FORMAT, ROUND_NORMAL,
			       0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r))
			continue;
		xf = a.d / b.d;
		x.d = xf;
		t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status);
		if (!equal_S(t, x)) {
			printf("bad single div, %g / %g = %g (should be %g)\n",
			       a.d, b.d, t.d, x.d);
			c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL,
				       0, &status);
		}
	}
}

static void
test_convert_int_to_double()
{
	union value a, c, x;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.r.q = (u_int64_t)random() << 32
			| random();
		status = 0;
		c.r = ieee_convert_Q_T(a.r, ROUND_NORMAL, 0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r))
			continue;
		x.d = (double) a.r.q;
		if (c.d != x.d) {
			printf("bad convert double, (double)%qx = %g (should be %g)\n",
			       a.r.q, c.d, x.d);
			c.r = ieee_convert_Q_T(a.r, ROUND_NORMAL, 0, &status);
		}
	}
}

static void
test_convert_int_to_single()
{
	union value a, c, x, t;
	float xf;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.r.q = (unsigned long long)random() << 32
			| random();
		status = 0;
		c.r = ieee_convert_Q_S(a.r, ROUND_NORMAL, 0, &status);
		/* ignore NaN and INF */
		if (isNaN(c.r) || isINF(c.r))
			continue;
		xf = (float) a.r.q;
		x.d = xf;
		t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status);
		if (t.d != x.d) {
			printf("bad convert single, (double)%qx = %g (should be %g)\n",
			       a.r.q, c.d, x.d);
			c.r = ieee_convert_Q_S(a.r, ROUND_NORMAL, 0, &status);
		}
	}
}

static void
test_convert_double_to_int()
{
	union value a, c;
	u_int64_t status = 0;
	int i;

	for (i = 0; i < ITER; i++) {
		a.d = random_double();
		status = 0;
		c.r = ieee_convert_T_Q(a.r, ROUND_NORMAL, 0, &status);
		if ((int)c.r.q != (int)a.d) {
			printf("bad convert double, (int)%g = %d (should be %d)\n",
			       a.d, (int)c.r.q, (int)a.d);
			c.r = ieee_convert_T_Q(a.r, ROUND_NORMAL, 0, &status);
		}
	}
}

int
main(int argc, char* argv[])
{
	srandom(0);

	test_double_div();
	test_single_div();
	test_double_add();
	test_single_add();
	test_double_mul();
	test_single_mul();
	test_convert_int_to_double();
	test_convert_int_to_single();
#if 0
	/* x86 generates SIGFPE on overflows. */
	test_convert_double_to_int();
#endif

	return 0;
}

#endif