/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * This file is mostly a result of compiling the mont_mulf.c file to generate an * assembly output and then hand-editing that output to replace the * compiler-generated loop for the 512-bit case (nlen == 16) in the * mont_mulf_noconv routine with a hand-crafted version. This file also * has big_savefp() and big_restorefp() routines added by hand. */ #include <sys/asm_linkage.h> #include <sys/trap.h> #include <sys/stack.h> #include <sys/privregs.h> #include <sys/regset.h> #include <sys/vis.h> #include <sys/machthread.h> #include <sys/machtrap.h> #include <sys/machsig.h> #if defined(lint) || defined(__lint) #include <sys/types.h> /* ARGSUSED */ uint64_t double2uint64_t(double* d) { return (0ULL); } /* ARGSUSED */ void conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) { } /* ARGSUSED */ void conv_i32_to_d32(double *d32, uint32_t *i32, int len) { } /* ARGSUSED */ void conv_i32_to_d16(double *d16, uint32_t *i32, int len) { } /* ARGSUSED */ void mont_mulf_noconv(uint32_t *result, double *dm1, double *dm2, double *dt, double *dn, uint32_t *nint, int nlen, double dn0) { } #else /* lint || __lint */ .section ".text",#alloc,#execinstr .file "mont_mulf.c" .section ".bss",#alloc,#write Bbss.bss: .section ".data",#alloc,#write Ddata.data: .section ".rodata",#alloc ! ! CONSTANT POOL ! Drodata.rodata: .global TwoTo16 .align 8 ! ! CONSTANT POOL ! .global TwoTo16 TwoTo16: .word 1089470464 .word 0 .type TwoTo16,#object .size TwoTo16,8 .global TwoToMinus16 ! ! CONSTANT POOL ! .global TwoToMinus16 TwoToMinus16: .word 1055916032 .word 0 .type TwoToMinus16,#object .size TwoToMinus16,8 .global Zero ! ! CONSTANT POOL ! .global Zero Zero: .word 0 .word 0 .type Zero,#object .size Zero,8 .global TwoTo32 ! ! CONSTANT POOL ! .global TwoTo32 TwoTo32: .word 1106247680 .word 0 .type TwoTo32,#object .size TwoTo32,8 .global TwoToMinus32 ! ! CONSTANT POOL ! .global TwoToMinus32 TwoToMinus32: .word 1039138816 .word 0 .type TwoToMinus32,#object .size TwoToMinus32,8 .section ".text",#alloc,#execinstr /* 000000 0 */ .register %g3,#scratch /* 000000 */ .register %g2,#scratch /* 000000 0 */ .align 32 ! FILE mont_mulf.c ! 1 !/* ! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved. ! 3 ! * Use is subject to license terms. ! 4 ! */ ! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI" ! 9 !/* ! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time ! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time ! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c ) ! 13 ! */ ! 15 !#include <sys/types.h> ! 16 !#include <math.h> ! 18 !static const double TwoTo16 = 65536.0; ! 19 !static const double TwoToMinus16 = 1.0/65536.0; ! 20 !static const double Zero = 0.0; ! 21 !static const double TwoTo32 = 65536.0 * 65536.0; ! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0); ! 24 !#ifdef RF_INLINE_MACROS ! 26 !double upper32(double); ! 27 !double lower32(double, double); ! 28 !double mod(double, double, double); ! 30 !#else ! 32 !static double ! 33 !upper32(double x) ! 34 !{ ! 35 ! return (floor(x * TwoToMinus32)); ! 36 !} ! 39 !/* ARGSUSED */ ! 40 !static double ! 41 !lower32(double x, double y) ! 42 !{ ! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32)); ! 44 !} ! 46 !static double ! 47 !mod(double x, double oneoverm, double m) ! 48 !{ ! 49 ! return (x - m * floor(x * oneoverm)); ! 50 !} ! 52 !#endif ! 55 !static void ! 56 !cleanup(double *dt, int from, int tlen) ! 57 !{ ! ! SUBROUTINE cleanup ! ! OFFSET SOURCE LINE LABEL INSTRUCTION cleanup: /* 000000 57 */ sra %o1,0,%o4 /* 0x0004 */ sra %o2,0,%o5 ! 58 ! int i; ! 59 ! double tmp, tmp1, x, x1; ! 61 ! tmp = tmp1 = Zero; /* 0x0008 61 */ sll %o5,1,%g5 ! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) { /* 0x000c 63 */ sll %o4,1,%g3 /* 0x0010 */ cmp %g3,%g5 /* 0x0014 */ bge,pn %icc,.L77000188 /* 0x0018 0 */ sethi %hi(Zero),%o3 .L77000197: /* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8 /* 0x0020 */ sra %g3,0,%o1 /* 0x0024 */ sub %g5,1,%g2 /* 0x0028 */ sllx %o1,3,%g4 ! 64 ! x = dt[i]; /* 0x002c 64 */ ldd [%g4+%o0],%f10 /* 0x0030 63 */ add %g4,%o0,%g1 /* 0x0034 */ fmovd %f8,%f18 /* 0x0038 */ fmovd %f8,%f16 ! 65 ! x1 = dt[i + 1]; ! 66 ! dt[i] = lower32(x, Zero) + tmp; .L900000110: /* 0x003c 66 */ fdtox %f10,%f0 /* 0x0040 65 */ ldd [%g1+8],%f12 ! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1; ! 68 ! tmp = upper32(x); ! 69 ! tmp1 = upper32(x1); /* 0x0044 69 */ add %g3,2,%g3 /* 0x0048 */ cmp %g3,%g2 /* 0x004c 67 */ fdtox %f12,%f2 /* 0x0050 68 */ fmovd %f0,%f4 /* 0x0054 66 */ fmovs %f8,%f0 /* 0x0058 67 */ fmovs %f8,%f2 /* 0x005c 66 */ fxtod %f0,%f0 /* 0x0060 67 */ fxtod %f2,%f2 /* 0x0064 69 */ fdtox %f12,%f6 /* 0x0068 66 */ faddd %f0,%f18,%f10 /* 0x006c */ std %f10,[%g1] /* 0x0070 67 */ faddd %f2,%f16,%f14 /* 0x0074 */ std %f14,[%g1+8] /* 0x0078 68 */ fitod %f4,%f18 /* 0x007c 69 */ add %g1,16,%g1 /* 0x0080 */ fitod %f6,%f16 /* 0x0084 */ ble,a,pt %icc,.L900000110 /* 0x0088 64 */ ldd [%g1],%f10 .L77000188: /* 0x008c 69 */ retl ! Result = /* 0x0090 */ nop /* 0x0094 0 */ .type cleanup,2 /* 0x0094 0 */ .size cleanup,(.-cleanup) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 /* 000000 */ .skip 24 /* 0x0018 */ .align 32 ! 70 ! } ! 71 !} ! 75 !#ifdef _KERNEL ! 76 !/* ! 77 ! * This only works if 0 <= d < 2^53 ! 78 ! */ ! 79 !uint64_t ! 80 !double2uint64_t(double* d) ! 81 !{ ! 82 ! uint64_t x; ! 83 ! uint64_t exp; ! 84 ! uint64_t man; ! 86 ! x = *((uint64_t *)d); ! ! SUBROUTINE double2uint64_t ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global double2uint64_t double2uint64_t: /* 000000 86 */ ldx [%o0],%o2 ! 87 ! if (x == 0) { /* 0x0004 87 */ cmp %o2,0 /* 0x0008 */ bne,pn %xcc,.L900000206 /* 0x000c 94 */ sethi %hi(0xfff00000),%o5 .L77000202: /* 0x0010 94 */ retl ! Result = %o0 ! 88 ! return (0ULL); /* 0x0014 88 */ or %g0,0,%o0 ! 89 ! } ! 90 ! exp = (x >> 52) - 1023; ! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL; ! 92 ! x = man >> (52 - exp); ! 94 ! return (x); .L900000206: /* 0x0018 94 */ sllx %o5,32,%o4 /* 0x001c */ srlx %o2,52,%o0 /* 0x0020 */ sethi %hi(0x40000000),%o1 /* 0x0024 */ or %g0,1023,%g5 /* 0x0028 */ sllx %o1,22,%g4 /* 0x002c */ xor %o4,-1,%o3 /* 0x0030 */ sub %g5,%o0,%g3 /* 0x0034 */ and %o2,%o3,%g2 /* 0x0038 */ or %g2,%g4,%o5 /* 0x003c */ add %g3,52,%g1 /* 0x0040 */ retl ! Result = %o0 /* 0x0044 */ srlx %o5,%g1,%o0 /* 0x0048 0 */ .type double2uint64_t,2 /* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 /* 000000 */ .skip 24 /* 0x0018 */ .align 32 ! 95 !} ! 96 !#else ! 97 !/* ! 98 ! * This only works if 0 <= d < 2^63 ! 99 ! */ ! 100 !uint64_t ! 101 !double2uint64_t(double* d) ! 102 !{ ! 103 ! return ((int64_t)(*d)); ! 104 !} ! 105 !#endif ! 107 !/* ARGSUSED */ ! 108 !void ! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) ! 110 !{ ! ! SUBROUTINE conv_d16_to_i32 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_d16_to_i32 conv_d16_to_i32: /* 000000 110 */ save %sp,-176,%sp ! 111 ! int i; ! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */ ! 113 ! a, b, c, d; /* because more efficient code is */ ! 114 ! /* generated this way, and there */ ! 115 ! /* is no overflow */ ! 116 ! t1 = 0; ! 117 ! a = double2uint64_t(&(d16[0])); /* 0x0004 117 */ ldx [%i1],%o0 /* 0x0008 118 */ ldx [%i1+8],%i2 /* 0x000c 117 */ cmp %o0,0 /* 0x0010 */ bne,pn %xcc,.L77000216 /* 0x0014 */ or %g0,0,%i4 .L77000215: /* 0x0018 117 */ ba .L900000316 /* 0x001c 118 */ cmp %i2,0 .L77000216: /* 0x0020 117 */ srlx %o0,52,%o5 /* 0x0024 */ sethi %hi(0xfff00000),%i4 /* 0x0028 */ sllx %i4,32,%o2 /* 0x002c */ sethi %hi(0x40000000),%o7 /* 0x0030 */ sllx %o7,22,%o3 /* 0x0034 */ or %g0,1023,%o4 /* 0x0038 */ xor %o2,-1,%g5 /* 0x003c */ sub %o4,%o5,%l0 /* 0x0040 */ and %o0,%g5,%o1 /* 0x0044 */ add %l0,52,%l1 /* 0x0048 */ or %o1,%o3,%g4 ! 118 ! b = double2uint64_t(&(d16[1])); /* 0x004c 118 */ cmp %i2,0 /* 0x0050 117 */ srlx %g4,%l1,%i4 .L900000316: /* 0x0054 118 */ bne,pn %xcc,.L77000222 /* 0x0058 134 */ sub %i3,1,%l3 .L77000221: /* 0x005c 118 */ or %g0,0,%i2 /* 0x0060 */ ba .L900000315 /* 0x0064 116 */ or %g0,0,%o3 .L77000222: /* 0x0068 118 */ srlx %i2,52,%l6 /* 0x006c */ sethi %hi(0xfff00000),%g4 /* 0x0070 */ sllx %g4,32,%i5 /* 0x0074 */ sethi %hi(0x40000000),%l5 /* 0x0078 */ xor %i5,-1,%l4 /* 0x007c */ or %g0,1023,%l2 /* 0x0080 */ and %i2,%l4,%l7 /* 0x0084 */ sllx %l5,22,%i2 /* 0x0088 */ sub %l2,%l6,%g1 /* 0x008c */ or %l7,%i2,%g3 /* 0x0090 */ add %g1,52,%g2 /* 0x0094 116 */ or %g0,0,%o3 /* 0x0098 118 */ srlx %g3,%g2,%i2 ! 119 ! for (i = 0; i < ilen - 1; i++) { .L900000315: /* 0x009c 119 */ cmp %l3,0 /* 0x00a0 */ ble,pn %icc,.L77000210 /* 0x00a4 */ or %g0,0,%l4 .L77000245: /* 0x00a8 118 */ sethi %hi(0xfff00000),%l7 /* 0x00ac */ or %g0,-1,%l6 /* 0x00b0 */ sllx %l7,32,%l3 /* 0x00b4 */ srl %l6,0,%l6 /* 0x00b8 */ sethi %hi(0x40000000),%l1 /* 0x00bc */ sethi %hi(0xfc00),%l2 /* 0x00c0 */ xor %l3,-1,%l7 /* 0x00c4 */ sllx %l1,22,%l3 /* 0x00c8 */ sub %i3,2,%l5 /* 0x00cc */ add %l2,1023,%l2 /* 0x00d0 */ or %g0,2,%g2 /* 0x00d4 */ or %g0,%i0,%g1 ! 120 ! c = double2uint64_t(&(d16[2 * i + 2])); .L77000208: /* 0x00d8 120 */ sra %g2,0,%g3 /* 0x00dc 123 */ add %g2,1,%o2 /* 0x00e0 120 */ sllx %g3,3,%i3 ! 121 ! t1 += a & 0xffffffff; ! 122 ! t = (a >> 32); ! 123 ! d = double2uint64_t(&(d16[2 * i + 3])); /* 0x00e4 123 */ sra %o2,0,%g5 /* 0x00e8 120 */ ldx [%i1+%i3],%o5 /* 0x00ec 123 */ sllx %g5,3,%o0 /* 0x00f0 121 */ and %i4,%l6,%g4 /* 0x00f4 123 */ ldx [%i1+%o0],%i3 /* 0x00f8 120 */ cmp %o5,0 /* 0x00fc */ bne,pn %xcc,.L77000228 /* 0x0100 124 */ and %i2,%l2,%i5 .L77000227: /* 0x0104 120 */ or %g0,0,%l1 /* 0x0108 */ ba .L900000314 /* 0x010c 121 */ add %o3,%g4,%o0 .L77000228: /* 0x0110 120 */ srlx %o5,52,%o7 /* 0x0114 */ and %o5,%l7,%o5 /* 0x0118 */ or %g0,52,%l0 /* 0x011c */ sub %o7,1023,%o4 /* 0x0120 */ or %o5,%l3,%l1 /* 0x0124 */ sub %l0,%o4,%o1 /* 0x0128 */ srlx %l1,%o1,%l1 /* 0x012c 121 */ add %o3,%g4,%o0 .L900000314: /* 0x0130 122 */ srax %i4,32,%g3 /* 0x0134 123 */ cmp %i3,0 /* 0x0138 */ bne,pn %xcc,.L77000234 /* 0x013c 124 */ sllx %i5,16,%g5 .L77000233: /* 0x0140 123 */ or %g0,0,%o2 /* 0x0144 */ ba .L900000313 /* 0x0148 124 */ add %o0,%g5,%o7 .L77000234: /* 0x014c 123 */ srlx %i3,52,%o2 /* 0x0150 */ and %i3,%l7,%i4 /* 0x0154 */ sub %o2,1023,%o1 /* 0x0158 */ or %g0,52,%g4 /* 0x015c */ sub %g4,%o1,%i5 /* 0x0160 */ or %i4,%l3,%i3 /* 0x0164 */ srlx %i3,%i5,%o2 ! 124 ! t1 += (b & 0xffff) << 16; /* 0x0168 124 */ add %o0,%g5,%o7 ! 125 ! t += (b >> 16) + (t1 >> 32); .L900000313: /* 0x016c 125 */ srax %i2,16,%l0 /* 0x0170 */ srax %o7,32,%o4 /* 0x0174 */ add %l0,%o4,%o3 ! 126 ! i32[i] = t1 & 0xffffffff; ! 127 ! t1 = t; ! 128 ! a = c; ! 129 ! b = d; /* 0x0178 129 */ add %l4,1,%l4 /* 0x017c 126 */ and %o7,%l6,%o5 /* 0x0180 125 */ add %g3,%o3,%o3 /* 0x0184 126 */ st %o5,[%g1] /* 0x0188 128 */ or %g0,%l1,%i4 /* 0x018c 129 */ or %g0,%o2,%i2 /* 0x0190 */ add %g2,2,%g2 /* 0x0194 */ cmp %l4,%l5 /* 0x0198 */ ble,pt %icc,.L77000208 /* 0x019c */ add %g1,4,%g1 ! 130 ! } ! 131 ! t1 += a & 0xffffffff; ! 132 ! t = (a >> 32); ! 133 ! t1 += (b & 0xffff) << 16; ! 134 ! i32[i] = t1 & 0xffffffff; .L77000210: /* 0x01a0 134 */ sra %l4,0,%l4 /* 0x01a4 */ sethi %hi(0xfc00),%i1 /* 0x01a8 */ add %o3,%i4,%l2 /* 0x01ac */ add %i1,1023,%i5 /* 0x01b0 */ and %i2,%i5,%l5 /* 0x01b4 */ sllx %l4,2,%i2 /* 0x01b8 */ sllx %l5,16,%l6 /* 0x01bc */ add %l2,%l6,%l7 /* 0x01c0 */ st %l7,[%i0+%i2] /* 0x01c4 129 */ ret ! Result = /* 0x01c8 */ restore %g0,%g0,%g0 /* 0x01cc 0 */ .type conv_d16_to_i32,2 /* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! ___const_seg_900000401: /* 000000 0 */ .word 1127219200,0 /* 0x0008 */ .word 1127219200 /* 0x000c 0 */ .type ___const_seg_900000401,1 /* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401) /* 0x000c 0 */ .align 8 /* 0x0010 */ .skip 24 /* 0x0028 */ .align 32 ! 135 !} ! 138 !void ! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len) ! 140 !{ ! ! SUBROUTINE conv_i32_to_d32 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d32 conv_i32_to_d32: /* 000000 140 */ orcc %g0,%o2,%o2 ! 141 ! int i; ! 143 !#pragma pipeloop(0) ! 144 ! for (i = 0; i < len; i++) /* 0x0004 144 */ ble,pn %icc,.L77000254 /* 0x0008 */ sub %o2,1,%o3 .L77000263: /* 0x000c 140 */ or %g0,%o0,%o2 ! 145 ! d32[i] = (double)(i32[i]); /* 0x0010 145 */ add %o3,1,%o5 /* 0x0014 144 */ or %g0,0,%g5 /* 0x0018 145 */ cmp %o5,10 /* 0x001c */ bl,pn %icc,.L77000261 /* 0x0020 */ sethi %hi(___const_seg_900000401),%g4 .L900000407: /* 0x0024 145 */ prefetch [%o1],0 /* 0x0028 */ prefetch [%o0],22 /* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4 /* 0x0030 */ or %g0,%o0,%o2 /* 0x0034 */ prefetch [%o1+64],0 /* 0x0038 */ add %o1,8,%o0 /* 0x003c */ sub %o3,7,%o5 /* 0x0040 */ prefetch [%o2+64],22 /* 0x0044 */ or %g0,2,%g5 /* 0x0048 */ prefetch [%o2+128],22 /* 0x004c */ prefetch [%o2+192],22 /* 0x0050 */ prefetch [%o1+128],0 /* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2 /* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16 /* 0x005c */ fmovs %f2,%f0 /* 0x0060 */ prefetch [%o2+256],22 /* 0x0064 */ prefetch [%o2+320],22 /* 0x0068 */ ld [%o1],%f3 /* 0x006c */ prefetch [%o1+192],0 /* 0x0070 */ ld [%o1+4],%f1 .L900000405: /* 0x0074 145 */ prefetch [%o0+188],0 /* 0x0078 */ fsubd %f2,%f16,%f22 /* 0x007c */ add %g5,8,%g5 /* 0x0080 */ add %o0,32,%o0 /* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4 /* 0x0088 */ std %f22,[%o2] /* 0x008c */ cmp %g5,%o5 /* 0x0090 */ ld [%o0-32],%f5 /* 0x0094 */ fsubd %f0,%f16,%f24 /* 0x0098 */ add %o2,64,%o2 /* 0x009c */ fmovs %f4,%f0 /* 0x00a0 */ std %f24,[%o2-56] /* 0x00a4 */ ld [%o0-28],%f1 /* 0x00a8 */ fsubd %f4,%f16,%f26 /* 0x00ac */ fmovs %f0,%f6 /* 0x00b0 */ prefetch [%o2+312],22 /* 0x00b4 */ std %f26,[%o2-48] /* 0x00b8 */ ld [%o0-24],%f7 /* 0x00bc */ fsubd %f0,%f16,%f28 /* 0x00c0 */ fmovs %f6,%f8 /* 0x00c4 */ std %f28,[%o2-40] /* 0x00c8 */ ld [%o0-20],%f9 /* 0x00cc */ fsubd %f6,%f16,%f30 /* 0x00d0 */ fmovs %f8,%f10 /* 0x00d4 */ std %f30,[%o2-32] /* 0x00d8 */ ld [%o0-16],%f11 /* 0x00dc */ prefetch [%o2+344],22 /* 0x00e0 */ fsubd %f8,%f16,%f48 /* 0x00e4 */ fmovs %f10,%f12 /* 0x00e8 */ std %f48,[%o2-24] /* 0x00ec */ ld [%o0-12],%f13 /* 0x00f0 */ fsubd %f10,%f16,%f50 /* 0x00f4 */ fmovs %f12,%f2 /* 0x00f8 */ std %f50,[%o2-16] /* 0x00fc */ ld [%o0-8],%f3 /* 0x0100 */ fsubd %f12,%f16,%f52 /* 0x0104 */ fmovs %f2,%f0 /* 0x0108 */ std %f52,[%o2-8] /* 0x010c */ ble,pt %icc,.L900000405 /* 0x0110 */ ld [%o0-4],%f1 .L900000408: /* 0x0114 145 */ fsubd %f2,%f16,%f18 /* 0x0118 */ add %o2,16,%o2 /* 0x011c */ cmp %g5,%o3 /* 0x0120 */ std %f18,[%o2-16] /* 0x0124 */ fsubd %f0,%f16,%f20 /* 0x0128 */ or %g0,%o0,%o1 /* 0x012c */ bg,pn %icc,.L77000254 /* 0x0130 */ std %f20,[%o2-8] .L77000261: /* 0x0134 145 */ ld [%o1],%f15 .L900000409: /* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4 /* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16 /* 0x0140 */ add %g5,1,%g5 /* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14 /* 0x0148 */ add %o1,4,%o1 /* 0x014c */ cmp %g5,%o3 /* 0x0150 */ fsubd %f14,%f16,%f54 /* 0x0154 */ std %f54,[%o2] /* 0x0158 */ add %o2,8,%o2 /* 0x015c */ ble,a,pt %icc,.L900000409 /* 0x0160 */ ld [%o1],%f15 .L77000254: /* 0x0164 145 */ retl ! Result = /* 0x0168 */ nop /* 0x016c 0 */ .type conv_i32_to_d32,2 /* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! ___const_seg_900000501: /* 000000 0 */ .word 1127219200,0 /* 0x0008 */ .word 1127219200 /* 0x000c 0 */ .type ___const_seg_900000501,1 /* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501) /* 0x000c 0 */ .align 8 /* 0x0010 */ .skip 24 /* 0x0028 */ .align 32 ! 146 !} ! 149 !void ! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len) ! 151 !{ ! ! SUBROUTINE conv_i32_to_d16 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d16 conv_i32_to_d16: /* 000000 151 */ save %sp,-368,%sp /* 0x0004 */ orcc %g0,%i2,%i2 ! 152 ! int i; ! 153 ! uint32_t a; ! 155 !#pragma pipeloop(0) ! 156 ! for (i = 0; i < len; i++) { /* 0x0008 156 */ ble,pn %icc,.L77000272 /* 0x000c */ sub %i2,1,%l6 .L77000281: /* 0x0010 156 */ sethi %hi(0xfc00),%i3 ! 157 ! a = i32[i]; /* 0x0014 157 */ or %g0,%i2,%l1 /* 0x0018 156 */ add %i3,1023,%i4 /* 0x001c 157 */ cmp %i2,4 /* 0x0020 151 */ or %g0,%i1,%l7 /* 0x0024 */ or %g0,%i0,%i2 /* 0x0028 156 */ or %g0,0,%i5 /* 0x002c */ or %g0,0,%i3 /* 0x0030 157 */ bl,pn %icc,.L77000279 /* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1 .L900000508: /* 0x0038 157 */ prefetch [%i0+8],22 /* 0x003c */ prefetch [%i0+72],22 /* 0x0040 */ or %g0,%i0,%l2 ! 158 ! d16[2 * i] = (double)(a & 0xffff); /* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1 /* 0x0048 157 */ prefetch [%i0+136],22 /* 0x004c */ sub %l6,1,%i0 /* 0x0050 */ or %g0,0,%i3 /* 0x0054 */ prefetch [%i2+200],22 /* 0x0058 */ or %g0,2,%i5 /* 0x005c */ prefetch [%i2+264],22 /* 0x0060 */ prefetch [%i2+328],22 /* 0x0064 */ prefetch [%i2+392],22 /* 0x0068 */ ld [%l7],%l3 /* 0x006c */ ld [%l7+4],%l4 /* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 ! 159 ! d16[2 * i + 1] = (double)(a >> 16); /* 0x0074 159 */ srl %l3,16,%o1 /* 0x0078 158 */ and %l3,%i4,%o3 /* 0x007c */ st %o3,[%sp+2335] /* 0x0080 159 */ srl %l4,16,%g4 /* 0x0084 158 */ and %l4,%i4,%o0 /* 0x0088 */ st %o0,[%sp+2303] /* 0x008c 159 */ add %l7,8,%l7 /* 0x0090 */ st %o1,[%sp+2271] /* 0x0094 */ st %g4,[%sp+2239] /* 0x0098 157 */ prefetch [%i2+456],22 /* 0x009c */ prefetch [%i2+520],22 .L900000506: /* 0x00a0 157 */ prefetch [%l2+536],22 /* 0x00a4 159 */ add %i5,2,%i5 /* 0x00a8 157 */ add %l2,32,%l2 /* 0x00ac */ ld [%l7],%g2 /* 0x00b0 159 */ cmp %i5,%i0 /* 0x00b4 */ add %l7,8,%l7 /* 0x00b8 158 */ ld [%sp+2335],%f9 /* 0x00bc 159 */ add %i3,4,%i3 /* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8 /* 0x00c4 159 */ ld [%sp+2271],%f11 /* 0x00c8 158 */ and %g2,%i4,%g3 /* 0x00cc 159 */ fmovs %f8,%f10 /* 0x00d0 158 */ st %g3,[%sp+2335] /* 0x00d4 */ fsubd %f8,%f20,%f28 /* 0x00d8 */ std %f28,[%l2-32] /* 0x00dc 159 */ srl %g2,16,%g1 /* 0x00e0 */ st %g1,[%sp+2271] /* 0x00e4 */ fsubd %f10,%f20,%f30 /* 0x00e8 */ std %f30,[%l2-24] /* 0x00ec 157 */ ld [%l7-4],%l0 /* 0x00f0 158 */ ld [%sp+2303],%f13 /* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12 /* 0x00f8 159 */ ld [%sp+2239],%f15 /* 0x00fc 158 */ and %l0,%i4,%l5 /* 0x0100 159 */ fmovs %f12,%f14 /* 0x0104 158 */ st %l5,[%sp+2303] /* 0x0108 */ fsubd %f12,%f20,%f44 /* 0x010c */ std %f44,[%l2-16] /* 0x0110 159 */ srl %l0,16,%o5 /* 0x0114 */ st %o5,[%sp+2239] /* 0x0118 */ fsubd %f14,%f20,%f46 /* 0x011c */ ble,pt %icc,.L900000506 /* 0x0120 */ std %f46,[%l2-8] .L900000509: /* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0 /* 0x0128 159 */ cmp %i5,%l6 /* 0x012c */ add %i3,4,%i3 /* 0x0130 158 */ ld [%sp+2335],%f1 /* 0x0134 */ ld [%sp+2303],%f5 /* 0x0138 159 */ fmovs %f0,%f2 /* 0x013c */ ld [%sp+2271],%f3 /* 0x0140 158 */ fmovs %f0,%f4 /* 0x0144 159 */ ld [%sp+2239],%f7 /* 0x0148 */ fmovs %f0,%f6 /* 0x014c 158 */ fsubd %f0,%f20,%f22 /* 0x0150 */ std %f22,[%l2] /* 0x0154 159 */ fsubd %f2,%f20,%f24 /* 0x0158 */ std %f24,[%l2+8] /* 0x015c 158 */ fsubd %f4,%f20,%f26 /* 0x0160 */ std %f26,[%l2+16] /* 0x0164 159 */ fsubd %f6,%f20,%f20 /* 0x0168 */ bg,pn %icc,.L77000272 /* 0x016c */ std %f20,[%l2+24] .L77000279: /* 0x0170 157 */ ld [%l7],%l2 .L900000510: /* 0x0174 158 */ and %l2,%i4,%o4 /* 0x0178 */ st %o4,[%sp+2399] /* 0x017c 159 */ srl %l2,16,%o2 /* 0x0180 */ st %o2,[%sp+2367] /* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1 /* 0x0188 */ sra %i3,0,%i0 /* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16 /* 0x0190 */ sllx %i0,3,%o1 /* 0x0194 159 */ add %i3,1,%o3 /* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 /* 0x019c 159 */ sra %o3,0,%l3 /* 0x01a0 */ add %i5,1,%i5 /* 0x01a4 158 */ ld [%sp+2399],%f17 /* 0x01a8 159 */ sllx %l3,3,%o0 /* 0x01ac */ add %l7,4,%l7 /* 0x01b0 */ fmovs %f16,%f18 /* 0x01b4 */ cmp %i5,%l6 /* 0x01b8 */ add %i3,2,%i3 /* 0x01bc 158 */ fsubd %f16,%f20,%f48 /* 0x01c0 */ std %f48,[%i2+%o1] /* 0x01c4 159 */ ld [%sp+2367],%f19 /* 0x01c8 */ fsubd %f18,%f20,%f50 /* 0x01cc */ std %f50,[%i2+%o0] /* 0x01d0 */ ble,a,pt %icc,.L900000510 /* 0x01d4 157 */ ld [%l7],%l2 .L77000272: /* 0x01d8 159 */ ret ! Result = /* 0x01dc */ restore %g0,%g0,%g0 /* 0x01e0 0 */ .type conv_i32_to_d16,2 /* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! ___const_seg_900000601: /* 000000 0 */ .word 1127219200,0 /* 0x0008 */ .word 1127219200 /* 0x000c 0 */ .type ___const_seg_900000601,1 /* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601) /* 0x000c 0 */ .align 8 /* 0x0010 */ .skip 24 /* 0x0028 */ .align 32 ! 160 ! } ! 161 !} ! 163 !#ifdef RF_INLINE_MACROS ! 165 !void ! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */ ! 167 ! const double *, /* 2^16 */ ! 168 ! const double *, /* 0 */ ! 169 ! double *, /* result16 */ ! 170 ! double *, /* result32 */ ! 171 ! float *); /* source - should be unsigned int* */ ! 172 ! /* converted to float* */ ! 174 !#else ! 177 !/* ARGSUSED */ ! 178 !static void ! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */ ! 180 ! const double *dummy2, /* 2^16 */ ! 181 ! const double *dummy3, /* 0 */ ! 182 ! double *result16, ! 183 ! double *result32, ! 184 ! float *src) /* source - should be unsigned int* */ ! 185 ! /* converted to float* */ ! 186 !{ ! 187 ! uint32_t *i32; ! 188 ! uint32_t a, b, c, d; ! 190 ! i32 = (uint32_t *)src; ! 191 ! a = i32[0]; ! 192 ! b = i32[1]; ! 193 ! c = i32[2]; ! 194 ! d = i32[3]; ! 195 ! result16[0] = (double)(a & 0xffff); ! 196 ! result16[1] = (double)(a >> 16); ! 197 ! result32[0] = (double)a; ! 198 ! result16[2] = (double)(b & 0xffff); ! 199 ! result16[3] = (double)(b >> 16); ! 200 ! result32[1] = (double)b; ! 201 ! result16[4] = (double)(c & 0xffff); ! 202 ! result16[5] = (double)(c >> 16); ! 203 ! result32[2] = (double)c; ! 204 ! result16[6] = (double)(d & 0xffff); ! 205 ! result16[7] = (double)(d >> 16); ! 206 ! result32[3] = (double)d; ! 207 !} ! 209 !#endif ! 212 !void ! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len) ! 214 !{ ! ! SUBROUTINE conv_i32_to_d32_and_d16 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d32_and_d16 conv_i32_to_d32_and_d16: /* 000000 214 */ save %sp,-368,%sp ! 215 ! int i; ! 216 ! uint32_t a; ! 218 !#pragma pipeloop(0) ! 219 ! for (i = 0; i < len - 3; i += 4) { ! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, ! 221 ! &(d16[2*i]), &(d32[i]), ! 222 ! (float *)(&(i32[i]))); ! 223 ! } ! 224 ! for (; i < len; i++) { ! 225 ! a = i32[i]; ! 226 ! d32[i] = (double)(i32[i]); ! 227 ! d16[2 * i] = (double)(a & 0xffff); ! 228 ! d16[2 * i + 1] = (double)(a >> 16); /* 0x0004 228 */ sub %i3,3,%i4 /* 0x0008 219 */ cmp %i4,0 /* 0x000c */ ble,pn %icc,.L77000289 /* 0x0010 */ or %g0,0,%i5 .L77000306: /* 0x0014 222 */ sethi %hi(Zero),%g3 /* 0x0018 */ sethi %hi(TwoToMinus16),%g2 /* 0x001c */ sethi %hi(TwoTo16),%o5 /* 0x0020 */ ldd [%g3+%lo(Zero)],%f2 /* 0x0024 219 */ sub %i3,4,%o4 /* 0x0028 */ or %g0,0,%o3 /* 0x002c */ or %g0,%i0,%l6 /* 0x0030 */ or %g0,%i2,%l5 .L900000615: /* 0x0034 222 */ fmovd %f2,%f26 /* 0x0038 */ ld [%l5],%f27 /* 0x003c */ sra %o3,0,%o0 /* 0x0040 */ add %i5,4,%i5 /* 0x0044 */ fmovd %f2,%f28 /* 0x0048 */ ld [%l5+4],%f29 /* 0x004c */ sllx %o0,3,%g5 /* 0x0050 */ cmp %i5,%o4 /* 0x0054 */ fmovd %f2,%f30 /* 0x0058 */ ld [%l5+8],%f31 /* 0x005c */ add %i1,%g5,%g4 /* 0x0060 */ add %o3,8,%o3 /* 0x0064 */ ld [%l5+12],%f3 /* 0x0068 */ fxtod %f26,%f26 /* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32 /* 0x0070 */ fxtod %f28,%f28 /* 0x0074 */ add %l5,16,%l5 /* 0x0078 */ fxtod %f30,%f30 /* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34 /* 0x0080 */ fxtod %f2,%f2 /* 0x0084 */ std %f2,[%l6+24] /* 0x0088 */ fmuld %f32,%f26,%f36 /* 0x008c */ std %f26,[%l6] /* 0x0090 */ fmuld %f32,%f28,%f38 /* 0x0094 */ std %f28,[%l6+8] /* 0x0098 */ fmuld %f32,%f30,%f40 /* 0x009c */ std %f30,[%l6+16] /* 0x00a0 */ fmuld %f32,%f2,%f42 /* 0x00a4 */ add %l6,32,%l6 /* 0x00a8 */ fdtox %f36,%f36 /* 0x00ac */ fdtox %f38,%f38 /* 0x00b0 */ fdtox %f40,%f40 /* 0x00b4 */ fdtox %f42,%f42 /* 0x00b8 */ fxtod %f36,%f36 /* 0x00bc */ std %f36,[%g4+8] /* 0x00c0 */ fxtod %f38,%f38 /* 0x00c4 */ std %f38,[%g4+24] /* 0x00c8 */ fxtod %f40,%f40 /* 0x00cc */ std %f40,[%g4+40] /* 0x00d0 */ fxtod %f42,%f42 /* 0x00d4 */ std %f42,[%g4+56] /* 0x00d8 */ fmuld %f36,%f34,%f36 /* 0x00dc */ fmuld %f38,%f34,%f38 /* 0x00e0 */ fmuld %f40,%f34,%f40 /* 0x00e4 */ fmuld %f42,%f34,%f42 /* 0x00e8 */ fsubd %f26,%f36,%f36 /* 0x00ec */ std %f36,[%i1+%g5] /* 0x00f0 */ fsubd %f28,%f38,%f38 /* 0x00f4 */ std %f38,[%g4+16] /* 0x00f8 */ fsubd %f30,%f40,%f40 /* 0x00fc */ std %f40,[%g4+32] /* 0x0100 */ fsubd %f2,%f42,%f42 /* 0x0104 */ std %f42,[%g4+48] /* 0x0108 */ ble,a,pt %icc,.L900000615 /* 0x010c */ ldd [%g3+%lo(Zero)],%f2 .L77000289: /* 0x0110 224 */ cmp %i5,%i3 /* 0x0114 */ bge,pn %icc,.L77000294 /* 0x0118 */ sethi %hi(0xfc00),%l0 .L77000307: /* 0x011c 224 */ sra %i5,0,%l2 /* 0x0120 */ sll %i5,1,%i4 /* 0x0124 */ sllx %l2,3,%l1 /* 0x0128 */ sllx %l2,2,%o1 /* 0x012c 225 */ sub %i3,%i5,%l3 /* 0x0130 224 */ add %l0,1023,%l0 /* 0x0134 */ add %l1,%i0,%l1 /* 0x0138 */ add %o1,%i2,%i2 /* 0x013c 225 */ cmp %l3,5 /* 0x0140 */ bl,pn %icc,.L77000291 /* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7 .L900000612: /* 0x0148 225 */ prefetch [%l1],22 /* 0x014c */ prefetch [%l1+64],22 /* 0x0150 */ sra %i4,0,%l6 /* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2 /* 0x0158 225 */ prefetch [%l1+128],22 /* 0x015c */ add %l6,-2,%l5 /* 0x0160 */ sub %i3,3,%i0 /* 0x0164 */ prefetch [%l1+192],22 /* 0x0168 */ sllx %l5,3,%o4 /* 0x016c 228 */ add %i5,1,%i5 /* 0x0170 225 */ add %i1,%o4,%o3 /* 0x0174 */ or %g0,%i3,%g1 /* 0x0178 */ ld [%i2],%l4 /* 0x017c */ prefetch [%o3+16],22 /* 0x0180 */ add %o3,16,%l3 /* 0x0184 228 */ add %i2,4,%i2 /* 0x0188 225 */ prefetch [%o3+80],22 /* 0x018c 228 */ srl %l4,16,%o1 /* 0x0190 227 */ and %l4,%l0,%o0 /* 0x0194 225 */ prefetch [%o3+144],22 /* 0x0198 228 */ st %o1,[%sp+2271] /* 0x019c 227 */ st %o0,[%sp+2239] /* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 /* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0 /* 0x01a8 225 */ prefetch [%o3+208],22 /* 0x01ac */ prefetch [%o3+272],22 /* 0x01b0 */ prefetch [%o3+336],22 .L900000610: /* 0x01b4 225 */ prefetch [%l1+192],22 /* 0x01b8 228 */ add %i5,4,%i5 /* 0x01bc 225 */ add %l3,64,%l3 /* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8 /* 0x01c4 228 */ cmp %i5,%i0 /* 0x01c8 225 */ ld [%i2],%g5 /* 0x01cc 228 */ add %i2,16,%i2 /* 0x01d0 */ add %l1,32,%l1 /* 0x01d4 */ add %i4,8,%i4 /* 0x01d8 226 */ ld [%i2-20],%f7 /* 0x01dc 228 */ srl %g5,16,%i3 /* 0x01e0 226 */ fmovs %f8,%f6 /* 0x01e4 228 */ st %i3,[%sp+2335] /* 0x01e8 227 */ and %g5,%l0,%g4 /* 0x01ec */ st %g4,[%sp+2303] /* 0x01f0 226 */ fsubd %f6,%f32,%f40 /* 0x01f4 227 */ ld [%sp+2239],%f9 /* 0x01f8 228 */ ld [%sp+2271],%f1 /* 0x01fc */ fmovs %f8,%f12 /* 0x0200 226 */ std %f40,[%l1-32] /* 0x0204 227 */ fsubd %f8,%f32,%f42 /* 0x0208 */ std %f42,[%l3-64] /* 0x020c 228 */ fsubd %f0,%f32,%f44 /* 0x0210 */ std %f44,[%l3-56] /* 0x0214 227 */ fmovs %f12,%f10 /* 0x0218 225 */ ld [%i2-12],%g2 /* 0x021c 226 */ ld [%i2-16],%f1 /* 0x0220 228 */ srl %g2,16,%g3 /* 0x0224 226 */ fmovs %f12,%f0 /* 0x0228 225 */ prefetch [%l3+320],22 /* 0x022c 228 */ st %g3,[%sp+2271] /* 0x0230 227 */ and %g2,%l0,%l6 /* 0x0234 */ st %l6,[%sp+2239] /* 0x0238 226 */ fsubd %f0,%f32,%f46 /* 0x023c 227 */ ld [%sp+2303],%f11 /* 0x0240 228 */ ld [%sp+2335],%f13 /* 0x0244 */ fmovs %f12,%f18 /* 0x0248 226 */ std %f46,[%l1-24] /* 0x024c 227 */ fsubd %f10,%f32,%f48 /* 0x0250 */ std %f48,[%l3-48] /* 0x0254 228 */ fsubd %f12,%f32,%f50 /* 0x0258 */ std %f50,[%l3-40] /* 0x025c 227 */ fmovs %f18,%f16 /* 0x0260 225 */ ld [%i2-8],%o5 /* 0x0264 226 */ ld [%i2-12],%f15 /* 0x0268 228 */ srl %o5,16,%l5 /* 0x026c 226 */ fmovs %f18,%f14 /* 0x0270 228 */ st %l5,[%sp+2335] /* 0x0274 227 */ and %o5,%l0,%o4 /* 0x0278 */ st %o4,[%sp+2303] /* 0x027c 226 */ fsubd %f14,%f32,%f52 /* 0x0280 227 */ ld [%sp+2239],%f17 /* 0x0284 228 */ ld [%sp+2271],%f19 /* 0x0288 225 */ prefetch [%l3+352],22 /* 0x028c 228 */ fmovs %f18,%f24 /* 0x0290 226 */ std %f52,[%l1-16] /* 0x0294 227 */ fsubd %f16,%f32,%f54 /* 0x0298 */ std %f54,[%l3-32] /* 0x029c 228 */ fsubd %f18,%f32,%f56 /* 0x02a0 */ std %f56,[%l3-24] /* 0x02a4 227 */ fmovs %f24,%f22 /* 0x02a8 225 */ ld [%i2-4],%l4 /* 0x02ac 226 */ ld [%i2-8],%f21 /* 0x02b0 228 */ srl %l4,16,%o3 /* 0x02b4 226 */ fmovs %f24,%f20 /* 0x02b8 228 */ st %o3,[%sp+2271] /* 0x02bc 227 */ and %l4,%l0,%o2 /* 0x02c0 */ st %o2,[%sp+2239] /* 0x02c4 226 */ fsubd %f20,%f32,%f58 /* 0x02c8 227 */ ld [%sp+2303],%f23 /* 0x02cc 228 */ ld [%sp+2335],%f25 /* 0x02d0 */ fmovs %f24,%f0 /* 0x02d4 226 */ std %f58,[%l1-8] /* 0x02d8 227 */ fsubd %f22,%f32,%f60 /* 0x02dc */ std %f60,[%l3-16] /* 0x02e0 228 */ fsubd %f24,%f32,%f62 /* 0x02e4 */ bl,pt %icc,.L900000610 /* 0x02e8 */ std %f62,[%l3-8] .L900000613: /* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 /* 0x02f0 228 */ add %l1,8,%l1 /* 0x02f4 */ cmp %i5,%g1 /* 0x02f8 226 */ ld [%i2-4],%f3 /* 0x02fc 225 */ or %g0,%g1,%i3 /* 0x0300 228 */ add %i4,2,%i4 /* 0x0304 227 */ ld [%sp+2239],%f5 /* 0x0308 226 */ fmovs %f4,%f2 /* 0x030c 228 */ ld [%sp+2271],%f1 /* 0x0310 226 */ fsubd %f2,%f32,%f34 /* 0x0314 */ std %f34,[%l1-8] /* 0x0318 227 */ fsubd %f4,%f32,%f36 /* 0x031c */ std %f36,[%l3] /* 0x0320 228 */ fsubd %f0,%f32,%f38 /* 0x0324 */ bge,pn %icc,.L77000294 /* 0x0328 */ std %f38,[%l3+8] .L77000291: /* 0x032c 225 */ ld [%i2],%o2 .L900000614: /* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 /* 0x0334 228 */ srl %o2,16,%l3 /* 0x0338 227 */ sra %i4,0,%i0 /* 0x033c 228 */ st %l3,[%sp+2367] /* 0x0340 227 */ and %o2,%l0,%g1 /* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2 /* 0x0348 227 */ st %g1,[%sp+2399] /* 0x034c */ sllx %i0,3,%o0 /* 0x0350 228 */ add %i4,1,%l4 /* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 /* 0x0358 228 */ sra %l4,0,%o1 /* 0x035c */ add %i5,1,%i5 /* 0x0360 226 */ ld [%i2],%f5 /* 0x0364 228 */ sllx %o1,3,%g5 /* 0x0368 */ cmp %i5,%i3 /* 0x036c */ ld [%sp+2367],%f9 /* 0x0370 */ add %i2,4,%i2 /* 0x0374 */ add %i4,2,%i4 /* 0x0378 227 */ fmovs %f4,%f6 /* 0x037c 226 */ fsubd %f4,%f32,%f44 /* 0x0380 */ std %f44,[%l1] /* 0x0384 227 */ ld [%sp+2399],%f7 /* 0x0388 228 */ fmovs %f6,%f8 /* 0x038c */ add %l1,8,%l1 /* 0x0390 */ fsubd %f8,%f32,%f48 /* 0x0394 227 */ fsubd %f6,%f32,%f46 /* 0x0398 */ std %f46,[%i1+%o0] /* 0x039c 228 */ std %f48,[%i1+%g5] /* 0x03a0 */ bl,a,pt %icc,.L900000614 /* 0x03a4 225 */ ld [%i2],%o2 .L77000294: /* 0x03a8 222 */ ret ! Result = /* 0x03ac */ restore %g0,%g0,%g0 /* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2 /* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 32 ! 229 ! } ! 230 !} ! 232 !extern long long c1, c2, c3, c4; ! 234 !static void ! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len) ! 236 !{ ! ! SUBROUTINE adjust_montf_result ! ! OFFSET SOURCE LINE LABEL INSTRUCTION adjust_montf_result: /* 000000 236 */ sra %o2,0,%g2 /* 0x0004 */ or %g0,%o0,%o4 ! 237 ! int64_t acc; ! 238 ! int i; ! 240 ! if (i32[len] > 0) { /* 0x0008 240 */ sllx %g2,2,%g3 /* 0x000c */ ld [%o0+%g3],%o0 /* 0x0010 */ cmp %o0,0 /* 0x0014 */ bleu,pn %icc,.L77000316 /* 0x0018 236 */ or %g0,%o1,%o5 ! 241 ! i = -1; .L77000315: /* 0x001c 241 */ sub %g2,1,%g3 /* 0x0020 */ ba .L900000712 /* 0x0024 249 */ cmp %g2,0 ! 242 ! } else { ! 243 ! for (i = len - 1; i >= 0; i--) { .L77000316: /* 0x0028 243 */ subcc %g2,1,%g3 /* 0x002c */ bneg,pn %icc,.L77000340 /* 0x0030 */ or %g0,%g3,%o3 .L77000348: /* 0x0034 243 */ sra %g3,0,%o1 /* 0x0038 */ sllx %o1,2,%g1 ! 244 ! if (i32[i] != nint[i]) break; /* 0x003c 244 */ ld [%g1+%o5],%g4 /* 0x0040 243 */ add %g1,%o4,%o2 /* 0x0044 */ add %g1,%o5,%o1 .L900000713: /* 0x0048 244 */ ld [%o2],%o0 /* 0x004c */ cmp %o0,%g4 /* 0x0050 */ bne,pn %icc,.L77000324 /* 0x0054 */ sub %o2,4,%o2 .L77000320: /* 0x0058 244 */ sub %o1,4,%o1 /* 0x005c */ subcc %o3,1,%o3 /* 0x0060 */ bpos,a,pt %icc,.L900000713 /* 0x0064 */ ld [%o1],%g4 .L900000706: /* 0x0068 244 */ ba .L900000712 /* 0x006c 249 */ cmp %g2,0 .L77000324: /* 0x0070 244 */ sra %o3,0,%o0 /* 0x0074 */ sllx %o0,2,%g1 /* 0x0078 */ ld [%o5+%g1],%o3 /* 0x007c */ ld [%o4+%g1],%g5 /* 0x0080 */ cmp %g5,%o3 /* 0x0084 */ bleu,pt %icc,.L77000332 /* 0x0088 */ nop ! 245 ! } ! 246 ! } ! 247 ! if ((i < 0) || (i32[i] > nint[i])) { ! 248 ! acc = 0; ! 249 ! for (i = 0; i < len; i++) { .L77000340: /* 0x008c 249 */ cmp %g2,0 .L900000712: /* 0x0090 249 */ ble,pn %icc,.L77000332 /* 0x0094 250 */ or %g0,%g2,%o3 .L77000347: /* 0x0098 249 */ or %g0,0,%o0 ! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); /* 0x009c 250 */ cmp %o3,10 /* 0x00a0 */ bl,pn %icc,.L77000341 /* 0x00a4 249 */ or %g0,0,%g2 .L900000709: /* 0x00a8 250 */ prefetch [%o4],22 /* 0x00ac */ prefetch [%o4+64],22 ! 251 ! i32[i] = acc & 0xffffffff; ! 252 ! acc = acc >> 32; /* 0x00b0 252 */ add %o5,4,%o1 /* 0x00b4 */ add %o4,8,%o2 /* 0x00b8 250 */ prefetch [%o4+128],22 /* 0x00bc */ sub %o3,8,%o5 /* 0x00c0 */ or %g0,2,%o0 /* 0x00c4 */ prefetch [%o4+192],22 /* 0x00c8 */ prefetch [%o4+256],22 /* 0x00cc */ prefetch [%o4+320],22 /* 0x00d0 */ prefetch [%o4+384],22 /* 0x00d4 */ ld [%o2-4],%g5 /* 0x00d8 */ prefetch [%o2+440],22 /* 0x00dc */ prefetch [%o2+504],22 /* 0x00e0 */ ld [%o4],%g4 /* 0x00e4 */ ld [%o1-4],%o4 /* 0x00e8 */ sub %g4,%o4,%o3 /* 0x00ec 251 */ st %o3,[%o2-8] /* 0x00f0 252 */ srax %o3,32,%g4 .L900000707: /* 0x00f4 252 */ add %o0,8,%o0 /* 0x00f8 */ add %o2,32,%o2 /* 0x00fc 250 */ ld [%o1],%g1 /* 0x0100 */ prefetch [%o2+496],22 /* 0x0104 252 */ cmp %o0,%o5 /* 0x0108 */ add %o1,32,%o1 /* 0x010c 250 */ sub %g5,%g1,%g5 /* 0x0110 */ add %g5,%g4,%o4 /* 0x0114 */ ld [%o2-32],%g4 /* 0x0118 251 */ st %o4,[%o2-36] /* 0x011c 252 */ srax %o4,32,%g1 /* 0x0120 250 */ ld [%o1-28],%o3 /* 0x0124 */ sub %g4,%o3,%g2 /* 0x0128 */ add %g2,%g1,%g5 /* 0x012c */ ld [%o2-28],%o3 /* 0x0130 251 */ st %g5,[%o2-32] /* 0x0134 252 */ srax %g5,32,%g4 /* 0x0138 250 */ ld [%o1-24],%o4 /* 0x013c */ sub %o3,%o4,%g1 /* 0x0140 */ add %g1,%g4,%g2 /* 0x0144 */ ld [%o2-24],%o3 /* 0x0148 251 */ st %g2,[%o2-28] /* 0x014c 252 */ srax %g2,32,%g5 /* 0x0150 250 */ ld [%o1-20],%o4 /* 0x0154 */ sub %o3,%o4,%g4 /* 0x0158 */ add %g4,%g5,%g1 /* 0x015c */ ld [%o2-20],%o4 /* 0x0160 251 */ st %g1,[%o2-24] /* 0x0164 252 */ srax %g1,32,%o3 /* 0x0168 250 */ ld [%o1-16],%g2 /* 0x016c */ sub %o4,%g2,%g5 /* 0x0170 */ add %g5,%o3,%g1 /* 0x0174 */ ld [%o2-16],%g4 /* 0x0178 251 */ st %g1,[%o2-20] /* 0x017c 252 */ srax %g1,32,%o4 /* 0x0180 250 */ ld [%o1-12],%g2 /* 0x0184 */ sub %g4,%g2,%o3 /* 0x0188 */ add %o3,%o4,%g5 /* 0x018c */ ld [%o2-12],%g2 /* 0x0190 251 */ st %g5,[%o2-16] /* 0x0194 252 */ srax %g5,32,%g4 /* 0x0198 250 */ ld [%o1-8],%g1 /* 0x019c */ sub %g2,%g1,%o4 /* 0x01a0 */ add %o4,%g4,%o3 /* 0x01a4 */ ld [%o2-8],%g2 /* 0x01a8 251 */ st %o3,[%o2-12] /* 0x01ac 252 */ srax %o3,32,%g5 /* 0x01b0 250 */ ld [%o1-4],%g1 /* 0x01b4 */ sub %g2,%g1,%g4 /* 0x01b8 */ add %g4,%g5,%o4 /* 0x01bc */ ld [%o2-4],%g5 /* 0x01c0 251 */ st %o4,[%o2-8] /* 0x01c4 252 */ ble,pt %icc,.L900000707 /* 0x01c8 */ srax %o4,32,%g4 .L900000710: /* 0x01cc 250 */ ld [%o1],%o3 /* 0x01d0 252 */ add %o1,4,%o5 /* 0x01d4 250 */ or %g0,%o2,%o4 /* 0x01d8 252 */ cmp %o0,%g3 /* 0x01dc 250 */ sub %g5,%o3,%g2 /* 0x01e0 */ add %g2,%g4,%g1 /* 0x01e4 251 */ st %g1,[%o2-4] /* 0x01e8 252 */ bg,pn %icc,.L77000332 /* 0x01ec */ srax %g1,32,%g2 .L77000341: /* 0x01f0 250 */ ld [%o4],%g5 .L900000711: /* 0x01f4 250 */ ld [%o5],%o2 /* 0x01f8 */ add %g2,%g5,%g4 /* 0x01fc 252 */ add %o0,1,%o0 /* 0x0200 */ cmp %o0,%g3 /* 0x0204 */ add %o5,4,%o5 /* 0x0208 250 */ sub %g4,%o2,%o1 /* 0x020c 251 */ st %o1,[%o4] /* 0x0210 252 */ srax %o1,32,%g2 /* 0x0214 */ add %o4,4,%o4 /* 0x0218 */ ble,a,pt %icc,.L900000711 /* 0x021c 250 */ ld [%o4],%g5 .L77000332: /* 0x0220 252 */ retl ! Result = /* 0x0224 */ nop /* 0x0228 0 */ .type adjust_montf_result,2 /* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 32 ! 253 ! } ! 254 ! } ! 255 !} ! 257 !/************* ! 258 !static void ! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len) ! 260 !{ ! 261 ! int64_t acc; ! 262 ! int i; ! 264 ! c4++; ! 265 ! ! 266 ! if (i32[len] > 0) { ! 267 ! i = -1; ! 268 ! c1++; ! 269 ! } else { ! 270 ! for (i = len - 1; i >= 0; i++) { ! 271 ! if (i32[i] != nint[i]) break; ! 272 ! c2++; ! 273 ! } ! 274 ! } ! 275 ! if ((i < 0) || (i32[i] > nint[i])) { ! 276 ! c3++; ! 277 ! acc = 0; ! 278 ! for (i = 0; i < len; i++) { ! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); ! 280 ! i32[i] = acc & 0xffffffff; ! 281 ! acc = acc >> 32; ! 282 ! } ! 283 ! } ! 284 !} ! 285 !uint32_t saveresult[1000]; ! 286 !void printarray(char *name, uint32_t *arr, int len) ! 287 !{ ! 288 ! int i, j; ! 289 ! uint64_t tmp; ! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2); ! 292 ! for(i=j=0; i<len; i+=2,j+=2){ ! 293 ! if(j == 6){ ! 294 ! printf("\n"); ! 295 ! j=0; ! 296 ! } ! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]); ! 298 ! printf("0x%016llx",tmp); ! 299 ! if((i/2)!=(((len+1)/2)-1))printf(","); ! 300 ! if(j!=4)printf(" "); ! 301 ! } ! 302 ! if(j!=0) printf("\n"); ! 303 ! printf("};\n"); ! 304 !} ! 305 !**************/ ! 308 !/* ! 309 ! * the lengths of the input arrays should be at least the following: ! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] ! 311 ! * all of them should be different from one another ! 312 ! */ ! 313 !void mont_mulf_noconv(uint32_t *result, ! 314 ! double *dm1, double *dm2, double *dt, ! 315 ! double *dn, uint32_t *nint, ! 316 ! int nlen, double dn0) ! 317 !{ ! ! SUBROUTINE mont_mulf_noconv ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global mont_mulf_noconv mont_mulf_noconv: /* 000000 317 */ save %sp,-176,%sp /* 0x0004 */ ldx [%fp+2223],%g1 /* 0x0008 0 */ sethi %hi(Zero),%l5 /* 0x000c 317 */ or %g0,%i2,%l0 ! 318 ! int i, j, jj; ! 319 ! double digit, m2j, a, b; ! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; ! 322 ! pdm1 = &(dm1[0]); ! 323 ! pdm2 = &(dm2[0]); ! 324 ! pdn = &(dn[0]); ! 325 ! pdm2[2 * nlen] = Zero; /* 0x0010 325 */ ldd [%l5+%lo(Zero)],%f0 /* 0x0014 317 */ or %g0,%i0,%i2 /* 0x0018 325 */ sll %g1,1,%o3 ! 327 ! if (nlen != 16) { /* 0x001c 327 */ cmp %g1,16 /* 0x0020 325 */ sra %o3,0,%i0 /* 0x0024 */ sllx %i0,3,%o0 /* 0x0028 317 */ or %g0,%i5,%i0 /* 0x002c 327 */ bne,pn %icc,.L77000476 /* 0x0030 325 */ std %f0,[%l0+%o0] .L77000488: /* 0x0034 0 */ sethi %hi(TwoToMinus16),%o2 /* 0x0038 0 */ sethi %hi(TwoTo16),%l3 ! 328 ! for (i = 0; i < 4 * nlen + 2; i++) ! 329 ! dt[i] = Zero; ! 330 ! a = dt[0] = pdm1[0] * pdm2[0]; ! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); ! 333 ! pdtj = &(dt[0]); ! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { ! 335 ! m2j = pdm2[j]; ! 336 ! a = pdtj[0] + pdn[0] * digit; ! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; ! 338 ! pdtj[1] = b; ! 340 !#pragma pipeloop(0) ! 341 ! for (i = 1; i < nlen; i++) { ! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; ! 343 ! } ! 344 ! if (jj == 15) { ! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1); ! 346 ! jj = 0; ! 347 ! } ! 349 ! digit = mod(lower32(b, Zero) * dn0, ! 350 ! TwoToMinus16, TwoTo16); ! 351 ! } ! 352 ! } else { ! 353 ! a = dt[0] = pdm1[0] * pdm2[0]; /* 0x003c 353 */ ldd [%i1],%f40 ! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = ! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = ! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] = ! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] = ! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] = ! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] = ! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = ! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = ! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] = ! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] = ! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] = ! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] = ! 367 ! dt[3] = dt[2] = dt[1] = Zero; ! 369 ! pdn_0 = pdn[0]; ! 370 ! pdm1_0 = pdm1[0]; ! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); ! 373 ! pdtj = &(dt[0]); /* 0x0040 373 */ or %g0,%i3,%o3 ! 375 ! for (j = 0; j < 32; j++, pdtj++) { /* 0x0044 375 */ or %g0,0,%l1 /* 0x0048 353 */ ldd [%l0],%f42 /* 0x004c 372 */ ldd [%o2+%lo(TwoToMinus16)],%f44 /* 0x0050 */ ldd [%l3+%lo(TwoTo16)],%f46 /* 0x0054 367 */ std %f0,[%i3+8] /* 0x0058 353 */ fmuld %f40,%f42,%f38 /* 0x005c */ std %f38,[%i3] /* 0x0060 367 */ std %f0,[%i3+16] /* 0x0064 */ std %f0,[%i3+24] /* 0x0068 */ std %f0,[%i3+32] /* 0x006c 372 */ fdtox %f38,%f4 /* 0x0070 367 */ std %f0,[%i3+40] /* 0x0074 */ std %f0,[%i3+48] /* 0x0078 */ std %f0,[%i3+56] /* 0x007c 372 */ fmovs %f0,%f4 /* 0x0080 367 */ std %f0,[%i3+64] /* 0x0084 */ std %f0,[%i3+72] /* 0x0088 372 */ fxtod %f4,%f52 /* 0x008c 367 */ std %f0,[%i3+80] /* 0x0090 */ std %f0,[%i3+88] /* 0x0094 */ std %f0,[%i3+96] /* 0x0098 */ std %f0,[%i3+104] /* 0x009c 372 */ fmuld %f52,%f14,%f60 /* 0x00a0 367 */ std %f0,[%i3+112] /* 0x00a4 */ std %f0,[%i3+120] /* 0x00a8 */ std %f0,[%i3+128] /* 0x00ac */ std %f0,[%i3+136] /* 0x00b0 372 */ fmuld %f60,%f44,%f62 /* 0x00b4 367 */ std %f0,[%i3+144] /* 0x00b8 */ std %f0,[%i3+152] /* 0x00bc */ std %f0,[%i3+160] /* 0x00c0 */ std %f0,[%i3+168] /* 0x00c4 372 */ fdtox %f62,%f32 /* 0x00c8 367 */ std %f0,[%i3+176] /* 0x00cc */ std %f0,[%i3+184] /* 0x00d0 */ std %f0,[%i3+192] /* 0x00d4 */ std %f0,[%i3+200] /* 0x00d8 372 */ fxtod %f32,%f50 /* 0x00dc 367 */ std %f0,[%i3+208] /* 0x00e0 */ std %f0,[%i3+216] /* 0x00e4 */ std %f0,[%i3+224] /* 0x00e8 */ std %f0,[%i3+232] /* 0x00ec 372 */ fmuld %f50,%f46,%f34 /* 0x00f0 367 */ std %f0,[%i3+240] /* 0x00f4 */ std %f0,[%i3+248] /* 0x00f8 */ std %f0,[%i3+256] /* 0x00fc */ std %f0,[%i3+264] /* 0x0100 372 */ fsubd %f60,%f34,%f40 /* 0x0104 367 */ std %f0,[%i3+272] /* 0x0108 */ std %f0,[%i3+280] /* 0x010c */ std %f0,[%i3+288] /* 0x0110 */ std %f0,[%i3+296] /* 0x0114 */ std %f0,[%i3+304] /* 0x0118 */ std %f0,[%i3+312] /* 0x011c */ std %f0,[%i3+320] /* 0x0120 */ std %f0,[%i3+328] /* 0x0124 */ std %f0,[%i3+336] /* 0x0128 */ std %f0,[%i3+344] /* 0x012c */ std %f0,[%i3+352] /* 0x0130 */ std %f0,[%i3+360] /* 0x0134 */ std %f0,[%i3+368] /* 0x0138 375 */ sub %g1,1,%l3 /* 0x013c */ add %i3,8,%o7 /* 0x0140 367 */ std %f0,[%i3+376] /* 0x0144 */ std %f0,[%i3+384] /* 0x0148 */ std %f0,[%i3+392] /* 0x014c */ std %f0,[%i3+400] /* 0x0150 */ std %f0,[%i3+408] /* 0x0154 */ std %f0,[%i3+416] /* 0x0158 */ std %f0,[%i3+424] /* 0x015c */ std %f0,[%i3+432] /* 0x0160 */ std %f0,[%i3+440] /* 0x0164 */ std %f0,[%i3+448] /* 0x0168 */ std %f0,[%i3+456] /* 0x016c */ std %f0,[%i3+464] /* 0x0170 */ std %f0,[%i3+472] /* 0x0174 */ std %f0,[%i3+480] /* 0x0178 */ std %f0,[%i3+488] /* 0x017c */ std %f0,[%i3+496] /* 0x0180 */ std %f0,[%i3+504] /* 0x0184 */ std %f0,[%i3+512] /* 0x0188 */ std %f0,[%i3+520] !BEGIN HAND CODED PART ! cheetah schedule, no even-odd trick add %i3,%g0,%o5 fmovd %f40,%f0 fmovd %f14,%f2 fmovd %f44,%f8 sethi %hi(TwoTo32),%l5 fmovd %f46,%f10 sethi %hi(TwoToMinus32),%g5 ldd [%i3],%f6 ldd [%l0],%f4 ldd [%i1],%f40 ldd [%i1+8],%f42 ldd [%i1+16],%f52 ldd [%i1+48],%f54 ldd [%i1+56],%f36 ldd [%i1+64],%f56 ldd [%i1+104],%f48 ldd [%i1+112],%f58 ldd [%i4],%f44 ldd [%i4+8],%f46 ldd [%i4+104],%f50 ldd [%i4+112],%f60 .L99999999: !1 ldd [%i1+24],%f20 fmuld %f0,%f44,%f12 !2 ldd [%i4+24],%f22 fmuld %f42,%f4,%f16 !3 ldd [%i1+40],%f24 fmuld %f46,%f0,%f18 !4 ldd [%i4+40],%f26 fmuld %f20,%f4,%f20 !5 ldd [%l0+8],%f38 faddd %f12,%f6,%f12 fmuld %f22,%f0,%f22 !6 add %l0,8,%l0 ldd [%i4+56],%f30 fmuld %f24,%f4,%f24 !7 ldd [%i1+72],%f32 faddd %f16,%f18,%f16 fmuld %f26,%f0,%f26 !8 ldd [%i3+16],%f18 fmuld %f40,%f38,%f14 !9 ldd [%i4+72],%f34 faddd %f20,%f22,%f20 fmuld %f8,%f12,%f12 !10 ldd [%i3+48],%f22 fmuld %f36,%f4,%f28 !11 ldd [%i3+8],%f6 faddd %f16,%f18,%f16 fmuld %f30,%f0,%f30 !12 std %f16,[%i3+16] faddd %f24,%f26,%f24 fmuld %f32,%f4,%f32 !13 ldd [%i3+80],%f26 faddd %f12,%f14,%f12 fmuld %f34,%f0,%f34 !14 ldd [%i1+88],%f16 faddd %f20,%f22,%f20 !15 ldd [%i4+88],%f18 faddd %f28,%f30,%f28 !16 ldd [%i3+112],%f30 faddd %f32,%f34,%f32 !17 ldd [%i3+144],%f34 faddd %f12,%f6,%f6 fmuld %f16,%f4,%f16 !18 std %f20,[%i3+48] faddd %f24,%f26,%f24 fmuld %f18,%f0,%f18 !19 std %f24,[%i3+80] faddd %f28,%f30,%f28 fmuld %f48,%f4,%f20 !20 std %f28,[%i3+112] faddd %f32,%f34,%f32 fmuld %f50,%f0,%f22 !21 ldd [%i1+120],%f24 fdtox %f6,%f12 !22 std %f32,[%i3+144] faddd %f16,%f18,%f16 !23 ldd [%i4+120],%f26 !24 ldd [%i3+176],%f18 faddd %f20,%f22,%f20 fmuld %f24,%f4,%f24 !25 ldd [%i4+16],%f30 fmovs %f11,%f12 !26 ldd [%i1+32],%f32 fmuld %f26,%f0,%f26 !27 ldd [%i4+32],%f34 fmuld %f52,%f4,%f28 !28 ldd [%i3+208],%f22 faddd %f16,%f18,%f16 fmuld %f30,%f0,%f30 !29 std %f16,[%i3+176] fxtod %f12,%f12 fmuld %f32,%f4,%f32 !30 ldd [%i4+48],%f18 faddd %f24,%f26,%f24 fmuld %f34,%f0,%f34 !31 ldd [%i3+240],%f26 faddd %f20,%f22,%f20 !32 std %f20,[%i3+208] faddd %f28,%f30,%f28 fmuld %f54,%f4,%f16 !33 ldd [%i3+32],%f30 fmuld %f12,%f2,%f14 !34 ldd [%i4+64],%f22 faddd %f32,%f34,%f32 fmuld %f18,%f0,%f18 !35 ldd [%i3+64],%f34 faddd %f24,%f26,%f24 !36 std %f24,[%i3+240] faddd %f28,%f30,%f28 fmuld %f56,%f4,%f20 !37 std %f28,[%i3+32] fmuld %f14,%f8,%f12 !38 ldd [%i1+80],%f24 faddd %f32,%f34,%f34 ! yes, tmp52! fmuld %f22,%f0,%f22 !39 ldd [%i4+80],%f26 faddd %f16,%f18,%f16 !40 ldd [%i1+96],%f28 fmuld %f58,%f4,%f32 !41 ldd [%i4+96],%f30 fdtox %f12,%f12 fmuld %f24,%f4,%f24 !42 std %f34,[%i3+64] ! yes, tmp52! faddd %f20,%f22,%f20 fmuld %f26,%f0,%f26 !43 ldd [%i3+96],%f18 fmuld %f28,%f4,%f28 !44 ldd [%i3+128],%f22 fmovd %f38,%f4 fmuld %f30,%f0,%f30 !45 fxtod %f12,%f12 fmuld %f60,%f0,%f34 !46 add %i3,8,%i3 faddd %f24,%f26,%f24 !47 ldd [%i3+160-8],%f26 faddd %f16,%f18,%f16 !48 std %f16,[%i3+96-8] faddd %f28,%f30,%f28 !49 ldd [%i3+192-8],%f30 faddd %f32,%f34,%f32 fmuld %f12,%f10,%f12 !50 ldd [%i3+224-8],%f34 faddd %f20,%f22,%f20 !51 std %f20,[%i3+128-8] faddd %f24,%f26,%f24 !52 add %l1,1,%l1 std %f24,[%i3+160-8] faddd %f28,%f30,%f28 !53 cmp %l1,15 std %f28,[%i3+192-8] fsubd %f14,%f12,%f0 !54 faddd %f32,%f34,%f32 ble,pt %icc,.L99999999 std %f32,[%i3+224-8] ! ldd [%g5+%lo(TwoToMinus32)],%f8 ! ldd [%i3+8],%f16 ! ldd [%i3+16],%f20 ! fmuld %f8,%f16,%f18 ldd [%i3+24],%f24 ! fmuld %f8,%f20,%f22 ldd [%i3+32],%f28 ! fmuld %f8,%f24,%f26 ldd [%l5+%lo(TwoTo32)],%f10 ! fmuld %f8,%f28,%f30 ! fdtox %f18,%f18 ! fdtox %f22,%f22 ! fdtox %f26,%f26 ldd [%i3+40],%f32 ! fdtox %f30,%f30 ldd [%i3+48],%f56 ! fxtod %f18,%f18 fmuld %f8,%f32,%f34 ldd [%i3+56],%f36 ! fxtod %f22,%f22 fmuld %f8,%f56,%f58 ldd [%i3+64],%f38 ! fxtod %f26,%f26 fmuld %f8,%f36,%f60 ! fxtod %f30,%f30 fmuld %f8,%f38,%f62 ! fdtox %f34,%f34 fmuld %f10,%f18,%f40 ! fdtox %f58,%f58 fmuld %f10,%f22,%f42 ! fdtox %f60,%f60 fmuld %f10,%f26,%f44 ! fdtox %f62,%f62 fmuld %f10,%f30,%f46 ! fxtod %f34,%f34 ! fxtod %f58,%f58 ! fxtod %f60,%f60 ! fxtod %f62,%f62 ! fsubd %f16,%f40,%f40 fmuld %f10,%f34,%f48 ! fsubd %f20,%f42,%f42 fmuld %f10,%f58,%f50 ! fsubd %f24,%f44,%f44 fmuld %f10,%f60,%f52 ! fsubd %f28,%f46,%f46 fmuld %f10,%f62,%f54 ! std %f40,[%i3+8] ! std %f42,[%i3+16] ! faddd %f18,%f44,%f44 std %f44,[%i3+24] ! faddd %f22,%f46,%f46 std %f46,[%i3+32] ! fsubd %f32,%f48,%f48 ldd [%i3+64+8],%f16 ! fsubd %f56,%f50,%f50 ldd [%i3+64+16],%f20 ! fsubd %f36,%f52,%f52 ldd [%i3+64+24],%f24 ! fsubd %f38,%f54,%f54 ldd [%i3+64+32],%f28 ! faddd %f26,%f48,%f48 fmuld %f8,%f16,%f18 std %f48,[%i3+40] ! faddd %f30,%f50,%f50 fmuld %f8,%f20,%f22 std %f50,[%i3+48] ! faddd %f34,%f52,%f52 fmuld %f8,%f24,%f26 std %f52,[%i3+56] ! faddd %f58,%f54,%f54 fmuld %f8,%f28,%f30 std %f54,[%i3+64] ! fdtox %f18,%f18 ! fdtox %f22,%f22 ! fdtox %f26,%f26 ldd [%i3+64+40],%f32 ! fdtox %f30,%f30 ldd [%i3+64+48],%f56 ! fxtod %f18,%f18 fmuld %f8,%f32,%f34 ldd [%i3+64+56],%f36 ! fxtod %f22,%f22 fmuld %f8,%f56,%f58 ldd [%i3+64+64],%f38 ! fxtod %f26,%f26 fmuld %f8,%f36,%f12 ! fxtod %f30,%f30 fmuld %f8,%f38,%f14 ! fdtox %f34,%f34 fmuld %f10,%f18,%f40 ! fdtox %f58,%f58 fmuld %f10,%f22,%f42 ! fdtox %f12,%f12 fmuld %f10,%f26,%f44 ! fdtox %f14,%f14 fmuld %f10,%f30,%f46 ! fxtod %f34,%f34 ! fxtod %f58,%f58 ! fxtod %f12,%f12 ! fxtod %f14,%f14 ! fsubd %f16,%f40,%f40 fmuld %f10,%f34,%f48 ! fsubd %f20,%f42,%f42 fmuld %f10,%f58,%f50 ! fsubd %f24,%f44,%f44 fmuld %f10,%f12,%f52 ! fsubd %f28,%f46,%f46 fmuld %f10,%f14,%f54 ! faddd %f60,%f40,%f40 std %f40,[%i3+64+8] ! faddd %f62,%f42,%f42 std %f42,[%i3+64+16] ! faddd %f18,%f44,%f44 std %f44,[%i3+64+24] ! faddd %f22,%f46,%f46 std %f46,[%i3+64+32] ! fsubd %f32,%f48,%f48 ldd [%i3+64+64+8],%f16 ! fsubd %f56,%f50,%f50 ldd [%i3+64+64+16],%f20 ! fsubd %f36,%f52,%f52 ldd [%i3+64+64+24],%f24 ! fsubd %f38,%f54,%f54 ldd [%i3+64+64+32],%f28 ! faddd %f26,%f48,%f48 fmuld %f8,%f16,%f18 std %f48,[%i3+64+40] ! faddd %f30,%f50,%f50 fmuld %f8,%f20,%f22 std %f50,[%i3+64+48] ! faddd %f34,%f52,%f52 fmuld %f8,%f24,%f26 std %f52,[%i3+64+56] ! faddd %f58,%f54,%f54 fmuld %f8,%f28,%f30 std %f54,[%i3+64+64] ! fdtox %f18,%f18 ! fdtox %f22,%f22 ! fdtox %f26,%f26 ldd [%i3+64+64+40],%f32 ! fdtox %f30,%f30 ldd [%i3+64+64+48],%f56 ! fxtod %f18,%f18 fmuld %f8,%f32,%f34 ldd [%i3+64+64+56],%f36 ! fxtod %f22,%f22 fmuld %f8,%f56,%f58 ldd [%i3+64+64+64],%f38 ! fxtod %f26,%f26 fmuld %f8,%f36,%f60 ! fxtod %f30,%f30 fmuld %f8,%f38,%f62 ! fdtox %f34,%f34 fmuld %f10,%f18,%f40 ! fdtox %f58,%f58 fmuld %f10,%f22,%f42 ! fdtox %f60,%f60 fmuld %f10,%f26,%f44 ! fdtox %f62,%f62 fmuld %f10,%f30,%f46 ! fxtod %f34,%f34 ! fxtod %f58,%f58 ! fxtod %f60,%f60 ! fxtod %f62,%f62 ! fsubd %f16,%f40,%f40 fmuld %f10,%f34,%f48 ! fsubd %f20,%f42,%f42 fmuld %f10,%f58,%f50 ! fsubd %f24,%f44,%f44 fmuld %f10,%f60,%f52 ! fsubd %f28,%f46,%f46 fmuld %f10,%f62,%f54 ! faddd %f12,%f40,%f40 std %f40,[%i3+64+64+8] ! faddd %f14,%f42,%f42 std %f42,[%i3+64+64+16] ! faddd %f18,%f44,%f44 std %f44,[%i3+64+64+24] ! faddd %f22,%f46,%f46 std %f46,[%i3+64+64+32] ! fsubd %f32,%f48,%f48 ldd [%i3+64+64+64+8],%f16 ! fsubd %f56,%f50,%f50 ldd [%i3+64+64+64+16],%f20 ! fsubd %f36,%f52,%f52 ldd [%i3+64+64+64+24],%f24 ! fsubd %f38,%f54,%f54 ldd [%i3+64+64+64+32],%f28 ! faddd %f26,%f48,%f48 fmuld %f8,%f16,%f18 std %f48,[%i3+64+64+40] ! faddd %f30,%f50,%f50 fmuld %f8,%f20,%f22 std %f50,[%i3+64+64+48] ! faddd %f34,%f52,%f52 fmuld %f8,%f24,%f26 std %f52,[%i3+64+64+56] ! faddd %f58,%f54,%f54 fmuld %f8,%f28,%f30 std %f54,[%i3+64+64+64] ! fdtox %f18,%f18 ! fdtox %f22,%f22 ! fdtox %f26,%f26 ldd [%i3+64+64+64+40],%f32 ! fdtox %f30,%f30 ldd [%i3+64+64+64+48],%f56 ! fxtod %f18,%f18 fmuld %f8,%f32,%f34 ldd [%i3+64+64+64+56],%f36 ! fxtod %f22,%f22 fmuld %f8,%f56,%f58 ldd [%i3+64+64+64+64],%f38 ! fxtod %f26,%f26 fmuld %f8,%f36,%f12 ! fxtod %f30,%f30 fmuld %f8,%f38,%f14 ! fdtox %f34,%f34 fmuld %f10,%f18,%f40 ! fdtox %f58,%f58 fmuld %f10,%f22,%f42 ! fdtox %f12,%f12 fmuld %f10,%f26,%f44 ! fdtox %f14,%f14 fmuld %f10,%f30,%f46 ! sethi %hi(TwoToMinus16),%g5 fxtod %f34,%f34 ! sethi %hi(TwoTo16),%l5 fxtod %f58,%f58 ! fxtod %f12,%f12 ! fxtod %f14,%f14 ! fsubd %f16,%f40,%f16 fmuld %f10,%f34,%f48 ldd [%g5+%lo(TwoToMinus16)],%f8 ! fsubd %f20,%f42,%f20 fmuld %f10,%f58,%f50 ldd [%i1],%f40 ! should be %f40 ! fsubd %f24,%f44,%f24 fmuld %f10,%f12,%f52 ldd [%i1+8],%f42 ! should be %f42 ! fsubd %f28,%f46,%f28 fmuld %f10,%f14,%f54 ldd [%i4],%f44 ! should be %f44 ! faddd %f60,%f16,%f16 std %f16,[%i3+64+64+64+8] ! faddd %f62,%f20,%f20 std %f20,[%i3+64+64+64+16] ! faddd %f18,%f24,%f24 std %f24,[%i3+64+64+64+24] ! faddd %f22,%f28,%f28 std %f28,[%i3+64+64+64+32] ! fsubd %f32,%f48,%f32 ldd [%i4+8],%f46 ! should be %f46 ! fsubd %f56,%f50,%f56 ldd [%i1+104],%f48 ! should be %f48 ! fsubd %f36,%f52,%f36 ldd [%i4+104],%f50 ! should be %f50 ! fsubd %f38,%f54,%f38 ldd [%i1+16],%f52 ! should be %f52 ! faddd %f26,%f32,%f32 std %f32,[%i3+64+64+64+40] ! faddd %f30,%f56,%f56 std %f56,[%i3+64+64+64+48] ! faddd %f34,%f36,%f36 std %f36,[%i3+64+64+64+56] ! faddd %f58,%f38,%f38 std %f38,[%i3+64+64+64+64] ! std %f12,[%i3+64+64+64+64+8] ! std %f14,[%i3+64+64+64+64+16] ! ldd [%l5+%lo(TwoTo16)],%f10 ldd [%i1+48],%f54 ldd [%i1+56],%f36 ldd [%i1+64],%f56 ldd [%i1+112],%f58 ldd [%i4+104],%f50 ldd [%i4+112],%f60 .L99999998: !1 ldd [%i1+24],%f20 fmuld %f0,%f44,%f12 !2 ldd [%i4+24],%f22 fmuld %f42,%f4,%f16 !3 ldd [%i1+40],%f24 fmuld %f46,%f0,%f18 !4 ldd [%i4+40],%f26 fmuld %f20,%f4,%f20 !5 ldd [%l0+8],%f38 faddd %f12,%f6,%f12 fmuld %f22,%f0,%f22 !6 add %l0,8,%l0 ldd [%i4+56],%f30 fmuld %f24,%f4,%f24 !7 ldd [%i1+72],%f32 faddd %f16,%f18,%f16 fmuld %f26,%f0,%f26 !8 ldd [%i3+16],%f18 fmuld %f40,%f38,%f14 !9 ldd [%i4+72],%f34 faddd %f20,%f22,%f20 fmuld %f8,%f12,%f12 !10 ldd [%i3+48],%f22 fmuld %f36,%f4,%f28 !11 ldd [%i3+8],%f6 faddd %f16,%f18,%f16 fmuld %f30,%f0,%f30 !12 std %f16,[%i3+16] faddd %f24,%f26,%f24 fmuld %f32,%f4,%f32 !13 ldd [%i3+80],%f26 faddd %f12,%f14,%f12 fmuld %f34,%f0,%f34 !14 ldd [%i1+88],%f16 faddd %f20,%f22,%f20 !15 ldd [%i4+88],%f18 faddd %f28,%f30,%f28 !16 ldd [%i3+112],%f30 faddd %f32,%f34,%f32 !17 ldd [%i3+144],%f34 faddd %f12,%f6,%f6 fmuld %f16,%f4,%f16 !18 std %f20,[%i3+48] faddd %f24,%f26,%f24 fmuld %f18,%f0,%f18 !19 std %f24,[%i3+80] faddd %f28,%f30,%f28 fmuld %f48,%f4,%f20 !20 std %f28,[%i3+112] faddd %f32,%f34,%f32 fmuld %f50,%f0,%f22 !21 ldd [%i1+120],%f24 fdtox %f6,%f12 !22 std %f32,[%i3+144] faddd %f16,%f18,%f16 !23 ldd [%i4+120],%f26 !24 ldd [%i3+176],%f18 faddd %f20,%f22,%f20 fmuld %f24,%f4,%f24 !25 ldd [%i4+16],%f30 fmovs %f11,%f12 !26 ldd [%i1+32],%f32 fmuld %f26,%f0,%f26 !27 ldd [%i4+32],%f34 fmuld %f52,%f4,%f28 !28 ldd [%i3+208],%f22 faddd %f16,%f18,%f16 fmuld %f30,%f0,%f30 !29 std %f16,[%i3+176] fxtod %f12,%f12 fmuld %f32,%f4,%f32 !30 ldd [%i4+48],%f18 faddd %f24,%f26,%f24 fmuld %f34,%f0,%f34 !31 ldd [%i3+240],%f26 faddd %f20,%f22,%f20 !32 std %f20,[%i3+208] faddd %f28,%f30,%f28 fmuld %f54,%f4,%f16 !33 ldd [%i3+32],%f30 fmuld %f12,%f2,%f14 !34 ldd [%i4+64],%f22 faddd %f32,%f34,%f32 fmuld %f18,%f0,%f18 !35 ldd [%i3+64],%f34 faddd %f24,%f26,%f24 !36 std %f24,[%i3+240] faddd %f28,%f30,%f28 fmuld %f56,%f4,%f20 !37 std %f28,[%i3+32] fmuld %f14,%f8,%f12 !38 ldd [%i1+80],%f24 faddd %f32,%f34,%f34 ! yes, tmp52! fmuld %f22,%f0,%f22 !39 ldd [%i4+80],%f26 faddd %f16,%f18,%f16 !40 ldd [%i1+96],%f28 fmuld %f58,%f4,%f32 !41 ldd [%i4+96],%f30 fdtox %f12,%f12 fmuld %f24,%f4,%f24 !42 std %f34,[%i3+64] ! yes, tmp52! faddd %f20,%f22,%f20 fmuld %f26,%f0,%f26 !43 ldd [%i3+96],%f18 fmuld %f28,%f4,%f28 !44 ldd [%i3+128],%f22 fmovd %f38,%f4 fmuld %f30,%f0,%f30 !45 fxtod %f12,%f12 fmuld %f60,%f0,%f34 !46 add %i3,8,%i3 faddd %f24,%f26,%f24 !47 ldd [%i3+160-8],%f26 faddd %f16,%f18,%f16 !48 std %f16,[%i3+96-8] faddd %f28,%f30,%f28 !49 ldd [%i3+192-8],%f30 faddd %f32,%f34,%f32 fmuld %f12,%f10,%f12 !50 ldd [%i3+224-8],%f34 faddd %f20,%f22,%f20 !51 std %f20,[%i3+128-8] faddd %f24,%f26,%f24 !52 add %l1,1,%l1 std %f24,[%i3+160-8] faddd %f28,%f30,%f28 !53 cmp %l1,31 std %f28,[%i3+192-8] fsubd %f14,%f12,%f0 !54 faddd %f32,%f34,%f32 ble,pt %icc,.L99999998 std %f32,[%i3+224-8] !55 std %f6,[%i3] add %o5,%g0,%i3 !END HAND CODED PART .L900000828: /* 0x03e4 405 */ ba .L900000852 /* 0x03e8 409 */ ldx [%i3+%o0],%l1 ! 406 ! } ! 407 ! } ! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1); ! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/ ! 413 ! adjust_montf_result(result, nint, nlen); .L77000476: /* 0x03ec 413 */ sll %g1,2,%l3 /* 0x03f0 0 */ sethi %hi(TwoTo16),%g5 /* 0x03f4 413 */ add %l3,2,%l2 /* 0x03f8 328 */ cmp %l2,0 /* 0x03fc */ ble,pn %icc,.L77000482 /* 0x0400 0 */ sethi %hi(TwoToMinus16),%o2 .L77000514: /* 0x0404 329 */ add %l3,2,%l2 /* 0x0408 328 */ add %l3,1,%o4 /* 0x040c */ or %g0,0,%l3 /* 0x0410 329 */ cmp %l2,8 /* 0x0414 */ bl,pn %icc,.L77000477 /* 0x0418 328 */ or %g0,%i3,%l1 .L900000831: /* 0x041c 329 */ prefetch [%i3],22 /* 0x0420 */ sub %o4,7,%l4 /* 0x0424 */ or %g0,0,%l3 /* 0x0428 */ or %g0,%i3,%l1 .L900000829: /* 0x042c 329 */ prefetch [%l1+528],22 /* 0x0430 */ std %f0,[%l1] /* 0x0434 */ add %l3,8,%l3 /* 0x0438 */ add %l1,64,%l1 /* 0x043c */ std %f0,[%l1-56] /* 0x0440 */ cmp %l3,%l4 /* 0x0444 */ std %f0,[%l1-48] /* 0x0448 */ std %f0,[%l1-40] /* 0x044c */ prefetch [%l1+496],22 /* 0x0450 */ std %f0,[%l1-32] /* 0x0454 */ std %f0,[%l1-24] /* 0x0458 */ std %f0,[%l1-16] /* 0x045c */ ble,pt %icc,.L900000829 /* 0x0460 */ std %f0,[%l1-8] .L900000832: /* 0x0464 329 */ cmp %l3,%o4 /* 0x0468 */ bg,pn %icc,.L77000482 /* 0x046c */ nop .L77000477: /* 0x0470 329 */ add %l3,1,%l3 .L900000851: /* 0x0474 329 */ std %f0,[%l1] /* 0x0478 */ cmp %l3,%o4 /* 0x047c */ add %l1,8,%l1 /* 0x0480 */ ble,pt %icc,.L900000851 /* 0x0484 */ add %l3,1,%l3 .L77000482: /* 0x0488 330 */ ldd [%i1],%f40 /* 0x048c 334 */ cmp %o3,0 /* 0x0490 */ sub %g1,1,%l3 /* 0x0494 330 */ ldd [%l0],%f42 /* 0x0498 331 */ ldd [%o2+%lo(TwoToMinus16)],%f36 /* 0x049c */ ldd [%g5+%lo(TwoTo16)],%f38 /* 0x04a0 330 */ fmuld %f40,%f42,%f52 /* 0x04a4 331 */ fdtox %f52,%f8 /* 0x04a8 */ fmovs %f0,%f8 /* 0x04ac */ fxtod %f8,%f62 /* 0x04b0 */ fmuld %f62,%f14,%f60 /* 0x04b4 */ fmuld %f60,%f36,%f32 /* 0x04b8 */ fdtox %f32,%f50 /* 0x04bc */ fxtod %f50,%f34 /* 0x04c0 */ fmuld %f34,%f38,%f46 /* 0x04c4 */ fsubd %f60,%f46,%f40 /* 0x04c8 334 */ ble,pn %icc,.L77000378 /* 0x04cc 330 */ std %f52,[%i3] .L77000509: /* 0x04d0 345 */ add %o3,1,%g5 /* 0x04d4 */ sll %g5,1,%o2 /* 0x04d8 */ or %g0,0,%l1 /* 0x04dc 337 */ ldd [%i4],%f42 /* 0x04e0 345 */ sub %o3,1,%o3 /* 0x04e4 */ or %g0,0,%o5 /* 0x04e8 */ or %g0,%i3,%l2 /* 0x04ec */ add %i4,8,%o1 /* 0x04f0 */ add %i1,8,%g5 .L900000848: /* 0x04f4 337 */ fmuld %f40,%f42,%f34 /* 0x04f8 */ ldd [%l0+8],%f32 /* 0x04fc 341 */ cmp %g1,1 /* 0x0500 337 */ ldd [%i1],%f50 /* 0x0504 */ ldd [%l2],%f46 /* 0x0508 */ ldd [%l2+8],%f44 /* 0x050c */ fmuld %f50,%f32,%f60 /* 0x0510 335 */ ldd [%l0],%f42 /* 0x0514 337 */ faddd %f46,%f34,%f48 /* 0x0518 */ faddd %f44,%f60,%f58 /* 0x051c */ fmuld %f36,%f48,%f54 /* 0x0520 */ faddd %f58,%f54,%f34 /* 0x0524 341 */ ble,pn %icc,.L77000368 /* 0x0528 338 */ std %f34,[%l2+8] .L77000507: /* 0x052c 341 */ or %g0,1,%l5 /* 0x0530 */ or %g0,2,%l4 /* 0x0534 */ or %g0,%g5,%g4 /* 0x0538 342 */ cmp %l3,12 /* 0x053c */ bl,pn %icc,.L77000481 /* 0x0540 341 */ or %g0,%o1,%g3 .L900000839: /* 0x0544 342 */ prefetch [%i1+8],0 /* 0x0548 */ prefetch [%i1+72],0 /* 0x054c */ add %i4,40,%l6 /* 0x0550 */ add %i1,40,%l7 /* 0x0554 */ prefetch [%l2+16],0 /* 0x0558 */ or %g0,%l2,%o7 /* 0x055c */ sub %l3,7,%i5 /* 0x0560 */ prefetch [%l2+80],0 /* 0x0564 */ add %l2,80,%g2 /* 0x0568 */ or %g0,2,%l4 /* 0x056c */ prefetch [%i1+136],0 /* 0x0570 */ or %g0,5,%l5 /* 0x0574 */ prefetch [%i1+200],0 /* 0x0578 */ prefetch [%l2+144],0 /* 0x057c */ ldd [%i4+8],%f52 /* 0x0580 */ ldd [%i4+16],%f44 /* 0x0584 */ ldd [%i4+24],%f56 /* 0x0588 */ fmuld %f40,%f52,%f48 /* 0x058c */ fmuld %f40,%f44,%f46 /* 0x0590 */ fmuld %f40,%f56,%f44 /* 0x0594 */ ldd [%l2+48],%f56 /* 0x0598 */ prefetch [%l2+208],0 /* 0x059c */ prefetch [%l2+272],0 /* 0x05a0 */ prefetch [%l2+336],0 /* 0x05a4 */ prefetch [%l2+400],0 /* 0x05a8 */ ldd [%i1+8],%f32 /* 0x05ac */ ldd [%i1+16],%f60 /* 0x05b0 */ ldd [%i1+24],%f50 /* 0x05b4 */ fmuld %f42,%f32,%f62 /* 0x05b8 */ ldd [%i1+32],%f32 /* 0x05bc */ fmuld %f42,%f60,%f58 /* 0x05c0 */ ldd [%l2+16],%f52 /* 0x05c4 */ ldd [%l2+32],%f54 /* 0x05c8 */ faddd %f62,%f48,%f60 /* 0x05cc */ fmuld %f42,%f50,%f48 /* 0x05d0 */ faddd %f58,%f46,%f62 /* 0x05d4 */ ldd [%i4+32],%f46 /* 0x05d8 */ ldd [%l2+64],%f58 .L900000837: /* 0x05dc 342 */ prefetch [%l7+192],0 /* 0x05e0 */ fmuld %f40,%f46,%f46 /* 0x05e4 */ faddd %f60,%f52,%f60 /* 0x05e8 */ ldd [%l6],%f52 /* 0x05ec */ std %f60,[%g2-64] /* 0x05f0 */ fmuld %f42,%f32,%f50 /* 0x05f4 */ add %l5,8,%l5 /* 0x05f8 */ ldd [%l7],%f60 /* 0x05fc */ faddd %f48,%f44,%f48 /* 0x0600 */ cmp %l5,%i5 /* 0x0604 */ ldd [%g2],%f32 /* 0x0608 */ add %g2,128,%g2 /* 0x060c */ prefetch [%g2+256],0 /* 0x0610 */ fmuld %f40,%f52,%f52 /* 0x0614 */ faddd %f62,%f54,%f44 /* 0x0618 */ ldd [%l6+8],%f54 /* 0x061c */ std %f44,[%g2-176] /* 0x0620 */ fmuld %f42,%f60,%f44 /* 0x0624 */ add %l6,64,%l6 /* 0x0628 */ ldd [%l7+8],%f60 /* 0x062c */ faddd %f50,%f46,%f50 /* 0x0630 */ add %l7,64,%l7 /* 0x0634 */ add %l4,16,%l4 /* 0x0638 */ ldd [%g2-112],%f46 /* 0x063c */ fmuld %f40,%f54,%f54 /* 0x0640 */ faddd %f48,%f56,%f62 /* 0x0644 */ ldd [%l6-48],%f56 /* 0x0648 */ std %f62,[%g2-160] /* 0x064c */ fmuld %f42,%f60,%f48 /* 0x0650 */ ldd [%l7-48],%f60 /* 0x0654 */ faddd %f44,%f52,%f52 /* 0x0658 */ ldd [%g2-96],%f30 /* 0x065c */ prefetch [%g2+288],0 /* 0x0660 */ fmuld %f40,%f56,%f56 /* 0x0664 */ faddd %f50,%f58,%f62 /* 0x0668 */ ldd [%l6-40],%f58 /* 0x066c */ std %f62,[%g2-144] /* 0x0670 */ fmuld %f42,%f60,%f50 /* 0x0674 */ ldd [%l7-40],%f62 /* 0x0678 */ faddd %f48,%f54,%f54 /* 0x067c */ ldd [%g2-80],%f28 /* 0x0680 */ prefetch [%l7+160],0 /* 0x0684 */ fmuld %f40,%f58,%f48 /* 0x0688 */ faddd %f52,%f32,%f44 /* 0x068c */ ldd [%l6-32],%f58 /* 0x0690 */ std %f44,[%g2-128] /* 0x0694 */ fmuld %f42,%f62,%f44 /* 0x0698 */ ldd [%l7-32],%f60 /* 0x069c */ faddd %f50,%f56,%f56 /* 0x06a0 */ ldd [%g2-64],%f52 /* 0x06a4 */ prefetch [%g2+320],0 /* 0x06a8 */ fmuld %f40,%f58,%f50 /* 0x06ac */ faddd %f54,%f46,%f32 /* 0x06b0 */ ldd [%l6-24],%f62 /* 0x06b4 */ std %f32,[%g2-112] /* 0x06b8 */ fmuld %f42,%f60,%f46 /* 0x06bc */ ldd [%l7-24],%f60 /* 0x06c0 */ faddd %f44,%f48,%f48 /* 0x06c4 */ ldd [%g2-48],%f54 /* 0x06c8 */ fmuld %f40,%f62,%f26 /* 0x06cc */ faddd %f56,%f30,%f32 /* 0x06d0 */ ldd [%l6-16],%f58 /* 0x06d4 */ std %f32,[%g2-96] /* 0x06d8 */ fmuld %f42,%f60,%f30 /* 0x06dc */ ldd [%l7-16],%f32 /* 0x06e0 */ faddd %f46,%f50,%f60 /* 0x06e4 */ ldd [%g2-32],%f56 /* 0x06e8 */ prefetch [%g2+352],0 /* 0x06ec */ fmuld %f40,%f58,%f44 /* 0x06f0 */ faddd %f48,%f28,%f62 /* 0x06f4 */ ldd [%l6-8],%f46 /* 0x06f8 */ std %f62,[%g2-80] /* 0x06fc */ fmuld %f42,%f32,%f48 /* 0x0700 */ ldd [%l7-8],%f32 /* 0x0704 */ faddd %f30,%f26,%f62 /* 0x0708 */ ble,pt %icc,.L900000837 /* 0x070c */ ldd [%g2-16],%f58 .L900000840: /* 0x0710 342 */ fmuld %f40,%f46,%f46 /* 0x0714 */ faddd %f62,%f54,%f62 /* 0x0718 */ std %f62,[%g2-48] /* 0x071c */ cmp %l5,%l3 /* 0x0720 */ fmuld %f42,%f32,%f50 /* 0x0724 */ faddd %f48,%f44,%f48 /* 0x0728 */ or %g0,%l7,%g4 /* 0x072c */ or %g0,%l6,%g3 /* 0x0730 */ faddd %f60,%f52,%f60 /* 0x0734 */ std %f60,[%g2-64] /* 0x0738 */ or %g0,%o7,%l2 /* 0x073c */ add %l4,8,%l4 /* 0x0740 */ faddd %f50,%f46,%f54 /* 0x0744 */ faddd %f48,%f56,%f56 /* 0x0748 */ std %f56,[%g2-32] /* 0x074c */ faddd %f54,%f58,%f58 /* 0x0750 */ bg,pn %icc,.L77000368 /* 0x0754 */ std %f58,[%g2-16] .L77000481: /* 0x0758 342 */ ldd [%g4],%f44 .L900000850: /* 0x075c 342 */ ldd [%g3],%f48 /* 0x0760 */ fmuld %f42,%f44,%f58 /* 0x0764 */ sra %l4,0,%l7 /* 0x0768 */ add %l5,1,%l5 /* 0x076c */ sllx %l7,3,%g2 /* 0x0770 */ add %g4,8,%g4 /* 0x0774 */ ldd [%l2+%g2],%f56 /* 0x0778 */ cmp %l5,%l3 /* 0x077c */ add %l4,2,%l4 /* 0x0780 */ fmuld %f40,%f48,%f54 /* 0x0784 */ add %g3,8,%g3 /* 0x0788 */ faddd %f58,%f54,%f52 /* 0x078c */ faddd %f52,%f56,%f62 /* 0x0790 */ std %f62,[%l2+%g2] /* 0x0794 */ ble,a,pt %icc,.L900000850 /* 0x0798 */ ldd [%g4],%f44 .L77000368: /* 0x079c 344 */ cmp %o5,15 /* 0x07a0 */ bne,pn %icc,.L77000483 /* 0x07a4 345 */ srl %l1,31,%g4 .L77000478: /* 0x07a8 345 */ add %l1,%g4,%l4 /* 0x07ac */ sra %l4,1,%o7 /* 0x07b0 */ add %o7,1,%o4 /* 0x07b4 */ sll %o4,1,%l6 /* 0x07b8 */ cmp %l6,%o2 /* 0x07bc */ bge,pn %icc,.L77000392 /* 0x07c0 */ fmovd %f0,%f42 .L77000508: /* 0x07c4 345 */ sra %l6,0,%l4 /* 0x07c8 */ sllx %l4,3,%g2 /* 0x07cc */ fmovd %f0,%f32 /* 0x07d0 */ sub %o2,1,%l5 /* 0x07d4 */ ldd [%g2+%i3],%f40 /* 0x07d8 */ add %g2,%i3,%g3 .L900000849: /* 0x07dc 345 */ fdtox %f40,%f10 /* 0x07e0 */ ldd [%g3+8],%f52 /* 0x07e4 */ add %l6,2,%l6 /* 0x07e8 */ cmp %l6,%l5 /* 0x07ec */ fdtox %f52,%f2 /* 0x07f0 */ fmovd %f10,%f30 /* 0x07f4 */ fmovs %f0,%f10 /* 0x07f8 */ fmovs %f0,%f2 /* 0x07fc */ fxtod %f10,%f10 /* 0x0800 */ fxtod %f2,%f2 /* 0x0804 */ fdtox %f52,%f28 /* 0x0808 */ faddd %f10,%f32,%f56 /* 0x080c */ std %f56,[%g3] /* 0x0810 */ faddd %f2,%f42,%f62 /* 0x0814 */ std %f62,[%g3+8] /* 0x0818 */ fitod %f30,%f32 /* 0x081c */ add %g3,16,%g3 /* 0x0820 */ fitod %f28,%f42 /* 0x0824 */ ble,a,pt %icc,.L900000849 /* 0x0828 */ ldd [%g3],%f40 .L77000392: /* 0x082c 346 */ or %g0,0,%o5 .L77000483: /* 0x0830 350 */ fdtox %f34,%f6 /* 0x0834 */ add %l1,1,%l1 /* 0x0838 */ cmp %l1,%o3 /* 0x083c */ add %o5,1,%o5 /* 0x0840 */ add %l2,8,%l2 /* 0x0844 */ add %l0,8,%l0 /* 0x0848 */ fmovs %f0,%f6 /* 0x084c */ fxtod %f6,%f46 /* 0x0850 */ fmuld %f46,%f14,%f56 /* 0x0854 */ fmuld %f56,%f36,%f44 /* 0x0858 */ fdtox %f44,%f48 /* 0x085c */ fxtod %f48,%f58 /* 0x0860 */ fmuld %f58,%f38,%f54 /* 0x0864 */ fsubd %f56,%f54,%f40 /* 0x0868 */ ble,a,pt %icc,.L900000848 /* 0x086c 337 */ ldd [%i4],%f42 .L77000378: /* 0x0870 409 */ ldx [%i3+%o0],%l1 .L900000852: /* 0x0874 409 */ add %i3,%o0,%l4 /* 0x0878 */ ldx [%l4+8],%i1 /* 0x087c */ cmp %l1,0 /* 0x0880 */ bne,pn %xcc,.L77000403 /* 0x0884 */ or %g0,0,%g5 .L77000402: /* 0x0888 409 */ or %g0,0,%i3 /* 0x088c */ ba .L900000847 /* 0x0890 */ cmp %i1,0 .L77000403: /* 0x0894 409 */ srlx %l1,52,%o5 /* 0x0898 */ sethi %hi(0xfff00000),%i3 /* 0x089c */ sllx %i3,32,%o2 /* 0x08a0 */ sethi %hi(0x40000000),%o0 /* 0x08a4 */ sllx %o0,22,%o4 /* 0x08a8 */ or %g0,1023,%l0 /* 0x08ac */ xor %o2,-1,%o3 /* 0x08b0 */ sub %l0,%o5,%o7 /* 0x08b4 */ and %l1,%o3,%l1 /* 0x08b8 */ add %o7,52,%i4 /* 0x08bc */ or %l1,%o4,%o1 /* 0x08c0 */ cmp %i1,0 /* 0x08c4 */ srlx %o1,%i4,%i3 .L900000847: /* 0x08c8 409 */ bne,pn %xcc,.L77000409 /* 0x08cc */ or %g0,0,%o7 .L77000408: /* 0x08d0 409 */ ba .L900000846 /* 0x08d4 350 */ cmp %g1,0 .L77000409: /* 0x08d8 409 */ srlx %i1,52,%l2 /* 0x08dc */ sethi %hi(0xfff00000),%o7 /* 0x08e0 */ sllx %o7,32,%i4 /* 0x08e4 */ sethi %hi(0x40000000),%i5 /* 0x08e8 */ sllx %i5,22,%l6 /* 0x08ec */ or %g0,1023,%l5 /* 0x08f0 */ xor %i4,-1,%o1 /* 0x08f4 */ sub %l5,%l2,%g2 /* 0x08f8 */ and %i1,%o1,%l7 /* 0x08fc */ add %g2,52,%g3 /* 0x0900 */ or %l7,%l6,%g4 /* 0x0904 350 */ cmp %g1,0 /* 0x0908 409 */ srlx %g4,%g3,%o7 .L900000846: /* 0x090c 350 */ ble,pn %icc,.L77000397 /* 0x0910 */ or %g0,0,%l5 .L77000510: /* 0x0914 409 */ sethi %hi(0xfff00000),%g4 /* 0x0918 */ sllx %g4,32,%o0 /* 0x091c 0 */ or %g0,-1,%i5 /* 0x0920 409 */ srl %i5,0,%l7 /* 0x0924 */ sethi %hi(0x40000000),%i1 /* 0x0928 */ sllx %i1,22,%l6 /* 0x092c */ sethi %hi(0xfc00),%i4 /* 0x0930 */ xor %o0,-1,%g2 /* 0x0934 */ add %i4,1023,%l2 /* 0x0938 */ or %g0,2,%g4 /* 0x093c */ or %g0,%i2,%g3 .L77000395: /* 0x0940 409 */ sra %g4,0,%o2 /* 0x0944 */ add %g4,1,%o3 /* 0x0948 */ sllx %o2,3,%o0 /* 0x094c */ sra %o3,0,%o5 /* 0x0950 */ ldx [%l4+%o0],%o4 /* 0x0954 */ sllx %o5,3,%l0 /* 0x0958 */ and %i3,%l7,%o1 /* 0x095c */ ldx [%l4+%l0],%i4 /* 0x0960 */ cmp %o4,0 /* 0x0964 */ bne,pn %xcc,.L77000415 /* 0x0968 350 */ and %o7,%l2,%i5 .L77000414: /* 0x096c 409 */ or %g0,0,%l1 /* 0x0970 */ ba .L900000845 /* 0x0974 */ add %g5,%o1,%i1 .L77000415: /* 0x0978 409 */ srlx %o4,52,%o3 /* 0x097c */ and %o4,%g2,%l1 /* 0x0980 */ or %g0,52,%o0 /* 0x0984 */ sub %o3,1023,%l0 /* 0x0988 */ or %l1,%l6,%o4 /* 0x098c */ sub %o0,%l0,%o5 /* 0x0990 */ srlx %o4,%o5,%l1 /* 0x0994 */ add %g5,%o1,%i1 .L900000845: /* 0x0998 409 */ srax %i3,32,%g5 /* 0x099c */ cmp %i4,0 /* 0x09a0 */ bne,pn %xcc,.L77000421 /* 0x09a4 350 */ sllx %i5,16,%o2 .L77000420: /* 0x09a8 409 */ or %g0,0,%o4 /* 0x09ac */ ba .L900000844 /* 0x09b0 350 */ add %i1,%o2,%o5 .L77000421: /* 0x09b4 409 */ srlx %i4,52,%o4 /* 0x09b8 */ or %g0,52,%o0 /* 0x09bc */ sub %o4,1023,%o3 /* 0x09c0 */ and %i4,%g2,%i3 /* 0x09c4 */ or %i3,%l6,%o5 /* 0x09c8 */ sub %o0,%o3,%l0 /* 0x09cc */ srlx %o5,%l0,%o4 /* 0x09d0 350 */ add %i1,%o2,%o5 .L900000844: /* 0x09d4 350 */ srax %o7,16,%i4 /* 0x09d8 */ srax %o5,32,%i5 /* 0x09dc */ add %i4,%i5,%o1 /* 0x09e0 */ add %l5,1,%l5 /* 0x09e4 */ and %o5,%l7,%i1 /* 0x09e8 */ add %g5,%o1,%g5 /* 0x09ec */ st %i1,[%g3] /* 0x09f0 */ or %g0,%l1,%i3 /* 0x09f4 */ or %g0,%o4,%o7 /* 0x09f8 */ add %g4,2,%g4 /* 0x09fc */ cmp %l5,%l3 /* 0x0a00 */ ble,pt %icc,.L77000395 /* 0x0a04 */ add %g3,4,%g3 .L77000397: /* 0x0a08 409 */ sethi %hi(0xfc00),%l4 /* 0x0a0c */ sra %l5,0,%i5 /* 0x0a10 */ add %l4,1023,%i1 /* 0x0a14 */ add %g5,%i3,%l5 /* 0x0a18 */ and %o7,%i1,%g5 /* 0x0a1c */ sllx %g5,16,%l2 /* 0x0a20 */ sllx %i5,2,%l7 /* 0x0a24 413 */ sra %g1,0,%g2 /* 0x0a28 409 */ add %l5,%l2,%l6 /* 0x0a2c */ st %l6,[%i2+%l7] /* 0x0a30 413 */ sllx %g2,2,%g3 /* 0x0a34 */ ld [%i2+%g3],%g4 /* 0x0a38 */ cmp %g4,0 /* 0x0a3c */ bgu,pn %icc,.L77000486 /* 0x0a40 */ cmp %l3,0 .L77000427: /* 0x0a44 413 */ bl,pn %icc,.L77000486 /* 0x0a48 */ or %g0,%l3,%i5 .L77000512: /* 0x0a4c 413 */ sra %l3,0,%o5 /* 0x0a50 */ sllx %o5,2,%l7 /* 0x0a54 */ ld [%l7+%i0],%o5 /* 0x0a58 */ add %l7,%i2,%o1 /* 0x0a5c */ add %l7,%i0,%i4 .L900000843: /* 0x0a60 413 */ ld [%o1],%i1 /* 0x0a64 */ cmp %i1,%o5 /* 0x0a68 */ bne,pn %icc,.L77000435 /* 0x0a6c */ sub %o1,4,%o1 .L77000431: /* 0x0a70 413 */ sub %i4,4,%i4 /* 0x0a74 */ subcc %i5,1,%i5 /* 0x0a78 */ bpos,a,pt %icc,.L900000843 /* 0x0a7c */ ld [%i4],%o5 .L900000827: /* 0x0a80 413 */ ba .L900000842 /* 0x0a84 350 */ cmp %g1,0 .L77000435: /* 0x0a88 413 */ sra %i5,0,%o0 /* 0x0a8c */ sllx %o0,2,%l1 /* 0x0a90 */ ld [%i0+%l1],%i3 /* 0x0a94 */ ld [%i2+%l1],%l0 /* 0x0a98 */ cmp %l0,%i3 /* 0x0a9c */ bleu,pt %icc,.L77000379 /* 0x0aa0 */ nop .L77000486: /* 0x0aa4 350 */ cmp %g1,0 .L900000842: /* 0x0aa8 350 */ ble,pn %icc,.L77000379 /* 0x0aac */ add %l3,1,%g3 .L77000511: /* 0x0ab0 350 */ or %g0,0,%l5 /* 0x0ab4 */ cmp %g3,10 /* 0x0ab8 */ bl,pn %icc,.L77000487 /* 0x0abc */ or %g0,0,%g1 .L900000835: /* 0x0ac0 350 */ prefetch [%i2],22 /* 0x0ac4 */ add %i0,4,%l2 /* 0x0ac8 */ prefetch [%i2+64],22 /* 0x0acc */ add %i2,8,%o5 /* 0x0ad0 */ sub %l3,7,%i0 /* 0x0ad4 */ prefetch [%i2+128],22 /* 0x0ad8 */ or %g0,2,%l5 /* 0x0adc */ prefetch [%i2+192],22 /* 0x0ae0 */ prefetch [%i2+256],22 /* 0x0ae4 */ prefetch [%i2+320],22 /* 0x0ae8 */ prefetch [%i2+384],22 /* 0x0aec */ ld [%l2-4],%l7 /* 0x0af0 */ ld [%o5-4],%l6 /* 0x0af4 */ prefetch [%o5+440],22 /* 0x0af8 */ prefetch [%o5+504],22 /* 0x0afc */ ld [%i2],%i2 /* 0x0b00 */ sub %i2,%l7,%g3 /* 0x0b04 */ st %g3,[%o5-8] /* 0x0b08 */ srax %g3,32,%l7 .L900000833: /* 0x0b0c 350 */ add %l5,8,%l5 /* 0x0b10 */ add %o5,32,%o5 /* 0x0b14 */ ld [%l2],%i5 /* 0x0b18 */ prefetch [%o5+496],22 /* 0x0b1c */ cmp %l5,%i0 /* 0x0b20 */ add %l2,32,%l2 /* 0x0b24 */ sub %l6,%i5,%g5 /* 0x0b28 */ add %g5,%l7,%o0 /* 0x0b2c */ ld [%o5-32],%l4 /* 0x0b30 */ st %o0,[%o5-36] /* 0x0b34 */ srax %o0,32,%i3 /* 0x0b38 */ ld [%l2-28],%i1 /* 0x0b3c */ sub %l4,%i1,%i4 /* 0x0b40 */ add %i4,%i3,%o1 /* 0x0b44 */ ld [%o5-28],%o3 /* 0x0b48 */ st %o1,[%o5-32] /* 0x0b4c */ srax %o1,32,%l1 /* 0x0b50 */ ld [%l2-24],%o2 /* 0x0b54 */ sub %o3,%o2,%g2 /* 0x0b58 */ add %g2,%l1,%o7 /* 0x0b5c */ ld [%o5-24],%l0 /* 0x0b60 */ st %o7,[%o5-28] /* 0x0b64 */ srax %o7,32,%l6 /* 0x0b68 */ ld [%l2-20],%o4 /* 0x0b6c */ sub %l0,%o4,%g1 /* 0x0b70 */ add %g1,%l6,%l7 /* 0x0b74 */ ld [%o5-20],%i2 /* 0x0b78 */ st %l7,[%o5-24] /* 0x0b7c */ srax %l7,32,%g4 /* 0x0b80 */ ld [%l2-16],%g3 /* 0x0b84 */ sub %i2,%g3,%i5 /* 0x0b88 */ add %i5,%g4,%g5 /* 0x0b8c */ ld [%o5-16],%i1 /* 0x0b90 */ st %g5,[%o5-20] /* 0x0b94 */ srax %g5,32,%l4 /* 0x0b98 */ ld [%l2-12],%o0 /* 0x0b9c */ sub %i1,%o0,%i3 /* 0x0ba0 */ add %i3,%l4,%i4 /* 0x0ba4 */ ld [%o5-12],%o2 /* 0x0ba8 */ st %i4,[%o5-16] /* 0x0bac */ srax %i4,32,%o3 /* 0x0bb0 */ ld [%l2-8],%o1 /* 0x0bb4 */ sub %o2,%o1,%l1 /* 0x0bb8 */ add %l1,%o3,%g2 /* 0x0bbc */ ld [%o5-8],%o4 /* 0x0bc0 */ st %g2,[%o5-12] /* 0x0bc4 */ srax %g2,32,%l0 /* 0x0bc8 */ ld [%l2-4],%o7 /* 0x0bcc */ sub %o4,%o7,%l6 /* 0x0bd0 */ add %l6,%l0,%g1 /* 0x0bd4 */ ld [%o5-4],%l6 /* 0x0bd8 */ st %g1,[%o5-8] /* 0x0bdc */ ble,pt %icc,.L900000833 /* 0x0be0 */ srax %g1,32,%l7 .L900000836: /* 0x0be4 350 */ ld [%l2],%l0 /* 0x0be8 */ add %l2,4,%i0 /* 0x0bec */ or %g0,%o5,%i2 /* 0x0bf0 */ cmp %l5,%l3 /* 0x0bf4 */ sub %l6,%l0,%l6 /* 0x0bf8 */ add %l6,%l7,%g1 /* 0x0bfc */ st %g1,[%o5-4] /* 0x0c00 */ bg,pn %icc,.L77000379 /* 0x0c04 */ srax %g1,32,%g1 .L77000487: /* 0x0c08 350 */ ld [%i2],%o4 .L900000841: /* 0x0c0c 350 */ ld [%i0],%i3 /* 0x0c10 */ add %g1,%o4,%l0 /* 0x0c14 */ add %l5,1,%l5 /* 0x0c18 */ cmp %l5,%l3 /* 0x0c1c */ add %i0,4,%i0 /* 0x0c20 */ sub %l0,%i3,%l6 /* 0x0c24 */ st %l6,[%i2] /* 0x0c28 */ srax %l6,32,%g1 /* 0x0c2c */ add %i2,4,%i2 /* 0x0c30 */ ble,a,pt %icc,.L900000841 /* 0x0c34 */ ld [%i2],%o4 .L77000379: /* 0x0c38 405 */ ret ! Result = /* 0x0c3c */ restore %g0,%g0,%g0 /* 0x0c40 0 */ .type mont_mulf_noconv,2 /* 0x0c40 0 */ .size mont_mulf_noconv,(.-mont_mulf_noconv) ! Begin Disassembling Debug Info .xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0 .xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0 ! End Disassembling Debug Info ! Begin Disassembling Ident .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE) .ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8) .ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9) .ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10) .ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11) .ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12) .ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13) .ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14) .ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15) .ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16) .ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17) .ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18) .ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19) .ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20) .ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21) .ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22) .ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23) .ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24) .ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57) .ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58) .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE) ! End Disassembling Ident #define FZERO \ fzero %f0 ;\ fzero %f2 ;\ faddd %f0, %f2, %f4 ;\ fmuld %f0, %f2, %f6 ;\ faddd %f0, %f2, %f8 ;\ fmuld %f0, %f2, %f10 ;\ faddd %f0, %f2, %f12 ;\ fmuld %f0, %f2, %f14 ;\ faddd %f0, %f2, %f16 ;\ fmuld %f0, %f2, %f18 ;\ faddd %f0, %f2, %f20 ;\ fmuld %f0, %f2, %f22 ;\ faddd %f0, %f2, %f24 ;\ fmuld %f0, %f2, %f26 ;\ faddd %f0, %f2, %f28 ;\ fmuld %f0, %f2, %f30 ;\ faddd %f0, %f2, %f32 ;\ fmuld %f0, %f2, %f34 ;\ faddd %f0, %f2, %f36 ;\ fmuld %f0, %f2, %f38 ;\ faddd %f0, %f2, %f40 ;\ fmuld %f0, %f2, %f42 ;\ faddd %f0, %f2, %f44 ;\ fmuld %f0, %f2, %f46 ;\ faddd %f0, %f2, %f48 ;\ fmuld %f0, %f2, %f50 ;\ faddd %f0, %f2, %f52 ;\ fmuld %f0, %f2, %f54 ;\ faddd %f0, %f2, %f56 ;\ fmuld %f0, %f2, %f58 ;\ faddd %f0, %f2, %f60 ;\ fmuld %f0, %f2, %f62 #include "assym.h" /* * In the routine below, we check/set FPRS_FEF bit since * we don't want to take a fp_disabled trap. We need not * check/set PSTATE_PEF bit as it is done early during boot. */ ENTRY(big_savefp) rd %fprs, %o2 st %o2, [%o0 + FPU_FPRS] andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set? bnz,a,pt %icc, .fregs_save ! yes, go to save nop wr %g0, FPRS_FEF, %fprs ! else, set the bit stx %fsr, [%o0 + FPU_FSR] ! store %fsr retl nop .fregs_save: BSTORE_FPREGS(%o0, %o4) stx %fsr, [%o0 + FPU_FSR] ! store %fsr retl nop SET_SIZE(big_savefp) ENTRY(big_restorefp) ldx [%o0 + FPU_FSR], %fsr ! restore %fsr ld [%o0 + FPU_FPRS], %o1 andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs? bnz,pt %icc, .fregs_restore ! yes, go to restore nop FZERO ! zero out to avoid leaks wr %g0, 0, %fprs retl nop .fregs_restore: BLOAD_FPREGS(%o0, %o2) wr %o1, 0, %fprs retl nop SET_SIZE(big_restorefp) #endif /* lint || __lint */