# 4.4BSD/usr/src/contrib/calc-1.26.4/zmul.c

/*
* Copyright (c) 1993 David I. Bell
* Permission is granted to use, distribute, or modify this source,
* provided that this copyright notice remains intact.
*
* Faster than usual multiplying and squaring routines.
* The algorithm used is the reasonably simple one from Knuth, volume 2,
* section 4.3.3.  These recursive routines are of speed O(N^1.585)
* instead of O(N^2).  The usual multiplication and (almost usual) squaring
* algorithms are used for small numbers.  On a 386 with its compiler, the
* two algorithms are equal in speed at about 100 decimal digits.
*/

#include <stdio.h>
#include "math.h"

LEN _mul2_ = MUL_ALG2;		/* size of number to use multiply algorithm 2 */
LEN _sq2_ = SQ_ALG2;		/* size of number to use square algorithm 2 */

#if 0
static char *abortmsg = "Calculation aborted";
static char *memmsg = "Not enough memory";
#endif

static HALF *tempbuf;		/* temporary buffer for multiply and square */

static LEN domul proto((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans));
static LEN dosquare proto((HALF *vp, LEN size, HALF *ans));

/*
* Multiply two numbers using the following formula recursively:
*	(A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
* where S is a power of 2^16, and so multiplies by it are shifts, and
* A,B,C,D are the left and right halfs of the numbers to be multiplied.
*/
void
zmul(z1, z2, res)
ZVALUE z1, z2;		/* numbers to multiply */
ZVALUE *res;		/* result of multiplication */
{
LEN len;		/* size of array */

if (iszero(z1) || iszero(z2)) {
*res = _zero_;
return;
}
if (isunit(z1)) {
zcopy(z2, res);
res->sign = (z1.sign != z2.sign);
return;
}
if (isunit(z2)) {
zcopy(z1, res);
res->sign = (z1.sign != z2.sign);
return;
}

/*
* Allocate a temporary buffer for the recursion levels to use.
* An array needs to be allocated large enough for all of the
* temporary results to fit in.  This size is about twice the size
* of the largest original number, since each recursion level uses
* the size of its given number, and whose size is 1/2 the size of
* the previous level.  The sum of the infinite series is 2.
* Add some extra words because of rounding when dividing by 2
* and also because of the extra word that each multiply needs.
*/
len = z1.len;
if (len < z2.len)
len = z2.len;
len = 2 * len + 64;
tempbuf = zalloctemp(len);

res->sign = (z1.sign != z2.sign);
res->v = alloc(z1.len + z2.len + 1);
res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);
}

/*
* Recursive routine to multiply two numbers by splitting them up into
* two numbers of half the size, and using the results of multiplying the
* subpieces.  The result is placed in the indicated array, which must be
* large enough for the result plus one extra word (size1 + size2 + 1).
* Returns the actual size of the result with leading zeroes stripped.
* This also uses a temporary array which must be twice as large as
* one more than the size of the number at the top level recursive call.
*/
static LEN
domul(v1, size1, v2, size2, ans)
HALF *v1;		/* first number */
LEN size1;		/* size of first number */
HALF *v2;		/* second number */
LEN size2;		/* size of second number */
HALF *ans;		/* location for result */
{
LEN shift;		/* amount numbers are shifted by */
LEN sizeA;		/* size of left half of first number */
LEN sizeB;		/* size of right half of first number */
LEN sizeC;		/* size of left half of second number */
LEN sizeD;		/* size of right half of second number */
LEN sizeAB;		/* size of subtraction of A and B */
LEN sizeDC;		/* size of subtraction of D and C */
LEN sizeABDC;		/* size of product of above two results */
LEN subsize;		/* size of difference of halfs */
LEN copysize;		/* size of number left to copy */
LEN sizetotal;		/* total size of product */
LEN len;		/* temporary length */
HALF *baseA;		/* base of left half of first number */
HALF *baseB;		/* base of right half of first number */
HALF *baseC;		/* base of left half of second number */
HALF *baseD;		/* base of right half of second number */
HALF *baseAB;		/* base of result of subtraction of A and B */
HALF *baseDC;		/* base of result of subtraction of D and C */
HALF *baseABDC;		/* base of product of above two results */
HALF *baseAC;		/* base of product of A and C */
HALF *baseBD;		/* base of product of B and D */
FULL carry;		/* carry digit for small multiplications */
FULL carryACBD;		/* carry from addition of A*C and B*D */
FULL digit;		/* single digit multiplying by */
HALF *temp;		/* base for temporary calculations */
BOOL neg;		/* whether imtermediate term is negative */
register HALF *hd, *h1, *h2;	/* for inner loops */
SIUNION sival;		/* for addition of digits */

/*
* Trim the numbers of leading zeroes and initialize the
* estimated size of the result.
*/
hd = &v1[size1 - 1];
while ((*hd == 0) && (size1 > 1)) {
hd--;
size1--;
}
hd = &v2[size2 - 1];
while ((*hd == 0) && (size2 > 1)) {
hd--;
size2--;
}
sizetotal = size1 + size2;

/*
* First check for zero answer.
*/
if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {
*ans = 0;
return 1;
}

/*
* Exchange the two numbers if necessary to make the number of
* digits of the first number be greater than or equal to the
* second number.
*/
if (size1 < size2) {
len = size1; size1 = size2; size2 = len;
hd = v1; v1 = v2; v2 = hd;
}

/*
* If the smaller number has only a few digits, then calculate
* the result in the normal manner in order to avoid the overhead
* of the recursion for small numbers.  The number of digits where
* the algorithm changes is settable from 2 to maxint.
*/
if (size2 < _mul2_) {
/*
* First clear the top part of the result, and then multiply
* by the lowest digit to get the first partial sum.  Later
* products will then add into this result.
*/
hd = &ans[size1];
len = size2;
while (len--)
*hd++ = 0;

digit = *v2++;
h1 = v1;
hd = ans;
carry = 0;
len = size1;
while (len >= 4) {	/* expand the loop some */
len -= 4;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (len--) {
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
*hd = (HALF)carry;

/*
* Now multiply by the remaining digits of the second number,
* adding each product into the final result.
*/
h2 = ans;
while (--size2 > 0) {
digit = *v2++;
h1 = v1;
hd = ++h2;
if (digit == 0)
continue;
carry = 0;
len = size1;
while (len >= 4) {	/* expand the loop some */
len -= 4;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (len--) {
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
}

/*
* Now return the true size of the number.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
hd--;
len--;
}
return len;
}

/*
* Need to multiply by a large number.
* Allocate temporary space for calculations, and calculate the
* value for the shift.  The shift value is 1/2 the size of the
* larger (first) number (rounded up).  The amount of temporary
* space needed is twice the size of the shift, plus one more word
* for the multiply to use.
*/
shift = (size1 + 1) / 2;
temp = tempbuf;
tempbuf += (2 * shift) + 1;

/*
* Determine the sizes and locations of all the numbers.
* The value of sizeC can be negative, and this is checked later.
* The value of sizeD is limited by the full size of the number.
*/
baseA = v1 + shift;
baseB = v1;
baseC = v2 + shift;
baseD = v2;
baseAB = ans;
baseDC = ans + shift;
baseAC = ans + shift * 2;
baseBD = ans;

sizeA = size1 - shift;
sizeC = size2 - shift;

sizeB = shift;
hd = &baseB[shift - 1];
while ((*hd == 0) && (sizeB > 1)) {
hd--;
sizeB--;
}

sizeD = shift;
if (sizeD > size2)
sizeD = size2;
hd = &baseD[sizeD - 1];
while ((*hd == 0) && (sizeD > 1)) {
hd--;
sizeD--;
}

/*
* If the smaller number has a high half of zero, then calculate
* the result by breaking up the first number into two numbers
* and combining the results using the obvious formula:
*	(A*S+B) * D = (A*D)*S + B*D
*/
if (sizeC <= 0) {
len = domul(baseB, sizeB, baseD, sizeD, ans);
hd = &ans[len];
len = sizetotal - len;
while (len--)
*hd++ = 0;

/*
* Add the second number into the first number, shifted
* over at the correct position.
*/
len = domul(baseA, sizeA, baseD, sizeD, temp);
h1 = temp;
hd = ans + shift;
carry = 0;
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}

/*
* Determine the final size of the number and return it.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
hd--;
len--;
}
tempbuf = temp;
return len;
}

/*
* Now we know that the high halfs of the numbers are nonzero,
* so we can use the complete formula.
*	(A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
* The steps are done in the following order:
*	A-B
*	D-C
*	(A-B)*(D-C)
*	S^2*A*C + B*D
*	(S^2+S)*A*C + (S+1)*B*D				(*)
*	(S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
*
* Note: step (*) above can produce a result which is larger than
* the final product will be, and this is where the extra word
* needed in the product comes from.  After the final subtraction is
* done, the result fits in the expected size.  Using the extra word
* is easier than suppressing the carries and borrows everywhere.
*
* Begin by forming the product (A-B)*(D-C) into a temporary
* location that we save until the final step.  Do each subtraction
* at positions 0 and S.  Be very careful about the relative sizes
* of the numbers since this result can be negative.  For the first
* step calculate the absolute difference of A and B into a temporary
* location at position 0 of the result.  Negate the sign if A is
* smaller than B.
*/
neg = FALSE;
if (sizeA == sizeB) {
len = sizeA;
h1 = &baseA[len - 1];
h2 = &baseB[len - 1];
while ((len > 1) && (*h1 == *h2)) {
len--;
h1--;
h2--;
}
}
if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
{
h1 = baseA;
h2 = baseB;
sizeAB = sizeA;
subsize = sizeB;
} else {
neg = !neg;
h1 = baseB;
h2 = baseA;
sizeAB = sizeB;
subsize = sizeA;
}
copysize = sizeAB - subsize;

hd = baseAB;
carry = 0;
while (subsize--) {
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}

hd = &baseAB[sizeAB - 1];
while ((*hd == 0) && (sizeAB > 1)) {
hd--;
sizeAB--;
}

/*
* This completes the calculation of abs(A-B).  For the next step
* calculate the absolute difference of D and C into a temporary
* location at position S of the result.  Negate the sign if C is
* larger than D.
*/
if (sizeC == sizeD) {
len = sizeC;
h1 = &baseC[len - 1];
h2 = &baseD[len - 1];
while ((len > 1) && (*h1 == *h2)) {
len--;
h1--;
h2--;
}
}
if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2)))
{
neg = !neg;
h1 = baseC;
h2 = baseD;
sizeDC = sizeC;
subsize = sizeD;
} else {
h1 = baseD;
h2 = baseC;
sizeDC = sizeD;
subsize = sizeC;
}
copysize = sizeDC - subsize;

hd = baseDC;
carry = 0;
while (subsize--) {
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
hd = &baseDC[sizeDC - 1];
while ((*hd == 0) && (sizeDC > 1)) {
hd--;
sizeDC--;
}

/*
* This completes the calculation of abs(D-C).  Now multiply
* together abs(A-B) and abs(D-C) into a temporary location,
* which is preserved until the final steps.
*/
baseABDC = temp;
sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);

/*
* Now calculate B*D and A*C into one of their two final locations.
* Make sure the high order digits of the products are zeroed since
* since the size of the high order words might be smaller than
* the shift size.  Do B*D first since the multiplies use one more
* word than the size of the product.  Also zero the final extra
* word in the result for possible carries to use.
*/
len = domul(baseB, sizeB, baseD, sizeD, baseBD);
hd = &baseBD[len];
len = shift * 2 - len;
while (len--)
*hd++ = 0;

len = domul(baseA, sizeA, baseC, sizeC, baseAC);
hd = &baseAC[len];
len = sizetotal - shift * 2 - len + 1;
while (len--)
*hd++ = 0;

/*
* Now add in A*C and B*D into themselves at the other shifted
* position that they need.  This addition is tricky in order to
* make sure that the two additions cannot interfere with each other.
* Therefore we first add in the top half of B*D and the lower half
* of A*C.  The sources and destinations of these two additions
* overlap, and so the same answer results from the two additions,
* thus only two pointers suffice for both additions.  Keep the
* final carry from these additions for later use since we cannot
* afford to change the top half of A*C yet.
*/
h1 = baseBD + shift;
h2 = baseAC;
carryACBD = 0;
len = shift;
while (len--) {
sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
*h1++ = sival.silow;
*h2++ = sival.silow;
carryACBD = sival.sihigh;
}

/*
* Now add in the bottom half of B*D and the top half of A*C.
* These additions are straightforward, except that A*C should
* be done first because of possible carries from B*D, and the
* top half of A*C might not exist.  Add in one of the carries
* from the previous addition while we are at it.
*/
h1 = baseAC + shift;
hd = baseAC;
carry = carryACBD;
len = sizetotal - 3 * shift;
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}

h1 = baseBD;
hd = baseBD + shift;
carry = 0;
len = shift;
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}

/*
* Now finally add in the other delayed carry from the
*/
hd = baseAC + shift;
while (carryACBD) {
sival.ivalue = ((FULL) *hd) + carryACBD;
*hd++ = sival.silow;
carryACBD = sival.sihigh;
}

/*
* Now finally add or subtract (A-B)*(D-C) into the final result at
* the correct position (S), according to whether it is positive or
* negative.  When subtracting, the answer cannot go negative.
*/
h1 = baseABDC;
hd = ans + shift;
carry = 0;
len = sizeABDC;
if (neg) {
while (len--) {
sival.ivalue = BASE1 - ((FULL) *hd) +
((FULL) *h1++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
} else {
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
}

/*
* Finally determine the size of the final result and return that.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
hd--;
len--;
}
tempbuf = temp;
return len;
}

/*
* Square a number by using the following formula recursively:
*	(A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
* where S is a power of 2^16, and so multiplies by it are shifts,
* and A and B are the left and right halfs of the number to square.
*/
void
zsquare(z, res)
ZVALUE z, *res;
{
LEN len;

if (iszero(z)) {
*res = _zero_;
return;
}
if (isunit(z)) {
*res = _one_;
return;
}

/*
* Allocate a temporary array if necessary for the recursion to use.
* The array needs to be allocated large enough for all of the
* temporary results to fit in.  This size is about 3 times the
* size of the original number, since each recursion level uses 3/2
* of the size of its given number, and whose size is 1/2 the size
* of the previous level.  The sum of the infinite series is 3.
* Allocate some extra words for rounding up the sizes.
*/
len = 3 * z.len + 32;
tempbuf = zalloctemp(len);

res->sign = 0;
res->v = alloc(z.len * 2);
res->len = dosquare(z.v, z.len, res->v);
}

/*
* Recursive routine to square a number by splitting it up into two numbers
* of half the size, and using the results of squaring the subpieces.
* The result is placed in the indicated array, which must be large
* enough for the result (size * 2).  Returns the size of the result.
* This uses a temporary array which must be 3 times as large as the
* size of the number at the top level recursive call.
*/
static LEN
dosquare(vp, size, ans)
HALF *vp;		/* value to be squared */
LEN size;		/* length of value to square */
HALF *ans;		/* location for result */
{
LEN shift;		/* amount numbers are shifted by */
LEN sizeA;		/* size of left half of number to square */
LEN sizeB;		/* size of right half of number to square */
LEN sizeAA;		/* size of square of left half */
LEN sizeBB;		/* size of square of right half */
LEN sizeAABB;		/* size of sum of squares of A and B */
LEN sizeAB;		/* size of difference of A and B */
LEN sizeABAB;		/* size of square of difference of A and B */
LEN subsize;		/* size of difference of halfs */
LEN copysize;		/* size of number left to copy */
LEN sumsize;		/* size of sum */
LEN sizetotal;		/* total size of square */
LEN len;		/* temporary length */
LEN len1;		/* another temporary length */
FULL carry;		/* carry digit for small multiplications */
FULL digit;		/* single digit multiplying by */
HALF *temp;		/* base for temporary calculations */
HALF *baseA;		/* base of left half of number */
HALF *baseB;		/* base of right half of number */
HALF *baseAA;		/* base of square of left half of number */
HALF *baseBB;		/* base of square of right half of number */
HALF *baseAABB;		/* base of sum of squares of A and B */
HALF *baseAB;		/* base of difference of A and B */
HALF *baseABAB;		/* base of square of difference of A and B */
register HALF *hd, *h1, *h2, *h3;	/* for inner loops */
SIUNION sival;		/* for addition of digits */

/*
* First trim the number of leading zeroes.
*/
hd = &vp[size - 1];
while ((*hd == 0) && (size > 1)) {
size--;
hd--;
}
sizetotal = size + size;

/*
* If the number has only a small number of digits, then use the
* (almost) normal multiplication method.  Multiply each halfword
* only by those halfwards further on in the number.  Missed terms
* will then be the same pairs of products repeated, and the squares
* of each halfword.  The first case is handled by doubling the
* result.  The second case is handled explicitly.  The number of
* digits where the algorithm changes is settable from 2 to maxint.
*/
if (size < _sq2_) {
hd = ans;
len = sizetotal;
while (len--)
*hd++ = 0;

h2 = vp;
hd = ans + 1;
for (len = size; len--; hd += 2) {
digit = (FULL) *h2++;
if (digit == 0)
continue;
h3 = h2;
h1 = hd;
carry = 0;
len1 = len;
while (len1 >= 4) {	/* expand the loop some */
len1 -= 4;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + carry;
*h1++ = sival.silow;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + ((FULL) sival.sihigh);
*h1++ = sival.silow;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + ((FULL) sival.sihigh);
*h1++ = sival.silow;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + ((FULL) sival.sihigh);
*h1++ = sival.silow;
carry = sival.sihigh;
}
while (len1--) {
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + carry;
*h1++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *h1) + carry;
*h1++ = sival.silow;
carry = sival.sihigh;
}
}

/*
* Now double the result.
* There is no final carry to worry about because we
* handle all digits of the result which must fit.
*/
carry = 0;
hd = ans;
len = sizetotal;
while (len--) {
digit = ((FULL) *hd);
sival.ivalue = digit + digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}

/*
* Now add in the squares of each halfword.
*/
carry = 0;
hd = ans;
h3 = vp;
len = size;
while (len--) {
digit = ((FULL) *h3++);
sival.ivalue = digit * digit + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}

/*
* Finally return the size of the result.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
len--;
hd--;
}
return len;
}

/*
* The number to be squared is large.
* Allocate temporary space and determine the sizes and
* positions of the values to be calculated.
*/
temp = tempbuf;
tempbuf += (3 * (size + 1) / 2);

sizeA = size / 2;
sizeB = size - sizeA;
shift = sizeB;
baseA = vp + sizeB;
baseB = vp;
baseAA = &ans[shift * 2];
baseBB = ans;
baseAABB = temp;
baseAB = temp;
baseABAB = &temp[shift];

/*
* Trim the second number of leading zeroes.
*/
hd = &baseB[sizeB - 1];
while ((*hd == 0) && (sizeB > 1)) {
sizeB--;
hd--;
}

/*
* Now to proceed to calculate the result using the formula.
*	(A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
* The steps are done in the following order:
*	S^2*A^2 + B^2
*	A^2 + B^2
*	(S^2+S)*A^2 + (S+1)*B^2
*	(A-B)^2
*	(S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
*
* Begin by forming the squares of two the halfs concatenated
* together in the final result location.  Make sure that the
* highest words of the results are zero.
*/
sizeBB = dosquare(baseB, sizeB, baseBB);
hd = &baseBB[sizeBB];
len = shift * 2 - sizeBB;
while (len--)
*hd++ = 0;

sizeAA = dosquare(baseA, sizeA, baseAA);
hd = &baseAA[sizeAA];
len = sizetotal - shift * 2 - sizeAA;
while (len--)
*hd++ = 0;

/*
* Sum the two squares into a temporary location.
*/
if (sizeAA >= sizeBB) {
h1 = baseAA;
h2 = baseBB;
sizeAABB = sizeAA;
sumsize = sizeBB;
} else {
h1 = baseBB;
h2 = baseAA;
sizeAABB = sizeBB;
sumsize = sizeAA;
}
copysize = sizeAABB - sumsize;

hd = baseAABB;
carry = 0;
while (sumsize--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = ((FULL) *h1++) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
if (carry) {
*hd = (HALF)carry;
sizeAABB++;
}

/*
* Add the sum back into the previously calculated squares
* shifted over to the proper location.
*/
h1 = baseAABB;
hd = ans + shift;
carry = 0;
len = sizeAABB;
while (len--) {
sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}

/*
* Calculate the absolute value of the difference of the two halfs
* into a temporary location.
*/
if (sizeA == sizeB) {
len = sizeA;
h1 = &baseA[len - 1];
h2 = &baseB[len - 1];
while ((len > 1) && (*h1 == *h2)) {
len--;
h1--;
h2--;
}
}
if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
{
h1 = baseA;
h2 = baseB;
sizeAB = sizeA;
subsize = sizeB;
} else {
h1 = baseB;
h2 = baseA;
sizeAB = sizeB;
subsize = sizeA;
}
copysize = sizeAB - subsize;

hd = baseAB;
carry = 0;
while (subsize--) {
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}

hd = &baseAB[sizeAB - 1];
while ((*hd == 0) && (sizeAB > 1)) {
sizeAB--;
hd--;
}

/*
* Now square the number into another temporary location,
* and subtract that from the final result.
*/
sizeABAB = dosquare(baseAB, sizeAB, baseABAB);

h1 = baseABAB;
hd = ans + shift;
carry = 0;
while (sizeABAB--) {
sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}

/*
* Return the size of the result.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
len--;
hd--;
}
tempbuf = temp;
return len;
}

/*
* Return a pointer to a buffer to be used for holding a temporary number.
* The buffer will be at least as large as the specified number of HALFs,
* and remains valid until the next call to this routine.  The buffer cannot
* be freed by the caller.  There is only one temporary buffer, and so as to
* avoid possible conflicts this is only used by the lowest level routines
* such as divide, multiply, and square.
*/
HALF *
zalloctemp(len)
LEN len;		/* required number of HALFs in buffer */
{
HALF *hp;
static LEN buflen;	/* current length of temp buffer */
static HALF *bufptr;	/* pointer to current temp buffer */

if (len <= buflen)
return bufptr;

/*
* We need to grow the temporary buffer.
* First free any existing buffer, and then allocate the new one.
* While we are at it, make the new buffer bigger than necessary
* in order to reduce the number of reallocations.
*/
len += 100;
if (buflen) {
buflen = 0;
free(bufptr);
}
hp = (HALF *) malloc(len * sizeof(HALF));
if (hp == NULL)
error("No memory for temp buffer");
bufptr = hp;
buflen = len;
return hp;
}

/* END CODE */