home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume26
/
calc
/
part11
/
zmul.c
Wrap
C/C++ Source or Header
|
1992-05-09
|
27KB
|
1,040 lines
/*
* Copyright (c) 1992 David I. Bell
* Permission is granted to use, distribute, or modify this source,
* provided that this copyright notice remains intact.
*
* Faster than usual multiplying and squaring routines.
* The algorithm used is the reasonably simple one from Knuth, volume 2,
* section 4.3.3. These recursive routines are of speed O(N^1.585)
* instead of O(N^2). The usual multiplication and (almost usual) squaring
* algorithms are used for small numbers. On a 386 with its compiler, the
* two algorithms are equal in speed at about 100 decimal digits.
*/
#include <stdio.h>
#include "math.h"
LEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */
LEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */
#if 0
static char *abortmsg = "Calculation aborted";
static char *memmsg = "Not enough memory";
#endif
static HALF *tempbuf; /* temporary buffer for multiply and square */
static LEN domul proto((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans));
static LEN dosquare proto((HALF *vp, LEN size, HALF *ans));
/*
* Multiply two numbers using the following formula recursively:
* (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
* where S is a power of 2^16, and so multiplies by it are shifts, and
* A,B,C,D are the left and right halfs of the numbers to be multiplied.
*/
void
zmul(z1, z2, res)
ZVALUE z1, z2; /* numbers to multiply */
ZVALUE *res; /* result of multiplication */
{
LEN len; /* size of array */
if (iszero(z1) || iszero(z2)) {
*res = _zero_;
return;
}
if (isunit(z1)) {
zcopy(z2, res);
res->sign = (z1.sign != z2.sign);
return;
}
if (isunit(z2)) {
zcopy(z1, res);
res->sign = (z1.sign != z2.sign);
return;
}
/*
* Allocate a temporary buffer for the recursion levels to use.
* An array needs to be allocated large enough for all of the
* temporary results to fit in. This size is about twice the size
* of the largest original number, since each recursion level uses
* the size of its given number, and whose size is 1/2 the size of
* the previous level. The sum of the infinite series is 2.
* Add some extra words because of rounding when dividing by 2
* and also because of the extra word that each multiply needs.
*/
len = z1.len;
if (len < z2.len)
len = z2.len;
len = 2 * len + 64;
tempbuf = zalloctemp(len);
res->sign = (z1.sign != z2.sign);
res->v = alloc(z1.len + z2.len + 1);
res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);
}
/*
* Recursive routine to multiply two numbers by splitting them up into
* two numbers of half the size, and using the results of multiplying the
* subpieces. The result is placed in the indicated array, which must be
* large enough for the result plus one extra word (size1 + size2 + 1).
* Returns the actual size of the result with leading zeroes stripped.
* This also uses a temporary array which must be twice as large as
* one more than the size of the number at the top level recursive call.
*/
static LEN
domul(v1, size1, v2, size2, ans)
HALF *v1; /* first number */
LEN size1; /* size of first number */
HALF *v2; /* second number */
LEN size2; /* size of second number */
HALF *ans; /* location for result */
{
LEN shift; /* amount numbers are shifted by */
LEN sizeA; /* size of left half of first number */
LEN sizeB; /* size of right half of first number */
LEN sizeC; /* size of left half of second number */
LEN sizeD; /* size of right half of second number */
LEN sizeAB; /* size of subtraction of A and B */
LEN sizeDC; /* size of subtraction of D and C */
LEN sizeABDC; /* size of product of above two results */
LEN subsize; /* size of difference of halfs */
LEN copysize; /* size of number left to copy */
LEN sizetotal; /* total size of product */
LEN len; /* temporary length */
HALF *baseA; /* base of left half of first number */
HALF *baseB; /* base of right half of first number */
HALF *baseC; /* base of left half of second number */
HALF *baseD; /* base of right half of second number */
HALF *baseAB; /* base of result of subtraction of A and B */
HALF *baseDC; /* base of result of subtraction of D and C */
HALF *baseABDC; /* base of product of above two results */
HALF *baseAC; /* base of product of A and C */
HALF *baseBD; /* base of product of B and D */
FULL carry; /* carry digit for small multiplications */
FULL carryACBD; /* carry from addition of A*C and B*D */
FULL digit; /* single digit multiplying by */
HALF *temp; /* base for temporary calculations */
BOOL neg; /* whether imtermediate term is negative */
register HALF *hd, *h1, *h2; /* for inner loops */
SIUNION sival; /* for addition of digits */
/*
* Trim the numbers of leading zeroes and initialize the
* estimated size of the result.
*/
hd = &v1[size1 - 1];
while ((*hd == 0) && (size1 > 1)) {
hd--;
size1--;
}
hd = &v2[size2 - 1];
while ((*hd == 0) && (size2 > 1)) {
hd--;
size2--;
}
sizetotal = size1 + size2;
/*
* First check for zero answer.
*/
if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {
*ans = 0;
return 1;
}
/*
* Exchange the two numbers if necessary to make the number of
* digits of the first number be greater than or equal to the
* second number.
*/
if (size1 < size2) {
len = size1; size1 = size2; size2 = len;
hd = v1; v1 = v2; v2 = hd;
}
/*
* If the smaller number has only a few digits, then calculate
* the result in the normal manner in order to avoid the overhead
* of the recursion for small numbers. The number of digits where
* the algorithm changes is settable from 2 to maxint.
*/
if (size2 < _mul2_) {
/*
* First clear the top part of the result, and then multiply
* by the lowest digit to get the first partial sum. Later
* products will then add into this result.
*/
hd = &ans[size1];
len = size2;
while (len--)
*hd++ = 0;
digit = *v2++;
h1 = v1;
hd = ans;
carry = 0;
len = size1;
while (len >= 4) { /* expand the loop some */
len -= 4;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (len--) {
sival.ivalue = ((FULL) *h1++) * digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
*hd = (HALF)carry;
/*
* Now multiply by the remaining digits of the second number,
* adding each product into the final result.
*/
h2 = ans;
while (--size2 > 0) {
digit = *v2++;
h1 = v1;
hd = ++h2;
if (digit == 0)
continue;
carry = 0;
len = size1;
while (len >= 4) { /* expand the loop some */
len -= 4;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (len--) {
sival.ivalue = ((FULL) *h1++) * digit
+ ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
}
/*
* Now return the true size of the number.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
hd--;
len--;
}
return len;
}
/*
* Need to multiply by a large number.
* Allocate temporary space for calculations, and calculate the
* value for the shift. The shift value is 1/2 the size of the
* larger (first) number (rounded up). The amount of temporary
* space needed is twice the size of the shift, plus one more word
* for the multiply to use.
*/
shift = (size1 + 1) / 2;
temp = tempbuf;
tempbuf += (2 * shift) + 1;
/*
* Determine the sizes and locations of all the numbers.
* The value of sizeC can be negative, and this is checked later.
* The value of sizeD is limited by the full size of the number.
*/
baseA = v1 + shift;
baseB = v1;
baseC = v2 + shift;
baseD = v2;
baseAB = ans;
baseDC = ans + shift;
baseAC = ans + shift * 2;
baseBD = ans;
sizeA = size1 - shift;
sizeC = size2 - shift;
sizeB = shift;
hd = &baseB[shift - 1];
while ((*hd == 0) && (sizeB > 1)) {
hd--;
sizeB--;
}
sizeD = shift;
if (sizeD > size2)
sizeD = size2;
hd = &baseD[sizeD - 1];
while ((*hd == 0) && (sizeD > 1)) {
hd--;
sizeD--;
}
/*
* If the smaller number has a high half of zero, then calculate
* the result by breaking up the first number into two numbers
* and combining the results using the obvious formula:
* (A*S+B) * D = (A*D)*S + B*D
*/
if (sizeC <= 0) {
len = domul(baseB, sizeB, baseD, sizeD, ans);
hd = &ans[len];
len = sizetotal - len;
while (len--)
*hd++ = 0;
/*
* Add the second number into the first number, shifted
* over at the correct position.
*/
len = domul(baseA, sizeA, baseD, sizeD, temp);
h1 = temp;
hd = ans + shift;
carry = 0;
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
/*
* Determine the final size of the number and return it.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
hd--;
len--;
}
tempbuf = temp;
return len;
}
/*
* Now we know that the high halfs of the numbers are nonzero,
* so we can use the complete formula.
* (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
* The steps are done in the following order:
* A-B
* D-C
* (A-B)*(D-C)
* S^2*A*C + B*D
* (S^2+S)*A*C + (S+1)*B*D (*)
* (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
*
* Note: step (*) above can produce a result which is larger than
* the final product will be, and this is where the extra word
* needed in the product comes from. After the final subtraction is
* done, the result fits in the expected size. Using the extra word
* is easier than suppressing the carries and borrows everywhere.
*
* Begin by forming the product (A-B)*(D-C) into a temporary
* location that we save until the final step. Do each subtraction
* at positions 0 and S. Be very careful about the relative sizes
* of the numbers since this result can be negative. For the first
* step calculate the absolute difference of A and B into a temporary
* location at position 0 of the result. Negate the sign if A is
* smaller than B.
*/
neg = FALSE;
if (sizeA == sizeB) {
len = sizeA;
h1 = &baseA[len - 1];
h2 = &baseB[len - 1];
while ((len > 1) && (*h1 == *h2)) {
len--;
h1--;
h2--;
}
}
if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
{
h1 = baseA;
h2 = baseB;
sizeAB = sizeA;
subsize = sizeB;
} else {
neg = !neg;
h1 = baseB;
h2 = baseA;
sizeAB = sizeB;
subsize = sizeA;
}
copysize = sizeAB - subsize;
hd = baseAB;
carry = 0;
while (subsize--) {
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
hd = &baseAB[sizeAB - 1];
while ((*hd == 0) && (sizeAB > 1)) {
hd--;
sizeAB--;
}
/*
* This completes the calculation of abs(A-B). For the next step
* calculate the absolute difference of D and C into a temporary
* location at position S of the result. Negate the sign if C is
* larger than D.
*/
if (sizeC == sizeD) {
len = sizeC;
h1 = &baseC[len - 1];
h2 = &baseD[len - 1];
while ((len > 1) && (*h1 == *h2)) {
len--;
h1--;
h2--;
}
}
if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2)))
{
neg = !neg;
h1 = baseC;
h2 = baseD;
sizeDC = sizeC;
subsize = sizeD;
} else {
h1 = baseD;
h2 = baseC;
sizeDC = sizeD;
subsize = sizeC;
}
copysize = sizeDC - subsize;
hd = baseDC;
carry = 0;
while (subsize--) {
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
hd = &baseDC[sizeDC - 1];
while ((*hd == 0) && (sizeDC > 1)) {
hd--;
sizeDC--;
}
/*
* This completes the calculation of abs(D-C). Now multiply
* together abs(A-B) and abs(D-C) into a temporary location,
* which is preserved until the final steps.
*/
baseABDC = temp;
sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);
/*
* Now calculate B*D and A*C into one of their two final locations.
* Make sure the high order digits of the products are zeroed since
* this initializes the final result. Be careful about this zeroing
* since the size of the high order words might be smaller than
* the shift size. Do B*D first since the multiplies use one more
* word than the size of the product. Also zero the final extra
* word in the result for possible carries to use.
*/
len = domul(baseB, sizeB, baseD, sizeD, baseBD);
hd = &baseBD[len];
len = shift * 2 - len;
while (len--)
*hd++ = 0;
len = domul(baseA, sizeA, baseC, sizeC, baseAC);
hd = &baseAC[len];
len = sizetotal - shift * 2 - len + 1;
while (len--)
*hd++ = 0;
/*
* Now add in A*C and B*D into themselves at the other shifted
* position that they need. This addition is tricky in order to
* make sure that the two additions cannot interfere with each other.
* Therefore we first add in the top half of B*D and the lower half
* of A*C. The sources and destinations of these two additions
* overlap, and so the same answer results from the two additions,
* thus only two pointers suffice for both additions. Keep the
* final carry from these additions for later use since we cannot
* afford to change the top half of A*C yet.
*/
h1 = baseBD + shift;
h2 = baseAC;
carryACBD = 0;
len = shift;
while (len--) {
sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
*h1++ = sival.silow;
*h2++ = sival.silow;
carryACBD = sival.sihigh;
}
/*
* Now add in the bottom half of B*D and the top half of A*C.
* These additions are straightforward, except that A*C should
* be done first because of possible carries from B*D, and the
* top half of A*C might not exist. Add in one of the carries
* from the previous addition while we are at it.
*/
h1 = baseAC + shift;
hd = baseAC;
carry = carryACBD;
len = sizetotal - 3 * shift;
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
h1 = baseBD;
hd = baseBD + shift;
carry = 0;
len = shift;
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
/*
* Now finally add in the other delayed carry from the
* above addition.
*/
hd = baseAC + shift;
while (carryACBD) {
sival.ivalue = ((FULL) *hd) + carryACBD;
*hd++ = sival.silow;
carryACBD = sival.sihigh;
}
/*
* Now finally add or subtract (A-B)*(D-C) into the final result at
* the correct position (S), according to whether it is positive or
* negative. When subtracting, the answer cannot go negative.
*/
h1 = baseABDC;
hd = ans + shift;
carry = 0;
len = sizeABDC;
if (neg) {
while (len--) {
sival.ivalue = BASE1 - ((FULL) *hd) +
((FULL) *h1++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
} else {
while (len--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
}
/*
* Finally determine the size of the final result and return that.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
hd--;
len--;
}
tempbuf = temp;
return len;
}
/*
* Square a number by using the following formula recursively:
* (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
* where S is a power of 2^16, and so multiplies by it are shifts,
* and A and B are the left and right halfs of the number to square.
*/
void
zsquare(z, res)
ZVALUE z, *res;
{
LEN len;
if (iszero(z)) {
*res = _zero_;
return;
}
if (isunit(z)) {
*res = _one_;
return;
}
/*
* Allocate a temporary array if necessary for the recursion to use.
* The array needs to be allocated large enough for all of the
* temporary results to fit in. This size is about 3 times the
* size of the original number, since each recursion level uses 3/2
* of the size of its given number, and whose size is 1/2 the size
* of the previous level. The sum of the infinite series is 3.
* Allocate some extra words for rounding up the sizes.
*/
len = 3 * z.len + 32;
tempbuf = zalloctemp(len);
res->sign = 0;
res->v = alloc(z.len * 2);
res->len = dosquare(z.v, z.len, res->v);
}
/*
* Recursive routine to square a number by splitting it up into two numbers
* of half the size, and using the results of squaring the subpieces.
* The result is placed in the indicated array, which must be large
* enough for the result (size * 2). Returns the size of the result.
* This uses a temporary array which must be 3 times as large as the
* size of the number at the top level recursive call.
*/
static LEN
dosquare(vp, size, ans)
HALF *vp; /* value to be squared */
LEN size; /* length of value to square */
HALF *ans; /* location for result */
{
LEN shift; /* amount numbers are shifted by */
LEN sizeA; /* size of left half of number to square */
LEN sizeB; /* size of right half of number to square */
LEN sizeAA; /* size of square of left half */
LEN sizeBB; /* size of square of right half */
LEN sizeAABB; /* size of sum of squares of A and B */
LEN sizeAB; /* size of difference of A and B */
LEN sizeABAB; /* size of square of difference of A and B */
LEN subsize; /* size of difference of halfs */
LEN copysize; /* size of number left to copy */
LEN sumsize; /* size of sum */
LEN sizetotal; /* total size of square */
LEN len; /* temporary length */
LEN len1; /* another temporary length */
FULL carry; /* carry digit for small multiplications */
FULL digit; /* single digit multiplying by */
HALF *temp; /* base for temporary calculations */
HALF *baseA; /* base of left half of number */
HALF *baseB; /* base of right half of number */
HALF *baseAA; /* base of square of left half of number */
HALF *baseBB; /* base of square of right half of number */
HALF *baseAABB; /* base of sum of squares of A and B */
HALF *baseAB; /* base of difference of A and B */
HALF *baseABAB; /* base of square of difference of A and B */
register HALF *hd, *h1, *h2, *h3; /* for inner loops */
SIUNION sival; /* for addition of digits */
/*
* First trim the number of leading zeroes.
*/
hd = &vp[size - 1];
while ((*hd == 0) && (size > 1)) {
size--;
hd--;
}
sizetotal = size + size;
/*
* If the number has only a small number of digits, then use the
* (almost) normal multiplication method. Multiply each halfword
* only by those halfwards further on in the number. Missed terms
* will then be the same pairs of products repeated, and the squares
* of each halfword. The first case is handled by doubling the
* result. The second case is handled explicitly. The number of
* digits where the algorithm changes is settable from 2 to maxint.
*/
if (size < _sq2_) {
hd = ans;
len = sizetotal;
while (len--)
*hd++ = 0;
h2 = vp;
hd = ans + 1;
for (len = size; len--; hd += 2) {
digit = (FULL) *h2++;
if (digit == 0)
continue;
h3 = h2;
h1 = hd;
carry = 0;
len1 = len;
while (len1 >= 4) { /* expand the loop some */
len1 -= 4;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + carry;
*h1++ = sival.silow;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + ((FULL) sival.sihigh);
*h1++ = sival.silow;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + ((FULL) sival.sihigh);
*h1++ = sival.silow;
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + ((FULL) sival.sihigh);
*h1++ = sival.silow;
carry = sival.sihigh;
}
while (len1--) {
sival.ivalue = (digit * ((FULL) *h3++))
+ ((FULL) *h1) + carry;
*h1++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *h1) + carry;
*h1++ = sival.silow;
carry = sival.sihigh;
}
}
/*
* Now double the result.
* There is no final carry to worry about because we
* handle all digits of the result which must fit.
*/
carry = 0;
hd = ans;
len = sizetotal;
while (len--) {
digit = ((FULL) *hd);
sival.ivalue = digit + digit + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
/*
* Now add in the squares of each halfword.
*/
carry = 0;
hd = ans;
h3 = vp;
len = size;
while (len--) {
digit = ((FULL) *h3++);
sival.ivalue = digit * digit + ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
/*
* Finally return the size of the result.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
len--;
hd--;
}
return len;
}
/*
* The number to be squared is large.
* Allocate temporary space and determine the sizes and
* positions of the values to be calculated.
*/
temp = tempbuf;
tempbuf += (3 * (size + 1) / 2);
sizeA = size / 2;
sizeB = size - sizeA;
shift = sizeB;
baseA = vp + sizeB;
baseB = vp;
baseAA = &ans[shift * 2];
baseBB = ans;
baseAABB = temp;
baseAB = temp;
baseABAB = &temp[shift];
/*
* Trim the second number of leading zeroes.
*/
hd = &baseB[sizeB - 1];
while ((*hd == 0) && (sizeB > 1)) {
sizeB--;
hd--;
}
/*
* Now to proceed to calculate the result using the formula.
* (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
* The steps are done in the following order:
* S^2*A^2 + B^2
* A^2 + B^2
* (S^2+S)*A^2 + (S+1)*B^2
* (A-B)^2
* (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
*
* Begin by forming the squares of two the halfs concatenated
* together in the final result location. Make sure that the
* highest words of the results are zero.
*/
sizeBB = dosquare(baseB, sizeB, baseBB);
hd = &baseBB[sizeBB];
len = shift * 2 - sizeBB;
while (len--)
*hd++ = 0;
sizeAA = dosquare(baseA, sizeA, baseAA);
hd = &baseAA[sizeAA];
len = sizetotal - shift * 2 - sizeAA;
while (len--)
*hd++ = 0;
/*
* Sum the two squares into a temporary location.
*/
if (sizeAA >= sizeBB) {
h1 = baseAA;
h2 = baseBB;
sizeAABB = sizeAA;
sumsize = sizeBB;
} else {
h1 = baseBB;
h2 = baseAA;
sizeAABB = sizeBB;
sumsize = sizeAA;
}
copysize = sizeAABB - sumsize;
hd = baseAABB;
carry = 0;
while (sumsize--) {
sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = ((FULL) *h1++) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
if (carry) {
*hd = (HALF)carry;
sizeAABB++;
}
/*
* Add the sum back into the previously calculated squares
* shifted over to the proper location.
*/
h1 = baseAABB;
hd = ans + shift;
carry = 0;
len = sizeAABB;
while (len--) {
sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = ((FULL) *hd) + carry;
*hd++ = sival.silow;
carry = sival.sihigh;
}
/*
* Calculate the absolute value of the difference of the two halfs
* into a temporary location.
*/
if (sizeA == sizeB) {
len = sizeA;
h1 = &baseA[len - 1];
h2 = &baseB[len - 1];
while ((len > 1) && (*h1 == *h2)) {
len--;
h1--;
h2--;
}
}
if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
{
h1 = baseA;
h2 = baseB;
sizeAB = sizeA;
subsize = sizeB;
} else {
h1 = baseB;
h2 = baseA;
sizeAB = sizeB;
subsize = sizeA;
}
copysize = sizeAB - subsize;
hd = baseAB;
carry = 0;
while (subsize--) {
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (copysize--) {
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
hd = &baseAB[sizeAB - 1];
while ((*hd == 0) && (sizeAB > 1)) {
sizeAB--;
hd--;
}
/*
* Now square the number into another temporary location,
* and subtract that from the final result.
*/
sizeABAB = dosquare(baseAB, sizeAB, baseABAB);
h1 = baseABAB;
hd = ans + shift;
carry = 0;
while (sizeABAB--) {
sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
while (carry) {
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
*hd++ = BASE1 - sival.silow;
carry = sival.sihigh;
}
/*
* Return the size of the result.
*/
len = sizetotal;
hd = &ans[len - 1];
while ((*hd == 0) && (len > 1)) {
len--;
hd--;
}
tempbuf = temp;
return len;
}
/*
* Return a pointer to a buffer to be used for holding a temporary number.
* The buffer will be at least as large as the specified number of HALFs,
* and remains valid until the next call to this routine. The buffer cannot
* be freed by the caller. There is only one temporary buffer, and so as to
* avoid possible conflicts this is only used by the lowest level routines
* such as divide, multiply, and square.
*/
HALF *
zalloctemp(len)
LEN len; /* required number of HALFs in buffer */
{
HALF *hp;
static LEN buflen; /* current length of temp buffer */
static HALF *bufptr; /* pointer to current temp buffer */
if (len <= buflen)
return bufptr;
/*
* We need to grow the temporary buffer.
* First free any existing buffer, and then allocate the new one.
* While we are at it, make the new buffer bigger than necessary
* in order to reduce the number of reallocations.
*/
len += 100;
if (buflen) {
buflen = 0;
free(bufptr);
}
hp = (HALF *) malloc(len * sizeof(HALF));
if (hp == NULL)
error("No memory for temp buffer");
bufptr = hp;
buflen = len;
return hp;
}
/* END CODE */