home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume27
/
calc-2.9.0
/
part13
< prev
next >
Wrap
Text File
|
1993-12-07
|
60KB
|
2,213 lines
Newsgroups: comp.sources.unix
From: dbell@canb.auug.org.au (David I. Bell)
Subject: v27i140: calc-2.9.0 - arbitrary precision C-like programmable calculator, Part13/19
References: <1.755316719.21314@gw.home.vix.com>
Sender: unix-sources-moderator@gw.home.vix.com
Approved: vixie@gw.home.vix.com
Submitted-By: dbell@canb.auug.org.au (David I. Bell)
Posting-Number: Volume 27, Issue 140
Archive-Name: calc-2.9.0/part13
#!/bin/sh
# this is part 13 of a multipart archive
# do not concatenate these parts, unpack them in order with /bin/sh
# file calc2.9.0/zmath.h continued
#
CurArch=13
if test ! -r s2_seq_.tmp
then echo "Please unpack part 1 first!"
exit 1; fi
( read Scheck
if test "$Scheck" != $CurArch
then echo "Please unpack part $Scheck next!"
exit 1;
else exit 0; fi
) < s2_seq_.tmp || exit 1
echo "x - Continuing file calc2.9.0/zmath.h"
sed 's/^X//' << 'SHAR_EOF' >> calc2.9.0/zmath.h
X#endif
Xextern void zminmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE *res));
Xextern BOOL zcmpmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE z3));
X
X
X/*
X * These functions are for internal use only.
X */
Xextern void ztrim MATH_PROTO((ZVALUE *z));
Xextern void zshiftr MATH_PROTO((ZVALUE z, long n));
Xextern void zshiftl MATH_PROTO((ZVALUE z, long n));
Xextern HALF *zalloctemp MATH_PROTO((LEN len));
Xextern void initmasks MATH_PROTO((void));
X
X
X/*
X * Modulo arithmetic definitions.
X * Structure holding state of REDC initialization.
X * Multiple instances of this structure can be used allowing
X * calculations with more than one modulus at the same time.
X * Len of zero means the structure is not initialized.
X */
Xtypedef struct {
X LEN len; /* number of words in binary modulus */
X ZVALUE mod; /* modulus REDC is computing with */
X ZVALUE inv; /* inverse of modulus in binary modulus */
X ZVALUE one; /* REDC format for the number 1 */
X} REDC;
X
Xextern REDC *zredcalloc MATH_PROTO((ZVALUE z1));
Xextern void zredcfree MATH_PROTO((REDC *rp));
Xextern void zredcencode MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE *res));
Xextern void zredcdecode MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE *res));
Xextern void zredcmul MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res));
Xextern void zredcsquare MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE *res));
Xextern void zredcpower MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res));
X
X
X/*
X * macro expansions to speed this thing up
X */
X#define ziseven(z) (!(*(z).v & 01))
X#define zisodd(z) (*(z).v & 01)
X#define ziszero(z) ((*(z).v == 0) && ((z).len == 1))
X#define zisneg(z) ((z).sign)
X#define zispos(z) (((z).sign == 0) && (*(z).v || ((z).len > 1)))
X#define zisunit(z) ((*(z).v == 1) && ((z).len == 1))
X#define zisone(z) ((*(z).v == 1) && ((z).len == 1) && !(z).sign)
X#define zisnegone(z) ((*(z).v == 1) && ((z).len == 1) && (z).sign)
X#define zistwo(z) ((*(z).v == 2) && ((z).len == 1) && !(z).sign)
X#define zisleone(z) ((*(z).v <= 1) && ((z).len == 1))
X#define zistiny(z) ((z).len == 1)
X#define zissmall(z) (((z).len < 2) || (((z).len == 2) && (((short)(z).v[1]) >= 0)))
X#define zisbig(z) (((z).len > 2) || (((z).len == 2) && (((short)(z).v[1]) < 0)))
X
X#define z1tol(z) ((long)((z).v[0]))
X#define z2tol(z) (((long)((z).v[0])) + \
X (((long)((z).v[1] & MAXHALF)) << BASEB))
X#define zclearval(z) memset((z).v, 0, (z).len * sizeof(HALF))
X#define zcopyval(z1,z2) memcpy((z2).v, (z1).v, (z1).len * sizeof(HALF))
X#define zquicktrim(z) {if (((z).len > 1) && ((z).v[(z).len-1] == 0)) \
X (z).len--;}
X#define zfree(z) freeh((z).v)
X
X
X/*
X * Output modes for numeric displays.
X */
X#define MODE_DEFAULT 0
X#define MODE_FRAC 1
X#define MODE_INT 2
X#define MODE_REAL 3
X#define MODE_EXP 4
X#define MODE_HEX 5
X#define MODE_OCTAL 6
X#define MODE_BINARY 7
X#define MODE_MAX 7
X
X#define MODE_INITIAL MODE_REAL
X
X
X/*
X * Output routines for either FILE handles or strings.
X */
Xextern void math_chr MATH_PROTO((int ch));
Xextern void math_str MATH_PROTO((char *str));
Xextern void math_fill MATH_PROTO((char *str, long width));
Xextern void math_flush MATH_PROTO((void));
Xextern void math_divertio MATH_PROTO((void));
Xextern void math_cleardiversions MATH_PROTO((void));
Xextern void math_setfp MATH_PROTO((FILE *fp));
Xextern char *math_getdivertedio MATH_PROTO((void));
Xextern int math_setmode MATH_PROTO((int mode));
Xextern long math_setdigits MATH_PROTO((long digits));
X
X
X#ifdef VARARGS
Xextern void math_fmt();
X#else
Xextern void math_fmt MATH_PROTO((char *, ...));
X#endif
X
X
X/*
X * The error routine.
X */
X#ifdef VARARGS
Xextern void math_error();
X#else
Xextern void math_error MATH_PROTO((char *, ...));
X#endif
X
X
X/*
X * constants used often by the arithmetic routines
X */
Xextern HALF _zeroval_[], _oneval_[], _twoval_[], _tenval_[];
Xextern ZVALUE _zero_, _one_, _ten_;
X
Xextern BOOL _math_abort_; /* nonzero to abort calculations */
Xextern ZVALUE _tenpowers_[32]; /* table of 10^2^n */
Xextern int _outmode_; /* current output mode */
Xextern LEN _mul2_; /* size of number to use multiply algorithm 2 */
Xextern LEN _sq2_; /* size of number to use square algorithm 2 */
Xextern LEN _pow2_; /* size of modulus to use REDC for powers */
Xextern LEN _redc2_; /* size of modulus to use REDC algorithm 2 */
Xextern HALF *bitmask; /* bit rotation, norm 0 */
X
X#endif
X
X/* END CODE */
SHAR_EOF
echo "File calc2.9.0/zmath.h is complete"
chmod 0644 calc2.9.0/zmath.h || echo "restore of calc2.9.0/zmath.h fails"
set `wc -c calc2.9.0/zmath.h`;Sum=$1
if test "$Sum" != "11814"
then echo original size 11814, current size $Sum;fi
echo "x - extracting calc2.9.0/zmod.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > calc2.9.0/zmod.c &&
X/*
X * Copyright (c) 1993 David I. Bell
X * Permission is granted to use, distribute, or modify this source,
X * provided that this copyright notice remains intact.
X *
X * Routines to do modulo arithmetic both normally and also using the REDC
X * algorithm given by Peter L. Montgomery in Mathematics of Computation,
X * volume 44, number 170 (April, 1985). For multiple multiplies using
X * the same large modulus, the REDC algorithm avoids the usual division
X * by the modulus, instead replacing it with two multiplies or else a
X * special algorithm. When these two multiplies or the special algorithm
X * are faster then the division, then the REDC algorithm is better.
X */
X
X#include "zmath.h"
X
X
X#define POWBITS 4 /* bits for power chunks (must divide BASEB) */
X#define POWNUMS (1<<POWBITS) /* number of powers needed in table */
X
X
XLEN _pow2_ = POW_ALG2; /* modulo size to use REDC for powers */
XLEN _redc2_ = REDC_ALG2; /* modulo size to use second REDC algorithm */
X
Xstatic REDC *powermodredc = NULL; /* REDC info for raising to power */
X
X#if 0
Xextern void zaddmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE z3, ZVALUE *res));
Xextern void znegmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE *res));
X
X/*
X * Multiply two numbers together and then mod the result with a third number.
X * The two numbers to be multiplied can be negative or out of modulo range.
X * The result will be in the range 0 to the modulus - 1.
X */
Xvoid
Xzmulmod(z1, z2, z3, res)
X ZVALUE z1; /* first number to be multiplied */
X ZVALUE z2; /* second number to be multiplied */
X ZVALUE z3; /* number to take mod with */
X ZVALUE *res; /* result */
X{
X ZVALUE tmp;
X FULL prod;
X FULL digit;
X BOOL neg;
X
X if (ziszero(z3) || zisneg(z3))
X math_error("Mod of non-positive integer");
X if (ziszero(z1) || ziszero(z2) || zisunit(z3)) {
X *res = _zero_;
X return;
X }
X
X /*
X * If the modulus is a single digit number, then do the result
X * cheaply. Check especially for a small power of two.
X */
X if (zistiny(z3)) {
X neg = (z1.sign != z2.sign);
X digit = z3.v[0];
X if ((digit & -digit) == digit) { /* NEEDS 2'S COMP */
X prod = ((FULL) z1.v[0]) * ((FULL) z2.v[0]);
X prod &= (digit - 1);
X } else {
X z1.sign = 0;
X z2.sign = 0;
X prod = (FULL) zmodi(z1, (long) digit);
X prod *= (FULL) zmodi(z2, (long) digit);
X prod %= digit;
X }
X if (neg && prod)
X prod = digit - prod;
X itoz((long) prod, res);
X return;
X }
X
X /*
X * The modulus is more than one digit.
X * Actually do the multiply and divide if necessary.
X */
X zmul(z1, z2, &tmp);
X if (zispos(tmp) && ((tmp.len < z3.len) || ((tmp.len == z3.len) &&
X (tmp.v[tmp.len-1] < z2.v[z3.len-1]))))
X {
X *res = tmp;
X return;
X }
X zmod(tmp, z3, res);
X zfree(tmp);
X}
X
X
X/*
X * Square a number and then mod the result with a second number.
X * The number to be squared can be negative or out of modulo range.
X * The result will be in the range 0 to the modulus - 1.
X */
Xvoid
Xzsquaremod(z1, z2, res)
X ZVALUE z1; /* number to be squared */
X ZVALUE z2; /* number to take mod with */
X ZVALUE *res; /* result */
X{
X ZVALUE tmp;
X FULL prod;
X FULL digit;
X
X if (ziszero(z2) || zisneg(z2))
X math_error("Mod of non-positive integer");
X if (ziszero(z1) || zisunit(z2)) {
X *res = _zero_;
X return;
X }
X
X /*
X * If the modulus is a single digit number, then do the result
X * cheaply. Check especially for a small power of two.
X */
X if (zistiny(z2)) {
X digit = z2.v[0];
X if ((digit & -digit) == digit) { /* NEEDS 2'S COMP */
X prod = (FULL) z1.v[0];
X prod = (prod * prod) & (digit - 1);
X } else {
X z1.sign = 0;
X prod = (FULL) zmodi(z1, (long) digit);
X prod = (prod * prod) % digit;
X }
X itoz((long) prod, res);
X return;
X }
X
X /*
X * The modulus is more than one digit.
X * Actually do the square and divide if necessary.
X */
X zsquare(z1, &tmp);
X if ((tmp.len < z2.len) ||
X ((tmp.len == z2.len) && (tmp.v[tmp.len-1] < z2.v[z2.len-1]))) {
X *res = tmp;
X return;
X }
X zmod(tmp, z2, res);
X zfree(tmp);
X}
X
X
X/*
X * Add two numbers together and then mod the result with a third number.
X * The two numbers to be added can be negative or out of modulo range.
X * The result will be in the range 0 to the modulus - 1.
X */
Xstatic void
Xzaddmod(z1, z2, z3, res)
X ZVALUE z1; /* first number to be added */
X ZVALUE z2; /* second number to be added */
X ZVALUE z3; /* number to take mod with */
X ZVALUE *res; /* result */
X{
X ZVALUE tmp;
X FULL sumdigit;
X FULL moddigit;
X
X if (ziszero(z3) || zisneg(z3))
X math_error("Mod of non-positive integer");
X if ((ziszero(z1) && ziszero(z2)) || zisunit(z3)) {
X *res = _zero_;
X return;
X }
X if (zistwo(z2)) {
X if ((z1.v[0] + z2.v[0]) & 0x1)
X *res = _one_;
X else
X *res = _zero_;
X return;
X }
X zadd(z1, z2, &tmp);
X if (zisneg(tmp) || (tmp.len > z3.len)) {
X zmod(tmp, z3, res);
X zfree(tmp);
X return;
X }
X sumdigit = tmp.v[tmp.len - 1];
X moddigit = z3.v[z3.len - 1];
X if ((tmp.len < z3.len) || (sumdigit < moddigit)) {
X *res = tmp;
X return;
X }
X if (sumdigit < 2 * moddigit) {
X zsub(tmp, z3, res);
X zfree(tmp);
X return;
X }
X zmod(tmp, z2, res);
X zfree(tmp);
X}
X
X
X/*
X * Subtract two numbers together and then mod the result with a third number.
X * The two numbers to be subtract can be negative or out of modulo range.
X * The result will be in the range 0 to the modulus - 1.
X */
Xvoid
Xzsubmod(z1, z2, z3, res)
X ZVALUE z1; /* number to be subtracted from */
X ZVALUE z2; /* number to be subtracted */
X ZVALUE z3; /* number to take mod with */
X ZVALUE *res; /* result */
X{
X if (ziszero(z3) || zisneg(z3))
X math_error("Mod of non-positive integer");
X if (ziszero(z2)) {
X zmod(z1, z3, res);
X return;
X }
X if (ziszero(z1)) {
X znegmod(z2, z3, res);
X return;
X }
X if ((z1.sign == z2.sign) && (z1.len == z2.len) &&
X (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0)) {
X *res = _zero_;
X return;
X }
X z2.sign = !z2.sign;
X zaddmod(z1, z2, z3, res);
X}
X
X
X/*
X * Calculate the negative of a number modulo another number.
X * The number to be negated can be negative or out of modulo range.
X * The result will be in the range 0 to the modulus - 1.
X */
Xstatic void
Xznegmod(z1, z2, res)
X ZVALUE z1; /* number to take negative of */
X ZVALUE z2; /* number to take mod with */
X ZVALUE *res; /* result */
X{
X int sign;
X int cv;
X
X if (ziszero(z2) || zisneg(z2))
X math_error("Mod of non-positive integer");
X if (ziszero(z1) || zisunit(z2)) {
X *res = _zero_;
X return;
X }
X if (zistwo(z2)) {
X if (z1.v[0] & 0x1)
X *res = _one_;
X else
X *res = _zero_;
X return;
X }
X
X /*
X * If the absolute value of the number is within the modulo range,
X * then the result is just a copy or a subtraction. Otherwise go
X * ahead and negate and reduce the result.
X */
X sign = z1.sign;
X z1.sign = 0;
X cv = zrel(z1, z2);
X if (cv == 0) {
X *res = _zero_;
X return;
X }
X if (cv < 0) {
X if (sign)
X zcopy(z1, res);
X else
X zsub(z2, z1, res);
X return;
X }
X z1.sign = !sign;
X zmod(z1, z2, res);
X}
X#endif
X
X
X/*
X * Calculate the number congruent to the given number whose absolute
X * value is minimal. The number to be reduced can be negative or out of
X * modulo range. The result will be within the range -int((modulus-1)/2)
X * to int(modulus/2) inclusive. For example, for modulus 7, numbers are
X * reduced to the range [-3, 3], and for modulus 8, numbers are reduced to
X * the range [-3, 4].
X */
Xvoid
Xzminmod(z1, z2, res)
X ZVALUE z1; /* number to find minimum congruence of */
X ZVALUE z2; /* number to take mod with */
X ZVALUE *res; /* result */
X{
X ZVALUE tmp1, tmp2;
X int sign;
X int cv;
X
X if (ziszero(z2) || zisneg(z2))
X math_error("Mod of non-positive integer");
X if (ziszero(z1) || zisunit(z2)) {
X *res = _zero_;
X return;
X }
X if (zistwo(z2)) {
X if (zisodd(z1))
X *res = _one_;
X else
X *res = _zero_;
X return;
X }
X
X /*
X * Do a quick check to see if the number is very small compared
X * to the modulus. If so, then the result is obvious.
X */
X if (z1.len < z2.len - 1) {
X zcopy(z1, res);
X return;
X }
X
X /*
X * Now make sure the input number is within the modulo range.
X * If not, then reduce it to be within range and make the
X * quick check again.
X */
X sign = z1.sign;
X z1.sign = 0;
X cv = zrel(z1, z2);
X if (cv == 0) {
X *res = _zero_;
X return;
X }
X tmp1 = z1;
X if (cv > 0) {
X z1.sign = (BOOL)sign;
X zmod(z1, z2, &tmp1);
X if (tmp1.len < z2.len - 1) {
X *res = tmp1;
X return;
X }
X sign = 0;
X }
X
X /*
X * Now calculate the difference of the modulus and the absolute
X * value of the original number. Compare the original number with
X * the difference, and return the one with the smallest absolute
X * value, with the correct sign. If the two values are equal, then
X * return the positive result.
X */
X zsub(z2, tmp1, &tmp2);
X cv = zrel(tmp1, tmp2);
X if (cv < 0) {
X zfree(tmp2);
X tmp1.sign = (BOOL)sign;
X if (tmp1.v == z1.v)
X zcopy(tmp1, res);
X else
X *res = tmp1;
X } else {
X if (cv)
X tmp2.sign = !sign;
X if (tmp1.v != z1.v)
X zfree(tmp1);
X *res = tmp2;
X }
X}
X
X
X/*
X * Compare two numbers for equality modulo a third number.
X * The two numbers to be compared can be negative or out of modulo range.
X * Returns TRUE if the numbers are not congruent, and FALSE if they are
X * congruent.
X */
XBOOL
Xzcmpmod(z1, z2, z3)
X ZVALUE z1; /* first number to be compared */
X ZVALUE z2; /* second number to be compared */
X ZVALUE z3; /* modulus */
X{
X ZVALUE tmp1, tmp2, tmp3;
X FULL digit;
X LEN len;
X int cv;
X
X if (zisneg(z3) || ziszero(z3))
X math_error("Non-positive modulus in zcmpmod");
X if (zistwo(z3))
X return (((z1.v[0] + z2.v[0]) & 0x1) != 0);
X
X /*
X * If the two numbers are equal, then their mods are equal.
X */
X if ((z1.sign == z2.sign) && (z1.len == z2.len) &&
X (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0))
X return FALSE;
X
X /*
X * If both numbers are negative, then we can make them positive.
X */
X if (zisneg(z1) && zisneg(z2)) {
X z1.sign = 0;
X z2.sign = 0;
X }
X
X /*
X * For small negative numbers, make them positive before comparing.
X * In any case, the resulting numbers are in tmp1 and tmp2.
X */
X tmp1 = z1;
X tmp2 = z2;
X len = z3.len;
X digit = z3.v[len - 1];
X
X if (zisneg(z1) && ((z1.len < len) ||
X ((z1.len == len) && (z1.v[z1.len - 1] < digit))))
X zadd(z1, z3, &tmp1);
X
X if (zisneg(z2) && ((z2.len < len) ||
X ((z2.len == len) && (z2.v[z2.len - 1] < digit))))
X zadd(z2, z3, &tmp2);
X
X /*
X * Now compare the two numbers for equality.
X * If they are equal we are all done.
X */
X if (zcmp(tmp1, tmp2) == 0) {
X if (tmp1.v != z1.v)
X zfree(tmp1);
X if (tmp2.v != z2.v)
X zfree(tmp2);
X return FALSE;
X }
X
X /*
X * They are not identical. Now if both numbers are positive
X * and less than the modulus, then they are definitely not equal.
X */
X if ((tmp1.sign == tmp2.sign) &&
X ((tmp1.len < len) || (zrel(tmp1, z3) < 0)) &&
X ((tmp2.len < len) || (zrel(tmp2, z3) < 0)))
X {
X if (tmp1.v != z1.v)
X zfree(tmp1);
X if (tmp2.v != z2.v)
X zfree(tmp2);
X return TRUE;
X }
X
X /*
X * Either one of the numbers is negative or is large.
X * So do the standard thing and subtract the two numbers.
X * Then they are equal if the result is 0 (mod z3).
X */
X zsub(tmp1, tmp2, &tmp3);
X if (tmp1.v != z1.v)
X zfree(tmp1);
X if (tmp2.v != z2.v)
X zfree(tmp2);
X
X /*
X * Compare the result with the modulus to see if it is equal to
X * or less than the modulus. If so, we know the mod result.
X */
X tmp3.sign = 0;
X cv = zrel(tmp3, z3);
X if (cv == 0) {
X zfree(tmp3);
X return FALSE;
X }
X if (cv < 0) {
X zfree(tmp3);
X return TRUE;
X }
X
X /*
X * We are forced to actually do the division.
X * The numbers are congruent if the result is zero.
X */
X zmod(tmp3, z3, &tmp1);
X zfree(tmp3);
X if (ziszero(tmp1)) {
X zfree(tmp1);
X return FALSE;
X } else {
X zfree(tmp1);
X return TRUE;
X }
X}
X
X
X/*
X * Compute the result of raising one number to a power modulo another number.
X * That is, this computes: a^b (modulo c).
X * This calculates the result by examining the power POWBITS bits at a time,
X * using a small table of POWNUMS low powers to calculate powers for those bits,
X * and repeated squaring and multiplying by the partial powers to generate
X * the complete power. If the power being raised to is high enough, then
X * this uses the REDC algorithm to avoid doing many divisions. When using
X * REDC, multiple calls to this routine using the same modulus will be
X * slightly faster.
X */
Xvoid
Xzpowermod(z1, z2, z3, res)
X ZVALUE z1, z2, z3, *res;
X{
X HALF *hp; /* pointer to current word of the power */
X REDC *rp; /* REDC information to be used */
X ZVALUE *pp; /* pointer to low power table */
X ZVALUE ans, temp; /* calculation values */
X ZVALUE modpow; /* current small power */
X ZVALUE lowpowers[POWNUMS]; /* low powers */
X int sign; /* original sign of number */
X int curshift; /* shift value for word of power */
X HALF curhalf; /* current word of power */
X unsigned int curpow; /* current low power */
X unsigned int curbit; /* current bit of low power */
X int i;
X
X if (zisneg(z3) || ziszero(z3))
X math_error("Non-positive modulus in zpowermod");
X if (zisneg(z2))
X math_error("Negative power in zpowermod");
X
X sign = z1.sign;
X z1.sign = 0;
X
X /*
X * Check easy cases first.
X */
X if (ziszero(z1) || zisunit(z3)) { /* 0^x or mod 1 */
X *res = _zero_;
X return;
X }
X if (zistwo(z3)) { /* mod 2 */
X if (zisodd(z1))
X *res = _one_;
X else
X *res = _zero_;
X return;
X }
X if (zisunit(z1) && (!sign || ziseven(z2))) {
X /* 1^x or (-1)^(2x) */
X *res = _one_;
X return;
X }
X
X /*
X * Normalize the number being raised to be non-negative and to lie
X * within the modulo range. Then check for zero or one specially.
X */
X zmod(z1, z3, &temp);
X if (ziszero(temp)) {
X zfree(temp);
X *res = _zero_;
X return;
X }
X z1 = temp;
X if (sign) {
X zsub(z3, z1, &temp);
X zfree(z1);
X z1 = temp;
X }
X if (zisunit(z1)) {
X zfree(z1);
X *res = _one_;
X return;
X }
X
X /*
X * If the modulus is odd, large enough, is not one less than an
X * exact power of two, and if the power is large enough, then use
X * the REDC algorithm. The size where this is done is configurable.
X */
X if ((z2.len > 1) && (z3.len >= _pow2_) && zisodd(z3)
X && !zisallbits(z3))
X {
X if (powermodredc && zcmp(powermodredc->mod, z3)) {
X zredcfree(powermodredc);
X powermodredc = NULL;
X }
X if (powermodredc == NULL)
X powermodredc = zredcalloc(z3);
X rp = powermodredc;
X zredcencode(rp, z1, &temp);
X zredcpower(rp, temp, z2, &z1);
X zfree(temp);
X zredcdecode(rp, z1, res);
X zfree(z1);
X return;
X }
X
X /*
X * Modulus or power is small enough to perform the power raising
X * directly. Initialize the table of powers.
X */
X for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++)
X pp->len = 0;
X lowpowers[0] = _one_;
X lowpowers[1] = z1;
X ans = _one_;
X
X hp = &z2.v[z2.len - 1];
X curhalf = *hp;
X curshift = BASEB - POWBITS;
X while (curshift && ((curhalf >> curshift) == 0))
X curshift -= POWBITS;
X
X /*
X * Calculate the result by examining the power POWBITS bits at a time,
X * and use the table of low powers at each iteration.
X */
X for (;;) {
X curpow = (curhalf >> curshift) & (POWNUMS - 1);
X pp = &lowpowers[curpow];
X
X /*
X * If the small power is not yet saved in the table, then
X * calculate it and remember it in the table for future use.
X */
X if (pp->len == 0) {
X if (curpow & 0x1)
X zcopy(z1, &modpow);
X else
X modpow = _one_;
X
X for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
X pp = &lowpowers[curbit];
X if (pp->len == 0) {
X zsquare(lowpowers[curbit/2], &temp);
X zmod(temp, z3, pp);
X zfree(temp);
X }
X if (curbit & curpow) {
X zmul(*pp, modpow, &temp);
X zfree(modpow);
X zmod(temp, z3, &modpow);
X zfree(temp);
X }
X }
X pp = &lowpowers[curpow];
X *pp = modpow;
X }
X
X /*
X * If the power is nonzero, then accumulate the small power
X * into the result.
X */
X if (curpow) {
X zmul(ans, *pp, &temp);
X zfree(ans);
X zmod(temp, z3, &ans);
X zfree(temp);
X }
X
X /*
X * Select the next POWBITS bits of the power, if there is
X * any more to generate.
X */
X curshift -= POWBITS;
X if (curshift < 0) {
X if (hp-- == z2.v)
X break;
X curhalf = *hp;
X curshift = BASEB - POWBITS;
X }
X
X /*
X * Square the result POWBITS times to make room for the next
X * chunk of bits.
X */
X for (i = 0; i < POWBITS; i++) {
X zsquare(ans, &temp);
X zfree(ans);
X zmod(temp, z3, &ans);
X zfree(temp);
X }
X }
X
X for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++) {
X if (pp->len)
X freeh(pp->v);
X }
X *res = ans;
X}
X
X
X/*
X * Initialize the REDC algorithm for a particular modulus,
X * returning a pointer to a structure that is used for other
X * REDC calls. An error is generated if the structure cannot
X * be allocated. The modulus must be odd and positive.
X */
XREDC *
Xzredcalloc(z1)
X ZVALUE z1; /* modulus to initialize for */
X{
X REDC *rp; /* REDC information */
X ZVALUE tmp;
X long bit;
X
X if (ziseven(z1) || zisneg(z1))
X math_error("REDC requires positive odd modulus");
X
X rp = (REDC *) malloc(sizeof(REDC));
X if (rp == NULL)
X math_error("Cannot allocate REDC structure");
X
X /*
X * Round up the binary modulus to the next power of two
X * which is at a word boundary. Then the shift and modulo
X * operations mod the binary modulus can be done very cheaply.
X * Calculate the REDC format for the number 1 for future use.
X */
X bit = zhighbit(z1) + 1;
X if (bit % BASEB)
X bit += (BASEB - (bit % BASEB));
X zcopy(z1, &rp->mod);
X zbitvalue(bit, &tmp);
X z1.sign = 1;
X (void) zmodinv(z1, tmp, &rp->inv);
X zmod(tmp, rp->mod, &rp->one);
X zfree(tmp);
X rp->len = bit / BASEB;
X return rp;
X}
X
X
X/*
X * Free any numbers associated with the specified REDC structure,
X * and then the REDC structure itself.
X */
Xvoid
Xzredcfree(rp)
X REDC *rp; /* REDC information to be cleared */
X{
X zfree(rp->mod);
X zfree(rp->inv);
X zfree(rp->one);
X free(rp);
X}
X
X
X/*
X * Convert a normal number into the specified REDC format.
X * The number to be converted can be negative or out of modulo range.
X * The resulting number can be used for multiplying, adding, subtracting,
X * or comparing with any other such converted numbers, as if the numbers
X * were being calculated modulo the number which initialized the REDC
X * information. When the final value is unconverted, the result is the
X * same as if the usual operations were done with the original numbers.
X */
Xvoid
Xzredcencode(rp, z1, res)
X REDC *rp; /* REDC information */
X ZVALUE z1; /* number to be converted */
X ZVALUE *res; /* returned converted number */
X{
X ZVALUE tmp1, tmp2;
X
X /*
X * Handle the cases 0, 1, -1, and 2 specially since these are
X * easy to calculate. Zero transforms to zero, and the others
X * can be obtained from the precomputed REDC format for 1 since
X * addition and subtraction act normally for REDC format numbers.
X */
X if (ziszero(z1)) {
X *res = _zero_;
X return;
X }
X if (zisone(z1)) {
X zcopy(rp->one, res);
X return;
X }
X if (zisunit(z1)) {
X zsub(rp->mod, rp->one, res);
X return;
X }
X if (zistwo(z1)) {
X zadd(rp->one, rp->one, &tmp1);
X if (zrel(tmp1, rp->mod) < 0) {
X *res = tmp1;
X return;
X }
X zsub(tmp1, rp->mod, res);
X zfree(tmp1);
X return;
X }
X
X /*
X * Not a trivial number to convert, so do the full transformation.
X * Convert negative numbers to positive numbers before converting.
X */
X tmp1.len = 0;
X if (zisneg(z1)) {
X zmod(z1, rp->mod, &tmp1);
X z1 = tmp1;
X }
X zshift(z1, rp->len * BASEB, &tmp2);
X if (tmp1.len)
X zfree(tmp1);
X zmod(tmp2, rp->mod, res);
X zfree(tmp2);
X}
X
X
X/*
X * The REDC algorithm used to convert numbers out of REDC format and also
X * used after multiplication of two REDC numbers. Using this routine
X * avoids any divides, replacing the divide by two multiplications.
X * If the numbers are very large, then these two multiplies will be
X * quicker than the divide, since dividing is harder than multiplying.
X */
Xvoid
Xzredcdecode(rp, z1, res)
X REDC *rp; /* REDC information */
X ZVALUE z1; /* number to be transformed */
X ZVALUE *res; /* returned transformed number */
X{
X ZVALUE tmp1, tmp2; /* temporaries */
X HALF *hp; /* saved pointer to tmp2 value */
X
X if (zisneg(z1))
X math_error("Negative number for zredc");
X
X /*
X * Check first for the special values for 0 and 1 that are easy.
X */
X if (ziszero(z1)) {
X *res = _zero_;
X return;
X }
X if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
X (zcmp(z1, rp->one) == 0)) {
X *res = _one_;
X return;
X }
X
X /*
X * First calculate the following:
X * tmp2 = ((z1 % 2^bitnum) * inv) % 2^bitnum.
X * The mod operations can be done with no work since the bit
X * number was selected as a multiple of the word size. Just
X * reduce the sizes of the numbers as required.
X */
X tmp1 = z1;
X if (tmp1.len > rp->len)
X tmp1.len = rp->len;
X zmul(tmp1, rp->inv, &tmp2);
X if (tmp2.len > rp->len)
X tmp2.len = rp->len;
X
X /*
X * Next calculate the following:
X * res = (z1 + tmp2 * modulus) / 2^bitnum
X * The division by a power of 2 is always exact, and requires no
X * work. Just adjust the address and length of the number to do
X * the divide, but save the original pointer for freeing later.
X */
X zmul(tmp2, rp->mod, &tmp1);
X zfree(tmp2);
X zadd(z1, tmp1, &tmp2);
X zfree(tmp1);
X hp = tmp2.v;
X if (tmp2.len <= rp->len) {
X freeh(hp);
X *res = _zero_;
X return;
X }
X tmp2.v += rp->len;
X tmp2.len -= rp->len;
X
X /*
X * Finally do a final modulo by a simple subtraction if necessary.
X * This is all that is needed because the previous calculation is
X * guaranteed to always be less than twice the modulus.
X */
X if (zrel(tmp2, rp->mod) < 0)
X zcopy(tmp2, res);
X else
X zsub(tmp2, rp->mod, res);
X freeh(hp);
X}
X
X
X/*
X * Multiply two numbers in REDC format together producing a result also
X * in REDC format. If the result is converted back to a normal number,
X * then the result is the same as the modulo'd multiplication of the
X * original numbers before they were converted to REDC format. This
X * calculation is done in one of two ways, depending on the size of the
X * modulus. For large numbers, the REDC definition is used directly
X * which involves three multiplies overall. For small numbers, a
X * complicated routine is used which does the indicated multiplication
X * and the REDC algorithm at the same time to produce the result.
X */
Xvoid
Xzredcmul(rp, z1, z2, res)
X REDC *rp; /* REDC information */
X ZVALUE z1; /* first REDC number to be multiplied */
X ZVALUE z2; /* second REDC number to be multiplied */
X ZVALUE *res; /* resulting REDC number */
X{
X FULL mulb;
X FULL muln;
X HALF *h1;
X HALF *h2;
X HALF *h3;
X HALF *hd;
X HALF Ninv;
X HALF topdigit;
X LEN modlen;
X LEN len;
X LEN len2;
X SIUNION sival1;
X SIUNION sival2;
X SIUNION sival3;
X SIUNION carry;
X ZVALUE tmp;
X
X if (zisneg(z1) || (z1.len > rp->mod.len) ||
X zisneg(z2) || (z2.len > rp->mod.len))
X math_error("Negative or too large number in zredcmul");
X
X /*
X * Check for special values which we easily know the answer.
X */
X if (ziszero(z1) || ziszero(z2)) {
X *res = _zero_;
X return;
X }
X
X if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
X (zcmp(z1, rp->one) == 0)) {
X zcopy(z2, res);
X return;
X }
X
X if ((z2.len == rp->one.len) && (z2.v[0] == rp->one.v[0]) &&
X (zcmp(z2, rp->one) == 0)) {
X zcopy(z1, res);
X return;
X }
X
X /*
X * If the size of the modulus is large, then just do the multiply,
X * followed by the two multiplies contained in the REDC routine.
X * This will be quicker than directly doing the REDC calculation
X * because of the O(N^1.585) speed of the multiplies. The size
X * of the number which this is done is configurable.
X */
X if (rp->mod.len >= _redc2_) {
X zmul(z1, z2, &tmp);
X zredcdecode(rp, tmp, res);
X zfree(tmp);
X return;
X }
X
X /*
X * The number is small enough to calculate by doing the O(N^2) REDC
X * algorithm directly. This algorithm performs the multiplication and
X * the reduction at the same time. Notice the obscure facts that
X * only the lowest word of the inverse value is used, and that
X * there is no shifting of the partial products as there is in a
X * normal multiply.
X */
X modlen = rp->mod.len;
X Ninv = rp->inv.v[0];
X
X /*
X * Allocate the result and clear it.
X * The size of the result will be equal to or smaller than
X * the modulus size.
X */
X res->sign = 0;
X res->len = modlen;
X res->v = alloc(modlen);
X
X hd = res->v;
X len = modlen;
X while (len--)
X *hd++ = 0;
X
X /*
X * Do this outermost loop over all the digits of z1.
X */
X h1 = z1.v;
X len = z1.len;
X while (len--) {
X /*
X * Start off with the next digit of z1, the first
X * digit of z2, and the first digit of the modulus.
X */
X mulb = (FULL) *h1++;
X h2 = z2.v;
X h3 = rp->mod.v;
X hd = res->v;
X sival1.ivalue = mulb * ((FULL) *h2++) + ((FULL) *hd++);
X muln = ((HALF) (sival1.silow * Ninv));
X sival2.ivalue = muln * ((FULL) *h3++);
X sival3.ivalue = ((FULL) sival1.silow) + ((FULL) sival2.silow);
X carry.ivalue = ((FULL) sival1.sihigh) + ((FULL) sival2.sihigh)
X + ((FULL) sival3.sihigh);
X
X /*
X * Do this innermost loop for each digit of z2, except
X * for the first digit which was just done above.
X */
X len2 = z2.len;
X while (--len2 > 0) {
X sival1.ivalue = mulb * ((FULL) *h2++);
X sival2.ivalue = muln * ((FULL) *h3++);
X sival3.ivalue = ((FULL) sival1.silow)
X + ((FULL) sival2.silow)
X + ((FULL) *hd)
X + ((FULL) carry.silow);
X carry.ivalue = ((FULL) sival1.sihigh)
X + ((FULL) sival2.sihigh)
X + ((FULL) sival3.sihigh)
X + ((FULL) carry.sihigh);
X
X hd[-1] = sival3.silow;
X hd++;
X }
X
X /*
X * Now continue the loop as necessary so the total number
X * of interations is equal to the size of the modulus.
X * This acts as if the innermost loop was repeated for
X * high digits of z2 that are zero.
X */
X len2 = modlen - z2.len;
X while (len2--) {
X sival2.ivalue = muln * ((FULL) *h3++);
X sival3.ivalue = ((FULL) sival2.silow)
X + ((FULL) *hd)
X + ((FULL) carry.silow);
X carry.ivalue = ((FULL) sival2.sihigh)
X + ((FULL) sival3.sihigh)
X + ((FULL) carry.sihigh);
X
X hd[-1] = sival3.silow;
X hd++;
X }
X
X res->v[modlen - 1] = carry.silow;
X topdigit = carry.sihigh;
X }
X
X /*
X * Now continue the loop as necessary so the total number
X * of interations is equal to the size of the modulus.
X * This acts as if the outermost loop was repeated for high
X * digits of z1 that are zero.
X */
X len = modlen - z1.len;
X while (len--) {
X /*
X * Start off with the first digit of the modulus.
X */
X h3 = rp->mod.v;
X hd = res->v;
X muln = ((HALF) (*hd * Ninv));
X sival2.ivalue = muln * ((FULL) *h3++);
X sival3.ivalue = ((FULL) *hd++) + ((FULL) sival2.silow);
X carry.ivalue = ((FULL) sival2.sihigh) + ((FULL) sival3.sihigh);
X
X /*
X * Do this innermost loop for each digit of the modulus,
X * except for the first digit which was just done above.
X */
X len2 = modlen;
X while (--len2 > 0) {
X sival2.ivalue = muln * ((FULL) *h3++);
X sival3.ivalue = ((FULL) sival2.silow)
X + ((FULL) *hd)
X + ((FULL) carry.silow);
X carry.ivalue = ((FULL) sival2.sihigh)
X + ((FULL) sival3.sihigh)
X + ((FULL) carry.sihigh);
X
X hd[-1] = sival3.silow;
X hd++;
X }
X res->v[modlen - 1] = carry.silow;
X topdigit = carry.sihigh;
X }
X
X /*
X * Determine the true size of the result, taking the top digit of
X * the current result into account. The top digit is not stored in
X * the number because it is temporary and would become zero anyway
X * after the final subtraction is done.
X */
X if (topdigit == 0) {
X len = modlen;
X hd = &res->v[len - 1];
X while ((*hd == 0) && (len > 1)) {
X hd--;
X len--;
X }
X res->len = len;
X }
X
X /*
X * Compare the result with the modulus.
X * If it is less than the modulus, then the calculation is complete.
X */
X if ((topdigit == 0) && ((len < modlen)
X || (res->v[len - 1] < rp->mod.v[len - 1])
X || (zrel(*res, rp->mod) < 0)))
X return;
X
X /*
X * Do a subtraction to reduce the result to a value less than
X * the modulus. The REDC algorithm guarantees that a single subtract
X * is all that is needed. Ignore any borrowing from the possible
X * highest word of the current result because that would affect
X * only the top digit value that was not stored and would become
X * zero anyway.
X */
X carry.ivalue = 0;
X h1 = rp->mod.v;
X hd = res->v;
X len = modlen;
X while (len--) {
X carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
X + ((FULL) carry.silow);
X *hd++ = BASE1 - carry.silow;
X carry.silow = carry.sihigh;
X }
X
X /*
X * Now finally recompute the size of the result.
X */
X len = modlen;
X hd = &res->v[len - 1];
X while ((*hd == 0) && (len > 1)) {
X hd--;
X len--;
X }
X res->len = len;
X}
X
X
X/*
X * Square a number in REDC format producing a result also in REDC format.
X */
Xvoid
Xzredcsquare(rp, z1, res)
X REDC *rp; /* REDC information */
X ZVALUE z1; /* REDC number to be squared */
X ZVALUE *res; /* resulting REDC number */
X{
X ZVALUE tmp;
X
X if (zisneg(z1))
X math_error("Negative number in zredcsquare");
X if (ziszero(z1)) {
X *res = _zero_;
X return;
X }
X if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
X (zcmp(z1, rp->one) == 0)) {
X zcopy(z1, res);
X return;
X }
X
X /*
X * If the modulus is small enough, then call the multiply
X * routine to produce the result. Otherwise call the O(N^1.585)
X * routines to get the answer.
X */
X if (rp->mod.len < _redc2_) {
X zredcmul(rp, z1, z1, res);
X return;
X }
X zsquare(z1, &tmp);
X zredcdecode(rp, tmp, res);
X zfree(tmp);
X}
X
X
X/*
X * Compute the result of raising a REDC format number to a power.
X * The result is within the range 0 to the modulus - 1.
X * This calculates the result by examining the power POWBITS bits at a time,
X * using a small table of POWNUMS low powers to calculate powers for those bits,
X * and repeated squaring and multiplying by the partial powers to generate
X * the complete power.
X */
Xvoid
Xzredcpower(rp, z1, z2, res)
X REDC *rp; /* REDC information */
X ZVALUE z1; /* REDC number to be raised */
X ZVALUE z2; /* normal number to raise number to */
X ZVALUE *res; /* result */
X{
X HALF *hp; /* pointer to current word of the power */
X ZVALUE *pp; /* pointer to low power table */
X ZVALUE ans, temp; /* calculation values */
X ZVALUE modpow; /* current small power */
X ZVALUE lowpowers[POWNUMS]; /* low powers */
X int curshift; /* shift value for word of power */
X HALF curhalf; /* current word of power */
X unsigned int curpow; /* current low power */
X unsigned int curbit; /* current bit of low power */
X int i;
X
X if (zisneg(z1))
X math_error("Negative number in zredcpower");
X if (zisneg(z2))
X math_error("Negative power in zredcpower");
X
X /*
X * Check for zero or the REDC format for one.
X */
X if (ziszero(z1) || zisunit(rp->mod)) {
X *res = _zero_;
X return;
X }
X if (zcmp(z1, rp->one) == 0) {
X zcopy(rp->one, res);
X return;
X }
X
X /*
X * See if the number being raised is the REDC format for -1.
X * If so, then the answer is the REDC format for one or minus one.
X * To do this check, calculate the REDC format for -1.
X */
X if (((HALF)(z1.v[0] + rp->one.v[0])) == rp->mod.v[0]) {
X zsub(rp->mod, rp->one, &temp);
X if (zcmp(z1, temp) == 0) {
X if (zisodd(z2)) {
X *res = temp;
X return;
X }
X zfree(temp);
X zcopy(rp->one, res);
X return;
X }
X zfree(temp);
X }
X
X for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++)
X pp->len = 0;
X zcopy(rp->one, &lowpowers[0]);
X zcopy(z1, &lowpowers[1]);
X zcopy(rp->one, &ans);
X
X hp = &z2.v[z2.len - 1];
X curhalf = *hp;
X curshift = BASEB - POWBITS;
X while (curshift && ((curhalf >> curshift) == 0))
X curshift -= POWBITS;
X
X /*
X * Calculate the result by examining the power POWBITS bits at a time,
X * and use the table of low powers at each iteration.
X */
X for (;;) {
X curpow = (curhalf >> curshift) & (POWNUMS - 1);
X pp = &lowpowers[curpow];
X
X /*
X * If the small power is not yet saved in the table, then
X * calculate it and remember it in the table for future use.
X */
X if (pp->len == 0) {
X if (curpow & 0x1)
X zcopy(z1, &modpow);
X else
X zcopy(rp->one, &modpow);
X
X for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
X pp = &lowpowers[curbit];
X if (pp->len == 0)
X zredcsquare(rp, lowpowers[curbit/2],
X pp);
X if (curbit & curpow) {
X zredcmul(rp, *pp, modpow, &temp);
X zfree(modpow);
X modpow = temp;
X }
X }
X pp = &lowpowers[curpow];
X *pp = modpow;
X }
X
X /*
X * If the power is nonzero, then accumulate the small power
X * into the result.
X */
X if (curpow) {
X zredcmul(rp, ans, *pp, &temp);
X zfree(ans);
X ans = temp;
X }
X
X /*
X * Select the next POWBITS bits of the power, if there is
X * any more to generate.
X */
X curshift -= POWBITS;
X if (curshift < 0) {
X if (hp-- == z2.v)
X break;
X curhalf = *hp;
X curshift = BASEB - POWBITS;
X }
X
X /*
X * Square the result POWBITS times to make room for the next
X * chunk of bits.
X */
X for (i = 0; i < POWBITS; i++) {
X zredcsquare(rp, ans, &temp);
X zfree(ans);
X ans = temp;
X }
X }
X
X for (pp = lowpowers; pp < &lowpowers[POWNUMS]; pp++) {
X if (pp->len)
X freeh(pp->v);
X }
X *res = ans;
X}
X
X/* END CODE */
SHAR_EOF
chmod 0644 calc2.9.0/zmod.c || echo "restore of calc2.9.0/zmod.c fails"
set `wc -c calc2.9.0/zmod.c`;Sum=$1
if test "$Sum" != "32540"
then echo original size 32540, current size $Sum;fi
echo "x - extracting calc2.9.0/zmul.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > calc2.9.0/zmul.c &&
X/*
X * Copyright (c) 1993 David I. Bell
X * Permission is granted to use, distribute, or modify this source,
X * provided that this copyright notice remains intact.
X *
X * Faster than usual multiplying and squaring routines.
X * The algorithm used is the reasonably simple one from Knuth, volume 2,
X * section 4.3.3. These recursive routines are of speed O(N^1.585)
X * instead of O(N^2). The usual multiplication and (almost usual) squaring
X * algorithms are used for small numbers. On a 386 with its compiler, the
X * two algorithms are equal in speed at about 100 decimal digits.
X */
X
X#include "zmath.h"
X
X
XLEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */
XLEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */
X
X
Xstatic HALF *tempbuf; /* temporary buffer for multiply and square */
X
Xstatic LEN domul MATH_PROTO((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans));
Xstatic LEN dosquare MATH_PROTO((HALF *vp, LEN size, HALF *ans));
X
X
X/*
X * Multiply two numbers using the following formula recursively:
X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
X * where S is a power of 2^16, and so multiplies by it are shifts, and
X * A,B,C,D are the left and right halfs of the numbers to be multiplied.
X */
Xvoid
Xzmul(z1, z2, res)
X ZVALUE z1, z2; /* numbers to multiply */
X ZVALUE *res; /* result of multiplication */
X{
X LEN len; /* size of array */
X
X if (ziszero(z1) || ziszero(z2)) {
X *res = _zero_;
X return;
X }
X if (zisunit(z1)) {
X zcopy(z2, res);
X res->sign = (z1.sign != z2.sign);
X return;
X }
X if (zisunit(z2)) {
X zcopy(z1, res);
X res->sign = (z1.sign != z2.sign);
X return;
X }
X
X /*
X * Allocate a temporary buffer for the recursion levels to use.
X * An array needs to be allocated large enough for all of the
X * temporary results to fit in. This size is about twice the size
X * of the largest original number, since each recursion level uses
X * the size of its given number, and whose size is 1/2 the size of
X * the previous level. The sum of the infinite series is 2.
X * Add some extra words because of rounding when dividing by 2
X * and also because of the extra word that each multiply needs.
X */
X len = z1.len;
X if (len < z2.len)
X len = z2.len;
X len = 2 * len + 64;
X tempbuf = zalloctemp(len);
X
X res->sign = (z1.sign != z2.sign);
X res->v = alloc(z1.len + z2.len + 1);
X res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);
X}
X
X
X/*
X * Recursive routine to multiply two numbers by splitting them up into
X * two numbers of half the size, and using the results of multiplying the
X * subpieces. The result is placed in the indicated array, which must be
X * large enough for the result plus one extra word (size1 + size2 + 1).
X * Returns the actual size of the result with leading zeroes stripped.
X * This also uses a temporary array which must be twice as large as
X * one more than the size of the number at the top level recursive call.
X */
Xstatic LEN
Xdomul(v1, size1, v2, size2, ans)
X HALF *v1; /* first number */
X LEN size1; /* size of first number */
X HALF *v2; /* second number */
X LEN size2; /* size of second number */
X HALF *ans; /* location for result */
X{
X LEN shift; /* amount numbers are shifted by */
X LEN sizeA; /* size of left half of first number */
X LEN sizeB; /* size of right half of first number */
X LEN sizeC; /* size of left half of second number */
X LEN sizeD; /* size of right half of second number */
X LEN sizeAB; /* size of subtraction of A and B */
X LEN sizeDC; /* size of subtraction of D and C */
X LEN sizeABDC; /* size of product of above two results */
X LEN subsize; /* size of difference of halfs */
X LEN copysize; /* size of number left to copy */
X LEN sizetotal; /* total size of product */
X LEN len; /* temporary length */
X HALF *baseA; /* base of left half of first number */
X HALF *baseB; /* base of right half of first number */
X HALF *baseC; /* base of left half of second number */
X HALF *baseD; /* base of right half of second number */
X HALF *baseAB; /* base of result of subtraction of A and B */
X HALF *baseDC; /* base of result of subtraction of D and C */
X HALF *baseABDC; /* base of product of above two results */
X HALF *baseAC; /* base of product of A and C */
X HALF *baseBD; /* base of product of B and D */
X FULL carry; /* carry digit for small multiplications */
X FULL carryACBD; /* carry from addition of A*C and B*D */
X FULL digit; /* single digit multiplying by */
X HALF *temp; /* base for temporary calculations */
X BOOL neg; /* whether imtermediate term is negative */
X register HALF *hd, *h1, *h2; /* for inner loops */
X SIUNION sival; /* for addition of digits */
X
X /*
X * Trim the numbers of leading zeroes and initialize the
X * estimated size of the result.
X */
X hd = &v1[size1 - 1];
X while ((*hd == 0) && (size1 > 1)) {
X hd--;
X size1--;
X }
X hd = &v2[size2 - 1];
X while ((*hd == 0) && (size2 > 1)) {
X hd--;
X size2--;
X }
X sizetotal = size1 + size2;
X
X /*
X * First check for zero answer.
X */
X if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {
X *ans = 0;
X return 1;
X }
X
X /*
X * Exchange the two numbers if necessary to make the number of
X * digits of the first number be greater than or equal to the
X * second number.
X */
X if (size1 < size2) {
X len = size1; size1 = size2; size2 = len;
X hd = v1; v1 = v2; v2 = hd;
X }
X
X /*
X * If the smaller number has only a few digits, then calculate
X * the result in the normal manner in order to avoid the overhead
X * of the recursion for small numbers. The number of digits where
X * the algorithm changes is settable from 2 to maxint.
X */
X if (size2 < _mul2_) {
X /*
X * First clear the top part of the result, and then multiply
X * by the lowest digit to get the first partial sum. Later
X * products will then add into this result.
X */
X hd = &ans[size1];
X len = size2;
X while (len--)
X *hd++ = 0;
X
X digit = *v2++;
X h1 = v1;
X hd = ans;
X carry = 0;
X len = size1;
X while (len >= 4) { /* expand the loop some */
X len -= 4;
X sival.ivalue = ((FULL) *h1++) * digit + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X sival.ivalue = ((FULL) *h1++) * digit + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X sival.ivalue = ((FULL) *h1++) * digit + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X sival.ivalue = ((FULL) *h1++) * digit + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (len--) {
X sival.ivalue = ((FULL) *h1++) * digit + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X *hd = (HALF)carry;
X
X /*
X * Now multiply by the remaining digits of the second number,
X * adding each product into the final result.
X */
X h2 = ans;
X while (--size2 > 0) {
X digit = *v2++;
X h1 = v1;
X hd = ++h2;
X if (digit == 0)
X continue;
X carry = 0;
X len = size1;
X while (len >= 4) { /* expand the loop some */
X len -= 4;
X sival.ivalue = ((FULL) *h1++) * digit
X + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X sival.ivalue = ((FULL) *h1++) * digit
X + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X sival.ivalue = ((FULL) *h1++) * digit
X + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X sival.ivalue = ((FULL) *h1++) * digit
X + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (len--) {
X sival.ivalue = ((FULL) *h1++) * digit
X + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (carry) {
X sival.ivalue = ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X }
X
X /*
X * Now return the true size of the number.
X */
X len = sizetotal;
X hd = &ans[len - 1];
X while ((*hd == 0) && (len > 1)) {
X hd--;
X len--;
X }
X return len;
X }
X
X /*
X * Need to multiply by a large number.
X * Allocate temporary space for calculations, and calculate the
X * value for the shift. The shift value is 1/2 the size of the
X * larger (first) number (rounded up). The amount of temporary
X * space needed is twice the size of the shift, plus one more word
X * for the multiply to use.
X */
X shift = (size1 + 1) / 2;
X temp = tempbuf;
X tempbuf += (2 * shift) + 1;
X
X /*
X * Determine the sizes and locations of all the numbers.
X * The value of sizeC can be negative, and this is checked later.
X * The value of sizeD is limited by the full size of the number.
X */
X baseA = v1 + shift;
X baseB = v1;
X baseC = v2 + shift;
X baseD = v2;
X baseAB = ans;
X baseDC = ans + shift;
X baseAC = ans + shift * 2;
X baseBD = ans;
X
X sizeA = size1 - shift;
X sizeC = size2 - shift;
X
X sizeB = shift;
X hd = &baseB[shift - 1];
X while ((*hd == 0) && (sizeB > 1)) {
X hd--;
X sizeB--;
X }
X
X sizeD = shift;
X if (sizeD > size2)
X sizeD = size2;
X hd = &baseD[sizeD - 1];
X while ((*hd == 0) && (sizeD > 1)) {
X hd--;
X sizeD--;
X }
X
X /*
X * If the smaller number has a high half of zero, then calculate
X * the result by breaking up the first number into two numbers
X * and combining the results using the obvious formula:
X * (A*S+B) * D = (A*D)*S + B*D
X */
X if (sizeC <= 0) {
X len = domul(baseB, sizeB, baseD, sizeD, ans);
X hd = &ans[len];
X len = sizetotal - len;
X while (len--)
X *hd++ = 0;
X
X /*
X * Add the second number into the first number, shifted
X * over at the correct position.
X */
X len = domul(baseA, sizeA, baseD, sizeD, temp);
X h1 = temp;
X hd = ans + shift;
X carry = 0;
X while (len--) {
X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (carry) {
X sival.ivalue = ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X
X /*
X * Determine the final size of the number and return it.
X */
X len = sizetotal;
X hd = &ans[len - 1];
X while ((*hd == 0) && (len > 1)) {
X hd--;
X len--;
X }
X tempbuf = temp;
X return len;
X }
X
X /*
X * Now we know that the high halfs of the numbers are nonzero,
X * so we can use the complete formula.
X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
X * The steps are done in the following order:
X * A-B
X * D-C
X * (A-B)*(D-C)
X * S^2*A*C + B*D
X * (S^2+S)*A*C + (S+1)*B*D (*)
X * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
X *
X * Note: step (*) above can produce a result which is larger than
X * the final product will be, and this is where the extra word
X * needed in the product comes from. After the final subtraction is
X * done, the result fits in the expected size. Using the extra word
X * is easier than suppressing the carries and borrows everywhere.
X *
X * Begin by forming the product (A-B)*(D-C) into a temporary
X * location that we save until the final step. Do each subtraction
X * at positions 0 and S. Be very careful about the relative sizes
X * of the numbers since this result can be negative. For the first
X * step calculate the absolute difference of A and B into a temporary
X * location at position 0 of the result. Negate the sign if A is
X * smaller than B.
X */
X neg = FALSE;
X if (sizeA == sizeB) {
X len = sizeA;
X h1 = &baseA[len - 1];
X h2 = &baseB[len - 1];
X while ((len > 1) && (*h1 == *h2)) {
X len--;
X h1--;
X h2--;
X }
X }
X if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
X {
X h1 = baseA;
X h2 = baseB;
X sizeAB = sizeA;
X subsize = sizeB;
X } else {
X neg = !neg;
X h1 = baseB;
X h2 = baseA;
X sizeAB = sizeB;
X subsize = sizeA;
X }
X copysize = sizeAB - subsize;
X
X hd = baseAB;
X carry = 0;
X while (subsize--) {
X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
X *hd++ = BASE1 - sival.silow;
X carry = sival.sihigh;
X }
X while (copysize--) {
X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
X *hd++ = BASE1 - sival.silow;
X carry = sival.sihigh;
X }
X
X hd = &baseAB[sizeAB - 1];
X while ((*hd == 0) && (sizeAB > 1)) {
X hd--;
X sizeAB--;
X }
X
X /*
X * This completes the calculation of abs(A-B). For the next step
X * calculate the absolute difference of D and C into a temporary
X * location at position S of the result. Negate the sign if C is
X * larger than D.
X */
X if (sizeC == sizeD) {
X len = sizeC;
X h1 = &baseC[len - 1];
X h2 = &baseD[len - 1];
X while ((len > 1) && (*h1 == *h2)) {
X len--;
X h1--;
X h2--;
X }
X }
X if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2)))
X {
X neg = !neg;
X h1 = baseC;
X h2 = baseD;
X sizeDC = sizeC;
X subsize = sizeD;
X } else {
X h1 = baseD;
X h2 = baseC;
X sizeDC = sizeD;
X subsize = sizeC;
X }
X copysize = sizeDC - subsize;
X
X hd = baseDC;
X carry = 0;
X while (subsize--) {
X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
X *hd++ = BASE1 - sival.silow;
X carry = sival.sihigh;
X }
X while (copysize--) {
X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
X *hd++ = BASE1 - sival.silow;
X carry = sival.sihigh;
X }
X hd = &baseDC[sizeDC - 1];
X while ((*hd == 0) && (sizeDC > 1)) {
X hd--;
X sizeDC--;
X }
X
X /*
X * This completes the calculation of abs(D-C). Now multiply
X * together abs(A-B) and abs(D-C) into a temporary location,
X * which is preserved until the final steps.
X */
X baseABDC = temp;
X sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);
X
X /*
X * Now calculate B*D and A*C into one of their two final locations.
X * Make sure the high order digits of the products are zeroed since
X * this initializes the final result. Be careful about this zeroing
X * since the size of the high order words might be smaller than
X * the shift size. Do B*D first since the multiplies use one more
X * word than the size of the product. Also zero the final extra
X * word in the result for possible carries to use.
X */
X len = domul(baseB, sizeB, baseD, sizeD, baseBD);
X hd = &baseBD[len];
X len = shift * 2 - len;
X while (len--)
X *hd++ = 0;
X
X len = domul(baseA, sizeA, baseC, sizeC, baseAC);
X hd = &baseAC[len];
X len = sizetotal - shift * 2 - len + 1;
X while (len--)
X *hd++ = 0;
X
X /*
X * Now add in A*C and B*D into themselves at the other shifted
X * position that they need. This addition is tricky in order to
X * make sure that the two additions cannot interfere with each other.
X * Therefore we first add in the top half of B*D and the lower half
X * of A*C. The sources and destinations of these two additions
X * overlap, and so the same answer results from the two additions,
X * thus only two pointers suffice for both additions. Keep the
X * final carry from these additions for later use since we cannot
X * afford to change the top half of A*C yet.
X */
X h1 = baseBD + shift;
X h2 = baseAC;
X carryACBD = 0;
X len = shift;
X while (len--) {
X sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
X *h1++ = sival.silow;
X *h2++ = sival.silow;
X carryACBD = sival.sihigh;
X }
X
X /*
X * Now add in the bottom half of B*D and the top half of A*C.
X * These additions are straightforward, except that A*C should
X * be done first because of possible carries from B*D, and the
X * top half of A*C might not exist. Add in one of the carries
X * from the previous addition while we are at it.
X */
X h1 = baseAC + shift;
X hd = baseAC;
X carry = carryACBD;
X len = sizetotal - 3 * shift;
X while (len--) {
X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (carry) {
X sival.ivalue = ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X
X h1 = baseBD;
X hd = baseBD + shift;
X carry = 0;
X len = shift;
X while (len--) {
X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (carry) {
X sival.ivalue = ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X
X /*
X * Now finally add in the other delayed carry from the
X * above addition.
X */
X hd = baseAC + shift;
X while (carryACBD) {
X sival.ivalue = ((FULL) *hd) + carryACBD;
X *hd++ = sival.silow;
X carryACBD = sival.sihigh;
X }
X
X /*
X * Now finally add or subtract (A-B)*(D-C) into the final result at
X * the correct position (S), according to whether it is positive or
X * negative. When subtracting, the answer cannot go negative.
X */
X h1 = baseABDC;
X hd = ans + shift;
X carry = 0;
X len = sizeABDC;
X if (neg) {
X while (len--) {
X sival.ivalue = BASE1 - ((FULL) *hd) +
X ((FULL) *h1++) + carry;
X *hd++ = BASE1 - sival.silow;
X carry = sival.sihigh;
X }
X while (carry) {
X sival.ivalue = BASE1 - ((FULL) *hd) + carry;
X *hd++ = BASE1 - sival.silow;
X carry = sival.sihigh;
X }
X } else {
X while (len--) {
X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X while (carry) {
X sival.ivalue = ((FULL) *hd) + carry;
X *hd++ = sival.silow;
X carry = sival.sihigh;
X }
X }
X
X /*
X * Finally determine the size of the final result and return that.
X */
X len = sizetotal;
X hd = &ans[len - 1];
X while ((*hd == 0) && (len > 1)) {
X hd--;
X len--;
X }
X tempbuf = temp;
X return len;
X}
X
X
X/*
X * Square a number by using the following formula recursively:
X * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
X * where S is a power of 2^16, and so multiplies by it are shifts,
X * and A and B are the left and right halfs of the number to square.
X */
Xvoid
Xzsquare(z, res)
X ZVALUE z, *res;
X{
X LEN len;
X
X if (ziszero(z)) {
X *res = _zero_;
X return;
X }
X if (zisunit(z)) {
X *res = _one_;
X return;
X }
X
X /*
X * Allocate a temporary array if necessary for the recursion to use.
X * The array needs to be allocated large enough for all of the
X * temporary results to fit in. This size is about 3 times the
X * size of the original number, since each recursion level uses 3/2
X * of the size of its given number, and whose size is 1/2 the size
X * of the previous level. The sum of the infinite series is 3.
X * Allocate some extra words for rounding up the sizes.
X */
X len = 3 * z.len + 32;
X tempbuf = zalloctemp(len);
X
X res->sign = 0;
X res->v = alloc(z.len * 2);
X res->len = dosquare(z.v, z.len, res->v);
X}
X
X
X/*
X * Recursive routine to square a number by splitting it up into two numbers
X * of half the size, and using the results of squaring the subpieces.
X * The result is placed in the indicated array, which must be large
X * enough for the result (size * 2). Returns the size of the result.
X * This uses a temporary array which must be 3 times as large as the
X * size of the number at the top level recursive call.
X */
Xstatic LEN
Xdosquare(vp, size, ans)
X HALF *vp; /* value to be squared */
X LEN size; /* length of value to square */
X HALF *ans; /* location for result */
X{
X LEN shift; /* amount numbers are shifted by */
X LEN sizeA; /* size of left half of number to square */
X LEN sizeB; /* size of right half of number to square */
X LEN sizeAA; /* size of square of left half */
X LEN sizeBB; /* size of square of right half */
X LEN sizeAABB; /* size of sum of squares of A and B */
X LEN sizeAB; /* size of difference of A and B */
X LEN sizeABAB; /* size of square of difference of A and B */
X LEN subsize; /* size of difference of halfs */
X LEN copysize; /* size of number left to copy */
X LEN sumsize; /* size of sum */
X LEN sizetotal; /* total size of square */
X LEN len; /* temporary length */
X LEN len1; /* another temporary length */
X FULL carry; /* carry digit for small multiplications */
X FULL digit; /* single digit multiplying by */
X HALF *temp; /* base for temporary calculations */
X HALF *baseA; /* base of left half of number */
X HALF *baseB; /* base of right half of number */
X HALF *baseAA; /* base of square of left half of number */
X HALF *baseBB; /* base of square of right half of number */
X HALF *baseAABB; /* base of sum of squares of A and B */
X HALF *baseAB; /* base of difference of A and B */
X HALF *baseABAB; /* base of square of difference of A and B */
X register HALF *hd, *h1, *h2, *h3; /* for inner loops */
X SIUNION sival; /* for addition of digits */
X
X /*
X * First trim the number of leading zeroes.
SHAR_EOF
echo "End of part 13"
echo "File calc2.9.0/zmul.c is continued in part 14"
echo "14" > s2_seq_.tmp
exit 0