Usenet 1994 October

home *** CD-ROM | disk | FTP | other *** search

/ Usenet 1994 October / usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso / unix / volume27 / calc-2.9.0 / part13 < prev next >

Wrap

Text File | 1993-12-07 | 60KB | 2,213 lines

Newsgroups: comp.sources.unix From: dbell@canb.auug.org.au (David I. Bell) Subject: v27i140: calc-2.9.0 - arbitrary precision C-like programmable calculator, Part13/19 References: <1.755316719.21314@gw.home.vix.com> Sender: unix-sources-moderator@gw.home.vix.com Approved: vixie@gw.home.vix.com Submitted-By: dbell@canb.auug.org.au (David I. Bell) Posting-Number: Volume 27, Issue 140 Archive-Name: calc-2.9.0/part13 #!/bin/sh # this is part 13 of a multipart archive # do not concatenate these parts, unpack them in order with /bin/sh # file calc2.9.0/zmath.h continued # CurArch=13 if test ! -r s2_seq_.tmp then echo "Please unpack part 1 first!" exit 1; fi ( read Scheck if test "$Scheck" != $CurArch then echo "Please unpack part $Scheck next!" exit 1; else exit 0; fi ) < s2_seq_.tmp || exit 1 echo "x - Continuing file calc2.9.0/zmath.h" sed 's/^X//' << 'SHAR_EOF' >> calc2.9.0/zmath.h X#endif Xextern void zminmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE *res)); Xextern BOOL zcmpmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE z3)); X X X/* X * These functions are for internal use only. X */ Xextern void ztrim MATH_PROTO((ZVALUE *z)); Xextern void zshiftr MATH_PROTO((ZVALUE z, long n)); Xextern void zshiftl MATH_PROTO((ZVALUE z, long n)); Xextern HALF *zalloctemp MATH_PROTO((LEN len)); Xextern void initmasks MATH_PROTO((void)); X X X/* X * Modulo arithmetic definitions. X * Structure holding state of REDC initialization. X * Multiple instances of this structure can be used allowing X * calculations with more than one modulus at the same time. X * Len of zero means the structure is not initialized. X */ Xtypedef struct { X LEN len; /* number of words in binary modulus */ X ZVALUE mod; /* modulus REDC is computing with */ X ZVALUE inv; /* inverse of modulus in binary modulus */ X ZVALUE one; /* REDC format for the number 1 */ X} REDC; X Xextern REDC *zredcalloc MATH_PROTO((ZVALUE z1)); Xextern void zredcfree MATH_PROTO((REDC *rp)); Xextern void zredcencode MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE *res)); Xextern void zredcdecode MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE *res)); Xextern void zredcmul MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)); Xextern void zredcsquare MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE *res)); Xextern void zredcpower MATH_PROTO((REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)); X X X/* X * macro expansions to speed this thing up X */ X#define ziseven(z) (!(*(z).v & 01)) X#define zisodd(z) (*(z).v & 01) X#define ziszero(z) ((*(z).v == 0) && ((z).len == 1)) X#define zisneg(z) ((z).sign) X#define zispos(z) (((z).sign == 0) && (*(z).v || ((z).len > 1))) X#define zisunit(z) ((*(z).v == 1) && ((z).len == 1)) X#define zisone(z) ((*(z).v == 1) && ((z).len == 1) && !(z).sign) X#define zisnegone(z) ((*(z).v == 1) && ((z).len == 1) && (z).sign) X#define zistwo(z) ((*(z).v == 2) && ((z).len == 1) && !(z).sign) X#define zisleone(z) ((*(z).v <= 1) && ((z).len == 1)) X#define zistiny(z) ((z).len == 1) X#define zissmall(z) (((z).len < 2) || (((z).len == 2) && (((short)(z).v[1]) >= 0))) X#define zisbig(z) (((z).len > 2) || (((z).len == 2) && (((short)(z).v[1]) < 0))) X X#define z1tol(z) ((long)((z).v[0])) X#define z2tol(z) (((long)((z).v[0])) + \ X (((long)((z).v[1] & MAXHALF)) << BASEB)) X#define zclearval(z) memset((z).v, 0, (z).len * sizeof(HALF)) X#define zcopyval(z1,z2) memcpy((z2).v, (z1).v, (z1).len * sizeof(HALF)) X#define zquicktrim(z) {if (((z).len > 1) && ((z).v[(z).len-1] == 0)) \ X (z).len--;} X#define zfree(z) freeh((z).v) X X X/* X * Output modes for numeric displays. X */ X#define MODE_DEFAULT 0 X#define MODE_FRAC 1 X#define MODE_INT 2 X#define MODE_REAL 3 X#define MODE_EXP 4 X#define MODE_HEX 5 X#define MODE_OCTAL 6 X#define MODE_BINARY 7 X#define MODE_MAX 7 X X#define MODE_INITIAL MODE_REAL X X X/* X * Output routines for either FILE handles or strings. X */ Xextern void math_chr MATH_PROTO((int ch)); Xextern void math_str MATH_PROTO((char *str)); Xextern void math_fill MATH_PROTO((char *str, long width)); Xextern void math_flush MATH_PROTO((void)); Xextern void math_divertio MATH_PROTO((void)); Xextern void math_cleardiversions MATH_PROTO((void)); Xextern void math_setfp MATH_PROTO((FILE *fp)); Xextern char *math_getdivertedio MATH_PROTO((void)); Xextern int math_setmode MATH_PROTO((int mode)); Xextern long math_setdigits MATH_PROTO((long digits)); X X X#ifdef VARARGS Xextern void math_fmt(); X#else Xextern void math_fmt MATH_PROTO((char *, ...)); X#endif X X X/* X * The error routine. X */ X#ifdef VARARGS Xextern void math_error(); X#else Xextern void math_error MATH_PROTO((char *, ...)); X#endif X X X/* X * constants used often by the arithmetic routines X */ Xextern HALF _zeroval_[], _oneval_[], _twoval_[], _tenval_[]; Xextern ZVALUE _zero_, _one_, _ten_; X Xextern BOOL _math_abort_; /* nonzero to abort calculations */ Xextern ZVALUE _tenpowers_[32]; /* table of 10^2^n */ Xextern int _outmode_; /* current output mode */ Xextern LEN _mul2_; /* size of number to use multiply algorithm 2 */ Xextern LEN _sq2_; /* size of number to use square algorithm 2 */ Xextern LEN _pow2_; /* size of modulus to use REDC for powers */ Xextern LEN _redc2_; /* size of modulus to use REDC algorithm 2 */ Xextern HALF *bitmask; /* bit rotation, norm 0 */ X X#endif X X/* END CODE */ SHAR_EOF echo "File calc2.9.0/zmath.h is complete" chmod 0644 calc2.9.0/zmath.h || echo "restore of calc2.9.0/zmath.h fails" set `wc -c calc2.9.0/zmath.h`;Sum=$1 if test "$Sum" != "11814" then echo original size 11814, current size $Sum;fi echo "x - extracting calc2.9.0/zmod.c (Text)" sed 's/^X//' << 'SHAR_EOF' > calc2.9.0/zmod.c && X/* X * Copyright (c) 1993 David I. Bell X * Permission is granted to use, distribute, or modify this source, X * provided that this copyright notice remains intact. X * X * Routines to do modulo arithmetic both normally and also using the REDC X * algorithm given by Peter L. Montgomery in Mathematics of Computation, X * volume 44, number 170 (April, 1985). For multiple multiplies using X * the same large modulus, the REDC algorithm avoids the usual division X * by the modulus, instead replacing it with two multiplies or else a X * special algorithm. When these two multiplies or the special algorithm X * are faster then the division, then the REDC algorithm is better. X */ X X#include "zmath.h" X X X#define POWBITS 4 /* bits for power chunks (must divide BASEB) */ X#define POWNUMS (1<<POWBITS) /* number of powers needed in table */ X X XLEN _pow2_ = POW_ALG2; /* modulo size to use REDC for powers */ XLEN _redc2_ = REDC_ALG2; /* modulo size to use second REDC algorithm */ X Xstatic REDC *powermodredc = NULL; /* REDC info for raising to power */ X X#if 0 Xextern void zaddmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE z3, ZVALUE *res)); Xextern void znegmod MATH_PROTO((ZVALUE z1, ZVALUE z2, ZVALUE *res)); X X/* X * Multiply two numbers together and then mod the result with a third number. X * The two numbers to be multiplied can be negative or out of modulo range. X * The result will be in the range 0 to the modulus - 1. X */ Xvoid Xzmulmod(z1, z2, z3, res) X ZVALUE z1; /* first number to be multiplied */ X ZVALUE z2; /* second number to be multiplied */ X ZVALUE z3; /* number to take mod with */ X ZVALUE *res; /* result */ X{ X ZVALUE tmp; X FULL prod; X FULL digit; X BOOL neg; X X if (ziszero(z3) || zisneg(z3)) X math_error("Mod of non-positive integer"); X if (ziszero(z1) || ziszero(z2) || zisunit(z3)) { X *res = _zero_; X return; X } X X /* X * If the modulus is a single digit number, then do the result X * cheaply. Check especially for a small power of two. X */ X if (zistiny(z3)) { X neg = (z1.sign != z2.sign); X digit = z3.v[0]; X if ((digit & -digit) == digit) { /* NEEDS 2'S COMP */ X prod = ((FULL) z1.v[0]) * ((FULL) z2.v[0]); X prod &= (digit - 1); X } else { X z1.sign = 0; X z2.sign = 0; X prod = (FULL) zmodi(z1, (long) digit); X prod *= (FULL) zmodi(z2, (long) digit); X prod %= digit; X } X if (neg && prod) X prod = digit - prod; X itoz((long) prod, res); X return; X } X X /* X * The modulus is more than one digit. X * Actually do the multiply and divide if necessary. X */ X zmul(z1, z2, &tmp); X if (zispos(tmp) && ((tmp.len < z3.len) || ((tmp.len == z3.len) && X (tmp.v[tmp.len-1] < z2.v[z3.len-1])))) X { X *res = tmp; X return; X } X zmod(tmp, z3, res); X zfree(tmp); X} X X X/* X * Square a number and then mod the result with a second number. X * The number to be squared can be negative or out of modulo range. X * The result will be in the range 0 to the modulus - 1. X */ Xvoid Xzsquaremod(z1, z2, res) X ZVALUE z1; /* number to be squared */ X ZVALUE z2; /* number to take mod with */ X ZVALUE *res; /* result */ X{ X ZVALUE tmp; X FULL prod; X FULL digit; X X if (ziszero(z2) || zisneg(z2)) X math_error("Mod of non-positive integer"); X if (ziszero(z1) || zisunit(z2)) { X *res = _zero_; X return; X } X X /* X * If the modulus is a single digit number, then do the result X * cheaply. Check especially for a small power of two. X */ X if (zistiny(z2)) { X digit = z2.v[0]; X if ((digit & -digit) == digit) { /* NEEDS 2'S COMP */ X prod = (FULL) z1.v[0]; X prod = (prod * prod) & (digit - 1); X } else { X z1.sign = 0; X prod = (FULL) zmodi(z1, (long) digit); X prod = (prod * prod) % digit; X } X itoz((long) prod, res); X return; X } X X /* X * The modulus is more than one digit. X * Actually do the square and divide if necessary. X */ X zsquare(z1, &tmp); X if ((tmp.len < z2.len) || X ((tmp.len == z2.len) && (tmp.v[tmp.len-1] < z2.v[z2.len-1]))) { X *res = tmp; X return; X } X zmod(tmp, z2, res); X zfree(tmp); X} X X X/* X * Add two numbers together and then mod the result with a third number. X * The two numbers to be added can be negative or out of modulo range. X * The result will be in the range 0 to the modulus - 1. X */ Xstatic void Xzaddmod(z1, z2, z3, res) X ZVALUE z1; /* first number to be added */ X ZVALUE z2; /* second number to be added */ X ZVALUE z3; /* number to take mod with */ X ZVALUE *res; /* result */ X{ X ZVALUE tmp; X FULL sumdigit; X FULL moddigit; X X if (ziszero(z3) || zisneg(z3)) X math_error("Mod of non-positive integer"); X if ((ziszero(z1) && ziszero(z2)) || zisunit(z3)) { X *res = _zero_; X return; X } X if (zistwo(z2)) { X if ((z1.v[0] + z2.v[0]) & 0x1) X *res = _one_; X else X *res = _zero_; X return; X } X zadd(z1, z2, &tmp); X if (zisneg(tmp) || (tmp.len > z3.len)) { X zmod(tmp, z3, res); X zfree(tmp); X return; X } X sumdigit = tmp.v[tmp.len - 1]; X moddigit = z3.v[z3.len - 1]; X if ((tmp.len < z3.len) || (sumdigit < moddigit)) { X *res = tmp; X return; X } X if (sumdigit < 2 * moddigit) { X zsub(tmp, z3, res); X zfree(tmp); X return; X } X zmod(tmp, z2, res); X zfree(tmp); X} X X X/* X * Subtract two numbers together and then mod the result with a third number. X * The two numbers to be subtract can be negative or out of modulo range. X * The result will be in the range 0 to the modulus - 1. X */ Xvoid Xzsubmod(z1, z2, z3, res) X ZVALUE z1; /* number to be subtracted from */ X ZVALUE z2; /* number to be subtracted */ X ZVALUE z3; /* number to take mod with */ X ZVALUE *res; /* result */ X{ X if (ziszero(z3) || zisneg(z3)) X math_error("Mod of non-positive integer"); X if (ziszero(z2)) { X zmod(z1, z3, res); X return; X } X if (ziszero(z1)) { X znegmod(z2, z3, res); X return; X } X if ((z1.sign == z2.sign) && (z1.len == z2.len) && X (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0)) { X *res = _zero_; X return; X } X z2.sign = !z2.sign; X zaddmod(z1, z2, z3, res); X} X X X/* X * Calculate the negative of a number modulo another number. X * The number to be negated can be negative or out of modulo range. X * The result will be in the range 0 to the modulus - 1. X */ Xstatic void Xznegmod(z1, z2, res) X ZVALUE z1; /* number to take negative of */ X ZVALUE z2; /* number to take mod with */ X ZVALUE *res; /* result */ X{ X int sign; X int cv; X X if (ziszero(z2) || zisneg(z2)) X math_error("Mod of non-positive integer"); X if (ziszero(z1) || zisunit(z2)) { X *res = _zero_; X return; X } X if (zistwo(z2)) { X if (z1.v[0] & 0x1) X *res = _one_; X else X *res = _zero_; X return; X } X X /* X * If the absolute value of the number is within the modulo range, X * then the result is just a copy or a subtraction. Otherwise go X * ahead and negate and reduce the result. X */ X sign = z1.sign; X z1.sign = 0; X cv = zrel(z1, z2); X if (cv == 0) { X *res = _zero_; X return; X } X if (cv < 0) { X if (sign) X zcopy(z1, res); X else X zsub(z2, z1, res); X return; X } X z1.sign = !sign; X zmod(z1, z2, res); X} X#endif X X X/* X * Calculate the number congruent to the given number whose absolute X * value is minimal. The number to be reduced can be negative or out of X * modulo range. The result will be within the range -int((modulus-1)/2) X * to int(modulus/2) inclusive. For example, for modulus 7, numbers are X * reduced to the range [-3, 3], and for modulus 8, numbers are reduced to X * the range [-3, 4]. X */ Xvoid Xzminmod(z1, z2, res) X ZVALUE z1; /* number to find minimum congruence of */ X ZVALUE z2; /* number to take mod with */ X ZVALUE *res; /* result */ X{ X ZVALUE tmp1, tmp2; X int sign; X int cv; X X if (ziszero(z2) || zisneg(z2)) X math_error("Mod of non-positive integer"); X if (ziszero(z1) || zisunit(z2)) { X *res = _zero_; X return; X } X if (zistwo(z2)) { X if (zisodd(z1)) X *res = _one_; X else X *res = _zero_; X return; X } X X /* X * Do a quick check to see if the number is very small compared X * to the modulus. If so, then the result is obvious. X */ X if (z1.len < z2.len - 1) { X zcopy(z1, res); X return; X } X X /* X * Now make sure the input number is within the modulo range. X * If not, then reduce it to be within range and make the X * quick check again. X */ X sign = z1.sign; X z1.sign = 0; X cv = zrel(z1, z2); X if (cv == 0) { X *res = _zero_; X return; X } X tmp1 = z1; X if (cv > 0) { X z1.sign = (BOOL)sign; X zmod(z1, z2, &tmp1); X if (tmp1.len < z2.len - 1) { X *res = tmp1; X return; X } X sign = 0; X } X X /* X * Now calculate the difference of the modulus and the absolute X * value of the original number. Compare the original number with X * the difference, and return the one with the smallest absolute X * value, with the correct sign. If the two values are equal, then X * return the positive result. X */ X zsub(z2, tmp1, &tmp2); X cv = zrel(tmp1, tmp2); X if (cv < 0) { X zfree(tmp2); X tmp1.sign = (BOOL)sign; X if (tmp1.v == z1.v) X zcopy(tmp1, res); X else X *res = tmp1; X } else { X if (cv) X tmp2.sign = !sign; X if (tmp1.v != z1.v) X zfree(tmp1); X *res = tmp2; X } X} X X X/* X * Compare two numbers for equality modulo a third number. X * The two numbers to be compared can be negative or out of modulo range. X * Returns TRUE if the numbers are not congruent, and FALSE if they are X * congruent. X */ XBOOL Xzcmpmod(z1, z2, z3) X ZVALUE z1; /* first number to be compared */ X ZVALUE z2; /* second number to be compared */ X ZVALUE z3; /* modulus */ X{ X ZVALUE tmp1, tmp2, tmp3; X FULL digit; X LEN len; X int cv; X X if (zisneg(z3) || ziszero(z3)) X math_error("Non-positive modulus in zcmpmod"); X if (zistwo(z3)) X return (((z1.v[0] + z2.v[0]) & 0x1) != 0); X X /* X * If the two numbers are equal, then their mods are equal. X */ X if ((z1.sign == z2.sign) && (z1.len == z2.len) && X (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0)) X return FALSE; X X /* X * If both numbers are negative, then we can make them positive. X */ X if (zisneg(z1) && zisneg(z2)) { X z1.sign = 0; X z2.sign = 0; X } X X /* X * For small negative numbers, make them positive before comparing. X * In any case, the resulting numbers are in tmp1 and tmp2. X */ X tmp1 = z1; X tmp2 = z2; X len = z3.len; X digit = z3.v[len - 1]; X X if (zisneg(z1) && ((z1.len < len) || X ((z1.len == len) && (z1.v[z1.len - 1] < digit)))) X zadd(z1, z3, &tmp1); X X if (zisneg(z2) && ((z2.len < len) || X ((z2.len == len) && (z2.v[z2.len - 1] < digit)))) X zadd(z2, z3, &tmp2); X X /* X * Now compare the two numbers for equality. X * If they are equal we are all done. X */ X if (zcmp(tmp1, tmp2) == 0) { X if (tmp1.v != z1.v) X zfree(tmp1); X if (tmp2.v != z2.v) X zfree(tmp2); X return FALSE; X } X X /* X * They are not identical. Now if both numbers are positive X * and less than the modulus, then they are definitely not equal. X */ X if ((tmp1.sign == tmp2.sign) && X ((tmp1.len < len) || (zrel(tmp1, z3) < 0)) && X ((tmp2.len < len) || (zrel(tmp2, z3) < 0))) X { X if (tmp1.v != z1.v) X zfree(tmp1); X if (tmp2.v != z2.v) X zfree(tmp2); X return TRUE; X } X X /* X * Either one of the numbers is negative or is large. X * So do the standard thing and subtract the two numbers. X * Then they are equal if the result is 0 (mod z3). X */ X zsub(tmp1, tmp2, &tmp3); X if (tmp1.v != z1.v) X zfree(tmp1); X if (tmp2.v != z2.v) X zfree(tmp2); X X /* X * Compare the result with the modulus to see if it is equal to X * or less than the modulus. If so, we know the mod result. X */ X tmp3.sign = 0; X cv = zrel(tmp3, z3); X if (cv == 0) { X zfree(tmp3); X return FALSE; X } X if (cv < 0) { X zfree(tmp3); X return TRUE; X } X X /* X * We are forced to actually do the division. X * The numbers are congruent if the result is zero. X */ X zmod(tmp3, z3, &tmp1); X zfree(tmp3); X if (ziszero(tmp1)) { X zfree(tmp1); X return FALSE; X } else { X zfree(tmp1); X return TRUE; X } X} X X X/* X * Compute the result of raising one number to a power modulo another number. X * That is, this computes: a^b (modulo c). X * This calculates the result by examining the power POWBITS bits at a time, X * using a small table of POWNUMS low powers to calculate powers for those bits, X * and repeated squaring and multiplying by the partial powers to generate X * the complete power. If the power being raised to is high enough, then X * this uses the REDC algorithm to avoid doing many divisions. When using X * REDC, multiple calls to this routine using the same modulus will be X * slightly faster. X */ Xvoid Xzpowermod(z1, z2, z3, res) X ZVALUE z1, z2, z3, *res; X{ X HALF *hp; /* pointer to current word of the power */ X REDC *rp; /* REDC information to be used */ X ZVALUE *pp; /* pointer to low power table */ X ZVALUE ans, temp; /* calculation values */ X ZVALUE modpow; /* current small power */ X ZVALUE lowpowers[POWNUMS]; /* low powers */ X int sign; /* original sign of number */ X int curshift; /* shift value for word of power */ X HALF curhalf; /* current word of power */ X unsigned int curpow; /* current low power */ X unsigned int curbit; /* current bit of low power */ X int i; X X if (zisneg(z3) || ziszero(z3)) X math_error("Non-positive modulus in zpowermod"); X if (zisneg(z2)) X math_error("Negative power in zpowermod"); X X sign = z1.sign; X z1.sign = 0; X X /* X * Check easy cases first. X */ X if (ziszero(z1) || zisunit(z3)) { /* 0^x or mod 1 */ X *res = _zero_; X return; X } X if (zistwo(z3)) { /* mod 2 */ X if (zisodd(z1)) X *res = _one_; X else X *res = _zero_; X return; X } X if (zisunit(z1) && (!sign || ziseven(z2))) { X /* 1^x or (-1)^(2x) */ X *res = _one_; X return; X } X X /* X * Normalize the number being raised to be non-negative and to lie X * within the modulo range. Then check for zero or one specially. X */ X zmod(z1, z3, &temp); X if (ziszero(temp)) { X zfree(temp); X *res = _zero_; X return; X } X z1 = temp; X if (sign) { X zsub(z3, z1, &temp); X zfree(z1); X z1 = temp; X } X if (zisunit(z1)) { X zfree(z1); X *res = _one_; X return; X } X X /* X * If the modulus is odd, large enough, is not one less than an X * exact power of two, and if the power is large enough, then use X * the REDC algorithm. The size where this is done is configurable. X */ X if ((z2.len > 1) && (z3.len >= _pow2_) && zisodd(z3) X && !zisallbits(z3)) X { X if (powermodredc && zcmp(powermodredc->mod, z3)) { X zredcfree(powermodredc); X powermodredc = NULL; X } X if (powermodredc == NULL) X powermodredc = zredcalloc(z3); X rp = powermodredc; X zredcencode(rp, z1, &temp); X zredcpower(rp, temp, z2, &z1); X zfree(temp); X zredcdecode(rp, z1, res); X zfree(z1); X return; X } X X /* X * Modulus or power is small enough to perform the power raising X * directly. Initialize the table of powers. X */ X for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++) X pp->len = 0; X lowpowers[0] = _one_; X lowpowers[1] = z1; X ans = _one_; X X hp = &z2.v[z2.len - 1]; X curhalf = *hp; X curshift = BASEB - POWBITS; X while (curshift && ((curhalf >> curshift) == 0)) X curshift -= POWBITS; X X /* X * Calculate the result by examining the power POWBITS bits at a time, X * and use the table of low powers at each iteration. X */ X for (;;) { X curpow = (curhalf >> curshift) & (POWNUMS - 1); X pp = &lowpowers[curpow]; X X /* X * If the small power is not yet saved in the table, then X * calculate it and remember it in the table for future use. X */ X if (pp->len == 0) { X if (curpow & 0x1) X zcopy(z1, &modpow); X else X modpow = _one_; X X for (curbit = 0x2; curbit <= curpow; curbit *= 2) { X pp = &lowpowers[curbit]; X if (pp->len == 0) { X zsquare(lowpowers[curbit/2], &temp); X zmod(temp, z3, pp); X zfree(temp); X } X if (curbit & curpow) { X zmul(*pp, modpow, &temp); X zfree(modpow); X zmod(temp, z3, &modpow); X zfree(temp); X } X } X pp = &lowpowers[curpow]; X *pp = modpow; X } X X /* X * If the power is nonzero, then accumulate the small power X * into the result. X */ X if (curpow) { X zmul(ans, *pp, &temp); X zfree(ans); X zmod(temp, z3, &ans); X zfree(temp); X } X X /* X * Select the next POWBITS bits of the power, if there is X * any more to generate. X */ X curshift -= POWBITS; X if (curshift < 0) { X if (hp-- == z2.v) X break; X curhalf = *hp; X curshift = BASEB - POWBITS; X } X X /* X * Square the result POWBITS times to make room for the next X * chunk of bits. X */ X for (i = 0; i < POWBITS; i++) { X zsquare(ans, &temp); X zfree(ans); X zmod(temp, z3, &ans); X zfree(temp); X } X } X X for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++) { X if (pp->len) X freeh(pp->v); X } X *res = ans; X} X X X/* X * Initialize the REDC algorithm for a particular modulus, X * returning a pointer to a structure that is used for other X * REDC calls. An error is generated if the structure cannot X * be allocated. The modulus must be odd and positive. X */ XREDC * Xzredcalloc(z1) X ZVALUE z1; /* modulus to initialize for */ X{ X REDC *rp; /* REDC information */ X ZVALUE tmp; X long bit; X X if (ziseven(z1) || zisneg(z1)) X math_error("REDC requires positive odd modulus"); X X rp = (REDC *) malloc(sizeof(REDC)); X if (rp == NULL) X math_error("Cannot allocate REDC structure"); X X /* X * Round up the binary modulus to the next power of two X * which is at a word boundary. Then the shift and modulo X * operations mod the binary modulus can be done very cheaply. X * Calculate the REDC format for the number 1 for future use. X */ X bit = zhighbit(z1) + 1; X if (bit % BASEB) X bit += (BASEB - (bit % BASEB)); X zcopy(z1, &rp->mod); X zbitvalue(bit, &tmp); X z1.sign = 1; X (void) zmodinv(z1, tmp, &rp->inv); X zmod(tmp, rp->mod, &rp->one); X zfree(tmp); X rp->len = bit / BASEB; X return rp; X} X X X/* X * Free any numbers associated with the specified REDC structure, X * and then the REDC structure itself. X */ Xvoid Xzredcfree(rp) X REDC *rp; /* REDC information to be cleared */ X{ X zfree(rp->mod); X zfree(rp->inv); X zfree(rp->one); X free(rp); X} X X X/* X * Convert a normal number into the specified REDC format. X * The number to be converted can be negative or out of modulo range. X * The resulting number can be used for multiplying, adding, subtracting, X * or comparing with any other such converted numbers, as if the numbers X * were being calculated modulo the number which initialized the REDC X * information. When the final value is unconverted, the result is the X * same as if the usual operations were done with the original numbers. X */ Xvoid Xzredcencode(rp, z1, res) X REDC *rp; /* REDC information */ X ZVALUE z1; /* number to be converted */ X ZVALUE *res; /* returned converted number */ X{ X ZVALUE tmp1, tmp2; X X /* X * Handle the cases 0, 1, -1, and 2 specially since these are X * easy to calculate. Zero transforms to zero, and the others X * can be obtained from the precomputed REDC format for 1 since X * addition and subtraction act normally for REDC format numbers. X */ X if (ziszero(z1)) { X *res = _zero_; X return; X } X if (zisone(z1)) { X zcopy(rp->one, res); X return; X } X if (zisunit(z1)) { X zsub(rp->mod, rp->one, res); X return; X } X if (zistwo(z1)) { X zadd(rp->one, rp->one, &tmp1); X if (zrel(tmp1, rp->mod) < 0) { X *res = tmp1; X return; X } X zsub(tmp1, rp->mod, res); X zfree(tmp1); X return; X } X X /* X * Not a trivial number to convert, so do the full transformation. X * Convert negative numbers to positive numbers before converting. X */ X tmp1.len = 0; X if (zisneg(z1)) { X zmod(z1, rp->mod, &tmp1); X z1 = tmp1; X } X zshift(z1, rp->len * BASEB, &tmp2); X if (tmp1.len) X zfree(tmp1); X zmod(tmp2, rp->mod, res); X zfree(tmp2); X} X X X/* X * The REDC algorithm used to convert numbers out of REDC format and also X * used after multiplication of two REDC numbers. Using this routine X * avoids any divides, replacing the divide by two multiplications. X * If the numbers are very large, then these two multiplies will be X * quicker than the divide, since dividing is harder than multiplying. X */ Xvoid Xzredcdecode(rp, z1, res) X REDC *rp; /* REDC information */ X ZVALUE z1; /* number to be transformed */ X ZVALUE *res; /* returned transformed number */ X{ X ZVALUE tmp1, tmp2; /* temporaries */ X HALF *hp; /* saved pointer to tmp2 value */ X X if (zisneg(z1)) X math_error("Negative number for zredc"); X X /* X * Check first for the special values for 0 and 1 that are easy. X */ X if (ziszero(z1)) { X *res = _zero_; X return; X } X if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) && X (zcmp(z1, rp->one) == 0)) { X *res = _one_; X return; X } X X /* X * First calculate the following: X * tmp2 = ((z1 % 2^bitnum) * inv) % 2^bitnum. X * The mod operations can be done with no work since the bit X * number was selected as a multiple of the word size. Just X * reduce the sizes of the numbers as required. X */ X tmp1 = z1; X if (tmp1.len > rp->len) X tmp1.len = rp->len; X zmul(tmp1, rp->inv, &tmp2); X if (tmp2.len > rp->len) X tmp2.len = rp->len; X X /* X * Next calculate the following: X * res = (z1 + tmp2 * modulus) / 2^bitnum X * The division by a power of 2 is always exact, and requires no X * work. Just adjust the address and length of the number to do X * the divide, but save the original pointer for freeing later. X */ X zmul(tmp2, rp->mod, &tmp1); X zfree(tmp2); X zadd(z1, tmp1, &tmp2); X zfree(tmp1); X hp = tmp2.v; X if (tmp2.len <= rp->len) { X freeh(hp); X *res = _zero_; X return; X } X tmp2.v += rp->len; X tmp2.len -= rp->len; X X /* X * Finally do a final modulo by a simple subtraction if necessary. X * This is all that is needed because the previous calculation is X * guaranteed to always be less than twice the modulus. X */ X if (zrel(tmp2, rp->mod) < 0) X zcopy(tmp2, res); X else X zsub(tmp2, rp->mod, res); X freeh(hp); X} X X X/* X * Multiply two numbers in REDC format together producing a result also X * in REDC format. If the result is converted back to a normal number, X * then the result is the same as the modulo'd multiplication of the X * original numbers before they were converted to REDC format. This X * calculation is done in one of two ways, depending on the size of the X * modulus. For large numbers, the REDC definition is used directly X * which involves three multiplies overall. For small numbers, a X * complicated routine is used which does the indicated multiplication X * and the REDC algorithm at the same time to produce the result. X */ Xvoid Xzredcmul(rp, z1, z2, res) X REDC *rp; /* REDC information */ X ZVALUE z1; /* first REDC number to be multiplied */ X ZVALUE z2; /* second REDC number to be multiplied */ X ZVALUE *res; /* resulting REDC number */ X{ X FULL mulb; X FULL muln; X HALF *h1; X HALF *h2; X HALF *h3; X HALF *hd; X HALF Ninv; X HALF topdigit; X LEN modlen; X LEN len; X LEN len2; X SIUNION sival1; X SIUNION sival2; X SIUNION sival3; X SIUNION carry; X ZVALUE tmp; X X if (zisneg(z1) || (z1.len > rp->mod.len) || X zisneg(z2) || (z2.len > rp->mod.len)) X math_error("Negative or too large number in zredcmul"); X X /* X * Check for special values which we easily know the answer. X */ X if (ziszero(z1) || ziszero(z2)) { X *res = _zero_; X return; X } X X if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) && X (zcmp(z1, rp->one) == 0)) { X zcopy(z2, res); X return; X } X X if ((z2.len == rp->one.len) && (z2.v[0] == rp->one.v[0]) && X (zcmp(z2, rp->one) == 0)) { X zcopy(z1, res); X return; X } X X /* X * If the size of the modulus is large, then just do the multiply, X * followed by the two multiplies contained in the REDC routine. X * This will be quicker than directly doing the REDC calculation X * because of the O(N^1.585) speed of the multiplies. The size X * of the number which this is done is configurable. X */ X if (rp->mod.len >= _redc2_) { X zmul(z1, z2, &tmp); X zredcdecode(rp, tmp, res); X zfree(tmp); X return; X } X X /* X * The number is small enough to calculate by doing the O(N^2) REDC X * algorithm directly. This algorithm performs the multiplication and X * the reduction at the same time. Notice the obscure facts that X * only the lowest word of the inverse value is used, and that X * there is no shifting of the partial products as there is in a X * normal multiply. X */ X modlen = rp->mod.len; X Ninv = rp->inv.v[0]; X X /* X * Allocate the result and clear it. X * The size of the result will be equal to or smaller than X * the modulus size. X */ X res->sign = 0; X res->len = modlen; X res->v = alloc(modlen); X X hd = res->v; X len = modlen; X while (len--) X *hd++ = 0; X X /* X * Do this outermost loop over all the digits of z1. X */ X h1 = z1.v; X len = z1.len; X while (len--) { X /* X * Start off with the next digit of z1, the first X * digit of z2, and the first digit of the modulus. X */ X mulb = (FULL) *h1++; X h2 = z2.v; X h3 = rp->mod.v; X hd = res->v; X sival1.ivalue = mulb * ((FULL) *h2++) + ((FULL) *hd++); X muln = ((HALF) (sival1.silow * Ninv)); X sival2.ivalue = muln * ((FULL) *h3++); X sival3.ivalue = ((FULL) sival1.silow) + ((FULL) sival2.silow); X carry.ivalue = ((FULL) sival1.sihigh) + ((FULL) sival2.sihigh) X + ((FULL) sival3.sihigh); X X /* X * Do this innermost loop for each digit of z2, except X * for the first digit which was just done above. X */ X len2 = z2.len; X while (--len2 > 0) { X sival1.ivalue = mulb * ((FULL) *h2++); X sival2.ivalue = muln * ((FULL) *h3++); X sival3.ivalue = ((FULL) sival1.silow) X + ((FULL) sival2.silow) X + ((FULL) *hd) X + ((FULL) carry.silow); X carry.ivalue = ((FULL) sival1.sihigh) X + ((FULL) sival2.sihigh) X + ((FULL) sival3.sihigh) X + ((FULL) carry.sihigh); X X hd[-1] = sival3.silow; X hd++; X } X X /* X * Now continue the loop as necessary so the total number X * of interations is equal to the size of the modulus. X * This acts as if the innermost loop was repeated for X * high digits of z2 that are zero. X */ X len2 = modlen - z2.len; X while (len2--) { X sival2.ivalue = muln * ((FULL) *h3++); X sival3.ivalue = ((FULL) sival2.silow) X + ((FULL) *hd) X + ((FULL) carry.silow); X carry.ivalue = ((FULL) sival2.sihigh) X + ((FULL) sival3.sihigh) X + ((FULL) carry.sihigh); X X hd[-1] = sival3.silow; X hd++; X } X X res->v[modlen - 1] = carry.silow; X topdigit = carry.sihigh; X } X X /* X * Now continue the loop as necessary so the total number X * of interations is equal to the size of the modulus. X * This acts as if the outermost loop was repeated for high X * digits of z1 that are zero. X */ X len = modlen - z1.len; X while (len--) { X /* X * Start off with the first digit of the modulus. X */ X h3 = rp->mod.v; X hd = res->v; X muln = ((HALF) (*hd * Ninv)); X sival2.ivalue = muln * ((FULL) *h3++); X sival3.ivalue = ((FULL) *hd++) + ((FULL) sival2.silow); X carry.ivalue = ((FULL) sival2.sihigh) + ((FULL) sival3.sihigh); X X /* X * Do this innermost loop for each digit of the modulus, X * except for the first digit which was just done above. X */ X len2 = modlen; X while (--len2 > 0) { X sival2.ivalue = muln * ((FULL) *h3++); X sival3.ivalue = ((FULL) sival2.silow) X + ((FULL) *hd) X + ((FULL) carry.silow); X carry.ivalue = ((FULL) sival2.sihigh) X + ((FULL) sival3.sihigh) X + ((FULL) carry.sihigh); X X hd[-1] = sival3.silow; X hd++; X } X res->v[modlen - 1] = carry.silow; X topdigit = carry.sihigh; X } X X /* X * Determine the true size of the result, taking the top digit of X * the current result into account. The top digit is not stored in X * the number because it is temporary and would become zero anyway X * after the final subtraction is done. X */ X if (topdigit == 0) { X len = modlen; X hd = &res->v[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X res->len = len; X } X X /* X * Compare the result with the modulus. X * If it is less than the modulus, then the calculation is complete. X */ X if ((topdigit == 0) && ((len < modlen) X || (res->v[len - 1] < rp->mod.v[len - 1]) X || (zrel(*res, rp->mod) < 0))) X return; X X /* X * Do a subtraction to reduce the result to a value less than X * the modulus. The REDC algorithm guarantees that a single subtract X * is all that is needed. Ignore any borrowing from the possible X * highest word of the current result because that would affect X * only the top digit value that was not stored and would become X * zero anyway. X */ X carry.ivalue = 0; X h1 = rp->mod.v; X hd = res->v; X len = modlen; X while (len--) { X carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) X + ((FULL) carry.silow); X *hd++ = BASE1 - carry.silow; X carry.silow = carry.sihigh; X } X X /* X * Now finally recompute the size of the result. X */ X len = modlen; X hd = &res->v[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X res->len = len; X} X X X/* X * Square a number in REDC format producing a result also in REDC format. X */ Xvoid Xzredcsquare(rp, z1, res) X REDC *rp; /* REDC information */ X ZVALUE z1; /* REDC number to be squared */ X ZVALUE *res; /* resulting REDC number */ X{ X ZVALUE tmp; X X if (zisneg(z1)) X math_error("Negative number in zredcsquare"); X if (ziszero(z1)) { X *res = _zero_; X return; X } X if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) && X (zcmp(z1, rp->one) == 0)) { X zcopy(z1, res); X return; X } X X /* X * If the modulus is small enough, then call the multiply X * routine to produce the result. Otherwise call the O(N^1.585) X * routines to get the answer. X */ X if (rp->mod.len < _redc2_) { X zredcmul(rp, z1, z1, res); X return; X } X zsquare(z1, &tmp); X zredcdecode(rp, tmp, res); X zfree(tmp); X} X X X/* X * Compute the result of raising a REDC format number to a power. X * The result is within the range 0 to the modulus - 1. X * This calculates the result by examining the power POWBITS bits at a time, X * using a small table of POWNUMS low powers to calculate powers for those bits, X * and repeated squaring and multiplying by the partial powers to generate X * the complete power. X */ Xvoid Xzredcpower(rp, z1, z2, res) X REDC *rp; /* REDC information */ X ZVALUE z1; /* REDC number to be raised */ X ZVALUE z2; /* normal number to raise number to */ X ZVALUE *res; /* result */ X{ X HALF *hp; /* pointer to current word of the power */ X ZVALUE *pp; /* pointer to low power table */ X ZVALUE ans, temp; /* calculation values */ X ZVALUE modpow; /* current small power */ X ZVALUE lowpowers[POWNUMS]; /* low powers */ X int curshift; /* shift value for word of power */ X HALF curhalf; /* current word of power */ X unsigned int curpow; /* current low power */ X unsigned int curbit; /* current bit of low power */ X int i; X X if (zisneg(z1)) X math_error("Negative number in zredcpower"); X if (zisneg(z2)) X math_error("Negative power in zredcpower"); X X /* X * Check for zero or the REDC format for one. X */ X if (ziszero(z1) || zisunit(rp->mod)) { X *res = _zero_; X return; X } X if (zcmp(z1, rp->one) == 0) { X zcopy(rp->one, res); X return; X } X X /* X * See if the number being raised is the REDC format for -1. X * If so, then the answer is the REDC format for one or minus one. X * To do this check, calculate the REDC format for -1. X */ X if (((HALF)(z1.v[0] + rp->one.v[0])) == rp->mod.v[0]) { X zsub(rp->mod, rp->one, &temp); X if (zcmp(z1, temp) == 0) { X if (zisodd(z2)) { X *res = temp; X return; X } X zfree(temp); X zcopy(rp->one, res); X return; X } X zfree(temp); X } X X for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++) X pp->len = 0; X zcopy(rp->one, &lowpowers[0]); X zcopy(z1, &lowpowers[1]); X zcopy(rp->one, &ans); X X hp = &z2.v[z2.len - 1]; X curhalf = *hp; X curshift = BASEB - POWBITS; X while (curshift && ((curhalf >> curshift) == 0)) X curshift -= POWBITS; X X /* X * Calculate the result by examining the power POWBITS bits at a time, X * and use the table of low powers at each iteration. X */ X for (;;) { X curpow = (curhalf >> curshift) & (POWNUMS - 1); X pp = &lowpowers[curpow]; X X /* X * If the small power is not yet saved in the table, then X * calculate it and remember it in the table for future use. X */ X if (pp->len == 0) { X if (curpow & 0x1) X zcopy(z1, &modpow); X else X zcopy(rp->one, &modpow); X X for (curbit = 0x2; curbit <= curpow; curbit *= 2) { X pp = &lowpowers[curbit]; X if (pp->len == 0) X zredcsquare(rp, lowpowers[curbit/2], X pp); X if (curbit & curpow) { X zredcmul(rp, *pp, modpow, &temp); X zfree(modpow); X modpow = temp; X } X } X pp = &lowpowers[curpow]; X *pp = modpow; X } X X /* X * If the power is nonzero, then accumulate the small power X * into the result. X */ X if (curpow) { X zredcmul(rp, ans, *pp, &temp); X zfree(ans); X ans = temp; X } X X /* X * Select the next POWBITS bits of the power, if there is X * any more to generate. X */ X curshift -= POWBITS; X if (curshift < 0) { X if (hp-- == z2.v) X break; X curhalf = *hp; X curshift = BASEB - POWBITS; X } X X /* X * Square the result POWBITS times to make room for the next X * chunk of bits. X */ X for (i = 0; i < POWBITS; i++) { X zredcsquare(rp, ans, &temp); X zfree(ans); X ans = temp; X } X } X X for (pp = lowpowers; pp < &lowpowers[POWNUMS]; pp++) { X if (pp->len) X freeh(pp->v); X } X *res = ans; X} X X/* END CODE */ SHAR_EOF chmod 0644 calc2.9.0/zmod.c || echo "restore of calc2.9.0/zmod.c fails" set `wc -c calc2.9.0/zmod.c`;Sum=$1 if test "$Sum" != "32540" then echo original size 32540, current size $Sum;fi echo "x - extracting calc2.9.0/zmul.c (Text)" sed 's/^X//' << 'SHAR_EOF' > calc2.9.0/zmul.c && X/* X * Copyright (c) 1993 David I. Bell X * Permission is granted to use, distribute, or modify this source, X * provided that this copyright notice remains intact. X * X * Faster than usual multiplying and squaring routines. X * The algorithm used is the reasonably simple one from Knuth, volume 2, X * section 4.3.3. These recursive routines are of speed O(N^1.585) X * instead of O(N^2). The usual multiplication and (almost usual) squaring X * algorithms are used for small numbers. On a 386 with its compiler, the X * two algorithms are equal in speed at about 100 decimal digits. X */ X X#include "zmath.h" X X XLEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */ XLEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */ X X Xstatic HALF *tempbuf; /* temporary buffer for multiply and square */ X Xstatic LEN domul MATH_PROTO((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans)); Xstatic LEN dosquare MATH_PROTO((HALF *vp, LEN size, HALF *ans)); X X X/* X * Multiply two numbers using the following formula recursively: X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D X * where S is a power of 2^16, and so multiplies by it are shifts, and X * A,B,C,D are the left and right halfs of the numbers to be multiplied. X */ Xvoid Xzmul(z1, z2, res) X ZVALUE z1, z2; /* numbers to multiply */ X ZVALUE *res; /* result of multiplication */ X{ X LEN len; /* size of array */ X X if (ziszero(z1) || ziszero(z2)) { X *res = _zero_; X return; X } X if (zisunit(z1)) { X zcopy(z2, res); X res->sign = (z1.sign != z2.sign); X return; X } X if (zisunit(z2)) { X zcopy(z1, res); X res->sign = (z1.sign != z2.sign); X return; X } X X /* X * Allocate a temporary buffer for the recursion levels to use. X * An array needs to be allocated large enough for all of the X * temporary results to fit in. This size is about twice the size X * of the largest original number, since each recursion level uses X * the size of its given number, and whose size is 1/2 the size of X * the previous level. The sum of the infinite series is 2. X * Add some extra words because of rounding when dividing by 2 X * and also because of the extra word that each multiply needs. X */ X len = z1.len; X if (len < z2.len) X len = z2.len; X len = 2 * len + 64; X tempbuf = zalloctemp(len); X X res->sign = (z1.sign != z2.sign); X res->v = alloc(z1.len + z2.len + 1); X res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v); X} X X X/* X * Recursive routine to multiply two numbers by splitting them up into X * two numbers of half the size, and using the results of multiplying the X * subpieces. The result is placed in the indicated array, which must be X * large enough for the result plus one extra word (size1 + size2 + 1). X * Returns the actual size of the result with leading zeroes stripped. X * This also uses a temporary array which must be twice as large as X * one more than the size of the number at the top level recursive call. X */ Xstatic LEN Xdomul(v1, size1, v2, size2, ans) X HALF *v1; /* first number */ X LEN size1; /* size of first number */ X HALF *v2; /* second number */ X LEN size2; /* size of second number */ X HALF *ans; /* location for result */ X{ X LEN shift; /* amount numbers are shifted by */ X LEN sizeA; /* size of left half of first number */ X LEN sizeB; /* size of right half of first number */ X LEN sizeC; /* size of left half of second number */ X LEN sizeD; /* size of right half of second number */ X LEN sizeAB; /* size of subtraction of A and B */ X LEN sizeDC; /* size of subtraction of D and C */ X LEN sizeABDC; /* size of product of above two results */ X LEN subsize; /* size of difference of halfs */ X LEN copysize; /* size of number left to copy */ X LEN sizetotal; /* total size of product */ X LEN len; /* temporary length */ X HALF *baseA; /* base of left half of first number */ X HALF *baseB; /* base of right half of first number */ X HALF *baseC; /* base of left half of second number */ X HALF *baseD; /* base of right half of second number */ X HALF *baseAB; /* base of result of subtraction of A and B */ X HALF *baseDC; /* base of result of subtraction of D and C */ X HALF *baseABDC; /* base of product of above two results */ X HALF *baseAC; /* base of product of A and C */ X HALF *baseBD; /* base of product of B and D */ X FULL carry; /* carry digit for small multiplications */ X FULL carryACBD; /* carry from addition of A*C and B*D */ X FULL digit; /* single digit multiplying by */ X HALF *temp; /* base for temporary calculations */ X BOOL neg; /* whether imtermediate term is negative */ X register HALF *hd, *h1, *h2; /* for inner loops */ X SIUNION sival; /* for addition of digits */ X X /* X * Trim the numbers of leading zeroes and initialize the X * estimated size of the result. X */ X hd = &v1[size1 - 1]; X while ((*hd == 0) && (size1 > 1)) { X hd--; X size1--; X } X hd = &v2[size2 - 1]; X while ((*hd == 0) && (size2 > 1)) { X hd--; X size2--; X } X sizetotal = size1 + size2; X X /* X * First check for zero answer. X */ X if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) { X *ans = 0; X return 1; X } X X /* X * Exchange the two numbers if necessary to make the number of X * digits of the first number be greater than or equal to the X * second number. X */ X if (size1 < size2) { X len = size1; size1 = size2; size2 = len; X hd = v1; v1 = v2; v2 = hd; X } X X /* X * If the smaller number has only a few digits, then calculate X * the result in the normal manner in order to avoid the overhead X * of the recursion for small numbers. The number of digits where X * the algorithm changes is settable from 2 to maxint. X */ X if (size2 < _mul2_) { X /* X * First clear the top part of the result, and then multiply X * by the lowest digit to get the first partial sum. Later X * products will then add into this result. X */ X hd = &ans[size1]; X len = size2; X while (len--) X *hd++ = 0; X X digit = *v2++; X h1 = v1; X hd = ans; X carry = 0; X len = size1; X while (len >= 4) { /* expand the loop some */ X len -= 4; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (len--) { X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X *hd = (HALF)carry; X X /* X * Now multiply by the remaining digits of the second number, X * adding each product into the final result. X */ X h2 = ans; X while (--size2 > 0) { X digit = *v2++; X h1 = v1; X hd = ++h2; X if (digit == 0) X continue; X carry = 0; X len = size1; X while (len >= 4) { /* expand the loop some */ X len -= 4; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (len--) { X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X } X X /* X * Now return the true size of the number. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X return len; X } X X /* X * Need to multiply by a large number. X * Allocate temporary space for calculations, and calculate the X * value for the shift. The shift value is 1/2 the size of the X * larger (first) number (rounded up). The amount of temporary X * space needed is twice the size of the shift, plus one more word X * for the multiply to use. X */ X shift = (size1 + 1) / 2; X temp = tempbuf; X tempbuf += (2 * shift) + 1; X X /* X * Determine the sizes and locations of all the numbers. X * The value of sizeC can be negative, and this is checked later. X * The value of sizeD is limited by the full size of the number. X */ X baseA = v1 + shift; X baseB = v1; X baseC = v2 + shift; X baseD = v2; X baseAB = ans; X baseDC = ans + shift; X baseAC = ans + shift * 2; X baseBD = ans; X X sizeA = size1 - shift; X sizeC = size2 - shift; X X sizeB = shift; X hd = &baseB[shift - 1]; X while ((*hd == 0) && (sizeB > 1)) { X hd--; X sizeB--; X } X X sizeD = shift; X if (sizeD > size2) X sizeD = size2; X hd = &baseD[sizeD - 1]; X while ((*hd == 0) && (sizeD > 1)) { X hd--; X sizeD--; X } X X /* X * If the smaller number has a high half of zero, then calculate X * the result by breaking up the first number into two numbers X * and combining the results using the obvious formula: X * (A*S+B) * D = (A*D)*S + B*D X */ X if (sizeC <= 0) { X len = domul(baseB, sizeB, baseD, sizeD, ans); X hd = &ans[len]; X len = sizetotal - len; X while (len--) X *hd++ = 0; X X /* X * Add the second number into the first number, shifted X * over at the correct position. X */ X len = domul(baseA, sizeA, baseD, sizeD, temp); X h1 = temp; X hd = ans + shift; X carry = 0; X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Determine the final size of the number and return it. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X tempbuf = temp; X return len; X } X X /* X * Now we know that the high halfs of the numbers are nonzero, X * so we can use the complete formula. X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D. X * The steps are done in the following order: X * A-B X * D-C X * (A-B)*(D-C) X * S^2*A*C + B*D X * (S^2+S)*A*C + (S+1)*B*D (*) X * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D X * X * Note: step (*) above can produce a result which is larger than X * the final product will be, and this is where the extra word X * needed in the product comes from. After the final subtraction is X * done, the result fits in the expected size. Using the extra word X * is easier than suppressing the carries and borrows everywhere. X * X * Begin by forming the product (A-B)*(D-C) into a temporary X * location that we save until the final step. Do each subtraction X * at positions 0 and S. Be very careful about the relative sizes X * of the numbers since this result can be negative. For the first X * step calculate the absolute difference of A and B into a temporary X * location at position 0 of the result. Negate the sign if A is X * smaller than B. X */ X neg = FALSE; X if (sizeA == sizeB) { X len = sizeA; X h1 = &baseA[len - 1]; X h2 = &baseB[len - 1]; X while ((len > 1) && (*h1 == *h2)) { X len--; X h1--; X h2--; X } X } X if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2))) X { X h1 = baseA; X h2 = baseB; X sizeAB = sizeA; X subsize = sizeB; X } else { X neg = !neg; X h1 = baseB; X h2 = baseA; X sizeAB = sizeB; X subsize = sizeA; X } X copysize = sizeAB - subsize; X X hd = baseAB; X carry = 0; X while (subsize--) { X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (copysize--) { X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X X hd = &baseAB[sizeAB - 1]; X while ((*hd == 0) && (sizeAB > 1)) { X hd--; X sizeAB--; X } X X /* X * This completes the calculation of abs(A-B). For the next step X * calculate the absolute difference of D and C into a temporary X * location at position S of the result. Negate the sign if C is X * larger than D. X */ X if (sizeC == sizeD) { X len = sizeC; X h1 = &baseC[len - 1]; X h2 = &baseD[len - 1]; X while ((len > 1) && (*h1 == *h2)) { X len--; X h1--; X h2--; X } X } X if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) X { X neg = !neg; X h1 = baseC; X h2 = baseD; X sizeDC = sizeC; X subsize = sizeD; X } else { X h1 = baseD; X h2 = baseC; X sizeDC = sizeD; X subsize = sizeC; X } X copysize = sizeDC - subsize; X X hd = baseDC; X carry = 0; X while (subsize--) { X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (copysize--) { X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X hd = &baseDC[sizeDC - 1]; X while ((*hd == 0) && (sizeDC > 1)) { X hd--; X sizeDC--; X } X X /* X * This completes the calculation of abs(D-C). Now multiply X * together abs(A-B) and abs(D-C) into a temporary location, X * which is preserved until the final steps. X */ X baseABDC = temp; X sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC); X X /* X * Now calculate B*D and A*C into one of their two final locations. X * Make sure the high order digits of the products are zeroed since X * this initializes the final result. Be careful about this zeroing X * since the size of the high order words might be smaller than X * the shift size. Do B*D first since the multiplies use one more X * word than the size of the product. Also zero the final extra X * word in the result for possible carries to use. X */ X len = domul(baseB, sizeB, baseD, sizeD, baseBD); X hd = &baseBD[len]; X len = shift * 2 - len; X while (len--) X *hd++ = 0; X X len = domul(baseA, sizeA, baseC, sizeC, baseAC); X hd = &baseAC[len]; X len = sizetotal - shift * 2 - len + 1; X while (len--) X *hd++ = 0; X X /* X * Now add in A*C and B*D into themselves at the other shifted X * position that they need. This addition is tricky in order to X * make sure that the two additions cannot interfere with each other. X * Therefore we first add in the top half of B*D and the lower half X * of A*C. The sources and destinations of these two additions X * overlap, and so the same answer results from the two additions, X * thus only two pointers suffice for both additions. Keep the X * final carry from these additions for later use since we cannot X * afford to change the top half of A*C yet. X */ X h1 = baseBD + shift; X h2 = baseAC; X carryACBD = 0; X len = shift; X while (len--) { X sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD; X *h1++ = sival.silow; X *h2++ = sival.silow; X carryACBD = sival.sihigh; X } X X /* X * Now add in the bottom half of B*D and the top half of A*C. X * These additions are straightforward, except that A*C should X * be done first because of possible carries from B*D, and the X * top half of A*C might not exist. Add in one of the carries X * from the previous addition while we are at it. X */ X h1 = baseAC + shift; X hd = baseAC; X carry = carryACBD; X len = sizetotal - 3 * shift; X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X h1 = baseBD; X hd = baseBD + shift; X carry = 0; X len = shift; X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Now finally add in the other delayed carry from the X * above addition. X */ X hd = baseAC + shift; X while (carryACBD) { X sival.ivalue = ((FULL) *hd) + carryACBD; X *hd++ = sival.silow; X carryACBD = sival.sihigh; X } X X /* X * Now finally add or subtract (A-B)*(D-C) into the final result at X * the correct position (S), according to whether it is positive or X * negative. When subtracting, the answer cannot go negative. X */ X h1 = baseABDC; X hd = ans + shift; X carry = 0; X len = sizeABDC; X if (neg) { X while (len--) { X sival.ivalue = BASE1 - ((FULL) *hd) + X ((FULL) *h1++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = BASE1 - ((FULL) *hd) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X } else { X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X } X X /* X * Finally determine the size of the final result and return that. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X tempbuf = temp; X return len; X} X X X/* X * Square a number by using the following formula recursively: X * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2 X * where S is a power of 2^16, and so multiplies by it are shifts, X * and A and B are the left and right halfs of the number to square. X */ Xvoid Xzsquare(z, res) X ZVALUE z, *res; X{ X LEN len; X X if (ziszero(z)) { X *res = _zero_; X return; X } X if (zisunit(z)) { X *res = _one_; X return; X } X X /* X * Allocate a temporary array if necessary for the recursion to use. X * The array needs to be allocated large enough for all of the X * temporary results to fit in. This size is about 3 times the X * size of the original number, since each recursion level uses 3/2 X * of the size of its given number, and whose size is 1/2 the size X * of the previous level. The sum of the infinite series is 3. X * Allocate some extra words for rounding up the sizes. X */ X len = 3 * z.len + 32; X tempbuf = zalloctemp(len); X X res->sign = 0; X res->v = alloc(z.len * 2); X res->len = dosquare(z.v, z.len, res->v); X} X X X/* X * Recursive routine to square a number by splitting it up into two numbers X * of half the size, and using the results of squaring the subpieces. X * The result is placed in the indicated array, which must be large X * enough for the result (size * 2). Returns the size of the result. X * This uses a temporary array which must be 3 times as large as the X * size of the number at the top level recursive call. X */ Xstatic LEN Xdosquare(vp, size, ans) X HALF *vp; /* value to be squared */ X LEN size; /* length of value to square */ X HALF *ans; /* location for result */ X{ X LEN shift; /* amount numbers are shifted by */ X LEN sizeA; /* size of left half of number to square */ X LEN sizeB; /* size of right half of number to square */ X LEN sizeAA; /* size of square of left half */ X LEN sizeBB; /* size of square of right half */ X LEN sizeAABB; /* size of sum of squares of A and B */ X LEN sizeAB; /* size of difference of A and B */ X LEN sizeABAB; /* size of square of difference of A and B */ X LEN subsize; /* size of difference of halfs */ X LEN copysize; /* size of number left to copy */ X LEN sumsize; /* size of sum */ X LEN sizetotal; /* total size of square */ X LEN len; /* temporary length */ X LEN len1; /* another temporary length */ X FULL carry; /* carry digit for small multiplications */ X FULL digit; /* single digit multiplying by */ X HALF *temp; /* base for temporary calculations */ X HALF *baseA; /* base of left half of number */ X HALF *baseB; /* base of right half of number */ X HALF *baseAA; /* base of square of left half of number */ X HALF *baseBB; /* base of square of right half of number */ X HALF *baseAABB; /* base of sum of squares of A and B */ X HALF *baseAB; /* base of difference of A and B */ X HALF *baseABAB; /* base of square of difference of A and B */ X register HALF *hd, *h1, *h2, *h3; /* for inner loops */ X SIUNION sival; /* for addition of digits */ X X /* X * First trim the number of leading zeroes. SHAR_EOF echo "End of part 13" echo "File calc2.9.0/zmul.c is continued in part 14" echo "14" > s2_seq_.tmp exit 0