Fujiology Archive

home *** CD-ROM | disk | FTP | other *** search

/ Fujiology Archive / fujiology_archive_v1_0.iso / !FALCON / LINEOUT / OUT.ZIP / SOURCE.ZIP / MATH.I < prev next >

Wrap

Text File | 2002-11-03 | 17KB | 740 lines

; Math libs. all stuff in here, sin/cos/tan/exp/atan/random,etc. ;======= equates =========================================================== * >WARNING< for these equ's: when using a new 'sintbllen' you must * recalculate 'cos1' and 'sin1'! sintbllen: equ 2048 * MUST BE A EXPONENTIAL VALUE OF 2! sin1: equ 13176774 * sin(2π/2048)*2^32 cos1: equ 4294947083 * cos(2π/2048)*2^32 Math.EXP_ITERATIONS: = 10 ; This is really awful.. To avoid distortion near angles of +-pi/4, you ; need at least 20 iterations! Math.SATAN: = 1 ; fast but 'Sneaky ATAN' Math.ATAN_ITERATIONS: = 40 Math.RAD2DEG: = sintbllen*5000/31415 ;degs/2pi Calc_NextRandom: MACRO move.l random,d0 move.l d0,d1 mulu.w d0,d0 eor.l d1,d0 addq.l #7,d0 move.l d0,random ENDM getNextRandom: Calc_NextRandom rts * Very fast and accurate squareroot algorithm. * Quite lengthy, though: 66 bytes. * INPUT: d1.l: value to calculate the squareroot of (integer) * OUTPUT: d0.l: squareroot of value (16.16 fixed point) Math.sqrt: CALC_ATARISQRT: moveq #1,d2 ror.l #2,d2 moveq #$F,d3 .loop1: cmp.l d2,d1 bgt.s .endloop1 add.l d1,d1 lsr.l #1,d2 dbf d3,.loop1 moveq #0,d0 bra.s .is_null .endloop1: sub.l d2,d1 move.l d2,d0 lsr.l #1,d2 .loop2: lsr.l #1,d2 add.l d2,d0 cmp.l d0,d1 bgt.s .endloop2 sub.l d2,d0 add.l d1,d1 dbf d3,.loop2 bra.s .end .endloop2: sub.l d0,d1 add.l d2,d0 add.l d1,d1 dbf d3,.loop2 .end: add.l d0,d0 addi.l #$00008000,d0 .is_null: rts *========================================================================== * Sinewave table generator. * By EarX/~fUn~, 10-5-1998 * 68020 or higher is required! *========================================================================== * Macro that returns the modulo of a given angle. * INPUT: angle: type: data-register (word) or RAM (word) Do_SinModulo: MACRO angle andi.w #sintbllen-1,\1 ENDM * Macro that returns sine & cosine of a given angle. * PRECONDITION: INIT_SINETABLE has been called! * INPUT: base: type: address-register or address or relative address * inpreg: type: data-register or address-register (lower word) * contains: angle (0=0°, sintbllen=360°) * OUTPUT: sinreg: type: data-register (long) or address-register * contains: sine value (signed: -32768 to 32767) * cosreg: type: data-register (long) or address-register * contains: cosine value (signed: -32768 to 32767) Get_SinCos: MACRO base,inpreg,sinreg,cosreg movem.w (\1,\2.w*4),\3/\4 ENDM * Macro that returns sine of a given angle. * PRECONDITION: INIT_SINETABLE has been called! * INPUT: base: type: address-register or address or relative address * inpreg: type: data-register or address-register (lower word) * contains: angle (0=0°, sintbllen=360°) * OUTPUT: sinreg: type: data-register (long) or address-register * contains: sine value (signed: -32768 to 32767) Get_Sin: MACRO base,inpreg,sinreg move.w (\1,\2.w*4),\3 ENDM * Macro that returns cosine of a given angle. * PRECONDITION: INIT_SINETABLE has been called! * INPUT: base: type: address-register or address or relative address * inpreg: type: data-register or address-register (lower word) * contains: angle (0=0°, sintbllen=360°) * OUTPUT: cosreg: type: data-register (long) or address-register * contains: cosine value (signed: -32768 to 32767) Get_Cos: MACRO base,inpreg,cosreg move.w 2(\1,\2.w*4),\3 ENDM * Creates the a combined sine and cosine table for quick fetching. * Macro is exactly 96 bytes in length :-) * INPUT: a0: address of sine_tbl Init_SineTable: MACRO moveq #$ffffffff,d0 * /cos(0)=1 lsr.l #1,d0 * \(=$7fffffff) moveq #0,d1 * sin(0)=0 move.l #sin1,d6 move.w #sintbllen/4-1,d7 .genlop: swap d0 * Get high-word of cosa swap d1 * Get high-word of sina move.w d1,2+(sintbllen)*3(a0) * Copy sina in cos-4th quadrant move.w d0,sintbllen*1(a0) * Copy cosa in sin-2nd quadrant sub.w d1,2+(sintbllen)*1(a0) * Copy -sina in cos-2nd quadrant sub.w d0,sintbllen*3(a0) * Copy -cosa in sin-4th quadrant sub.w d0,2+(sintbllen)*2(a0) * Copy -cosa in cos-3rd quadrant sub.w d1,sintbllen*2(a0) * Copy -sina in sin-3rd quadrant move.w d1,(a0)+ * Save sina (16 bit signed value) in first quadrant move.w d0,(a0)+ * Save cosa (16 bit signed value) in first quadrant swap d0 * Change cosa back to fixedpoint swap d1 * Change sina back to fixedpoint move.l d1,d4 * / Backup sina move.l d0,d5 * | and cosa move.l d1,d2 * | for use in move.l d0,d3 * \ multiplications. mulu.l d6,d3:d1 * d3:=sin1*sina mulu.l #cos1,d2:d0 * d2:=cos1*cosa mulu.l d6,d1:d5 * d0:=sin1*cosa mulu.l #cos1,d0:d4 * d1:=cos1*sina sub.l d3,d2 * d2:=(cos1*cosa)-(sin1*sina) add.l d0,d1 * sina:=(sin1*cosa)+(cos1*sina) move.l d2,d0 * cosa:=(cos1*cosa)-(sin1*sina) dbra d7,.genlop ENDM * Creates a tangens table by using the sine/cosine table. * INPUT: sincos_tbl: address register * OUTPUT: tan_tbl: address register Init_TanTable: MACRO sincos_tbl,tan_tbl move.w #sintbllen-1,d7 .gentanloop: movem.w (/1)+,d0-d1 move.w d0,d0 beq.s .notandiv lsl.l #8,d1 divs.w d0,d1 .notandiv: move.w d1,(/2)+ dbra d7,.gentanloop ENDM ;------------------------------------------------------------------------------ ; exp(x) using Taylor series: ; ; exp(x)=1+x+(1/2!)x^2+(1/3!)x^3+.. ; exp(x)=Sum[ n : 0<=n : x^n/n! ] ; ;------------------------------------------------------------------------------ ; Proof of correctness by variation of constant N: ; ; exp(x,N)= Sum[ n : 0<=n<=N : x^n/n! ] ; ; exp(x,N+1)= exp(x,N) + x^(N+1)/(N+1)! ; ; x x^N ; = exp(x,N) + ----- * ----- ; N+1 N! ; ; => { T.n = x^n/n! } ; ;------------------------------------------------------------------------------ ; Main invariants: ; ; exp(x,0)=T.0=1 ; T.(N+1)=x/(N+1)*T.n ; exp(x,N+1) = exp(x,N) + x/(N+1)*T.n ; ;------------------------------------------------------------------------------ ; Unsigned 16b fractional implementation. Inputs and outputs fixedpoint. ; Reasonably accurate, at least: when |x|>1 => |x|<#iterations*2!! ; AND when numbers can be extremely large (even more than doubleprec!). ; |x|<=1 always works fast and accurate. ; Needs 020 or higher.. ; ; Better idea is to precalc e and 1/e and put them in tables. ; Then only power function is needed. This is simple cos of the short ; fractional taylor approx and the highly optimisable multiplier with ; precalced tables!! Maybe next time.. ; ; INPUT: ; d0.l=x (16:16) ; OUTPUT: ; d1.l=exp(x) (16:16) Math.exp: ; todo: power function and table stuff! ; Good fractional exp function. Even works with few iterations. Math.fracExp: IFNE 1 bsr Fix32.toFloat ; d0.l=x (f) moveq #1,d7 move.l #$7FFFFF00,d6 ; d6.l=T.0=1.0 (f) move.l d0,d5 ; d5.l=x (f) movea.l d6,a0 ; a0=exp(x,0) (f) .loop: move.l d6,d1 ; d1.l=T.n (f) move.l d5,d0 ; d0.l=x (f) ; d0.l=x (f), d1.l=T.n (f) bsr Float.mul move.l d0,d6 ; d6.l=x*T.n (f) move.l d7,d0 bsr Int.toFloat move.l d0,d1 move.l d6,d0 ; d0.l=x*T.n (f), d1.l=n+1 (f) bsr Float.div ; d0.l=T.(n+1)=x/(n+1)*T.n (f) move.l d0,d6 ; Store T.(n+1) move.l a0,d1 ; d1.l=exp(x,n) bsr Float.add movea.l d0,a0 ; d5.l=exp(x,n+1) addq.w #1,d7 ; n:=n+1 cmpi.w #Math.EXP_ITERATIONS,d7 blt.s .loop bsr Float.toFix32 ; d0.l=exp(x) (16:16) ELSE moveq #1,d7 move.l #$00010000,d1 move.l d1,d2 .loop: muls.l d0,d3:d2 move.w d3,d2 swap d2 ; d2.l=x*T.n (16:16) divs.l d7,d2 ; d2.l=T.(n+1)=x/(N+1)*T.n add.l d2,d1 ; d1.l=exp(x,n+1)=exp(x,n)+T.(n+1) addq.w #1,d7 cmpi.w #Math.EXP_ITERATIONS,d7 blt.s .loop ENDC rts ;------------------------------------------------------------------------------ ; Definition of arctan(x) using Taylor series: ; ; arctan(x)=x-x^3/3+x^5/5-..., (-1<x<+1) ; ; arctan(x)= Sum[ n : 0<=n : (-1)^n*x^(1+2n)/(1+2n) ], (-1<x<+1) ; ;------------------------------------------------------------------------------ ; Proof of correctness by variation of constant N: ; ; arctan(x,N)= Sum[ n : 0<=n<=N : (-1)^n*x^(1+2n)/(1+2n) ] ; ; arctan(x,N+1)= arctan(x,N) + (-1)^(N+1)*x^(1+2(N+1))/(1+2(N+1)) ; ; p.n=1+2n ; p.0=1 ; p.(n+1)=p.n+2 ; ; T.n=x^p.n ; T.0=x^p.0=x^1=x ; T.(n+1)=x^p.(n+1)=x^(p.n+2)=x^p.n*x*x=T.n*x*x ; ;------------------------------------------------------------------------------ ; Main invariants: ; ; arctan(x,0)= (-1)^0*x^1/1= T.0= x ; ; arctan(x,N+1)= arctan(x,N) + (-1)^(N+1)*T.N*x*x/(p.N+2) ;------------------------------------------------------------------------------ ; Unsigned 16b fractional implementation. Inputs and outputs fixedpoint. ; Reasonably accurate and works on old 68000. ; INPUT: ; d0.l=x (16:16) (-256.0<x<+256.0) ; OUTPUT: ; d1.l=arctan(x) (radians) (16:16) Frac.arctan: move.l d0,-(sp) bpl.s .sign_okay neg.l d0 .sign_okay: cmpi.l #$00010000,d0 bne.s .calc_quadrant ; Output pi/4 (arctan(1)=pi/4). move.l #31415<<14/10000,d1 tst.l (sp)+ bpl.s .sign_okay2 neg.l d1 .sign_okay2: rts .calc_quadrant: bgt.s .adjust bsr.s .calc tst.l (sp)+ bpl.s .end_sign neg.l d1 .end_sign: rts .adjust: move.l #$01000000,d1 lsr.l #8,d0 divu.w d0,d1 move.w d1,d0 ; d0.w=1/x (frac) ; moveq #$FFFFFFFF,d1 ; divu.l d0,d1 ; move.w d1,d0 bsr.s .calc move.l #31415<<15/10000,d2 sub.l d1,d2 move.l d2,d1 ; d1.l=pi/2-arctan(1/x) tst.l (sp)+ bpl.s .end_sign2 neg.l d1 .end_sign2: rts ; d0.l=x (16:16, 0<=x<1) ; OUTPUT: ; d1.l=arctan(x) (16:16, non-negative) .calc: IFNE Math.SATAN ; atan(a) =~ x/(1+0.28*x*x) move.w d0,d2 mulu.w d2,d2 swap d2 ; d2.w=x*x (frac) mulu.w #65536*28/100,d2 ; d2.l=0.28*x*x (frac) move.w #1,d2 swap d2 ; d2.l=1+0.28*x*x (16:16) swap d0 ; d0.l=x<<16 (16:16) divu.l d2,d0 ; d0.l=x/(1+0.28*x*x) move.l d0,d1 ELSE clr.l d1 move.w d0,d1 ; d1.l=arctan(x,n)={n=0}=x (frac) moveq #1,d2 ; d2.l=p.n=2n+1 move.w d0,d3 ; d3.l=T.n={n=0}=x (frac) mulu.w d0,d0 swap d0 ; d0.w=x*x (frac) moveq #Math.ATAN_ITERATIONS/2-1,d6 ; d6.w=counter .loop: ; Calculate p.(n+1). addq.w #2,d2 ; d2.l=p.n+2=p.(n+1) ; Calculate T.(n+1). mulu.w d0,d3 ; d3.l=T.(n+1)=x*x*T.n (16:16) swap d3 ; d3.w=T.(n+1)=x*x*T.n (frac) clr.l d5 move.w d3,d5 divu.w d2,d5 ; d5.w=x*x*T.n/(p.n+2) (frac) ; Calculate arctan(x,n+1). sub.w d5,d1 ; Calculate p.(n+1). addq.w #2,d2 ; d2.l=p.n+2=p.(n+1) ; Calculate T.(n+1). mulu.w d0,d3 ; d3.l=T.(n+1)=x*x*T.n (16:16) swap d3 ; d3.w=T.(n+1)=x*x*T.n (frac) clr.l d5 move.w d3,d5 divu.w d2,d5 ; d5.w=x*x*T.n/(p.n+2) (frac) ; Calculate arctan(x,n+1). add.w d5,d1 dbra d6,.loop ENDC rts ; Uses two input system.. ; INPUT: ; d0.l=x (int) ; d1.l=y (int) Frac.atan2: clr.l d3 cmp.l d0,d1 bhi.s .sorted bne.s .go_on moveq #$FFFFFFFF,d0 bra.s .a_calced .go_on: exg d0,d1 not.l d3 .sorted:swap d0 divu.w d1,d0 ; d0.w=a=x/y (frac) .a_calced: ; atan(a) =~ a/(1+0.28*a*a) move.w d0,d2 mulu.w d2,d2 swap d2 ; d2.w=a*a (frac) mulu.w #65536*28/100,d2 ; d2.l=0.28*a*a (frac) move.w #1,d2 swap d2 ; d2.l=1+0.28*a*a (16:16) swap d0 ; d0.l=a<<16 (16:16) divu.l d2,d0 ; d0.l=a/(1+0.28*a*a) move.l d0,d1 tst.l d3 beq.s .end move.l #31415<<15/10000,d2 sub.l d1,d2 move.l d2,d1 ; d1.l=pi/2-arctan(1/a) .end: rts IFNE 0 ; Floating point implementation, accuracy overkill, you all know I'm sick. ; Needs 68020 or higher. ; ; integers: n, p.n ; reals: x, x*x, T.n, arctan(x,n) ; ; INPUT: ; d0.l=x (float) ; OUTPUT: ; d1.l=arctan(x) (radians) (float) Float.arctan: cmpi.l #$40000001,d0 bne.s .calc_quadrant ; Output pi/4 (arctan(1)=pi/4). move.l #$6487ED00,d0 rts .calc_quadrant: move.l #$40000001,d1 move.l d0,-(sp) bsr Float.sub ; d0.l=x-1.0 (float) move.l d0,d1 ; d1.l=x-1.0 (float) move.l (sp)+,d0 ; d0.l=x (float) tst.l d1 bgt.s .adjust bra.s .calc .adjust:move.l d0,d1 ; d1.l=x (float) move.l #$40000001,d0 ; d0.l=1.0 (float) bsr Float.div ; d0.l=1/x (float) bsr.s .calc ; d1.l=arctan(1/x) (float) move.l #$6487ED01,d0 ; d0.l pi/2 (float) bsr Float.sub move.l d0,d1 ; d1.l=pi/2-arctan(1/x) (float) rts .calc: move.l d0,d1 move.l d0,-(sp) bsr Float.mul move.l d0,d4 ; d4.l=x*x (float) move.l (sp)+,d0 sub.l a6,a6 ; a6=n=0 moveq #1,d7 ; d7.l=p.n=2n+1 move.l d0,d1 ; d1.l=arctan(x,n)={n=0}=T.n (float) move.l d0,d3 ; d3.l=T.n (float) .loop: addq.w #2,d7 ; d7.l=(p.n+2)=p.(n+1) (int) ; Calculate T.(n+1). move.l d1,-(sp) move.l d3,d0 move.l d4,d1 bsr Float.mul ; d0.l=T.(n+1)=x*x*T.n (float) move.l d0,-(sp) move.l d7,d0 bsr Int.toFloat move.l d0,d1 move.l (sp),d0 ; d0.l=T.(n+1)=x*x*T.n (float) ; d1.l=p.(n+1) (float) bsr Float.div move.l (sp)+,d3 ; d3.l=T.(n+1)=x*x*T.n (float) ; d0.l=T.(n+1)=x*x*T.n/p.(n+1) (float) move.l (sp)+,d1 ; Calculate (-1)^(n+1)*T.(n+1). .sign: addq #1,a6 move.l a6,d5 btst #0,d5 beq.s .end_sign move.l d1,-(sp) bsr Float.neg move.l (sp)+,d1 .end_sign: ; Calculate arctan(x,n+1). ; d0.l=(-1)^(n+1)*T.(n+1) (float) ; d1.l=arctan(x,n) (float) movem.l d3-d4,-(sp) bsr Float.add movem.l (sp)+,d3-d4 move.l d0,d1 ; d1.l=arctan(x,n+1) (float) cmpa.w #Math.ATAN_ITERATIONS,a6 blt.s .loop rts ENDC ; INPUT: ; d0.l=src1 (float) ; d1.l=src2 (float) ; OUTPUT: ; d0.l=dest (float) src1*src2 Float.mul: clr.l d2 clr.l d3 move.b d0,d2 ; d2.b=exp src1 move.b d1,d3 ; d3.b=exp src2 sub.l d2,d0 ; d0.l=mantisse src1 sub.l d3,d1 ; d1.l=mantisse src2 muls.l d0,d0:d1 add.l d1,d1 addx.l d0,d0 ; d0.l=mantisse of src1*src2 subq.b #1,d2 add.l d1,d1 addx.l d0,d0 bvc.s .end_normalization addx.b d0,d0 ror.l d0 addq.b #1,d2 .end_normalization: add.b d3,d2 ; d2.b=exp of src1*src2 move.b d2,d0 rts ; INPUT: ; d0.l=divident (float) ; d1.l=divisor (float) ; OUTPUT: ; d0.l=quotient (float) divident/divisor Float.div: clr.l d2 clr.l d3 move.b d0,d2 ; d2.b=exp src1 move.b d1,d3 ; d3.b=exp src2 sub.l d2,d0 ; d0.l=mantisse src1 sub.l d3,d1 ; d1.l=mantisse src2 sub.b d3,d2 ; d2.b=exp of src1*src2 asr.l #2,d0 ; divident<divisor, cos of result limit. clr.l d3 divs.l d1,d0:d3 ; d0.l=mantisse of src1*src2 move.l d3,d0 add.l d0,d0 bvc.s .end_normalization move.l d3,d0 addq.b #1,d2 .end_normalization: move.b d2,d0 rts ; INPUT: ; d0.l=src1 (24b mantisse:8b exponent) ; d1.l=src2 (24b mantisse:8b exponent) ; OUTPUT: ; d0.l=dest (24b mantisse:8b exponent) src1-src2 Float.sub: move.b d1,d3 clr.b d1 neg.l d1 move.b d3,d1 ; ; m: mantissa, p: power ; ; a+b~= { m=m(a)+m(b)>>[p(a)-p(b)], p=p(a); p(a)>=p(b) ; { m=m(b)+m(a)>>[p(b)-p(a)], p=p(b); p(a)<p(b) ; ; INPUT: ; d0.l=src1 (24b mantisse:8b exponent) ; d1.l=src2 (24b mantisse:8b exponent) ; OUTPUT: ; d0.l=dest (24b mantisse:8b exponent) src1+src2 Float.add: clr.l d2 clr.l d3 move.b d0,d2 ; d2.b=p(a) move.b d1,d3 ; d3.b=p(b) sub.l d2,d0 ; d0.l=m(a) sub.l d3,d1 ; d1.l=m(b) cmp.b d3,d2 ; p(a)>=p(b)?? bge.s .end_exg exg.l d0,d1 exg.l d2,d3 .end_exg: move.b d2,d4 sub.b d3,d2 ; d2.b=p(a)-p(b) asr.l d2,d1 ; d1.l=m(b)>>[p(a)-p(b)] addq.b #1,d4 add.l d1,d0 ; d0.l=m(a)+m(b)>>[p(a)-p(b)] bvc.s .normalize_loop addx.b d0,d0 ; if carried.. ror.l #1,d0 ; ..make negative move.b d4,d0 rts .normalize_loop: subq.b #1,d4 move.l d0,d2 add.l d0,d0 bvc.s .normalize_loop .end_normalization: move.l d2,d0 move.b d4,d0 ; d0.l={m(a)+m(b)>>[p(a)-p(b)],p=p(a)}=a+b rts ; INPUT: ; d0.l=src (24b mantisse:8b exponent) ; OUTPUT: ; d0.l=-src (24b mantisse:8b exponent) Float.neg: move.b d0,d1 clr.b d0 neg.l d0 move.b d1,d0 rts ; INPUT: ; d0.l=src (24m:8e) ; OUTPUT: ; d0.l=src (32b signed) Float.toInt: move.b d0,d1 clr.b d0 subi.b #31,d1 ; d1.b=exp-[log(r)+7] bpl.s .limit_0 neg.b d1 asr.l d1,d0 rts .limit_0: clr.l d0 rts ; INPUT: ; d0.l=src (24m:8e) ; OUTPUT: ; d0.l=src (16:16 sgn) Float.toFix32: move.b d0,d1 clr.b d0 subi.b #15,d1 ; d1.b=exp-[log(r)+7]+16 bpl.s .limit_0 neg.b d1 asr.l d1,d0 rts .limit_0: clr.l d0 rts ; INPUT: ; d0.l=src (32b signed) ; OUTPUT: ; d0.l=src (24m:8e) Int.toFloat: moveq #31,d1 tst.l d0 beq.s .limit_0 .normalize_loop: move.l d0,d2 add.l d0,d0 dbvs d1,.normalize_loop move.l d2,d0 move.b d1,d0 ; d0.l={m(a)+m(b)>>[p(a)-p(b)],p=p(a)}=a+b rts .limit_0: move.l #$40000080,d0 rts ; Converts 16:16 fixed point number to floating point. ; INPUT: ; d0.l=src (32b signed) ; OUTPUT: ; d0.l=src (24m:8e) Fix32.toFloat: moveq #31,d1 tst.l d0 beq.s .limit_0 .normalize_loop: move.l d0,d2 add.l d0,d0 dbvs d1,.normalize_loop move.l d2,d0 move.b d1,d0 ; d0.l={m(a)+m(b)>>[p(a)-p(b)],p=p(a)}=a+b subi.b #16,d0 ; /65536 rts .limit_0: move.l #$40000080,d0 rts BSS random: DS.L 1