Fresh Fish 6

home *** CD-ROM | disk | FTP | other *** search

/ Fresh Fish 6 / FreshFish_September1994.bin / bbs / gnu / gcc-2.6.0-src.lha / GNU / src / amiga / gcc-2.6.0 / config / h8300 / lib1funcs.asm < prev next >

Wrap

Assembly Source File | 1994-07-12 | 10.2 KB | 672 lines

;; libgcc1 routines for the Hitachi h8/300 cpu. ;; Contributed by Steve Chamberlain. ;; sac@cygnus.com ;; This file is in the public domain. /* Assembler register definitions. */ #define A0 r0 #define A0L r0l #define A0H r0h #define A1 r1 #define A1L r1l #define A1H r1h #define A2 r2 #define A2L r2l #define A2H r2h #define A3 r3 #define A3L r3l #define A3H r3h #define S0 r4 #define S0L r4l #define S0H r4h #define S1 r5 #define S1L r5l #define S1H r5h #define S2 r6 #define S2L r6l #define S2H r6h #ifdef __H8300__ #define MOVP mov.w /* pointers are 16 bits */ #define ADDP add.w #define CMPP cmp.w #define PUSHP push #define POPP pop #define A0P r0 #define A1P r1 #define A2P r2 #define A3P r3 #define S0P r4 #define S1P r5 #define S2P r6 #endif #ifdef __H8300H__ #define MOVP mov.l /* pointers are 32 bits */ #define ADDP add.l #define CMPP cmp.l #define PUSHP push.l #define POPP pop.l #define A0P er0 #define A1P er1 #define A2P er2 #define A3P er3 #define S0P er4 #define S1P er5 #define S2P er6 #define A0E e0 #define A1E e1 #define A2E e2 #define A3E e3 #endif #ifdef L_cmpsi2 #ifdef __H8300__ .section .text .align 2 .global ___cmpsi2 ___cmpsi2: cmp.w A2,A0 bne .L2 cmp.w A3,A1 bne .L2 mov.w #1,A0 rts .L2: cmp.w A0,A2 bgt .L4 bne .L3 cmp.w A1,A3 bls .L3 .L4: sub.w A0,A0 rts .L3: mov.w #2,A0 .L5: rts .end #endif #endif /* L_cmpsi2 */ #ifdef L_ucmpsi2 #ifdef __H8300__ .section .text .align 2 .global ___ucmpsi2 ___ucmpsi2: cmp.w A2,A0 bne .L2 cmp.w A3,A1 bne .L2 mov.w #1,A0 rts .L2: cmp.w A0,A2 bhi .L4 bne .L3 cmp.w A1,A3 bls .L3 .L4: sub.w A0,A0 rts .L3: mov.w #2,A0 .L5: rts .end #endif #endif /* L_ucmpsi2 */ #ifdef L_divhi3 ;; HImode divides for the H8/300. ;; We bunch all of this into one object file since there are several ;; "supporting routines". ; general purpose normalize routine ; ; divisor in A0 ; dividend in A1 ; turns both into +ve numbers, and leaves what the answer sign ; should be in A2L #ifdef __H8300__ .section .text .align 2 divnorm: mov.b #0x0,A2L or A0H,A0H ; is divisor > 0 bge _lab1 not A0H ; no - then make it +ve not A0L adds #1,A0 xor #0x1,A2L ; and remember that in A2L _lab1: or A1H,A1H ; look at dividend bge _lab2 not A1H ; it is -ve, make it positive not A1L adds #1,A1 xor #0x1,A2L; and toggle sign of result _lab2: rts ; A0=A0/A1 signed .global ___divhi3 ___divhi3: bsr divnorm bsr ___udivhi3 negans: or A2L,A2L ; should answer be negative ? beq _lab4 not A0H ; yes, so make it so not A0L adds #1,A0 _lab4: rts ; A0=A0%A1 signed .global ___modhi3 ___modhi3: bsr divnorm bsr ___udivhi3 mov A3,A0 bra negans ; A0=A0%A1 unsigned .global ___umodhi3 ___umodhi3: bsr ___udivhi3 mov A3,A0 rts ; A0=A0/A1 unsigned ; A3=A0%A1 unsigned ; A2H trashed ; D high 8 bits of denom ; d low 8 bits of denom ; N high 8 bits of num ; n low 8 bits of num ; M high 8 bits of mod ; m low 8 bits of mod ; Q high 8 bits of quot ; q low 8 bits of quot ; P preserve ; The h8 only has a 16/8 bit divide, so we look at the incoming and ; see how to partition up the expression. .global ___udivhi3 ___udivhi3: ; A0 A1 A2 A3 ; Nn Dd P sub.w A3,A3 ; Nn Dd xP 00 or A1H,A1H bne divlongway or A0H,A0H beq _lab6 ; we know that D == 0 and N is != 0 mov.b A0H,A3L ; Nn Dd xP 0N divxu A1L,A3 ; MQ mov.b A3L,A0H ; Q ; dealt with N, do n _lab6: mov.b A0L,A3L ; n divxu A1L,A3 ; mq mov.b A3L,A0L ; Qq mov.b A3H,A3L ; m mov.b #0x0,A3H ; Qq 0m rts ; D != 0 - which means the denominator is ; loop around to get the result. divlongway: mov.b A0H,A3L ; Nn Dd xP 0N mov.b #0x0,A0H ; high byte of answer has to be zero mov.b #0x8,A2H ; 8 div8: add.b A0L,A0L ; n*=2 rotxl A3L ; Make remainder bigger rotxl A3H sub.w A1,A3 ; Q-=N bhs setbit ; set a bit ? add.w A1,A3 ; no : too far , Q+=N dec A2H bne div8 ; next bit rts setbit: inc A0L ; do insert bit dec A2H bne div8 ; next bit rts #endif /* __H8300__ */ #endif /* L_divhi3 */ #ifdef L_divsi3 ;; 4 byte integer divides for the H8/300. ;; ;; We have one routine which does all the work and lots of ;; little ones which prepare the args and massage the sign. ;; We bunch all of this into one object file since there are several ;; "supporting routines". #ifdef __H8300H__ .h8300h #endif .section .text .align 2 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. ; This function is here to keep branch displacements small. #ifdef __H8300__ divnorm: mov.b #0,S2L ; keep the sign in S2 mov.b A0H,A0H ; is the numerator -ve bge postive ; negate arg not A0H not A1H not A0L not A1L add #1,A1L addx #0,A1H addx #0,A0H addx #0,A0L mov.b #1,S2L ; the sign will be -ve postive: mov.b A2H,A2H ; is the denominator -ve bge postive2 not A2L not A2H not A3L not A3H add.b #1,A3L addx #0,A3H addx #0,A2L addx #0,A2H xor #1,S2L ; toggle result sign postive2: rts #else /* __H8300H__ */ divnorm: mov.b #0,S2L ; keep the sign in S2 mov.l A0P,A0P ; is the numerator -ve bge postive neg.l A0P ; negate arg mov.b #1,S2L ; the sign will be -ve postive: mov.l A1P,A1P ; is the denominator -ve bge postive2 neg.l A1P ; negate arg xor.b #1,S2L ; toggle result sign postive2: rts #endif ; numerator in A0/A1 ; denominator in A2/A3 .global ___modsi3 ___modsi3: PUSHP S2P PUSHP S0P PUSHP S1P bsr divnorm bsr divmodsi4 #ifdef __H8300__ mov S0,A0 mov S1,A1 #else mov.l S0P,A0P #endif bra exitdiv .global ___udivsi3 ___udivsi3: PUSHP S2P PUSHP S0P PUSHP S1P mov.b #0,S2L ; keep sign low bsr divmodsi4 bra exitdiv .global ___umodsi3 ___umodsi3: PUSHP S2P PUSHP S0P PUSHP S1P mov.b #0,S2L ; keep sign low bsr divmodsi4 #ifdef __H8300__ mov S0,A0 mov S1,A1 #else mov.l S0P,A0P #endif bra exitdiv .global ___divsi3 ___divsi3: PUSHP S2P PUSHP S0P PUSHP S1P jsr divnorm jsr divmodsi4 ; examine what the sign should be exitdiv: POPP S1P POPP S0P or S2L,S2L beq reti ; should be -ve #ifdef __H8300__ not A0H not A1H not A0L not A1L add #1,A1L addx #0,A1H addx #0,A0H addx #0,A0L #else /* __H8300H__ */ neg.l A0P #endif reti: POPP S2P rts ; takes A0/A1 numerator (A0P for 300h) ; A2/A3 denominator (A1P for 300h) ; returns A0/A1 quotient (A0P for 300h) ; S0/S1 remainder (S0P for 300h) ; trashes S2 #ifdef __H8300__ divmodsi4: sub.w S0,S0 ; zero play area mov.w S0,S1 mov.b A2H,S2H or A2L,S2H or A3H,S2H bne DenHighZero mov.b A0H,A0H bne NumByte0Zero mov.b A0L,A0L bne NumByte1Zero mov.b A1H,A1H bne NumByte2Zero bra NumByte3Zero NumByte0Zero: mov.b A0H,S1L divxu A3L,S1 mov.b S1L,A0H NumByte1Zero: mov.b A0L,S1L divxu A3L,S1 mov.b S1L,A0L NumByte2Zero: mov.b A1H,S1L divxu A3L,S1 mov.b S1L,A1H NumByte3Zero: mov.b A1L,S1L divxu A3L,S1 mov.b S1L,A1L mov.b S1H,S1L mov.b #0x0,S1H rts ; have to do the divide by shift and test DenHighZero: mov.b A0H,S1L mov.b A0L,A0H mov.b A1H,A0L mov.b A1L,A1H mov.b #0,A1L mov.b #24,S2H ; only do 24 iterations nextbit: add.w A1,A1 ; double the answer guess rotxl A0L rotxl A0H rotxl S1L ; double remainder rotxl S1H rotxl S0L rotxl S0H sub.w A3,S1 ; does it all fit subx A2L,S0L subx A2H,S0H bhs setone add.w A3,S1 ; no, restore mistake addx A2L,S0L addx A2H,S0H dec S2H bne nextbit rts setone: inc A1L dec S2H bne nextbit rts #else /* __H8300H__ */ divmodsi4: sub.l S0P,S0P ; zero play area mov.w A1E,A1E ; denominator top word 0? bne DenHighZero ; do it the easy way, see page 107 in manual mov.w A0E,A2 extu.l A2P divxu.w A1,A2P mov.w A2E,A0E divxu.w A1,A0P mov.w A0E,S0 mov.w A2,A0E extu.l S0P rts DenHighZero: mov.w A0E,A2 mov.b A2H,S0L mov.b A2L,A2H mov.b A0H,A2L mov.w A2,A0E mov.b A0L,A0H mov.b #0,A0L mov.b #24,S2H ; only do 24 iterations nextbit: shll.l A0P ; double the answer guess rotxl.l S0P ; double remainder sub.l A1P,S0P ; does it all fit? bhs setone add.l A1P,S0P ; no, restore mistake dec S2H bne nextbit rts setone: inc A0L dec S2H bne nextbit rts #endif #endif /* L_divsi3 */ #ifdef L_mulhi3 ;; HImode multiply. ; The h8 only has an 8*8->16 multiply. ; The answer is the same as: ; ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 ; (we can ignore A1.h * A0.h cause that will all off the top) ; A0 in ; A1 in ; A0 answer #ifdef __H8300__ .section .text .align 2 .global ___mulhi3 ___mulhi3: mov.b A1L,A2L ; A2l gets srcb.l mulxu A0L,A2 ; A2 gets first sub product mov.b A0H,A3L ; prepare for mulxu A1L,A3 ; second sub product add.b A3L,A2H ; sum first two terms mov.b A1H,A3L ; third sub product mulxu A0L,A3 add.b A3L,A2H ; almost there mov.w A2,A0 ; that is rts #endif #endif /* L_mulhi3 */ #ifdef L_mulsi3 ;; SImode multiply. ;; ;; I think that shift and add may be sufficient for this. Using the ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way ;; the inner loop uses maybe 20 cycles + overhead, but terminates ;; quickly on small args. ;; ;; A0/A1 src_a ;; A2/A3 src_b ;; ;; while (a) ;; { ;; if (a & 1) ;; r += b; ;; a >>= 1; ;; b <<= 1; ;; } .section .text .align 2 #ifdef __H8300__ .global ___mulsi3 ___mulsi3: PUSHP S0P PUSHP S1P PUSHP S2P sub.w S0,S0 sub.w S1,S1 ; while (a) _top: mov.w A0,A0 bne _more mov.w A1,A1 beq _done _more: ; if (a & 1) bld #0,A1L bcc _nobit ; r += b add.w A3,S1 addx A2L,S0L addx A2H,S0H _nobit: ; a >>= 1 shlr A0H rotxr A0L rotxr A1H rotxr A1L ; b <<= 1 add.w A3,A3 addx A2L,A2L addx A2H,A2H bra _top _done: mov.w S0,A0 mov.w S1,A1 POPP S2P POPP S1P POPP S0P rts #else /* __H8300H__ */ .h8300h .global ___mulsi3 ___mulsi3: sub.l A2P,A2P ; while (a) _top: mov.l A0P,A0P beq _done ; if (a & 1) bld #0,A0L bcc _nobit ; r += b add.l A1P,A2P _nobit: ; a >>= 1 shlr.l A0P ; b <<= 1 shll.l A1P bra _top _done: mov.l A2P,A0P rts #endif #endif /* L_mulsi3 */