home *** CD-ROM | disk | FTP | other *** search
- ;; libgcc1 routines for the Hitachi h8/300 cpu.
- ;; Contributed by Steve Chamberlain.
- ;; sac@cygnus.com
- ;; This file is in the public domain.
- /* Assembler register definitions. */
- #define A0 r0
- #define A0L r0l
- #define A0H r0h
- #define A1 r1
- #define A1L r1l
- #define A1H r1h
- #define A2 r2
- #define A2L r2l
- #define A2H r2h
- #define A3 r3
- #define A3L r3l
- #define A3H r3h
- #define S0 r4
- #define S0L r4l
- #define S0H r4h
- #define S1 r5
- #define S1L r5l
- #define S1H r5h
- #define S2 r6
- #define S2L r6l
- #define S2H r6h
- #ifdef __H8300__
- #define MOVP mov.w /* pointers are 16 bits */
- #define ADDP add.w
- #define CMPP cmp.w
- #define PUSHP push
- #define POPP pop
- #define A0P r0
- #define A1P r1
- #define A2P r2
- #define A3P r3
- #define S0P r4
- #define S1P r5
- #define S2P r6
- #endif
- #ifdef __H8300H__
- #define MOVP mov.l /* pointers are 32 bits */
- #define ADDP add.l
- #define CMPP cmp.l
- #define PUSHP push.l
- #define POPP pop.l
- #define A0P er0
- #define A1P er1
- #define A2P er2
- #define A3P er3
- #define S0P er4
- #define S1P er5
- #define S2P er6
- #define A0E e0
- #define A1E e1
- #define A2E e2
- #define A3E e3
- #endif
- #ifdef L_cmpsi2
- #ifdef __H8300__
- .section .text
- .align 2
- .global ___cmpsi2
- ___cmpsi2:
- cmp.w A2,A0
- bne .L2
- cmp.w A3,A1
- bne .L2
- mov.w #1,A0
- rts
- .L2:
- cmp.w A0,A2
- bgt .L4
- bne .L3
- cmp.w A1,A3
- bls .L3
- .L4:
- sub.w A0,A0
- rts
- .L3:
- mov.w #2,A0
- .L5:
- rts
- .end
- #endif
- #endif /* L_cmpsi2 */
- #ifdef L_ucmpsi2
- #ifdef __H8300__
- .section .text
- .align 2
- .global ___ucmpsi2
- ___ucmpsi2:
- cmp.w A2,A0
- bne .L2
- cmp.w A3,A1
- bne .L2
- mov.w #1,A0
- rts
- .L2:
- cmp.w A0,A2
- bhi .L4
- bne .L3
- cmp.w A1,A3
- bls .L3
- .L4:
- sub.w A0,A0
- rts
- .L3:
- mov.w #2,A0
- .L5:
- rts
- .end
- #endif
- #endif /* L_ucmpsi2 */
- #ifdef L_divhi3
- ;; HImode divides for the H8/300.
- ;; We bunch all of this into one object file since there are several
- ;; "supporting routines".
- ; general purpose normalize routine
- ;
- ; divisor in A0
- ; dividend in A1
- ; turns both into +ve numbers, and leaves what the answer sign
- ; should be in A2L
- #ifdef __H8300__
- .section .text
- .align 2
- divnorm:
- mov.b #0x0,A2L
- or A0H,A0H ; is divisor > 0
- bge _lab1
- not A0H ; no - then make it +ve
- not A0L
- adds #1,A0
- xor #0x1,A2L ; and remember that in A2L
- _lab1: or A1H,A1H ; look at dividend
- bge _lab2
- not A1H ; it is -ve, make it positive
- not A1L
- adds #1,A1
- xor #0x1,A2L; and toggle sign of result
- _lab2: rts
- ; A0=A0/A1 signed
- .global ___divhi3
- ___divhi3:
- bsr divnorm
- bsr ___udivhi3
- negans: or A2L,A2L ; should answer be negative ?
- beq _lab4
- not A0H ; yes, so make it so
- not A0L
- adds #1,A0
- _lab4: rts
- ; A0=A0%A1 signed
- .global ___modhi3
- ___modhi3:
- bsr divnorm
- bsr ___udivhi3
- mov A3,A0
- bra negans
- ; A0=A0%A1 unsigned
- .global ___umodhi3
- ___umodhi3:
- bsr ___udivhi3
- mov A3,A0
- rts
- ; A0=A0/A1 unsigned
- ; A3=A0%A1 unsigned
- ; A2H trashed
- ; D high 8 bits of denom
- ; d low 8 bits of denom
- ; N high 8 bits of num
- ; n low 8 bits of num
- ; M high 8 bits of mod
- ; m low 8 bits of mod
- ; Q high 8 bits of quot
- ; q low 8 bits of quot
- ; P preserve
- ; The h8 only has a 16/8 bit divide, so we look at the incoming and
- ; see how to partition up the expression.
- .global ___udivhi3
- ___udivhi3:
- ; A0 A1 A2 A3
- ; Nn Dd P
- sub.w A3,A3 ; Nn Dd xP 00
- or A1H,A1H
- bne divlongway
- or A0H,A0H
- beq _lab6
- ; we know that D == 0 and N is != 0
- mov.b A0H,A3L ; Nn Dd xP 0N
- divxu A1L,A3 ; MQ
- mov.b A3L,A0H ; Q
- ; dealt with N, do n
- _lab6: mov.b A0L,A3L ; n
- divxu A1L,A3 ; mq
- mov.b A3L,A0L ; Qq
- mov.b A3H,A3L ; m
- mov.b #0x0,A3H ; Qq 0m
- rts
- ; D != 0 - which means the denominator is
- ; loop around to get the result.
- divlongway:
- mov.b A0H,A3L ; Nn Dd xP 0N
- mov.b #0x0,A0H ; high byte of answer has to be zero
- mov.b #0x8,A2H ; 8
- div8: add.b A0L,A0L ; n*=2
- rotxl A3L ; Make remainder bigger
- rotxl A3H
- sub.w A1,A3 ; Q-=N
- bhs setbit ; set a bit ?
- add.w A1,A3 ; no : too far , Q+=N
- dec A2H
- bne div8 ; next bit
- rts
- setbit: inc A0L ; do insert bit
- dec A2H
- bne div8 ; next bit
- rts
- #endif /* __H8300__ */
- #endif /* L_divhi3 */
- #ifdef L_divsi3
- ;; 4 byte integer divides for the H8/300.
- ;;
- ;; We have one routine which does all the work and lots of
- ;; little ones which prepare the args and massage the sign.
- ;; We bunch all of this into one object file since there are several
- ;; "supporting routines".
- #ifdef __H8300H__
- .h8300h
- #endif
- .section .text
- .align 2
- ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
- ; This function is here to keep branch displacements small.
- #ifdef __H8300__
- divnorm:
- mov.b #0,S2L ; keep the sign in S2
- mov.b A0H,A0H ; is the numerator -ve
- bge postive
- ; negate arg
- not A0H
- not A1H
- not A0L
- not A1L
- add #1,A1L
- addx #0,A1H
- addx #0,A0H
- addx #0,A0L
- mov.b #1,S2L ; the sign will be -ve
- postive:
- mov.b A2H,A2H ; is the denominator -ve
- bge postive2
- not A2L
- not A2H
- not A3L
- not A3H
- add.b #1,A3L
- addx #0,A3H
- addx #0,A2L
- addx #0,A2H
- xor #1,S2L ; toggle result sign
- postive2:
- rts
- #else /* __H8300H__ */
- divnorm:
- mov.b #0,S2L ; keep the sign in S2
- mov.l A0P,A0P ; is the numerator -ve
- bge postive
- neg.l A0P ; negate arg
- mov.b #1,S2L ; the sign will be -ve
- postive:
- mov.l A1P,A1P ; is the denominator -ve
- bge postive2
- neg.l A1P ; negate arg
- xor.b #1,S2L ; toggle result sign
- postive2:
- rts
- #endif
- ; numerator in A0/A1
- ; denominator in A2/A3
- .global ___modsi3
- ___modsi3:
- bsr divnorm
- bsr divmodsi4
- #ifdef __H8300__
- mov S0,A0
- mov S1,A1
- #else
- mov.l S0P,A0P
- #endif
- bra exitdiv
- .global ___udivsi3
- ___udivsi3:
- mov.b #0,S2L ; keep sign low
- bsr divmodsi4
- bra exitdiv
- .global ___umodsi3
- ___umodsi3:
- mov.b #0,S2L ; keep sign low
- bsr divmodsi4
- #ifdef __H8300__
- mov S0,A0
- mov S1,A1
- #else
- mov.l S0P,A0P
- #endif
- bra exitdiv
- .global ___divsi3
- ___divsi3:
- jsr divnorm
- jsr divmodsi4
- ; examine what the sign should be
- exitdiv:
- or S2L,S2L
- beq reti
- ; should be -ve
- #ifdef __H8300__
- not A0H
- not A1H
- not A0L
- not A1L
- add #1,A1L
- addx #0,A1H
- addx #0,A0H
- addx #0,A0L
- #else /* __H8300H__ */
- neg.l A0P
- #endif
- reti:
- rts
- ; takes A0/A1 numerator (A0P for 300h)
- ; A2/A3 denominator (A1P for 300h)
- ; returns A0/A1 quotient (A0P for 300h)
- ; S0/S1 remainder (S0P for 300h)
- ; trashes S2
- #ifdef __H8300__
- divmodsi4:
- sub.w S0,S0 ; zero play area
- mov.w S0,S1
- mov.b A2H,S2H
- or A2L,S2H
- or A3H,S2H
- bne DenHighZero
- mov.b A0H,A0H
- bne NumByte0Zero
- mov.b A0L,A0L
- bne NumByte1Zero
- mov.b A1H,A1H
- bne NumByte2Zero
- bra NumByte3Zero
- NumByte0Zero:
- mov.b A0H,S1L
- divxu A3L,S1
- mov.b S1L,A0H
- NumByte1Zero:
- mov.b A0L,S1L
- divxu A3L,S1
- mov.b S1L,A0L
- NumByte2Zero:
- mov.b A1H,S1L
- divxu A3L,S1
- mov.b S1L,A1H
- NumByte3Zero:
- mov.b A1L,S1L
- divxu A3L,S1
- mov.b S1L,A1L
- mov.b S1H,S1L
- mov.b #0x0,S1H
- rts
- ; have to do the divide by shift and test
- DenHighZero:
- mov.b A0H,S1L
- mov.b A0L,A0H
- mov.b A1H,A0L
- mov.b A1L,A1H
- mov.b #0,A1L
- mov.b #24,S2H ; only do 24 iterations
- nextbit:
- add.w A1,A1 ; double the answer guess
- rotxl A0L
- rotxl A0H
- rotxl S1L ; double remainder
- rotxl S1H
- rotxl S0L
- rotxl S0H
- sub.w A3,S1 ; does it all fit
- subx A2L,S0L
- subx A2H,S0H
- bhs setone
- add.w A3,S1 ; no, restore mistake
- addx A2L,S0L
- addx A2H,S0H
- dec S2H
- bne nextbit
- rts
- setone:
- inc A1L
- dec S2H
- bne nextbit
- rts
- #else /* __H8300H__ */
- divmodsi4:
- sub.l S0P,S0P ; zero play area
- mov.w A1E,A1E ; denominator top word 0?
- bne DenHighZero
- ; do it the easy way, see page 107 in manual
- mov.w A0E,A2
- extu.l A2P
- divxu.w A1,A2P
- mov.w A2E,A0E
- divxu.w A1,A0P
- mov.w A0E,S0
- mov.w A2,A0E
- extu.l S0P
- rts
- DenHighZero:
- mov.w A0E,A2
- mov.b A2H,S0L
- mov.b A2L,A2H
- mov.b A0H,A2L
- mov.w A2,A0E
- mov.b A0L,A0H
- mov.b #0,A0L
- mov.b #24,S2H ; only do 24 iterations
- nextbit:
- shll.l A0P ; double the answer guess
- rotxl.l S0P ; double remainder
- sub.l A1P,S0P ; does it all fit?
- bhs setone
- add.l A1P,S0P ; no, restore mistake
- dec S2H
- bne nextbit
- rts
- setone:
- inc A0L
- dec S2H
- bne nextbit
- rts
- #endif
- #endif /* L_divsi3 */
- #ifdef L_mulhi3
- ;; HImode multiply.
- ; The h8 only has an 8*8->16 multiply.
- ; The answer is the same as:
- ;
- ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
- ; (we can ignore A1.h * A0.h cause that will all off the top)
- ; A0 in
- ; A1 in
- ; A0 answer
- #ifdef __H8300__
- .section .text
- .align 2
- .global ___mulhi3
- ___mulhi3:
- mov.b A1L,A2L ; A2l gets srcb.l
- mulxu A0L,A2 ; A2 gets first sub product
- mov.b A0H,A3L ; prepare for
- mulxu A1L,A3 ; second sub product
- add.b A3L,A2H ; sum first two terms
- mov.b A1H,A3L ; third sub product
- mulxu A0L,A3
- add.b A3L,A2H ; almost there
- mov.w A2,A0 ; that is
- rts
- #endif
- #endif /* L_mulhi3 */
- #ifdef L_mulsi3
- ;; SImode multiply.
- ;;
- ;; I think that shift and add may be sufficient for this. Using the
- ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
- ;; the inner loop uses maybe 20 cycles + overhead, but terminates
- ;; quickly on small args.
- ;;
- ;; A0/A1 src_a
- ;; A2/A3 src_b
- ;;
- ;; while (a)
- ;; {
- ;; if (a & 1)
- ;; r += b;
- ;; a >>= 1;
- ;; b <<= 1;
- ;; }
- .section .text
- .align 2
- #ifdef __H8300__
- .global ___mulsi3
- ___mulsi3:
- sub.w S0,S0
- sub.w S1,S1
- ; while (a)
- _top: mov.w A0,A0
- bne _more
- mov.w A1,A1
- beq _done
- _more: ; if (a & 1)
- bld #0,A1L
- bcc _nobit
- ; r += b
- add.w A3,S1
- addx A2L,S0L
- addx A2H,S0H
- _nobit:
- ; a >>= 1
- shlr A0H
- rotxr A0L
- rotxr A1H
- rotxr A1L
- ; b <<= 1
- add.w A3,A3
- addx A2L,A2L
- addx A2H,A2H
- bra _top
- _done:
- mov.w S0,A0
- mov.w S1,A1
- rts
- #else /* __H8300H__ */
- .h8300h
- .global ___mulsi3
- ___mulsi3:
- sub.l A2P,A2P
- ; while (a)
- _top: mov.l A0P,A0P
- beq _done
- ; if (a & 1)
- bld #0,A0L
- bcc _nobit
- ; r += b
- add.l A1P,A2P
- _nobit:
- ; a >>= 1
- shlr.l A0P
- ; b <<= 1
- shll.l A1P
- bra _top
- _done:
- mov.l A2P,A0P
- rts
- #endif
- #endif /* L_mulsi3 */