home *** CD-ROM | disk | FTP | other *** search
- ; Copyright (C) 1989, 1992, 1993 Aladdin Enterprises. All rights reserved.
- ;
- ; This file is part of Ghostscript.
- ;
- ; Ghostscript is distributed in the hope that it will be useful, but
- ; WITHOUT ANY WARRANTY. No author or distributor accepts responsibility
- ; to anyone for the consequences of using it or for whether it serves any
- ; particular purpose or works at all, unless he says so in writing. Refer
- ; to the Ghostscript General Public License for full details.
- ;
- ; Everyone is granted permission to copy, modify and redistribute
- ; Ghostscript, but only under the conditions described in the Ghostscript
- ; General Public License. A copy of this license is supposed to have been
- ; given to you along with Ghostscript so you can know your rights and
- ; responsibilities. It should be in a file named COPYING. Among other
- ; things, the copyright notice and this notice must be preserved on all
- ; copies.
-
- ; iutilasm.asm
- ; Assembly code for Ghostscript interpreter on MS-DOS systems
-
- ifdef FOR80386
-
- .286c
-
- endif
-
- utilasm_TEXT SEGMENT WORD PUBLIC 'CODE'
- ASSUME CS:utilasm_TEXT
-
-
- ifdef FOR80386
-
- ; Macro for 32-bit operand prefix.
- OP32 macro
- db 66h
- endm
-
- endif ; FOR80386
-
-
- ifdef FOR80386
-
- ; Replace the multiply and divide routines in the Turbo C library
- ; if we are running on an 80386.
-
- ; Macro to swap the halves of a 32-bit register.
- ; Unfortunately, masm won't allow a shift instruction with a count of 16,
- ; so we have to code it in hex.
- swap macro regno
- OP32
- db 0c1h,0c0h+regno,16 ; rol regno,16
- endm
- regax equ 0
- regcx equ 1
- regdx equ 2
- regbx equ 3
-
- ; Multiply (dx,ax) by (cx,bx) to (dx,ax).
- PUBLIC LXMUL@
- PUBLIC F_LXMUL@
- F_LXMUL@ proc far
- LXMUL@ proc far
- swap regdx
- mov dx,ax
- swap regcx
- mov cx,bx
- OP32
- db 0fh,0afh,0d1h ; imul dx,cx
- OP32
- mov ax,dx
- swap regdx
- ret
- LXMUL@ endp
- F_LXMUL@ endp
-
- ; Divide two stack operands, leave the result in (dx,ax).
-
- ifdef DEBUG
-
- setup32 macro
- mov bx,sp
- push bp
- mov bp,sp
- OP32
- mov ax,ss:[bx+4] ; dividend
- endm
-
- ret32 macro n
- mov sp,bp
- pop bp
- ret n
- endm
-
- else ; !DEBUG
-
- setup32 macro
- mov bx,sp
- OP32
- mov ax,ss:[bx+4] ; dividend
- endm
-
- ret32 macro n
- ret n
- endm
-
- endif ; (!)DEBUG
-
- PUBLIC LDIV@, LUDIV@, LMOD@, LUMOD@
- PUBLIC F_LDIV@, F_LUDIV@, F_LMOD@, F_LUMOD@
- F_LDIV@ proc far
- LDIV@ proc far
- setup32
- OP32
- cwd
- OP32
- idiv word ptr ss:[bx+8] ; divisor
- OP32
- mov dx,ax
- swap regdx
- ret32 8
- LDIV@ endp
- F_LDIV@ endp
- F_LUDIV@ proc far
- LUDIV@ proc far
- setup32
- OP32
- xor dx,dx
- OP32
- div word ptr ss:[bx+8] ; divisor
- OP32
- mov dx,ax
- swap regdx
- ret32 8
- LUDIV@ endp
- F_LUDIV@ endp
- F_LMOD@ proc far
- LMOD@ proc far
- setup32
- OP32
- cwd
- OP32
- idiv word ptr ss:[bx+8] ; divisor
- OP32
- mov ax,dx
- swap regdx
- ret32 8
- LMOD@ endp
- F_LMOD@ endp
- F_LUMOD@ proc far
- LUMOD@ proc far
- setup32
- OP32
- xor dx,dx
- OP32
- div word ptr ss:[bx+8] ; divisor
- OP32
- mov ax,dx
- swap regdx
- ret32 8
- LUMOD@ endp
- F_LUMOD@ endp
-
- else ; !FOR80386
-
- ; Replace the divide routines in the Turbo C library,
- ; which do the division one bit at a time (!).
-
- PUBLIC LDIV@, LMOD@, LUDIV@, LUMOD@
- PUBLIC F_LDIV@, F_LMOD@, F_LUDIV@, F_LUMOD@
-
- ; Negate a long on the stack.
- negbp macro offset
- neg word ptr [bp+offset+2] ; high part
- neg word ptr [bp+offset] ; low part
- sbb word ptr [bp+offset+2],0
- endm
-
- ; Negate a long in (dx,ax).
- negr macro
- neg dx
- neg ax
- sbb dx,0
- endm
-
- ; Divide two unsigned longs on the stack.
- ; Leave either the quotient or the remainder in (dx,ax).
- ; Operand offsets assume that bp (and only bp) has been pushed.
- nlo equ 6
- nhi equ 8
- dlo equ 10
- dhi equ 12
-
- ; We use an offset in bx to distinguish div from mod,
- ; and to indicate whether the result should be negated.
- odiv equ 0
- omod equ 2
- odivneg equ 4
- omodneg equ 6
- F_LMOD@ proc far
- LMOD@ proc far
- push bp
- mov bp,sp
- mov bx,omod
- ; Take abs of denominator
- cmp byte ptr [bp+dhi+1],bh ; bh = 0
- jge modpd
- negbp dlo
- modpd: ; Negate mod if numerator < 0
- cmp byte ptr [bp+nhi+1],bh ; bh = 0
- jge udiv
- mov bx,omodneg
- negnum: negbp nlo
- jmp udiv
- LMOD@ endp
- F_LMOD@ endp
- F_LUMOD@ proc far
- LUMOD@ proc far
- mov bx,omod
- jmp udpush
- LUMOD@ endp
- F_LUMOD@ endp
- F_LDIV@ proc far
- LDIV@ proc far
- push bp
- mov bp,sp
- mov bx,odiv
- ; Negate quo if num^den < 0
- mov ax,[bp+nhi]
- xor ax,[bp+dhi]
- jge divabs
- mov bx,odivneg
- divabs: ; Take abs of denominator
- cmp byte ptr [bp+dhi+1],bh ; bh = 0
- jge divpd
- negbp dlo
- divpd: ; Take abs of numerator
- cmp byte ptr [bp+nhi+1],bh ; bh = 0
- jge udiv
- jmp negnum
- LDIV@ endp
- F_LDIV@ endp
- F_LUDIV@ proc far
- LUDIV@ proc far
- mov bx,odiv
- udpush: push bp
- mov bp,sp
- udiv: push bx ; odiv, omod, odivneg, omodneg
- mov ax,[bp+nlo]
- mov dx,[bp+nhi]
- mov bx,[bp+dlo]
- mov cx,[bp+dhi]
- ; Now we are dividing dx:ax by cx:bx.
- ; Check to see whether this is really a 32/16 division.
- or cx,cx
- jnz div2
- ; 32/16, check for 16- vs. 32-bit quotient
- cmp dx,bx
- jae div1
- ; 32/16 with 16-bit quotient, just do it.
- div bx ; ax = quo, dx = rem
- pop bx
- pop bp
- jmp cs:xx1[bx]
- even
- xx1 dw offset divx1
- dw offset modx1
- dw offset divx1neg
- dw offset modx1neg
- modx1: mov ax,dx
- divx1: xor dx,dx
- ret 8
- modx1neg: mov ax,dx
- divx1neg: xor dx,dx
- rneg: negr
- ret 8
- ; 32/16 with 32-bit quotient, do in 2 parts.
- div1: mov cx,ax ; save lo num
- mov ax,dx
- xor dx,dx
- div bx ; ax = hi quo
- xchg cx,ax ; save hi quo, get lo num
- div bx ; ax = lo quo, dx = rem
- pop bx
- pop bp
- jmp cs:xx1a[bx]
- even
- xx1a dw offset divx1a
- dw offset modx1
- dw offset divx1aneg
- dw offset modx1neg
- divx1a: mov dx,cx ; hi quo
- ret 8
- divx1aneg: mov dx,cx
- jmp rneg
- ; This is really a 32/32 bit division.
- ; (Note that the quotient cannot exceed 16 bits.)
- ; The following algorithm is taken from pp. 235-240 of Knuth, vol. 2
- ; (first edition).
- ; Start by normalizing the numerator and denominator.
- div2: or ch,ch
- jz div21 ; ch == 0, but cl != 0
- ; Do 8 steps all at once.
- mov bl,bh
- mov bh,cl
- mov cl,ch
- xor ch,ch
- mov al,ah
- mov ah,dl
- mov dl,dh
- xor dh,dh
- rol bx,1 ; faster than jmp
- div2a: rcr bx,1 ; finish previous shift
- div21: shr dx,1
- rcr ax,1
- shr cx,1
- jnz div2a
- rcr bx,1
- ; Now we can do a 32/16 divide.
- div2x: div bx ; ax = quo, dx = rem
- ; Multiply by the denominator, and correct the result.
- mov cx,ax ; save quotient
- mul word ptr [bp+dhi]
- mov bx,ax ; save lo part of hi product
- mov ax,cx
- mul word ptr [bp+dlo]
- add dx,bx
- ; Now cx = trial quotient, (dx,ax) = cx * denominator.
- not dx
- neg ax
- cmc
- adc dx,0 ; double-precision neg
- jc divz ; zero quotient
- ; requires special handling
- add ax,[bp+nlo]
- adc dx,[bp+nhi]
- jc divx
- ; Quotient is too large, adjust it.
- div3: dec cx
- add ax,[bp+dlo]
- adc dx,[bp+dhi]
- jnc div3
- ; All done. (dx,ax) = remainder, cx = lo quotient.
- divx: pop bx
- pop bp
- jmp cs:xx3[bx]
- even
- xx3 dw offset divx3
- dw offset modx3
- dw offset divx3neg
- dw offset modx3neg
- divx3: mov ax,cx
- xor dx,dx
- modx3: ret 8
- divx3neg: mov ax,cx
- xor dx,dx
- modx3neg: jmp rneg
- ; Handle zero quotient specially.
- divz: pop bx
- jmp cs:xxz[bx]
- even
- xxz dw offset divxz
- dw offset modxz
- dw offset divxz
- dw offset modxzneg
- divxz: pop bp
- ret 8
- modxzneg: negbp nlo
- modxz: mov ax,[bp+nlo]
- mov dx,[bp+nhi]
- pop bp
- ret 8
- LUDIV@ endp
- F_LUDIV@ endp
-
- endif ; FOR80386
-
- ; Transpose an 8x8 bit matrix. See gsmisc.c for the algorithm in C.
- PUBLIC _memflip8x8
- _memflip8x8 proc far
- push ds
- push si
- push di
- ; After pushing, the offsets of the parameters are:
- ; byte *inp=10, int line_size=14, byte *outp=16, int dist=20.
- mov si,sp
- mov di,ss:[si+14] ; line_size
- lds si,ss:[si+10] ; inp
- ; We assign variables to registers as follows:
- ; ax = AE, bx = BF, cx (or di) = CG, dx = DH.
- ; Load the input data. Initially we assign
- ; ax = AB, bx = EF, cx (or di) = CD, dx = GH.
- mov ah,[si]
- iload macro reg
- add si,di
- mov reg,[si]
- endm
- iload al
- iload ch
- iload cl
- iload bh
- iload bl
- iload dh
- iload dl
- ; Transposition macro, see C code for explanation.
- trans macro reg1,reg2,shift,mask
- mov si,reg1
- shr si,shift
- xor si,reg2
- and si,mask
- xor reg2,si
- shl si,shift
- xor reg1,si
- endm
- ; Do 4x4 transpositions
- mov di,cx ; we need cl for the shift count
- mov cl,4
- trans bx,ax,cl,0f0fh
- trans dx,di,cl,0f0fh
- ; Swap B/E, D/G
- xchg al,bh
- mov cx,di
- xchg cl,dh
- ; Do 2x2 transpositions
- mov di,cx ; need cl again
- mov cl,2
- trans di,ax,cl,3333h
- trans dx,bx,cl,3333h
- mov cx,di ; done shifting >1
- ; Do 1x1 transpositions
- trans bx,ax,1,5555h
- trans dx,cx,1,5555h
- ; Store result
- mov si,sp
- mov di,ss:[si+20] ; dist
- lds si,ss:[si+16] ; outp
- mov [si],ah
- istore macro reg
- add si,di
- mov [si],reg
- endm
- istore bh
- istore ch
- istore dh
- istore al
- istore bl
- istore cl
- istore dl
- ; All done
- pop di
- pop si
- pop ds
- ret
- _memflip8x8 ENDP
-
-
- utilasm_TEXT ENDS
- END
-