home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Windoware
/
WINDOWARE_1_6.iso
/
screen
/
winlava
/
sqrt.asm
< prev
Wrap
Assembly Source File
|
1991-09-10
|
16KB
|
626 lines
;---------------------------Module-Header-------------------------------;
; Module Name: SQRT.ASM
;
; Contains FIXED and ULONG square root
;
; Created: Sun 30-Aug-1987 19:28:30
;
; NOTE: the 64-bit sqrt code is 386 specific!
;
;-----------------------------------------------------------------------;
;
; (C) Copyright Microsoft Corp. 1987-1991. All rights reserved.
;
; You have a royalty-free right to use, modify, reproduce and
; distribute the Sample Files (and/or any modified version) in
; any way you find useful, provided that you agree that
; Microsoft has no warranty obligations or liability for any
; Sample Application Files which are modified.
;
?WIN = 0
?PLM = 0
?NODATA = 1
.xlist
include cmacros.inc
.list
UQUAD struc
uq0 dw ?
uq1 dw ?
uq2 dw ?
uq3 dw ?
UQUAD ends
PTL struc
ptl_x dd ?
ptl_y dd ?
PTL ends
POINTFX struc
x dd ?
y dd ?
z dd ?
w dd ?
POINTFX ends
MAXINT equ 7FFFh
MININT equ 8000h
; The following two equates are just used as shorthand
; for the "word ptr" and "byte ptr" overrides.
wptr equ word ptr
bptr equ byte ptr
; The following structure should be used to access high and low
; words of a DWORD. This means that "word ptr foo[2]" -> "foo.hi".
LONG struc
lo dw ?
hi dw ?
LONG ends
FARPOINTER struc
off dw ?
sel dw ?
FARPOINTER ends
assert macro a,b,c,d
endm
EAXtoDXAX macro
shld edx,eax,16 ; move HIWORD(eax) to dx
endm
DXAXtoEAX macro
ror eax,16 ; xchg HIWORD(eax) and LOWORD(eax)
shrd eax,edx,16 ; move LOWORD(edx) to HIWORD(eax)
endm
.386
sBegin Code
assumes cs,Code
assumes es,nothing
assumes ds,nothing
;---------------------------Private-Macro-------------------------------;
; set_ov
;
; Sets the overflow flag
;
; Entry:
; reg = word register to use for scratch
; Returns:
; OF set
; Error Returns:
; none
; Registers Destroyed:
; reg
;-----------------------------------------------------------------------;
set_ov macro reg
mov reg,8000h
dec reg
endm
;---------------------------Public-Routine------------------------------;
; square_root
;
; Computes the Fixed square root of a Fixed. This algorithm
; comes from Knuth D.E; Metafont:The Program (Addison-Weseley, 1986)
; Part 8:Algebraic and Transcendental Functions.
;
; Notation used below:
;
; Entry:
; DX:AX = number to square root
; Returns:
; DX:AX = square root of input
; Error Returns:
; OF = 1
; Registers Destroyed:
; BX,CX
; Registers Preserved:
; DS,ES,SI,DI
;-----------------------------------------------------------------------;
assumes ds,nothing
assumes es,nothing
cProc fxsqrt,<PUBLIC,NEAR,NODATA,NONWIN>,<>
ParmD fx
cBegin
mov ax, fx.lo
mov dx, fx.hi
cCall square_root
mov dh,dl ; DX:AX *= sqrt(1000h)
mov dl,ah
mov ah,al
xor al,al
cEnd
cProc ulsqrt,<PUBLIC,NEAR,NODATA,NONWIN>,<>
ParmD ul
cBegin
mov ax, ul.lo
mov dx, ul.hi
cCall square_root
cEnd
;
; compute the square root of EDX:EAX
;
; retult returned in EAX and DX:AX
;
; We dont have a 64-bit square_root function
; so approximate it by dividing by 4 until we can use our
; 32-bit function
;
cProc sqrt64,<PUBLIC,NEAR,NODATA,NONWIN>,<>
cBegin
xor cx,cx
test_less_than_32:
or edx,edx ; is it less than 32-bit?
jz less_than_32
inc cx
shrd eax,edx,2 ; divide by 4
shr edx,2
jmp short test_less_than_32
less_than_32:
;;
;; EDX:EAX is now less than 32 bits, CX contains count of
;; divisions by 4 we had to do to get here. call square_root
;; and then re-normalize
;;
push cx
EAXtoDXAX
call square_root
pop cx
jcxz sqrt64_exit
@@: shl ax,1
rcl dx,1
loop @b
sqrt64_exit:
DXAXtoEAX
cEnd
cProc square_root,<PUBLIC,NEAR,NODATA,NONWIN>,<SI,DI>
;; ParmD fx
cBegin
;; mov ax, fx.lo
;; mov dx, fx.hi
; check for < 1 since this algorithm returns 0000:0001
or dx,dx
jnz non_zero_arg
cmp ax,1
ja non_zero_arg
jmp square_root_exit
negative_arg: ; can't have number negative
xor ax,ax
cwd
set_ov bx
jmp square_root_exit
non_zero_arg:
cCall ulNormalize
;;;;;;;;jcxz negative_arg
jz exit_relay
shr cx,1
jc add_shifts_only
shr dx,1
rcr ax,1
dec cx
add_shifts_only:
;;
;; use 23 for a FIXED square_root, and 15 for a ULONG square_root!
;;
if 0
mov ch,23 ; FIXED square_root
else
mov ch,15 ; ULONG square_root
endif
sub ch,cl
mov cl,8
sub ch,cl
jg more_than_8 ; CL = Min(count,8)
add cl,ch
xor ch,ch ; CH = Max(count-8,0)
more_than_8:
xchg ax,si ; DI:SI = x
mov di,dx
xor ax,ax ; AX = y = lower bound = 0
mov bx,2 ; BX = q = estimate of root = 2
add si,si ; intiialize y = top bit of x
adc di,di
adc ax,ax
first_8_loop:
add si,si ; move 2 bits from top
adc di,di ; of x to bottom of y
adc ax,ax
assert NC
add si,si
adc di,di
adc ax,ax
assert NC
sub ax,bx ; AX = y - q
jle y_neg_or_zero_8
;!!!CR loops can be cleaned up some
add bx,bx ; BX = 2q
sub ax,bx ; AX = y - 3q
jg y_greater_q_8
add ax,bx ; AX = y - q
dec cl
jnz first_8_loop
jcxz first_8_was_enough
jmp short done_with_1st_8
y_greater_q_8:
add bx,2 ; BX = 2q + 2 (or 2q if jg not taken)
dec cl
jnz first_8_loop
or ch,ch
jnz done_with_1st_8
first_8_was_enough:
xchg ax,bx ; DX:AX = q
xor dx,dx
shr ax,1 ; root = q >> 1
or ax,ax ; clear overflow flag
exit_relay:
jmp square_root_exit
y_neg_or_zero_8: ; come here if y <= 0
dec bx
add bx,bx ; BX = 2q - 2
add ax,bx ; AX = y + q - 2
dec cl
jnz first_8_loop
jcxz first_8_was_enough
;si is now empty, di contains all signifigant bits
done_with_1st_8:
mov cl,4
sub ch,cl
jg second_4_loop ; CL = Min(count,4)
add cl,ch
xor ch,ch ; CH = Max(count-8,0)
second_4_loop:
; move 2 bits from top
add di,di ; of x to bottom of y
adc ax,ax
assert NC
add di,di
adc ax,ax
assert NC
sub ax,bx ; AX = y - q
jle y_neg_or_zero_2nd_4
add bx,bx ; BX = 2q
;!!!CR use compare and jump rather than sub, branch, and restore
sub ax,bx ; AX = y - 3q
jg y_greater_q_2nd_4
add ax,bx ; AX = y - q
dec cl
jnz second_4_loop
jcxz second_4_was_enough
jmp short done_with_2nd_4
y_greater_q_2nd_4:
add bx,2 ; BX = 2q + 2 (or 2q if jg not taken)
dec cl
jnz second_4_loop
or ch,ch
jnz done_with_2nd_4
second_4_was_enough:
xchg ax,bx ; DX:AX = q
xor dx,dx
shr ax,1 ; root = q >> 1
or ax,ax ; clear overflow flag
jmp square_root_exit
y_neg_or_zero_2nd_4: ; come here if y <= 0
dec bx
add bx,bx ; BX = 2q - 2
add ax,bx ; AX = y + q - 2
dec cl
jnz second_4_loop
jcxz second_4_was_enough
done_with_2nd_4:
xor dx,dx ; DX:AX = y SI:BX = q
mov cl,4
sub ch,cl
jg third_4_loop ; CL = Min(count,4)
add cl,ch
xor ch,ch ; CH = Max(count-13,0)
third_4_loop:
;!!!CR Add comment noting that 32 bit math is being used now
; move 2 bits from top
add di,di ; of x to bottom of y
adc ax,ax
adc dx,dx
assert NC
add di,di
adc ax,ax
adc dx,dx
assert NC
sub ax,bx ; DX:AX = y - q
sbb dx,si
js y_negative_3rd_4
jz y_might_be_zero_3rd_4
sorry_y_not_zero_3rd_4:
add bx,bx
adc si,si ; SI:BX = 2q
sub ax,bx ; DX:AX = y - 3q
sbb dx,si
jg y_greater_q_3rd_4
jl y_less_or_equal_q_3rd_4
or ax,ax
jnz y_greater_q_3rd_4
y_less_or_equal_q_3rd_4:
add ax,bx ; DX:AX = y - q
adc dx,si
dec cl
jnz third_4_loop
jcxz third_4_was_enough
jmp short done_with_3rd_4
y_greater_q_3rd_4:
add bx,2
adc si,0
dec cl
jnz third_4_loop
or ch,ch
jnz done_with_3rd_4
third_4_was_enough:
xchg ax,bx ; DX:AX = q
mov dx,si
shr dx,1 ; root = q >> 1
rcr ax,1
or ax,ax ; clear overflow flag
jmp square_root_exit ;;; short!
y_might_be_zero_3rd_4:
or ax,ax
jnz sorry_y_not_zero_3rd_4
y_negative_3rd_4: ; come here if y <= 0
sub bx,1
sbb si,0
add bx,bx ; DL:BX = 2q - 2
adc si,si
add ax,bx
adc dx,si ; DH:AX = y + q - 2
dec cl
jnz third_4_loop
jcxz third_4_was_enough
done_with_3rd_4:
xchg ch,cl
last_7_loop:
add ax,ax
adc dx,dx
add ax,ax
adc dx,dx
sub ax,bx ; DX:AX = y - q
sbb dx,si
js y_negative_last_7
jz y_might_be_zero_last_7
;!!!CR Clean up the exit so that a single exit point is used.
sorry_y_not_zero_last_7:
add bx,bx
adc si,si ; SI:BX = 2q
sub ax,bx ; DX:AX = y - 3q
sbb dx,si
jg y_greater_q_last_7
jl y_less_or_equal_q_last_7
or ax,ax
jnz y_greater_q_last_7
y_less_or_equal_q_last_7:
add ax,bx ; DX:AX = y - q
adc dx,si
loop last_7_loop
xchg ax,bx ; DX:AX = q
mov dx,si
shr dx,1 ; root = q >> 1
rcr ax,1
or ax,ax ; clear overflow flag
jmp short square_root_exit
y_greater_q_last_7:
add bx,2
adc si,0
loop last_7_loop
xchg ax,bx ; DX:AX = q
mov dx,si
shr dx,1 ; root = q >> 1
rcr ax,1
or ax,ax ; clear overflow flag
jmp short square_root_exit
y_might_be_zero_last_7:
or ax,ax
jnz sorry_y_not_zero_last_7
y_negative_last_7: ; come here if y <= 0
sub bx,1
sbb si,0
add bx,bx ; DL:BX = 2q - 2
adc si,si
add ax,bx
adc dx,si ; DH:AX = y + q - 2
loop last_7_loop
xchg ax,bx ; DX:AX = q
mov dx,si
shr dx,1 ; root = q >> 1
rcr ax,1
or ax,ax ; clear overflow flag
square_root_exit:
cEnd
;---------------------------Public-Routine------------------------------;
; ulNormalize
;
; Normalizes a ULONG so that the highest order bit is 1. Returns the
; number of shifts done. Also returns ZF=1 if the ULONG was zero.
;
; Entry:
; DX:AX = ULONG
; Returns:
; DX:AX = normalized ULONG
; CX = shift count
; ZF = 1 if the ULONG is zero, 0 otherwise
; Registers Destroyed:
; none
;-----------------------------------------------------------------------;
assumes ds,nothing
assumes es,nothing
cProc ulNormalize,<PUBLIC,NEAR>
cBegin
; shift by words
xor cx,cx
or dx,dx
js ulNormalize_exit
jnz top_word_ok
xchg ax,dx
or dx,dx
jz ulNormalize_exit ; the zero exit
mov cl,16
js ulNormalize_exit
top_word_ok:
; shift by bytes
or dh,dh
jnz top_byte_ok
xchg dh,dl
xchg dl,ah
xchg ah,al
add cl,8
or dh,dh
js ulNormalize_exit
top_byte_ok:
; do the rest by bits
inc cx
add ax,ax
adc dx,dx
js ulNormalize_exit
inc cx
add ax,ax
adc dx,dx
js ulNormalize_exit
inc cx
add ax,ax
adc dx,dx
js ulNormalize_exit
inc cx
add ax,ax
adc dx,dx
js ulNormalize_exit
inc cx
add ax,ax
adc dx,dx
js ulNormalize_exit
inc cx
add ax,ax
adc dx,dx
js ulNormalize_exit
inc cx
add ax,ax
adc dx,dx
ulNormalize_exit:
cEnd
sEnd Code
end