home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Fujiology Archive
/
fujiology_archive_v1_0.iso
/
!FALCON
/
LINEOUT
/
OUT.ZIP
/
SOURCE.ZIP
/
BILERP.ASM
< prev
next >
Wrap
Assembly Source File
|
2003-02-24
|
9KB
|
303 lines
; bilinear rotozoom (palette based)
;
; we have to keep the texture in dsp ram for speed
; this causes a problem, namely saving ram (for soundmixer)
; we can choose:
;
; a) 64*64 texture
; b) 128*128 texture, banked x:even,y:odd
; c) 128*128 texture, interleaved (2 texels in 1 word)
;
; a) might look crappy, but is simple and fast
; b) looks good, but is somewhat slower <-
; c) looks good (as b), but is slowest
;
; i chose to dump the concept of wrapping.. tough luck, but faster.
; seems to work now.. reasonable speed.
; one problem: you need to pack the pixels into words to kill as much
; cpu handshakes as possible.. this might require some minor unrolling..
;
; update: fixed all the shit.. i think it's still slow due to banking..
;
; update: sending two pixels in one transfers speeds it up.
;
; update: speeded up lots, cos we keep vars in internal x/y mem.
DOUBLE: = 1 ; send two pixels per transfer?
get: MACRO
jclr #0,X:<<$FFE9,*
movep X:<<$FFEB,\1
ENDM
send: MACRO
jclr #1,X:<<$FFE9,*
movep \1,X:<<$FFEB
ENDM
org p:0
jmp <start
org p:$40
start: jsr <init
_loop: jsr <getVectors
jsr <paint
jmp <_loop
getVectors:
move #uv_table,r0
do #3,_loop
get x:(r0)
get y:(r0)+
_loop:
IFNE 0
move #$000100,x0
move #$3FFF00,x1
move x1,x:<uv_start
move x1,y:<uv_start
clr a
move x0,x:<uv_xstep
move a,y:<uv_xstep
move a,x:<uv_ystep
move x0,y:<uv_ystep
ENDC
rts
paint:
IFNE DOUBLE
move #$FFEB,r1
move #<texturesize,r2
move #<texturemask,r3
move #<uv_xstep,r4
move #<1,n4
move #<uv_x,r5
move #<1,n5
move #<scalar,r6
move #<128/2,n0
move l:<uv_start,a
move a,l:(r5+n5)
movec #128*128/2-1,m0
move y:(r2),y1 ; y1=texturesize
do #100,_yloop
; calc start (u,v)
; a1=u, a0=v
move a,l:(r5)
move y:(r5),y0 ; y0=v
do #80,_xloop
; 1: calc texture coordinates (u,v) 30 cycles..
; a1=u, a0=v, y0=texturesize, y1=v
mpy y0,y1,b l:(r3),x ; b=offset=u*texturesize, x1=width, x0=mask
move #<0,b0
and x0,b l:(r5),y ; kill frag_u, y1=u, y0=v
mac y1,x1,b l:(r4),x ; b=offset, x1=u_step, x0=v_step
add x,a x:(r6),x0 ; a1=u[n+1], a0=v[n+1], x0=scalar
mpy x0,y0,a a,l:(r5)
bclr #23,a0
mpy x0,y1,a a0,y1 ; y1=v_frac
bclr #23,a0
lsr b a0,y0 ; y0=u_frac
move b1,r0 ; r0=textureoffset (assuming texturestart=0!)
; 2: bilinear interpolation..
; two bank version..
; y0=u_frac, y1=v_frac
; r0:texture, n0=texturewidth
jcs <_odd ; Was it even?
; nice.. this one cannot wrap in u-dir ;) v-dir can be done with m0.. 20 cycles
_even: move x:(r0+n0),x0 ; x0=hlb
move x:(r0),b ; b=hlt
mac +y1,x0,b b,x0 y:(r0),a ; b=v_frac*hlb+hlt, x0=hlt, a=hrt
macr -y1,x0,b y:(r0+n0),x0 ; b=hl=v_frac*(hlb-hlt)+hlt, x0=hrb
mac +y1,x0,a a,x0 ; a=v_frac*hrb+hrt, x0=hrt
macr -y1,x0,a b,x0 ; a=hr=v_frac*(hrb-hrt)+hrt, x0=hl
mac -y0,x0,b a,x0 y:(r2),y1 ; b=u_frac*hr+hl, x0=hl, y1=texturesize
macr +y0,x0,b l:(r5),a ; b=h=u_frac*(hr-hl)+hl, a=uv_x
jmp <_next
; beware.. this one has no wrapping in u-direction.. 18 cycles
_odd: move y:(r0+n0),x0 ; x0=hlb
move y:(r0)+,b ; b=hlt
move b,x1
mac +y1,x0,b x:(r0),a ; b=v_frac*hlb+hlt, x0=hlt, a=hrt
macr -y1,x1,b x:(r0+n0),x0 ; b=hl=v_frac*(hlb-hlt)+hlt, x0=hrb
mac +y1,x0,a a,x0 ; a=v_frac*hrb+hrt, x0=hrt
macr -y1,x0,a b,x0 ; a=hr=v_frac*(hrb-hrt)+hrt, x0=hl
mac -y0,x0,b a,x0 y:(r2),y1 ; b=u_frac*hr+hl, x0=hl, y1=texturesize
macr +y0,x0,b l:(r5),a ; b=h=u_frac*(hr-hl)+hl, a=uv_x
_next: move b,x:(r2) y:(r5),y0 ; Store texturepixel, y0=v
; 1: calc texture coordinates (u,v) 30 cycles..
; a1=u, a0=v, y0=texturesize, y1=v
mpy y0,y1,b l:(r3),x ; b=offset=u*texturesize, x1=width, x0=mask
move #<0,b0
and x0,b l:(r5),y ; kill frag_u, y1=u, y0=v
mac y1,x1,b l:(r4),x ; b=offset, x1=u_step, x0=v_step
add x,a x:(r6),x0 ; a1=u[n+1], a0=v[n+1], x0=scalar
mpy x0,y0,a a,l:(r5)
bclr #23,a0
mpy x0,y1,a a0,y1 ; y1=v_frac
bclr #23,a0
lsr b a0,y0 ; y0=u_frac
move b1,r0 ; r0=textureoffset (assuming texturestart=0!)
; 2: bilinear interpolation..
; two bank version..
; y0=u_frac, y1=v_frac
; r0:texture, n0=texturewidth
jcs <_odd2 ; Was it even?
; nice.. this one cannot wrap in u-dir ;) v-dir can be done with m0.. 20 cycles
_even2: move x:(r0+n0),x0 ; x0=hlb
move x:(r0),b ; b=hlt
mac +y1,x0,b b,x0 y:(r0),a ; b=v_frac*hlb+hlt, x0=hlt, a=hrt
macr -y1,x0,b y:(r0+n0),x0 ; b=hl=v_frac*(hlb-hlt)+hlt, x0=hrb
mac +y1,x0,a a,x0 ; a=v_frac*hrb+hrt, x0=hrt
macr -y1,x0,a b,x0 ; a=hr=v_frac*(hrb-hrt)+hrt, x0=hl
mac -y0,x0,b a,x0 y:(r2),y1 ; b=u_frac*hr+hl, x0=hl, y1=texturesize
macr +y0,x0,b x:(r2),x0 y:(r6),y0 ; b=h=u_frac*(hr-hl)+hl, x0=prv. pix, y0=scalar
jmp <_next2
; beware.. this one has no wrapping in u-direction.. 18 cycles
_odd2: move y:(r0+n0),x0 ; x0=hlb
move y:(r0)+,b ; b=hlt
move b,x1
mac +y1,x0,b x:(r0),a ; b=v_frac*hlb+hlt, x0=hlt, a=hrt
macr -y1,x1,b x:(r0+n0),x0 ; b=hl=v_frac*(hlb-hlt)+hlt, x0=hrb
mac +y1,x0,a a,x0 ; a=v_frac*hrb+hrt, x0=hrt
macr -y1,x0,a b,x0 ; a=hr=v_frac*(hrb-hrt)+hrt, x0=hl
mac -y0,x0,b a,x0 y:(r2),y1 ; b=u_frac*hr+hl, x0=hl, y1=texturesize
macr +y0,x0,b x:(r2),x0 y:(r6),y0 ; b=h=u_frac*(hr-hl)+hl, x0=prv. pix, y0=scalar
_next2: mpy x0,y0,a ; a0=c1<<8
move a0,y0 ; y0=c1<<8
add y0,b l:(r5),a ; b=c1<<8+c2, a=uv_x
jclr #1,x:<<$FFE9,*
move b,x:(r1) y:(r5),y0 ; Send two pixels to host, y0=v
_xloop:
ELSE
; old version.. one at at time.....
move #$FFEB,r1
move #<texturesize,r2
move #<texturemask,r3
move #<uv_xstep,r4
move #<1,n4
move #<uv_x,r5
move #<1,n5
move #<scalar,r6
move #<128/2,n0
move l:<uv_start,a
move a,l:(r5+n5)
movec #128*128/2-1,m0
move y:(r2),y0 ; y0=texturesize
move y:(r5),y1 ; y1=v
do #100,_yloop
; calc start (u,v)
; a1=u, a0=v
move a,l:(r5)
do #160,_xloop
; 1: calc texture coordinates (u,v) 30 cycles..
; a1=u, a0=v, y0=texturesize, y1=v
mpy y0,y1,b l:(r3),x ; b=offset=u*texturesize, x1=width, x0=mask
move #<0,b0
and x0,b l:(r5),y ; kill frag_u, y1=u, y0=v
mac y1,x1,b l:(r4),x ; b=offset, x1=u_step, x0=v_step
add x,a y:(r6),x0 ; a1=u[n+1], a0=v[n+1], x0=scalar
mpy x0,y0,a a,l:(r5)
bclr #23,a0
mpy x0,y1,a a0,y1 ; y1=v_frac
bclr #23,a0
lsr b a0,y0 ; y0=u_frac
move b1,r0 ; r0=textureoffset (assuming texturestart=0!)
; 2: bilinear interpolation..
; two bank version..
; y0=u_frac, y1=v_frac
; r0:texture, n0=texturewidth
jcs <_odd ; Was it even?
; nice.. this one cannot wrap in u-dir ;) v-dir can be done with m0.. 20 cycles
_even: move x:(r0+n0),x0 ; x0=hlb
move x:(r0),b ; b=hlt
mac +y1,x0,b b,x0 y:(r0),a ; b=v_frac*hlb+hlt, x0=hlt, a=hrt
macr -y1,x0,b y:(r0+n0),x0 ; b=hl=v_frac*(hlb-hlt)+hlt, x0=hrb
mac +y1,x0,a a,x0 ; a=v_frac*hrb+hrt, x0=hrt
macr -y1,x0,a b,x0 ; a=hr=v_frac*(hrb-hrt)+hrt, x0=hl
mac -y0,x0,b a,x0 y:(r2),y1 ; b=u_frac*hr+hl, x0=hl
macr +y0,x0,b l:(r5),a ; b=h=u_frac*(hr-hl)+hl, a=uv_x
jmp <_next
; beware.. this one has no wrapping in u-direction.. 18 cycles
_odd: move y:(r0+n0),x0 ; x0=hlb
move y:(r0)+,b ; b=hlt
move b,x1
mac +y1,x0,b x:(r0),a ; b=v_frac*hlb+hlt, x0=hlt, a=hrt
macr -y1,x1,b x:(r0+n0),x0 ; b=hl=v_frac*(hlb-hlt)+hlt, x0=hrb
mac +y1,x0,a a,x0 ; a=v_frac*hrb+hrt, x0=hrt
macr -y1,x0,a b,x0 ; a=hr=v_frac*(hrb-hrt)+hrt, x0=hl
mac -y0,x0,b a,x0 y:(r2),y1 ; b=u_frac*hr+hl, x0=hl
macr +y0,x0,b l:(r5),a ; b=h=u_frac*(hr-hl)+hl, a=uv_x
_next: jclr #1,x:<<$FFE9,*
move b,x:(r1) y:(r5),y0
_xloop:
ENDC
; increment start (u,v)
move l:(r5+n5),a
move l:(r4+n4),x
add x,a
move a,l:(r5+n5)
_yloop: rts
; Store texture.
init: move #128*128/2,x0
move #>texture,r0
do x0,_loop
get x:(r0)
get y:(r0)+
_loop: rts
org x:0
texturemask: dc 128 ; texturewidth
uv_table:
uv_xstep: ds 1 ; u_xstep
uv_ystep: ds 1 ; u_ystep
uv_start: ds 1 ; u_start
uv_x: ds 1 ; u_x
uv_y: ds 1 ; u_y
prv_pixel: ds 1
scalar: dc 128/2 ; scalar for (u,v) fractions
ds 1
texture: ds 128*128/2
org y:0
dc $003F80 ; texture v_mask
ds 1 ; v_xstep
ds 1 ; v_ystep
ds 1 ; v_start
ds 1 ; v_x
ds 1 ; v_y
texturesize: dc 128*128
dc $000080 ; <<8 scalar for pixel multiplex
ds 1
ds 128*128/2 ; texture