home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Simtel MSDOS 1992 June
/
SIMTEL_0692.cdr
/
msdos
/
compress
/
lz13.arc
/
LZCOMP2.ASM
< prev
next >
Wrap
Assembly Source File
|
1988-10-20
|
8KB
|
303 lines
title lzcomp - file compressor using limpel-ziv algorithm
;Tom Pfau
;Digital Equipment Corporation
;Parsippany, NJ
;
;v1.2, Toad Hall Tweak
; - Converting to COM file.
; - Gonna use BP to hold bit_offset throughout (faster)
; - Moved most buffers outside code space.
;Constants
CLEAR equ 256 ;Clear code
EOF equ 257 ;End of file marker
FIRST_FREE equ 258 ;First free code
MAXMAX equ 4096 ;Max code + 1
include macros2.mlb
;Hash table entry
hash_rec struc
first dw ? ; First entry with this value
next dw ? ; Next entry along chain
char db ? ; Suffix char
hash_rec ends
;Declare Segments
Code segment para public 'code'
assume CS:Code, DS:Code, ES:Code
org 100H
LzComp proc near
jmp Start
;TH collecting all data segment stuff
input_prompt db 'Input file: $'
output_prompt db 'Output file: $'
input_file db 80,0,80 dup (?)
output_file db 80,0,80 dup (?)
crlf db 13,10,'$'
input_handle dw 0 ;?
output_handle dw 0 ;?
;hash_seg dw hash SHR 4 ;convert to a segment?
prefix_code dw 0 ;?
free_code dw 0 ;?
max_code dw 0 ;?
nbits dw 0 ;?
k db 0 ;?
;bit_offset dw 0 ;?
input_offset dw 0
input_size dw 0
LzComp endp
start proc near ;far
;TH we won't mess with any memory allocating, since I think
;all required buffers, hash tables, etc. can work within
;a lousy 64Kb segment.
;Won't even bother moving the stackpointer for now either.
print input_prompt ;Get input file name
input input_file
print crlf
print output_prompt ;And output
input output_file
print crlf
;Terminate file names with nulls
mov al,input_file+1 ;input filename length
xor ah,ah ;clear msb
mov si,ax ;pointer is filename length
mov input_file+2[si],0 ;point to last char+1, stuff 0
mov al,output_file+1 ;output filename length
mov si,ax ;pointer is filename length
mov output_file+2[si],0 ;point to last char+1, stuff 0
hopen input_file+2,0
mov input_handle,ax ;save input file handle
hcreat output_file+2,0
mov output_handle,ax ;save output file handle
call compress ;Compress file
hclose input_handle ;Close in and out
hclose output_handle
exit ;Done
start endp
compress proc near
l1: call init_table ;Initialize the table and some vars
mov ax,CLEAR ;Write a clear code
call write_code
call read_char ;Read first char
l4: xor ah,ah ;Turn char into code
l4a: mov prefix_code,ax ;Set prefix code
call read_char ;Read next char
jc l17 ;Carry means EOF
mov k,al ;Save char in k
mov bx,prefix_code ;Get prefix code
call lookup_code ;See if this pair in table
jnc l4a ;nc means yes, new code in ax
call add_code ;Add pair to table
push bx ;Save new code
mov ax,prefix_code ;Write old prefix code
call write_code
pop bx
mov al,k ;Get last char
cmp bx,max_code ;Exceed code size?
jl l4 ;less means no
cmp nbits,12 ;Currently less than 12 bits?
jl l14 ;yes
mov ax,CLEAR ;Write a clear code
call write_code
call init_table ;Reinit table
mov al,k ;get last char
jmp l4 ;Start over
l14: inc nbits ;Increase number of bits
shl max_code,1 ;Double max code size
jmp l4 ;Get next char
l17: mov ax,prefix_code ;Write last code
call write_code
mov ax,EOF ;Write EOF code
call write_code
mov ax,bp ;bit_offset ;Make sure buffer is flushed to file
or ax,ax ;TH
je l18
mov cx,8 ;convert bits to bytes
xor dx,dx
div cx
or dx,dx ;If extra bits, make sure they get
je l17a ;written
inc ax
l17a: call flush
l18: ret
compress endp
init_table proc near
mov nbits,9 ;Set code size to 9
mov max_code,512 ;Set max code to 512
mov ax,-1 ;Unused flag
mov cx,640 ;Clear first 256 entries
mov di,offset hash ;TH Point to first entry
rep stosw ;Clear it out
mov free_code,FIRST_FREE ;Set next code to use
ret ;done
init_table endp
write_code proc near
push ax ;Save code
mov ax,bp ;bit_offset ;Get bit offset
;TH we're keeping bit_offset in BP
; mov cx,nbits ;Adjust bit offset by code size
; add bit_offset,cx
add bp,nbits ;TH adjust bit offset by code size
mov cx,8 ;Convert bit offset to byte offset
xor dx,dx
div cx
cmp ax,1020 ;Approaching end of buffer?
jl wc1 ;less means no
call flush ;Write the buffer
;TH we're keeping bit_offset in BP
; push dx ;dx contains offset within byte
; add dx,nbits ;adjust by code size
; mov bit_offset,dx ;new bit offset
; pop dx ;restore dx
mov bp,dx ;TH dx contains offset within byte
add bp,nbits ;TH adjust by code size
add ax,offset output_data ;Point to last byte
mov si,ax ;put in si
mov al,[si] ;move byte to first position
mov byte ptr output_data,al
xor ax,ax ;Byte offset of zero
wc1: add ax,offset output_data ;Point into buffer
mov di,ax ;Destination
pop ax ;Restore code
mov cx,dx ;offset within byte
xor dx,dx ;dx will catch bits rotated out
jcxz wc3 ;If offset in byte is zero, skip shift
wc2: shl ax,1 ;Rotate code
rcl dx,1
loop wc2
or al,[di] ;Grab bits currently in buffer
wc3: stosw ;Save data
mov al,dl ;Grab extra bits
stosb ;and save
ret
write_code endp
flush proc near
push ax ;Save all registers
;TH push bx ;AX contains number of bytes to write
;TH push cx
push dx
hwrite output_handle,output_data,ax ;write output file
pop dx
;TH pop cx
;TH pop bx
pop ax
ret
flush endp
read_char proc near
mov di,input_offset ;Anything left in buffer?
cmp di,input_size
jl rd1 ;less means yes
hread input_handle,input_data,1024 ;Read next chunk of input
or ax,ax ;TH Anything left?
je rd2 ;equal means no, finished
mov input_size,ax ;Save bytes read
xor di,di ;TH clear DI
mov input_offset,di ;TH 0 ;Point to beginning of buffer
rd1:
;TH the mov/add instrs below are faster than the LEA.
; lea si,input_data[di] ;Point at character
mov si,offset input_data ;TH
add si,di ;Point at char
lodsb ;Read it in
inc input_offset ;Adjust pointer
clc ;Success
ret
rd2: stc ;Nothing left
ret
read_char endp
lookup_code proc near
call index ;convert code to address
xor di,di ;TH ;flag
cmp [si].first,-1 ;Has this code been used?
je gc4 ;equal means no
inc di ;set flag
mov bx,[si].first ;Get first entry
gc2: call index ;convert code to address
cmp [si].char,al ;is char the same?
jne gc3 ;ne means no
clc ;success
mov ax,bx ;put found code in ax
ret ;done
gc3: cmp [si].next,-1 ;More left with this prefix?
je gc4 ;equal means no
mov bx,[si].next ;get next code
jmp gc2 ;try again
gc4: stc ;not found
ret ;done
lookup_code endp
index proc near
mov si,bx ;si = bx * 5 (5 byte hash entries)
shl si,1 ;si = bx * 2 * 2 + bx
shl si,1
add si,bx
add si,offset hash ;TH plus hash table base
ret
index endp
add_code proc near
;Only called once
mov bx,free_code ;Get code to use
or di,di ;TH ;First use of this prefix?
je ac1 ;equal means yes
mov [si].next,bx ;point last use to new entry
jmp short ac2
ac1: mov [si].first,bx ;Point first use to new entry
ac2: cmp bx,MAXMAX ;Have we reached code limit?
je ac3 ;equal means yes, just return
call index ;get address of new entry
;TH switched around a little
mov [si].char,al ;save suffix char
mov ax,-1 ;TH do this once (ok to destroy AX)
mov [si].first,ax ;-1 ;initialize pointers
mov [si].next,ax ;-1
inc free_code ;TH adjust next code
ac3: ret
add_code endp
;TH input/output buffers moved here outside code space
even
output_data equ $ ;db 1024 dup (?)
input_data equ output_data+1024 ;db 1024 dup (?)
;Instead of using a separate segment for memory and the hash
;table (with all the resultant segment register fiddling),
;gonna just use a dynamic buffer right in code space.
hash equ input_data+1024
code ends
end LzComp