home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
CP/M
/
CPM_CDROM.iso
/
simtel
/
sigm
/
vols000
/
vol040
/
comp2.asm
< prev
next >
Wrap
Assembly Source File
|
1984-04-29
|
8KB
|
233 lines
; COMP2.ASM as of April 29, 1981
;
; This routine is extracted and CP/Mified from an article
; 'An Inroduction to Data Compression' by Harold Corbin,
; that appeared in the April 1981 issue of Byte magazine.
;
; CP/Mified by: Kelly Smith, CP/M-Net "SYSOP" April 29, 1981
;
; COMP2 (text compression routine) takes keyboard text
; (uppercase letters ONLY) and compresses them using the
; Huffman coding technique. The first two bytes in the data
; buffer (dbuf) are the bit count. Encoded data is stored in
; the data buffer in packed form, 8 bits to the data byte.
;
; The Huffman Code was derived for the following table, by
; the relative frequency of occurence in a random sampling
; of English text. Frequently used characters are assigned
; shorter bit 'code' patterns, and seldom used characters
; are assigned longer bit 'code' patterns. Note: with any
; set of codes generated, it is important that no code has a
; shorter code as part of its beginning. For example, if the
; letter 'E' is 100, then 10010 cannot be the code for
; another letter...because in scanning the bit stream from
; left to right (using EXP2, the text expansion routine),
; the decoding algorithm would think that 10010 is 'E' (100)
; followed by 10 and NOT the different letter that was
; intended.
;
; Letter Frequency (%) Huffman Code
;
; E 13.0 100
; T 10.5 001
; A 8.1 1111
; O 7.9 1110
; N 7.1 1100
; R 6.8 1011
; I 6.3 1010
; S 6.1 0110
; H 5.2 0101
; D 3.8 11011
; L 3.4 01111
; F 2.9 01001
; C 2.7 01000
; M 2.5 00011
; U 2.4 00010
; G 2.0 00001
; Y 1.9 00000
; P 1.9 110101
; W 1.5 011101
; B 1.4 011100
; V .9 1101001
; K .4 110100011
; X .15 110100001
; J .13 110100000
; Q .11 1101000101
; Z .07 1101000100
;
; The COMP2 program takes characters entered via the
; keyboard, checks for a legal character, finds the Huffman
; Code corresponding to the entered character, and the
; serial bit stream that results from the encoding process
; is packed and stored 8 bits to the byte.
;
; The heart of the program's operation is the table lookup
; and shifting function. Based upon a letter ASCII code, an
; index is computed that is then added to the base address
; of the encoding table. This table has the following
; format: two 8 bit words are required for each letter to be
; encoded; the low order 4 bits of the first word in memory
; contain a count of the number of bits required to encode
; the letter. The remaining 12 bits, 8 in the second byte
; followed by 4 in the top half of the first byte are used
; to store the compressed code. The code is stored left-
; justified in the 12 bit word.
;
; With the compressing code located, it is serialized by
; shifting left according to the count in the 4 bit part of
; the table. As each bit is shifted out, the total bit count
; in the buffer is updated. The processing of the input
; stream continues until a period (.) is detected, and
; control returns to CP/M. The data buffer at address 4000
; Hex then remains intact for expansion by EXP2.
;
;
;
true equ -1 ; define true
false equ not true; define false
;
stdcpm equ true ; true if regular CP/M (base address 0000h)
altcpm equ false ; true if alternate CP/M (base address 4200h)
;
if stdcpm ; if standard CP/M...
base equ 0000h ; bsae for standard CP/M system
endif ; end if...
;
if altcpm ; if alternate CP/M...
base equ 4200h ; base for H8 or TRS-80 CP/M system
endif ; end if...
;
bdos equ base+5 ; CP/M BDOS entry address for function call
;
rdcon equ 1 ; read console character
;
dbuf equ 04000h ; data buffer for compressed data
;
org base+100h
start: lxi h,0 ; save "old" CP/M stack pointer
shld dbuf ; clear compressed bit count
dad sp
shld oldstk
lxi sp,stack; make "new" stack pointer
lxi h,dbuf+2; store pointer to next bit location in data buffer
shld dadd
xra a ; clear bit position
sta pos
;
; get character from keyboard, check if end of text '.' character
;
get$character:
;
push b ; save reigisters
push d
push h
mvi c,rdcon ; read console character function
call bdos
pop h ; restore registers
pop d
pop b
ani 07fh ; mask-off parity bit
cpi '.' ; end of text?
jnz process ; if not, process this character
lhld dadd ; clean up partial byte remaining
lda pos ; compute remaining shift count
mov b,a
mvi a,8
sub b
ani 7
mov b,a ; keep shift count
mov a,m ; get compressed byte
shift: jz exit ; exit to CP/M, if all bytes processed
ral
dcr b
mov m,a ; replace compressed byte
jmp shift ; loop 'till done
process:sui 'A' ; character < 'A'?
jc get$character
cpi 'Z'-'A'+1 ; character > 'Z'?
jnc get$character
add a ; multiply by 2 to get table index
mov c,a
mvi b,0 ; make index bias to table
lxi h,compression$table ; point to base of table
dad b ; add index bias to table pointer
mov e,m ; get encoded value
inx h
mov d,m
mov a,e ; get bit count for this character
ani 00fh ; mask-off high nibble
mov b,a ; keep bit count
xchg ; swap pointer for encoded value
next: xra a
dad h ; shift out bit stream...
ral ; ...MSB first
ani 1 ; mask-off all but output bit
push h
lhld dadd ; get pointer to next bit location
mov d,a
lda pos ; get current bit position
mov e,a ; keep it
mov a,m ; get "old" compressed data
ral ; make room...
ora d ; ...compress
mov m,a ; save it, done with this compression
inr e ; update position
mov a,e
cpi 8 ; full byte processed?
jnz stor
xra a ; clear position
inx h ; bump for next bit location
shld dadd
stor: sta pos
lhld dbuf ; update compressed bit count
inx h
shld dbuf
pop h
dcr b ; de-bump count
jnz next ; continue compression, if not done
jmp get$character ; get next character, and compress
;
exit: lhld oldstk ; get "old" CP/M stack pointer
sphl ; and restore...
ret ; return to CP/M
;
compression$table:
;
dw 0f004h ; 'A'
dw 07006h ; 'B'
dw 04005h ; 'C'
dw 0d805h ; 'D'
dw 08003h ; 'E'
dw 04805h ; 'F'
dw 00805h ; 'G'
dw 05004h ; 'H'
dw 0a004h ; 'I'
dw 0d009h ; 'J'
dw 0d189h ; 'K'
dw 07805h ; 'L'
dw 01805h ; 'M'
dw 0c004h ; 'N'
dw 0e004h ; 'O'
dw 0d406h ; 'P'
dw 0d14ah ; 'Q'
dw 0b004h ; 'R'
dw 06004h ; 'S'
dw 02003h ; 'T'
dw 01005h ; 'U'
dw 0d207h ; 'V'
dw 07406h ; 'W'
dw 0d089h ; 'X'
dw 00005h ; 'Y'
dw 0d10ah ; 'Z'
;
dadd: dw 0 ; next bit location
;
pos: ds 1 ; current bit location
;
oldstk: ds 2 ; storage for "old" CP/M stack pointer
ds 32 ; storage for 16 level stack
stack equ $ ; top of local stack
;
end start