home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Simtel MSDOS 1992 December
/
simtel1292_SIMTEL_1292_Walnut_Creek.iso
/
msdos
/
txtutl
/
toadcr11.arc
/
TOADCRLF.ASM
next >
Wrap
Assembly Source File
|
1989-08-04
|
13KB
|
492 lines
TITLE CrLf Unix<>DOS EOL converter
Comment ~
Usage: CRLF [-r] file1 [>output]
-r Convert from DOS CR/LF End Of Line (EOL) to Unix LF EOL.
Default output is STDOUT (e.g., redirectable).
Written to replace the C version by Steve Creps (which was a slug)
Given to the public domain, 7 Jul 89
David Kirschbaum
Toad Hall
kirsch@braggvax.ARPA
v1.1, 3 Aug 89
- Bug report from Erich Neuwirth <A4422DAB%AWIUNI11.BITNET@CUNYVM.CUNY.EDU>
Unix -> DOS conversion breaks down at the BUFFSIZE boundary.
Found bug when writing output buffer .. a constant CR or LF (in AH)
was being lost by destroying AX during the output write.
Fixed. Thanks, Erich.
- While I was at it, relocated dynamic file buffers
to overlay the startup code and messages.
- Changed input and output buffer sizes to eliminate the multiple tests
of the ES:DI output buffer pointer during conversion.
Now we read an input buffer-full, process it to the output buffer,
and then write the full output buffer .. not worrying about overrunning
the output buffer end (and our stack!)
This overrun problem would only arise during Unix --> DOS conversions
(where output is longer than input).
There's no telling just HOW much longer the output will be than the
input .. worst case is input * 2 (a file of all CRs converted to
a file of all CR/LFs).
Assuming worst case, we're making our output buffer TWICE the size
of the input buffer (most unlikely .. but still ..).
Surprisingly, we've gained no significant speed! The more frequent
input file reads offset the increase in processing speed .. sigh ...
Toad Hall
Comment ends ~
CR EQU 0DH
LF EQU 0AH
FALSE EQU 0
TRUE EQU NOT FALSE
STDOUT EQU 1 ;DOS Std Out
STDERR EQU 2 ;DOS Std Err
;BUFFSIZE EQU 30000 ;likely buffer size (arbitrary)
BUFFSIZE EQU 20000 ;input buffer size v1.1
;output has space for twice that much
CSEG SEGMENT PARA PUBLIC 'CODE'
ASSUME CS:CSEG,DS:CSEG,ES:CSEG
org 80H
cmdline label byte
org 100H
CrLf proc near
jmp Start ;skip over data
handle dw 0 ;input file handle
eofFlag db FALSE ;non-0 if EOF
addcr db LOW(TRUE) ;assume we're converting LF to CR/LF
unix$ db ' Unix --> DOS',CR,LF,0
dos$ db ' DOS --> Unix',CR,LF,0
u2d_warn$ db 'Warning: '
db 'Unix file has CRs!',CR,LF,0 ;Unix -> DOS, but HAS CRs!
d2u_warn$ db 'Warning: '
db 'DOS file has no CRs!',CR,LF,0 ;DOS -> Unix, but no CRs!
openErr$ db 'Can''t open target file',0 ;input file open err msg
readErr$ db 'Input file read error',0 ;input file read err msg
outErr$ db 'Output error',0 ;output write error msg
switchErr$ db 'Unknown switch',0 ;error msg if not -r switch
prompt$ db 'Continue? [Y/N]: ',0 ;prompt string
abort$ db 'User abort',0 ;abort msg
CrLf endp
Start proc near
call Parse_CmdLine ;get any switches,
;prepare target filename
jc Msg_Exit ;no action, DX -> error msg
;else DX -> target filename's first character
mov ax,3D00H ;open file, read only
int 21H
mov dx,offset openErr$ ;'Can't open target file'
jc Msg_Exit ;open failed, die
mov handle,ax ;save input file handle
call Test_Buffer ;initial input file read,
;test for funny input file EOLs
jnc Read_1 ;ok, skip the read/EOF test seq v1.1
jmp short Read_Error ;first read failed, or empty file v1.1
;We loop here, processing each buffer full, until EOF or file error.
Read_Lup:
mov si,offset INBUFF ;DS:SI -> input buffer base
cmp eofFlag,FALSE ;hit input EOF yet?
jnz Flush ;yep, flush any remaining processed
;chars, terminate.
mov dx,si ;read into input buffer (DS:SI)
mov cx,BUFFSIZE ;try for a full buffer's worth
mov bx,handle ;input file handle
mov ah,3FH ;read from file/device
int 21H
jc Read_Error ;read failed
or ax,ax ;read anything?
jz Flush ;nope, flush any remaining processed
;chars, terminate.
cmp ax,cx ;read all we requested?
adc eofFlag,0 ;will make flag non-0 if EOF
mov cx,ax ;CX = input buffer count
Read_1:
call Process_Buff ;convert input buffer EOLs
call Write_Output ;write output buffer bytes v1.1
jnb Read_Lup ;went ok v1.1
jmp short Write_Error ;CF means output write failed v1.1
Flush:
call Write_Output ;write any output buffer bytes
jc Write_Error ;failed
xor ax,ax ;ERRORLEVEL=0
jmp short Terminate
Read_Error:
mov dx,offset readErr$ ;'Input file read error'
jmp short Msg_Exit ;terminate
Write_Error:
mov dx,offset outErr$ ;'Output error'
;fall thru to...
;Come here with any messages in DX
Msg_Exit:
push ax ;save any errors in AL
call Write_StdErr ;output to StdErr (console)
pop ax
Terminate:
mov ah,4CH ;terminate (errorlevel in AL)
int 21H ;we let DOS close the input file.
Start endp
;-- Make initial test of input buffer.
; Depending on type conversion, gives user a warning
; if there are unexpected EOLs in the first bufferfull.
Test_Buffer proc near
mov dx,offset INBUFF ;read into input buffer
mov cx,BUFFSIZE ;try for a full buffer's worth
mov bx,handle ;input file handle
mov ah,3FH ;read from file/device
int 21H
jc TB_Ret ;read failed, return CF set
or ax,ax ;read anything?
jnz TB_1 ;yep, continue
stc ;zero contents ..
ret ; return CF set
TB_1:
cmp ax,cx ;read all we requested?
adc eofFlag,0 ;will make flag non-0 if EOF
mov cx,ax ;CX = input buffer count
push cx ;save buffer size
mov al,CR ;scan for CRs
mov di,dx ;offset INBUFF ;input buffer start
mov si,dx ;offset INBUFF ;may as well prepare SI
repne scasb ;look for a CR
pop cx ;restore
pushf ;save those results
cmp addCr,FALSE ;adding CRs? (Unix -> DOS)
jz TB_D2U ;nope, DOS -> Unix
;We're doing Unix -> DOS.
;If there's a CR in the input buffer, maybe this is NOT a Unix file!
;Warn the user.
mov dx,offset u2d_warn$ ;'Warning: Unix file has CRs!'
popf ;restore the scasb flag
jnz TB_Ok ;no CRs, ok
jmp short TB_Warn ;there WAS a CR.
;Display warning msg, return
;We're doing DOS -> Unix.
;If there are no CRs in the input buffer, maybe this is NOT a DOS file!
;Warn the user.
TB_D2U:
mov dx,offset d2u_warn$ ;'Warning: DOS file has no CRs!'
popf ;restore the scasb flag
jz TB_Ok ;there WAS a CR, ok.
;Common warning routine for both conversion modes
TB_Warn:
call Write_StdErr ;display warning msg
mov dx,offset prompt$ ;'Continue? [Y/N]: '
call Write_StdErr ;display prompt
mov ax,0C08H ;clear kbd, kbd input w/o echo
int 21H
and al,5FH ;uppercase response
cmp al,'Y' ;Yes, continue?
jz TB_Ok ;yep
pop ax ;clear the call
mov al,1 ;ERRORLEVEL 1
mov dx,offset abort$ ;'User abort'
jmp Msg_Exit ;display, terminate
TB_Ok:
mov di,offset OUTBUFF ;ES:DI -> output buffer base
clc ;but return CF clear
TB_Ret:
ret
Test_Buffer endp
;-- Tests type conversion, jumps to appropriate conversion procedure.
Process_Buff proc near
mov bx,offset Unix_To_Dos ;assume Unix -> DOS conversion
cmp addcr,TRUE ;adding CRs? (Unix -> DOS)
jz PB_Jump ;yep
mov bx,offset Dos_To_Unix ;nope, CR/LF to LF conversion
PB_Jump:
jmp bx ;return from whichever procedure
Process_Buff endp
;-- Converts DOS CR/LF EOLs to Unix-style EOLs (LF)
; DS:SI -> input buffer start
; ES:DI -> next free output buffer byte
; CX = bytes read (e.g., size of input buffer)
; Destroys most everything
; Preserves DI (output buffer pointer)
; Removed output buffer overrun testing at every byte.
Dos_To_Unix proc near
mov ah,CR ;handy constant
D2U_Lup:
lodsb ;snarf input byte
cmp al,ah ;CR ;DOS EOL first char?
jz D2U_Relup ;yep, gobble that CR
stosb ;stuff normal char or LF
D2U_Relup:
loop D2U_Lup ;do all the input characters.
ret
Dos_To_Unix endp
;-- Converts Unix-style EOLs (LF) to normal DOS CR/LF EOL
; DS:SI -> input buffer start
; ES:DI -> next free output buffer byte
; CX = bytes read (e.g., size of input buffer)
; Destroys most everything.
; Preserves DI
;v1.1 Added CR/LF word stuffing. Some code is redundant,
; but this maximizes speed at minimal code increase.
; Removed output buffer overrun testing at every byte.
Unix_To_Dos proc near
mov ah,LF ;handy constant
U2D_Lup:
lodsb ;snarf input byte
cmp al,ah ;LF ;Unix EOL?
jz U2D_EOL ;yep
stosb ;stuff normal char
loop U2D_Lup ;reloop
ret
U2D_EOL:
mov al,CR ;stuff CR/LF
stosw ;as a word
loop U2D_Lup ;redundant code, but faster
ret
Unix_To_Dos endp
;-- Write output buffer to StdOut
; Output buffer size may be bigger (Unix --> DOS)
; or smaller (DOS --> Unix) than input buffer size.
; Return CF set if error (with error in AX)
; Destroys AX,BX,DX
; Returns DI -> output buffer start
Write_Output proc near
mov dx,offset OUTBUFF ;output buffer start
mov ax,di ;output buffer's last byte+1
sub ax,dx ;last byte (+1) - start=bytes to write
ja Write_Out1 ;ok, we have output to write
xor ax,ax ;nothing to write
clc ;insure CF clear
ret
Write_Out1:
push cx ;preserve CX
mov cx,ax ;bytes to write
mov bx,STDOUT ;output to StdOut
mov ah,40H ;write to file/device
int 21H
pop cx ;restore CX
mov di,dx ;ES:DI -> output buffer start
ret ;CF set if write error
Write_Output endp
;-- Enter with DS:DX -> AsciiZ message.
; Writes msg to StdErr
; Destroys AX,BX
Write_StdErr proc near
push di
push cx
xor al,al ;scan for AsciiZ 0
mov cx,0FFFFH ;max scan
mov di,dx ;ES:DI -> message's first char
repne scasb ;find AsciiZ 0
not cx ;flip, CX = msg length
mov bx,STDERR ;write to StdErr
mov ah,40H ;write to file/device
int 21H
pop cx
pop di
ret
Write_StdErr endp
;Runtime file buffers start here,
;and will overwrite startup code (Parse_CmdLine)
;and usage message.
EVEN ;make it easy for 8086 family
INBUFF label byte ;input buffer start
OUTBUFF EQU INBUFF + BUFFSIZE ;output buffer start
;v1.1 OUTBUFF has about BUFFSIZE*2 bytes to play with.
;OUTBUFFEND EQU OUTBUFF+BUFFSIZE ;mark output buffer end
usage$ db 'CRLF v1.1 - Convert Unix LF line endings to DOS CR/LF endings.'
db CR,LF
db 'Usage: CRLF [-r] filename.typ [>output]',CR,LF
db 'Where',CR,LF
db ' -r reverses the operation (CR/LF to LF)',CR,LF
db ' filename.typ is the target filename',CR,LF
db 'Default output is to STDOUT (redirect to any file/device).'
db CR,LF,0
;-- Parse PSP command line for -r switch and target filename.
; Return CF set if errors, no output, whatever.
Parse_CmdLine proc near
mov si,offset cmdline ;PSP cmdline length byte
xor ah,ah ;clear msb
lodsb ;snarf length byte
mov cx,ax ;CX=cmdline length
mov dx,offset usage$ ;assume no cmdline
jcxz PC_Bad ;return CF set
call Next_Char ;gobble any spaces, tabs
jcxz PC_Bad ;went illegal
;AL = first real cmdline char
;SI -> next cmdline char
;CX = remaining cmdline length
cmp al,'-' ;got a switch?
jz PC_Switch ;yep
cmp al,'/' ;be nice, test for other switch
jz PC_Switch
cmp al,'?' ;asking for help?
jz PC_Bad ;yep, DX -> usage msg
jmp short PC_FileName ;should be target filename's first char
;We got a switch
PC_Switch:
call Next_Char ;get next char
jcxz PC_Bad ;usage, die
mov dx,offset switchErr$ ;'Unknown switch'
and al,5FH ;uppercase
cmp al,'R' ;we only take 'R' switches for now
jnz PC_Bad ;bad
mov dx,offset usage$ ;if no filename, usage msg
not addCr ;flip flag to CR/LF -> LF conversion
call Next_Char ;filename should be next
jcxz PC_Bad
PC_FileName:
dec si ;back up to filename's first char
mov dx,si ;remember in DX
mov cx,80H ;should be long enough!
mov ah,CR ;look for terminating CR
PC_FNLup:
lodsb ;snarf next char
cmp al,ah ;CR ;hit CR?
jnz PC_FNLup ;nope
dec si ;back up to the CR
mov byte ptr [si],0 ;AsciiZe it
call Write_StdErr ;display filename
push dx ;save filename ptr
mov dx,offset unix$ ;assume 'Unix -> DOS' EOL conversion
cmp addCr,LOW(TRUE) ;true?
jz PC_1 ;yep
mov dx,offset dos$ ;'DOS -> Unix'
PC_1:
call Write_StdErr ;display msg
pop dx ;restore filename ptr
clc ;return CF clear
ret
PC_Bad:
stc ;return CF set for failure
ret ;DX -> error msg
;Parse_CmdLine subroutine
Next_Char:
jcxz NC_Ret ;cmdline zeroed out, return
NC_Lup:
lodsb ;snarf cmdline char
cmp al,' ' ;space?
jz NC_ReLup ;yep, gobble
cmp al,9 ;tab?
jz NC_ReLup ;yep, gobble
cmp al,CR ;CR terminates
jnz NC_Ret ;normal char, return
NC_ReLup:
loop NC_Lup
NC_Ret:
ret
Parse_CmdLine endp
CSEG ENDS
END CrLf