home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Simtel MSDOS 1992 June
/
SIMTEL_0692.cdr
/
msdos
/
pcmag
/
vol6n02.arc
/
PARSE.ASM
< prev
next >
Wrap
Assembly Source File
|
1987-12-13
|
16KB
|
299 lines
; Parse.asm
; Returns character, word and sentence count
; count of ASCII files that is compiled into
; a reading level index .
; Syntax PARSE [d:][path]filename
CODE SEGMENT ;*************************
ASSUME CS:CODE,DS:CODE ;* *
ORG 100H ;* REMEMBER TO EXE2BIN *
;* *
START: JMP BEGINNING ;*************************
; DATA AREA
; ---------
NOTICE DB ' Copyright 1986 Ziff-Davis Publishing Co.'
NOTICE_2 DB ' Programmed by Michael J. Mefford'
CHAR_COUNT DW 0
CHAR_CT_CARRY DW 0
WORDCHAR_CT DW 0
WORDCHAR_CARRY DW 0
WORD_COUNT DW 0
SENTENCE_COUNT DW 0
LONG_WORDS DW 0
INTEGER DW 0
DECIMAL DW 0
WORD_FLAG DB 0
SENTENCE_FLAG DB 0
DISPLAY_FLAG DB 0
ALPA_STRING DB ' printable characters.',13,10,'$'
WORD_STRING DB ' words.',13,10,'$'
LONG_STRING DB ' words with three or more syllables.',13,10,'$'
SENT_STRING DB ' sentences.',13,10,'$'
CHAR_STRING DB ' avg. characters/word.',13,10,'$'
LENG_STRING DB ' avg. words/sentence.',13,10,'$'
FOG_INDEX DB 13,10,' Reading index',13,10,'$'
GRADE_LEVEL DB ' grade level.',13,10,'$'
DECIMAL_PT DB '.$'
CARRY_64K DB '65536+$'
;-------------------------------------------------------;
; Before the file can be parsed, the command line has ;
; to be parsed. Then the file will be opened and read. ;
;-------------------------------------------------------;
BEGINNING: MOV DI,80H ;Point to parameter.
MOV BL,[DI] ;Get parameter length.
XOR BH,BH ;Zero in high half.
MOV BYTE PTR DS:[BX+DI+1],0 ;Convert to ASCIIZ string
NEXT_BYTE: INC DI ;Point to next byte.
CMP BYTE PTR DS:[DI],32 ;Is it a space delimiter?
JZ NEXT_BYTE ;If no, get next byte
MOV DX,DI ;If yes, points to parameter.
MOV AX,3D00H ;Open file
INT 21H ; via DOS.
JC EXIT ;If failed, exit.
PUSH AX ;If OK, save file handle.
READ: POP BX ;File handle in BX
PUSH BX ; and save to close file
MOV DX,OFFSET BUFFER ; point to DTA
MOV CX,64000 ; ask for 64,000 bytes
MOV AH,3FH ; read from file
INT 21H ; via DOS.
JC CLOSE_FILE ;If failed, close file.
CMP AX,0 ;Are we at the end of file?
JNZ PARSE ;If no, parse next batch.
CMP WORD_FLAG,0 ;Did the file end with word?
JZ GO_DISPLAY ;If no, go display
INC WORD_COUNT ; else add one to word count.
CMP SENTENCE_FLAG,0 ;Did the file end with sentence?
JZ GO_DISPLAY ;If no, go display
INC SENTENCE_COUNT ; else add one to sentence count.
GO_DISPLAY: CALL DISPLAY ;If yes, display results.
CLOSE_FILE: POP BX ;Get file handle
MOV AH,3EH ; and close file
INT 21H ; via DOS
EXIT: INT 20H ;We are done, so exit.
;-------------------------------------------------------------;
; Each byte of the file will be examined and the appropriate ;
; counter (character, word, or sentence) will be incremented. ;
; Long words will be counted as 8 characters or more. ;
;-------------------------------------------------------------;
PARSE: MOV CX,AX ;Set counter to bytes read.
MOV SI,OFFSET BUFFER ;Point to DTA.
CLD
GET_CHAR: LODSB ;Get a character.
AND AL,7FH ;Strip Wordstar high bit
CMP AL,32 ;Is it below a space character?
JB DELIMITER ;If yes, non printable.
ADD CHAR_COUNT,1 ;Add one to character count
ADC CHAR_CT_CARRY,0 ; and one if carry.
DELIMITER: CMP AL,32 ;Is it a delimiter?
JA SENTENCE? ;If no, is it sentence punctuation?
CMP SENTENCE_FLAG,0 ;If yes, is it after sentence punc?
JZ WORD? ;If no, is it at the end of a word?
INC SENTENCE_COUNT ;If yes, increment sentence counter.
WORD?: CMP WORD_FLAG,0 ;Is it at the end of a word?
JNZ WORD_END ;If yes, it marks the end of a word.
SENTENCE?: CMP AL,'.' ;Is it a period?
JZ SENTENCE ;If yes, flag as possible sentence.
CMP AL,'!' ;Is it '!'
JZ SENTENCE ;If yes, flag as possible sentence.
CMP AL,'?' ;Is it '?'
JZ SENTENCE ;If yes, flag as possible sentence.
CMP AL,'-' ;Is it '-'
JZ NOT_WORD_END ;If yes, flag as not word end.
CMP AL,'0' ;Is it a zero or above?
JB NOT_SENTENCE ;If no, it's punctuation.
CMP AL,'9' ;Is it a nine or below?
JBE GOT_WORD ;If yes, it's numerical character.
AND AL,5FH ;Capitalize.
CMP AL,'A' ;Is it an 'A' or above?
JB NOT_SENTENCE ;If no, it's punctuation.
CMP AL,'Z' ;Is it a 'Z' or below?
JA NOT_SENTENCE ;If no, it's punctuation.
GOT_WORD: ADD WORDCHAR_CT,1 ;If we got here, it's alphanumeric
ADC WORDCHAR_CARRY,0 ;so add one to character count.
MOV WORD_FLAG,1 ;Flag that we are in a word
INC BX ; increment char count of word
JMP SHORT NOT_SENTENCE ; and flag not sentence end.
SENTENCE: MOV SENTENCE_FLAG,1 ;Flag as possible sentence
JMP SHORT NEXT_CHAR ; and get next character.
WORD_END: INC WORD_COUNT ;Add one to word count.
CMP BX,8 ;Is word 8 characters?
JB NOT_LONGWORD ;If no, not a long word
INC LONG_WORDS ;else increment long words
NOT_LONGWORD: XOR BX,BX ; and reset char counter.
NOT_WORD_END: MOV WORD_FLAG,0 ;Flag as not in a word.
NOT_SENTENCE: MOV SENTENCE_FLAG,0 ;Flag as not possible sentence.
NEXT_CHAR: LOOP GET_CHAR ;Get next character.
JMP READ ;Go read next batch.
;*************;
; Subroutines ;
;*************;
;-----------------------------------------------------------------;
; This subroutine is in charge of displaying each line of output. ;
;-----------------------------------------------------------------;
DISPLAY: MOV DL,10 ;First print a linefeed
MOV AH,2 ; just to make the output
INT 21H ; pretty.
CMP CHAR_CT_CARRY,0 ;Did the char. count overflow?
JZ NO_CARRY ;If no, display low half
MOV DX,OFFSET CARRY_64K ;If yes, display 64k+
MOV AH,9
INT 21H
NO_CARRY: MOV BX,CHAR_COUNT ;Same with low half
CALL NUMBERS ;and display.
MOV DX,OFFSET ALPA_STRING ;Display the
CALL DISPLAY_TEXT ;text as well.
MOV BX,WORD_COUNT ;Word count into BX
CALL NUMBERS ;and display.
MOV DX,OFFSET WORD_STRING ;Display the
CALL DISPLAY_TEXT ;text as well.
MOV BX,LONG_WORDS ;Long words into BX.
CALL NUMBERS ;and display.
MOV DX,OFFSET LONG_STRING ;Display the
CALL DISPLAY_TEXT ;text as well.
MOV BX,SENTENCE_COUNT ;Sentence count into BX
CALL NUMBERS ;and display.
MOV DX,OFFSET SENT_STRING ;Display the
CALL DISPLAY_TEXT ;text as well.
MOV AX,WORDCHAR_CT ;Character count into AX
MOV DX,WORDCHAR_CARRY ; and carry in high half
MOV CX,WORD_COUNT ; divisor into CX
DIV CX ; and divide.
CALL GET_DECIMAL ;Convert remainder to decimal
CALL DISPLAY_TENTH ;to display tenths.
MOV DX,OFFSET CHAR_STRING ;Display the
CALL DISPLAY_TEXT ;text as well.
MOV AX,WORD_COUNT ;Character count into AX
XOR DX,DX ; and zero in high half
MOV CX,SENTENCE_COUNT ; divisor into CX
DIV CX ; and divide.
CALL GET_DECIMAL ;Convert remainder to decimal
CALL DISPLAY_TENTH ;to display tenths.
MOV DX,OFFSET LENG_STRING ;Display the
CALL DISPLAY_TEXT ;text as well.
MOV DX,OFFSET FOG_INDEX ;Display "Reading index"
CALL DISPLAY_TEXT
MOV AX,40 ;First multiply long words by 40.
MUL LONG_WORDS
MOV CX,WORD_COUNT ;Then divide by word count.
DIV CX
CALL GET_DECIMAL ;Convert remainder to decimal.
PUSH DECIMAL ;Save both halves of the number.
PUSH INTEGER
MOV AX,10 ;Now multiply sentence count by 10
MUL SENTENCE_COUNT
PUSH AX ; and save.
MOV AX,4 ;Multiply word count by 4.
MUL WORD_COUNT
POP CX ;Retrieve prior dividend
DIV CX ; and divide.
CALL GET_DECIMAL
POP AX ;Retrieve rest of index integer
ADD INTEGER,AX ; and add to this half of index.
POP AX ;Do the same with the decimal.
ADD AX,DECIMAL
CALL ROUND? ;See if needs rounding.
CALL DISPLAY_TENTH ;Display the number.
MOV DX,OFFSET GRADE_LEVEL ;Display the
CALL DISPLAY_TEXT ;text as well.
RET
;------------------------------------------------------------;
; This subroutine divides to convert hexidecimal to decimal. ;
;------------------------------------------------------------;
NUMBERS: MOV CX,10000 ;Get ten thousands by dividing.
CALL DIVIDE
MOV CX,1000 ;Get thousands by dividing.
CALL DIVIDE
TENTHS: MOV CX,100 ;Get hundreds by dividing.
CALL DIVIDE
MOV CX,10 ;Get tens by dividing.
CALL DIVIDE
MOV CX,1 ;Get ones by dividing.
CALL DIVIDE
MOV DISPLAY_FLAG,0 ;Reset display flag.
RET
;------------------------------------------------------------------------;
; This subroutine does the actual dividing and suppresses leading zeros. ;
;------------------------------------------------------------------------;
DIVIDE: MOV AX,BX ;Number in AX
XOR DX,DX ; and zero in DX
DIV CX ; divide by CX
MOV BX,DX ; remainder into BX
MOV DL,AL ; and quotient into DL.
CMP AL,0 ;Is it zero?
JZ FLAG ;If yes, is a non zero displayed?
OR DISPLAY_FLAG,AL ;If non zero indicate by flag.
FLAG: CMP DISPLAY_FLAG,0 ;Non zero number been displayed?
JNZ DISP_NUMBER ;If yes, suppress leading zeros
MOV DL,-10H ; with blanks.
DISP_NUMBER: ADD DL,30H ;Convert hexadecimal to decimal
MOV AH,2H ; and display via
INT 21H ; DOS.
RET
;------------------------------------------------------------;
; This subroutine displays the text via DOS function call 9. ;
;------------------------------------------------------------;
DISPLAY_TEXT: MOV AH,9H ;Display text string
INT 21H ;via DOS.
RET
;---------------------------------------------------------------;
; This subroutine gets the decimal part after integer division. ;
;---------------------------------------------------------------;
GET_DECIMAL: MOV INTEGER,AX ;Save the integer half.
MOV AX,10 ;Multiply the remainder by 10.
MUL DX
DIV CX ;Divide by last divisor.
ADD DX,DX ;Double remainder.
CMP DX,CX ;Is it more than half?
JB NO_ROUND ;If no, don't round up
INC AX ; else add one to decimal.
ROUND?: CMP AX,10 ;Is it over ten?
JB NO_ROUND ;If no, don't round up
SUB AX,10 ; else subtract one from decimal
INC INTEGER ; and increment the integer.
NO_ROUND: MOV DECIMAL,AX ;Save decimal.
RET
;------------------------------------------------------------------;
; This subroutine displays the integer, decimal point and decimal. ;
;------------------------------------------------------------------;
DISPLAY_TENTH: MOV BX,INTEGER ;Retrieve integer and display
CALL TENTHS
MOV DX,OFFSET DECIMAL_PT ;Display the decimal point.
CALL DISPLAY_TEXT
MOV DX,DECIMAL ;Then display the decimal
CALL DISP_NUMBER
RET
BUFFER: ;Data transfer area.
CODE ENDS
END START