home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power-Programmierung
/
CD1.mdf
/
qtawk
/
wordfreq.exp
< prev
Wrap
Text File
|
1990-02-10
|
962b
|
37 lines
# wordfreq - print number of occurrences of each word
# input: text
# output: number-word pairs sorted by number
BEGIN {
min_cnt = 1; # define minimum number of occurrances to print
min_lng = 3; # define minimum length of word to count
comment_only = /^#/;
}
# Ignore Comment Only Lines
comment_only { next; }
{
fprintf("stderr","%u\n",FNR);
gsub(/{_p}/,""); #remove puncutation
gsub(/#.*$/,""); # remove comments
gsub(/[0-9+^"'`*\$\&~\<\>=\\\/\[\]\(\)\{\}-]+/," "); # change to single white space
for ( i = 1 ; i <= NF ; i++ ) if ( length($i) > min_lng ) count[$i]++;
}
FINAL {
local i = 0, k = 0, m = 0, w;
for ( w in count ) {
if ( min_cnt <= (j = count[w]) ) {
print j , w;
i++;
m += j;
}
k++;
}
deletea count;
printf("File: %s\n",FILENAME);
printf("Total Words: %lu\nTotal Output: %lu\nTotal Count Ouput: %lu\n\n",k,i,m);
}