home *** CD-ROM | disk | FTP | other *** search
- # From Gawk Manual modified by bug fix and removal of punctuation
- # Record every word which is used at least once
- {
- for (i = 1; i <= NF; i++) {
- tmp = tolower($i)
- if (0 != (pos = match(tmp, /([a-z]|-)+/))) {
- used[substr(tmp, pos, RLENGTH)] = 1
- }
- }
- }
-
- #Find a number of distinct words longer than 10 characters
- END {
- num_long_words = 0
- for (x in used) {
- if (length(x) > 10) {
- ++num_long_words
- print x
- }
- }
- print num_long_words, "long words"
- }
-