home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Gold Fish 3
/
goldfish_volume_3.bin
/
files
/
dev
/
e
/
amigae
/
src
/
various
/
freq.e
< prev
next >
Wrap
Text File
|
1992-09-02
|
9KB
|
380 lines
/* compute frequency of words in file. read in any ascii file, and spits
the result (as table) on stdout, or process on existing freqlist
FILE/A,SERVER/S,FREQFILE/S,ENGLISHOPTI/S,HEAVY/S
FILE: words to process
SERVER: go into server mode
FREQFILE: expect input to be in frequency list format or just any ascii text
ENGLISHOPTI: perform merges on english words (note: needs multiple passes)
HEAVY: do heavy english opti (does more damage to semantics :-)
FREQFACTOR: minimum factor of frequency for output words in server mode [default: 100]
*/
OPT REG=5,OSVERSION=37
MODULE 'tools/file', 'tools/exceptions', 'class/hash', 'tools/ctype', 'tools/arexx',
'tools/constructors', 'exec/nodes', 'exec/lists'
OBJECT hlink OF hashlink
count, sig
ENDOBJECT
CONST NUMTOP=1000
DEF ght:PTR TO hashtable,pht:PTR TO hashtable, -> silly
gsize,psize, -> number of words
isheavy=FALSE,iseng=FALSE,
minsig=100,top:PTR TO lh,largest
PROC main() HANDLE
DEF m,l,ht=NIL:PTR TO hashtable,myargs:PTR TO LONG,rdargs=NIL
myargs:=[0,0,0,0,0,0]
IF (rdargs:=ReadArgs('FILE/A,SERVER/S,FREQFILE/S,ENGLISHOPTI/S,HEAVY/S,FREQFACTOR/N',myargs,NIL))=NIL THEN Raise("ARGS")
m,l:=readfile(myargs[0])
ght:=NEW ht.hashtable(HASH_HEAVIER)
gsize:=IF iseng:=myargs[2] THEN process_fl(m,l,ht) ELSE process(m,l,ht)
IF gsize<1 THEN gsize:=1
isheavy:=myargs[4]
IF myargs[5] THEN minsig:=Long(myargs[5])
IF myargs[3] THEN ht.iterate({engfilter})
IF myargs[1] THEN server() ELSE ht.iterate({print})
EXCEPT DO
IF rdargs THEN FreeArgs(rdargs)
report_exception()
ENDPROC
PROC process(mem,len,ht:PTR TO hashtable,listd=NIL)
DEF p,c,a,b,h,end,hl:PTR TO hlink,numw=0,list
end:=mem+len
p:=mem
LOOP
SELECT 128 OF c:=p[]++
CASE "\n"
IF p>end THEN RETURN numw
CASE "A" TO "Z", "a" TO "z"
IF c<="Z" THEN p[-1]:=c+32
a:=p-1
WHILE isalpha(c:=p[])
IF c<="Z" THEN p[]:=c+32
p++
ENDWHILE
hl,h:=ht.find(a,b:=p-a)
IF hl=NIL THEN ht.add(NEW hl,h,a,b)
hl.count:=hl.count+1
numw++
p[]++:=0
IF listd
^listd:=list:=NEW [NIL,hl]:LONG
listd:=list
ENDIF
ENDSELECT
ENDLOOP
ENDPROC
PROC process_fl(m,l,ht:PTR TO hashtable)
DEF b,h,end,hl:PTR TO hlink,v,s,numw=0
end:=m+l
s:=m
WHILE s<end
v,b:=Val(s)
s:=s+b+1
b:=s
WHILE b[]<>"\n" DO b++
b:=b-s
hl,h:=ht.find(s,b)
IF hl=NIL THEN ht.add(NEW hl,h,s,b)
hl.count:=hl.count+v
numw:=numw+v
s:=s+b
s[]++:=0
ENDWHILE
ENDPROC numw
/*
["."=checked, "*"=sem_danger, "#"=not_impl]
safe extension optimisations:
.. Xed X | Xe conversed
. Xied Xy crucified
. XYYed XY crammed, abhorred
.. Xing X | Xe conspiring
. Xan Xa american, an
. Xian Xy | Xia hungarian, australian
. Xier Xy copier
. Xs X conveys, as?, this?
. Xous X courageous
. Xies Xy contemporaries
. Xness X remoteness
Xy Xe argueably
.# Xly X | Xe convincingly
. Xility Xle intangibility
. Xacy Xate indelicacy
less safe extension optimisations:
activities -> activity -> active -> act
* Xic X alcoholic
* Xive X | Xe constructive
** Xable X | Xe argueable
### Xial X | Xe | Xia residential
# Xtial Xce consequential
* Xism X alcoholism
* Xion X | Xe damnation, deallocation
* Xor X | Xe coordinator
# Xious Xy ceremonious
## Xant X | Xe colorant
## Xment X | Xe containment
# Xlet X booklet
.*# Xily X | Xe | Xy particularily, family?
* Xity X actuality
not used for now:
Xves Xfe leaves
Xer X | Xe manager?
Xward X upward, awkward?, reward?
Xar X singular?
Xss
Xibly
Xend
safe prefix optimisations:
unX X unacceptable -> same as "not X"
imX X imperfect, image?
inX X incoherent
less safe prefix optimisations:
deX X decompression
reX X rebuilt
misX X misguided
not used for now:
overX X overflow?
preX X prefixed?
disX X dissatisfied, discover?
upX X uproar?
superX X superimpose?
nonX X nondeterministically?
*/
PROC engfilter(tl:PTR TO hlink,d)
DEF l,s,hl=NIL:PTR TO hlink,v,w,x,y,z,t[100]:STRING,min=3 ->4?
l:=tl.len
s:=tl.data
z:=s[l-1]
IF l>1
y:=s[l-2]
IF l>2
x:=s[l-3]
IF l>3
w:=s[l-4]
IF l>4 THEN v:=s[l-4]
ENDIF
ENDIF
ENDIF
SELECT 128 OF z
CASE "c"
IF y="i" THEN hl:=fh(s,l-2) -> ic
CASE "d"
IF y="e"
IF x="i" -> ied
hl:=suf(t,s,l-3,'y')
ELSEIF x=w -> XXed
hl:=f(s,l-3)
ELSE -> ed
IF (hl:=f(s,l-2))=NIL THEN hl:=f(s,l-1)
ENDIF
ENDIF
CASE "e"
IF (x="i") AND (y="v") -> ive
IF (hl:=fh(s,l-3))=NIL THEN hl:=sufh(t,s,l-3,'e')
ELSEIF (w="a") AND (x="b") AND (y="l")
IF (hl:=fh(s,l-4))=NIL THEN hl:=sufh(t,s,l-4,'e')
ENDIF
CASE "g"
IF (x="i") AND (y="n") -> ing
IF (hl:=f(s,l-3))=NIL THEN hl:=suf(t,s,l-3,'e')
ENDIF
CASE "m"
IF (x="i") AND (y="s") THEN hl:=fh(s,l-3) -> ism
CASE "n"
IF y="a"
IF x="i" -> ian
IF (hl:=suf(t,s,l-3,'y'))=NIL THEN hl:=f(s,l-1)
ELSE -> an
hl:=f(s,l-1)
ENDIF
ELSEIF (y="o") AND (x="i") -> ion
IF (hl:=fh(s,l-3))=NIL THEN hl:=sufh(t,s,l-3,'e')
ENDIF
CASE "r"
IF y="o" -> or
IF (hl:=fh(s,l-2))=NIL THEN hl:=sufh(t,s,l-2,'e')
ENDIF
CASE "s"
IF (x="o") AND (y="u") -> ous
hl:=f(s,l-3)
ELSEIF (x="i") AND (y="e") -> ies
hl:=suf(t,s,l-3,'y')
ELSEIF (w="n") AND (x="e") AND (y="s") -> ness
hl:=f(s,l-4)
ELSE -> s
hl:=f(s,l-1)
ENDIF
CASE "y"
IF y="l"
IF x="i" -> ily
IF (hl:=fh(s,l-3))=NIL THEN hl:=sufh(t,s,l-3,'e')
ELSE -> ly
hl:=f(s,l-2)
ENDIF
ELSEIF (y="t") AND (x="i")
IF (v="i") AND (w="l") -> ility
hl:=suf(t,s,l-5,'le')
ELSE -> ity
hl:=fh(s,l-3)
ENDIF
ELSEIF y="c" -> acy
hl:=suf(t,s,l-2,'te')
ELSE -> y
hl:=suf(t,s,l-1,'e')
ENDIF
ENDSELECT
IF hl=NIL
min:=4
IF IF ((x:=s[])="u") THEN s[1]="n" ELSE IF x="i" THEN -> un/in/im
((y:=s[1])="n") OR (y="m") ELSE FALSE
hl:=f(s+2,l-2)
ENDIF
ENDIF
IF hl
IF (hl.len>=min) AND ((hl.count>1) OR (hl.count=0))
hl.count:=hl.count+tl.count
tl.count:=0
ENDIF
ENDIF
ENDPROC
PROC suf(dest,src,len,suf)
StrCopy(dest,src,len)
StrAdd(dest,suf)
ENDPROC ght.find(dest,EstrLen(dest))
PROC f(s,l) IS ght.find(s,l)
PROC fh(s,l) IS IF isheavy THEN ght.find(s,l) ELSE NIL
PROC sufh(d,s,l,su) IS IF isheavy THEN suf(d,s,l,su) ELSE NIL
PROC print(l:PTR TO hlink,d)
PrintF('\d[8]\t\s\n',l.count,l.data)
ENDPROC
PROC server()
WriteF('Starting Arexx Server, port: "FREQPORT", commands: "QUIT", "FREQ"\n')
rx_HandleAll({process_msg},'FREQPORT')
ENDPROC
PROC process_msg(s)
DEF cl,a=NIL,q=FALSE
IF (cl:=InStr(s,' '))>0 THEN a:=s+cl+1
IF StrCmp(s,'QUIT',cl)
WriteF('Terminating server.\n')
q:=TRUE
ELSEIF StrCmp(s,'FREQ',cl)
WriteF('Processing file "\s".\n',a)
do(a)
ELSE
WriteF('Unknown Command: "\s"\n',s)
ENDIF
ENDPROC q,0,NIL
PROC do(filename) HANDLE
DEF m=NIL,l,ht=NIL:PTR TO hashtable,list=NIL
top:=pht:=NIL
m,l:=readfile(filename)
pht:=NEW ht.hashtable(HASH_HEAVY)
psize:=process(m,l,ht,{list})
IF psize<1 THEN psize:=1
IF iseng THEN ht.iterate({engfilter})
WriteF('word ratio = \d:\d\n',gsize,psize)
largest:=0
ht.iterate({significant})
top:=newlist()
largest:=largest/127+1
ht.iterate({sort})
writenewfile(list,filename)
writetop(filename)
EXCEPT DO
END top
IF pht THEN pht.end_links(SIZEOF hlink)
END pht
IF m THEN freefile(m)
report_exception()
ENDPROC
PROC significant(phl:PTR TO hlink,d)
DEF numg=1,nump,hl:PTR TO hlink,sig
nump:=phl.count
IF hl:=ght.find(phl.data,phl.len) THEN numg:=hl.count
IF numg<1 THEN numg:=1
IF nump<1 THEN nump:=1
phl.sig:=sig:=Div(Div(gsize,numg),Div(psize,nump))
IF sig>largest THEN largest:=sig
ENDPROC
PROC sort(phl:PTR TO hlink,d)
IF phl.sig>minsig THEN Enqueue(top,newnode(NIL,phl,0,phl.sig/largest))
ENDPROC
PROC writenewfile(list:PTR TO LONG,fn)
DEF hl:PTR TO hlink,o:PTR TO LONG,fh,nfn[200]:STRING,numc=0
StrCopy(nfn,fn)
StrAdd(nfn,'.sig')
IF fh:=Open(nfn,NEWFILE)
WHILE o:=list
hl:=list[1]
list:=list[]
END o[2]
IF hl.sig>minsig
IF numc+hl.len+1>78 THEN (numc:=0) BUT FputC(fh,"\n")
Fputs(fh,hl.data)
FputC(fh," ")
numc:=numc+hl.len+1
ENDIF
ENDWHILE
FputC(fh,"\n")
Close(fh)
ELSE
WriteF('Problem opening "\s"\n',nfn)
ENDIF
ENDPROC
PROC writetop(fn)
DEF n:PTR TO ln,o,fh,nfn[200]:STRING,hl:PTR TO hlink,num,totsig=0,f
StrCopy(nfn,fn)
StrAdd(nfn,'.top')
IF fh:=Open(nfn,NEWFILE)
n:=top.head; num:=0
WHILE o:=n.succ
hl:=n.name; num++; EXIT num=NUMTOP; totsig:=totsig+hl.sig; n:=o
ENDWHILE
f:=totsig/num/100+1
WriteF('tot=\d,num=\d,f=\d\n',totsig,num,f)
n:=top.head; num:=0
WHILE o:=n.succ
hl:=n.name; num++; EXIT num=NUMTOP; VfPrintf(fh,'\s:\d\n',[hl.data,hl.sig/f]:LONG); END n; n:=o
ENDWHILE
FputC(fh,"\n")
Close(fh)
ELSE
WriteF('Problem opening "\s"\n',nfn)
ENDIF
ENDPROC