home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Nebula
/
nebula.bin
/
SourceCode
/
libcs
/
srchscore.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-12-11
|
4KB
|
133 lines
/*
* Copyright (c) 1990 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND CARNEGIE MELLON UNIVERSITY
* DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT
* SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE FOR ANY SPECIAL, DIRECT,
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
* RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
* CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Users of this software agree to return to Carnegie Mellon any
* improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* Export of this software is permitted only after complying with the
* regulations of the U.S. Deptartment of Commerce relating to the
* Export of Technical Data.
*/
/* srchscore -- perform approximate string matching
*
* int srchscore (big,small);
* tells how well "small" matches substrings of "big".
* The score ranges from 0 to (strlen(small) ** 2).
* The score is something like the sum, over the longest
* matching contiguous substrings, of the square of the
* length of the substring minus some penalty for
* not-quite-exactly-matching substrings. Case is not significant.
*
* The value ranges from 0 to strlen(small)**2.
*
* HISTORY
* $Log: srchscore.c,v $
* Revision 1.2 90/12/11 17:59:18 mja
* Add copyright/disclaimer for distribution.
*
* 06-Feb-81 Steven Shafer (sas) at Carnegie-Mellon University
* Added limit before "return" so value is never negative.
*
* 23-Jan-80 Steven Shafer (sas) at Carnegie-Mellon University
* Created. The same as Dave McKeown's matching function from the
* PDP-11; only the identifiers have been changed to protect somebody.
*
*/
#define INSERTCOST 1 /* penalty to insert 1 char in small */
#define DELETECOST 1 /* penalty to delete 1 char from small */
#define CHANGECOST 3 /* penalty to change 1 char in small */
static char *newsmall; /* next character to match in "small" */
static int smatch (big,small)
char *big,*small;
{
register char *b,*s; /* current chars in big and small */
register int goon; /* TRUE while still looping */
int nmatch, penalty; /* match count; total penalty */
nmatch = 0;
penalty = 0;
goon = 1;
b = big;
s = small;
while (*b && *s && goon) {
if (*b == *s) {
/* just do stuff at end of loop */
}
else if (*(b+1) == *s) {
b++;
penalty += INSERTCOST;
}
else if (*b == *(s+1)) {
s++;
penalty += DELETECOST;
}
else if (*(b+1) == *(s+1)) {
b++; s++;
penalty += CHANGECOST;
}
else {
goon = 0;
}
if (goon) { /* something matched */
b++; s++;
nmatch++;
}
}
newsmall = s;
return ((nmatch * nmatch) - penalty);
}
int srchscore (big,small)
char *big,*small;
{
register int score,bestscore,total;
char *b,*s,*bestsmall;
total = 0;
if (*big) {
s = small;
while (*s) {
bestscore = -1;
for (b=big; *b; b++) {
score = smatch (b,s);
if (score > bestscore) {
bestscore = score;
bestsmall = newsmall;
}
}
if (bestsmall > s) {
total += bestscore;
s = bestsmall;
}
else {
if (total >= DELETECOST-1) total -= DELETECOST;
s++;
}
}
}
if (total < 0) total = 0;
return (total);
}