blob: b74ed384a98e8b5f907558d57859c2c2f13fb174 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#include "matcher.h"
#ifndef defaultmatcher_h
#define defaultmatcher_h
class DefaultMatcher : public Matcher {
public:
DefaultMatcher() {
}
// Compares 2 words and tries to give a percentage assurance of a match
// TODO: could use more smarts here
//
virtual int compare(const SWBuf &s1, const SWBuf &s2) {
SWBuf t1 = sanitizeWord(s1);
SWBuf t2 = sanitizeWord(s2);
int retVal = 0;
SWBuf largest = (t1.length() > t2.length()) ? t1 : t2;
SWBuf smallest = (t1.length() > t2.length()) ? t2 : t1;
int matches = 0;
int j = 0;
for (int i = 0; i < smallest.length() && j < largest.length(); i++) {
while (j < largest.length()) {
if (smallest[i] == largest[j++]) {
matches++;
break;
}
}
}
return (((float)matches) / largest.length()) * 100;
}
//
// This is where the magic happens
//
// we must point each targetMod word to an XMLTag
//
// when the magic is done, and your guess is made
// populate targetWordTags with the integer offset
// into wordTags for which XMLTag you think it should
// be.
//
virtual void matchWords(vector<int> &targetWordTags, const vector<SWBuf> &targetWords, const vector<SWBuf> &fromWords, vector<int> fromWordTags) {
// initialize our results to all -1 so we can pop around and set
// words as we find them, and know which ones we haven't yet set
for (int i = 0; i < targetWords.size(); i++) targetWordTags.push_back(-1);
// poor effort attempt
int j = 0;
for (int i = 0; i < targetWords.size(); ++i) {
for (int j = 0; j < fromWords.size(); ++j) {
if (fromWordTags[j] == -1) continue;
int match = compare(targetWords[i], fromWords[j]);
// if we have a better than XX% match of sequencial characters
// then we'll say we have a match
if (match > 49) {
targetWordTags[i] = fromWordTags[j];
fromWordTags[j] = -1;
break;
}
// TOTRY: maybe check one word before and after?
//
// be creative!
//
}
}
}
virtual SWBuf sanitizeWord(const SWBuf &word) {
SWBuf t1 = word;
t1.toUpper();
return t1;
}
};
#endif
|