summaryrefslogtreecommitdiffstats
path: root/migratetags/matchers/defaultmatcher.h
diff options
context:
space:
mode:
Diffstat (limited to 'migratetags/matchers/defaultmatcher.h')
-rw-r--r--migratetags/matchers/defaultmatcher.h80
1 files changed, 80 insertions, 0 deletions
diff --git a/migratetags/matchers/defaultmatcher.h b/migratetags/matchers/defaultmatcher.h
new file mode 100644
index 0000000..592dbf5
--- /dev/null
+++ b/migratetags/matchers/defaultmatcher.h
@@ -0,0 +1,80 @@
+#include "matcher.h"
+
+#ifndef defaultmatcher_h
+#define defaultmatcher_h
+
+class DefaultMatcher : public Matcher {
+public:
+
+// Compares 2 words and tries to give a percentage assurance of a match
+// TODO: could use more smarts here
+//
+virtual int compare(const SWBuf &s1, const SWBuf &s2) {
+ SWBuf t1 = s1;
+ SWBuf t2 = s2;
+ UTF8GreekAccents filter;
+ filter.setOptionValue("off");
+
+ // remove greek accents
+ filter.processText(t1);
+ filter.processText(t2);
+
+ // change to uppercase to match
+ StringMgr::getSystemStringMgr()->upperUTF8(t1.getRawData());
+ StringMgr::getSystemStringMgr()->upperUTF8(t2.getRawData());
+
+ int retVal = 0;
+ SWBuf largest = (t1.length() > t2.length()) ? t1 : t2;
+ SWBuf smallest = (t1.length() > t2.length()) ? t2 : t1;
+ int matches = 0;
+ int j = 0;
+ for (int i = 0; i < smallest.length() && j < largest.length(); i++) {
+ while (j < largest.length()) {
+ if (smallest[i] == largest[j++]) {
+ matches++;
+ break;
+ }
+ }
+ }
+ return (((float)matches) / largest.length()) * 100;
+}
+//
+// This is where the magic happens
+//
+// we must point each targetMod word to an XMLTag
+//
+// when the magic is done, and your guess is made
+// populate targetWordTags with the integer offset
+// into wordTags for which XMLTag you think it should
+// be.
+//
+
+virtual void matchWords(vector<int> &targetWordTags, const vector<SWBuf> &targetWords, const vector<SWBuf> &fromWords, vector<int> fromWordTags) {
+
+ // initialize our results to all -1 so we can pop around and set
+ // words as we find them, and know which ones we haven't yet set
+ for (int i = 0; i < targetWords.size(); i++) targetWordTags.push_back(-1);
+
+
+ // poor effort attempt
+ int j = 0;
+ for (int i = 0; i < targetWords.size(); ++i) {
+ for (int j = 0; j < fromWords.size(); ++j) {
+ if (fromWordTags[j] == -1) continue;
+ int match = compare(targetWords[i], fromWords[j]);
+ // if we have a better than XX% match of sequencial characters
+ // then we'll say we have a match
+ if (match > 49) {
+ targetWordTags[i] = fromWordTags[j];
+ fromWordTags[j] = -1;
+ break;
+ }
+ // TOTRY: maybe check one word before and after?
+ //
+ // be creative!
+ //
+ }
+ }
+}
+};
+#endif