diff options
Diffstat (limited to 'migratetags/matchers/defaultmatcher.h')
-rw-r--r-- | migratetags/matchers/defaultmatcher.h | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/migratetags/matchers/defaultmatcher.h b/migratetags/matchers/defaultmatcher.h new file mode 100644 index 0000000..592dbf5 --- /dev/null +++ b/migratetags/matchers/defaultmatcher.h @@ -0,0 +1,80 @@ +#include "matcher.h" + +#ifndef defaultmatcher_h +#define defaultmatcher_h + +class DefaultMatcher : public Matcher { +public: + +// Compares 2 words and tries to give a percentage assurance of a match +// TODO: could use more smarts here +// +virtual int compare(const SWBuf &s1, const SWBuf &s2) { + SWBuf t1 = s1; + SWBuf t2 = s2; + UTF8GreekAccents filter; + filter.setOptionValue("off"); + + // remove greek accents + filter.processText(t1); + filter.processText(t2); + + // change to uppercase to match + StringMgr::getSystemStringMgr()->upperUTF8(t1.getRawData()); + StringMgr::getSystemStringMgr()->upperUTF8(t2.getRawData()); + + int retVal = 0; + SWBuf largest = (t1.length() > t2.length()) ? t1 : t2; + SWBuf smallest = (t1.length() > t2.length()) ? t2 : t1; + int matches = 0; + int j = 0; + for (int i = 0; i < smallest.length() && j < largest.length(); i++) { + while (j < largest.length()) { + if (smallest[i] == largest[j++]) { + matches++; + break; + } + } + } + return (((float)matches) / largest.length()) * 100; +} +// +// This is where the magic happens +// +// we must point each targetMod word to an XMLTag +// +// when the magic is done, and your guess is made +// populate targetWordTags with the integer offset +// into wordTags for which XMLTag you think it should +// be. +// + +virtual void matchWords(vector<int> &targetWordTags, const vector<SWBuf> &targetWords, const vector<SWBuf> &fromWords, vector<int> fromWordTags) { + + // initialize our results to all -1 so we can pop around and set + // words as we find them, and know which ones we haven't yet set + for (int i = 0; i < targetWords.size(); i++) targetWordTags.push_back(-1); + + + // poor effort attempt + int j = 0; + for (int i = 0; i < targetWords.size(); ++i) { + for (int j = 0; j < fromWords.size(); ++j) { + if (fromWordTags[j] == -1) continue; + int match = compare(targetWords[i], fromWords[j]); + // if we have a better than XX% match of sequencial characters + // then we'll say we have a match + if (match > 49) { + targetWordTags[i] = fromWordTags[j]; + fromWordTags[j] = -1; + break; + } + // TOTRY: maybe check one word before and after? + // + // be creative! + // + } + } +} +}; +#endif |