summaryrefslogblamecommitdiffstats
path: root/migratetags/matchers/defaultmatcher.h
blob: 592dbf5b71ef69c0a90281ef304a4c91544221b9 (plain) (tree)















































































                                                                                                                                                  
#include "matcher.h"

#ifndef defaultmatcher_h
#define defaultmatcher_h

class DefaultMatcher : public Matcher {
public:

// Compares 2 words and tries to give a percentage assurance of a match
// TODO: could use more smarts here
//
virtual int compare(const SWBuf &s1, const SWBuf &s2) {
	SWBuf t1 = s1;
	SWBuf t2 = s2;
	UTF8GreekAccents filter;
	filter.setOptionValue("off");

	// remove greek accents
	filter.processText(t1);
	filter.processText(t2);

	// change to uppercase to match
	StringMgr::getSystemStringMgr()->upperUTF8(t1.getRawData());
	StringMgr::getSystemStringMgr()->upperUTF8(t2.getRawData());

	int retVal = 0;
	SWBuf largest  = (t1.length() > t2.length()) ? t1 : t2;
	SWBuf smallest = (t1.length() > t2.length()) ? t2 : t1;
	int matches = 0;
	int j = 0;
	for (int i = 0; i < smallest.length() && j < largest.length(); i++) {
		while (j < largest.length()) {
			if (smallest[i] == largest[j++]) {
				matches++;
				break;
			}
		}
	}
	return (((float)matches) / largest.length()) * 100;
}
// 
// This is where the magic happens
//
// we must point each targetMod word to an XMLTag
//
// when the magic is done, and your guess is made
// populate targetWordTags with the integer offset
// into wordTags for which XMLTag you think it should
// be.
//

virtual void matchWords(vector<int> &targetWordTags, const vector<SWBuf> &targetWords, const vector<SWBuf> &fromWords, vector<int> fromWordTags) {

	// initialize our results to all -1 so we can pop around and set
	// words as we find them, and know which ones we haven't yet set
	for (int i = 0; i < targetWords.size(); i++) targetWordTags.push_back(-1);


	// poor effort attempt
	int j = 0;
	for (int i = 0; i < targetWords.size(); ++i) {
		for (int j = 0; j < fromWords.size(); ++j) {
			if (fromWordTags[j] == -1) continue;
			int match = compare(targetWords[i], fromWords[j]);
			// if we have a better than XX% match of sequencial characters
			// then we'll say we have a match
			if (match > 49) {
				targetWordTags[i] = fromWordTags[j];
				fromWordTags[j] = -1;
				break;
			}
			// TOTRY: maybe check one word before and after?
			//
			// be creative!
			//
		}
	}
}
};
#endif