summaryrefslogtreecommitdiffstats
path: root/migratetags/matchers/gntmatcher.h
diff options
context:
space:
mode:
Diffstat (limited to 'migratetags/matchers/gntmatcher.h')
-rw-r--r--migratetags/matchers/gntmatcher.h16
1 files changed, 15 insertions, 1 deletions
diff --git a/migratetags/matchers/gntmatcher.h b/migratetags/matchers/gntmatcher.h
index 8c8f3e4..d2fcbd8 100644
--- a/migratetags/matchers/gntmatcher.h
+++ b/migratetags/matchers/gntmatcher.h
@@ -104,12 +104,26 @@ virtual void matchWords(vector<int> &targetWordTags, const vector<SWBuf> &target
}
}
+const char *ignoreSeries = "[]\nʼ‾̷‾";
virtual SWBuf sanitizeWord(const SWBuf &word) {
SWBuf t1 = word;
// remove greek accents
sanitizeGreekAccentFilter.processText(t1);
t1.toUpper();
- t1.replaceBytes("[]", 0);
+
+ // remove ignoreSeries characters
+ SWBuf o = t1;
+ const unsigned char* from = (unsigned char*)o.c_str();
+ t1 = "";
+ while (*from) {
+ SW_u32 ch = getUniCharFromUTF8(&from, true);
+ // if ch is bad, then convert to replacement char
+ if (!ch) ch = 0xFFFD;
+ SWBuf checkChar;
+ getUTF8FromUniChar(ch, &checkChar);
+ if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue;
+ t1.append(checkChar);
+ }
return t1;
}