summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTroy A. Griffitts <scribe@crosswire.org>2023-05-25 08:17:14 +0000
committerTroy A. Griffitts <scribe@crosswire.org>2023-05-25 08:17:14 +0000
commitcda6afbc5c9b2fc2d84a0d96daf166b8c28e379c (patch)
tree01307f3a169339b3f6f47a5ac538fe52a7a4dade
parentaafe13c6f5e4c94dee8de00a158f6a42f9b98735 (diff)
downloadsword-tools-cda6afbc5c9b2fc2d84a0d96daf166b8c28e379c.tar.gz
added regularization and sigma normalization
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@563 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r--migratetags/matchers/gntmatcher.h13
1 files changed, 12 insertions, 1 deletions
diff --git a/migratetags/matchers/gntmatcher.h b/migratetags/matchers/gntmatcher.h
index d2fcbd8..35c245d 100644
--- a/migratetags/matchers/gntmatcher.h
+++ b/migratetags/matchers/gntmatcher.h
@@ -1,15 +1,21 @@
#include "matcher.h"
#include <utf8greekaccents.h>
+#include <map>
#ifndef gntmatcher_h
#define gntmatcher_h
+using std::map;
+
class GNTMatcher : public Matcher {
UTF8GreekAccents sanitizeGreekAccentFilter;
+ map<SWBuf, SWBuf> globalRegs;
public:
GNTMatcher() : sanitizeGreekAccentFilter() {
sanitizeGreekAccentFilter.setOptionValue("off");
+ globalRegs["ΘΣ"] = "ΘΕΟΣ";
+ globalRegs["ΚΥ"] = "ΚΥΡΙΟΥ";
}
// Compares 2 words and tries to give a percentage assurance of a match
@@ -109,7 +115,6 @@ virtual SWBuf sanitizeWord(const SWBuf &word) {
SWBuf t1 = word;
// remove greek accents
sanitizeGreekAccentFilter.processText(t1);
- t1.toUpper();
// remove ignoreSeries characters
SWBuf o = t1;
@@ -122,8 +127,14 @@ virtual SWBuf sanitizeWord(const SWBuf &word) {
SWBuf checkChar;
getUTF8FromUniChar(ch, &checkChar);
if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue;
+ if (checkChar == "ϲ") checkChar = "σ";
+ if (checkChar == "ς") checkChar = "σ";
t1.append(checkChar);
}
+ t1.toUpper();
+ if (globalRegs.find(t1) != globalRegs.end()) {
+ t1 = globalRegs[t1];
+ }
return t1;
}