diff options
author | Troy A. Griffitts <scribe@crosswire.org> | 2023-05-25 08:17:14 +0000 |
---|---|---|
committer | Troy A. Griffitts <scribe@crosswire.org> | 2023-05-25 08:17:14 +0000 |
commit | cda6afbc5c9b2fc2d84a0d96daf166b8c28e379c (patch) | |
tree | 01307f3a169339b3f6f47a5ac538fe52a7a4dade | |
parent | aafe13c6f5e4c94dee8de00a158f6a42f9b98735 (diff) | |
download | sword-tools-cda6afbc5c9b2fc2d84a0d96daf166b8c28e379c.tar.gz |
added regularization and sigma normalization
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@563 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r-- | migratetags/matchers/gntmatcher.h | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/migratetags/matchers/gntmatcher.h b/migratetags/matchers/gntmatcher.h index d2fcbd8..35c245d 100644 --- a/migratetags/matchers/gntmatcher.h +++ b/migratetags/matchers/gntmatcher.h @@ -1,15 +1,21 @@ #include "matcher.h" #include <utf8greekaccents.h> +#include <map> #ifndef gntmatcher_h #define gntmatcher_h +using std::map; + class GNTMatcher : public Matcher { UTF8GreekAccents sanitizeGreekAccentFilter; + map<SWBuf, SWBuf> globalRegs; public: GNTMatcher() : sanitizeGreekAccentFilter() { sanitizeGreekAccentFilter.setOptionValue("off"); + globalRegs["ΘΣ"] = "ΘΕΟΣ"; + globalRegs["ΚΥ"] = "ΚΥΡΙΟΥ"; } // Compares 2 words and tries to give a percentage assurance of a match @@ -109,7 +115,6 @@ virtual SWBuf sanitizeWord(const SWBuf &word) { SWBuf t1 = word; // remove greek accents sanitizeGreekAccentFilter.processText(t1); - t1.toUpper(); // remove ignoreSeries characters SWBuf o = t1; @@ -122,8 +127,14 @@ virtual SWBuf sanitizeWord(const SWBuf &word) { SWBuf checkChar; getUTF8FromUniChar(ch, &checkChar); if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue; + if (checkChar == "ϲ") checkChar = "σ"; + if (checkChar == "ς") checkChar = "σ"; t1.append(checkChar); } + t1.toUpper(); + if (globalRegs.find(t1) != globalRegs.end()) { + t1 = globalRegs[t1]; + } return t1; } |