From cda6afbc5c9b2fc2d84a0d96daf166b8c28e379c Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Thu, 25 May 2023 08:17:14 +0000 Subject: added regularization and sigma normalization git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@563 07627401-56e2-0310-80f4-f8cd0041bdcd --- migratetags/matchers/gntmatcher.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/migratetags/matchers/gntmatcher.h b/migratetags/matchers/gntmatcher.h index d2fcbd8..35c245d 100644 --- a/migratetags/matchers/gntmatcher.h +++ b/migratetags/matchers/gntmatcher.h @@ -1,15 +1,21 @@ #include "matcher.h" #include +#include #ifndef gntmatcher_h #define gntmatcher_h +using std::map; + class GNTMatcher : public Matcher { UTF8GreekAccents sanitizeGreekAccentFilter; + map globalRegs; public: GNTMatcher() : sanitizeGreekAccentFilter() { sanitizeGreekAccentFilter.setOptionValue("off"); + globalRegs["ΘΣ"] = "ΘΕΟΣ"; + globalRegs["ΚΥ"] = "ΚΥΡΙΟΥ"; } // Compares 2 words and tries to give a percentage assurance of a match @@ -109,7 +115,6 @@ virtual SWBuf sanitizeWord(const SWBuf &word) { SWBuf t1 = word; // remove greek accents sanitizeGreekAccentFilter.processText(t1); - t1.toUpper(); // remove ignoreSeries characters SWBuf o = t1; @@ -122,8 +127,14 @@ virtual SWBuf sanitizeWord(const SWBuf &word) { SWBuf checkChar; getUTF8FromUniChar(ch, &checkChar); if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue; + if (checkChar == "ϲ") checkChar = "σ"; + if (checkChar == "ς") checkChar = "σ"; t1.append(checkChar); } + t1.toUpper(); + if (globalRegs.find(t1) != globalRegs.end()) { + t1 = globalRegs[t1]; + } return t1; } -- cgit