added option to allow additions of lexical information

git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@556 07627401-56e2-0310-80f4-f8cd0041bdcd
author: Troy A. Griffitts <scribe@crosswire.org> 2023-04-16 15:15:24 +0000
committer: Troy A. Griffitts <scribe@crosswire.org> 2023-04-16 15:15:24 +0000
commit: 71612e4d6c459c8b9022e4887fd884dfab0ec0a7 (patch)
tree: 95be627fd6aa5af094b1febdb35c28075bc89938
parent: 4f33c33b5492e1cc391c13bee8aa9265784706c9 (diff)
download: sword-tools-71612e4d6c459c8b9022e4887fd884dfab0ec0a7.tar.gz
1 files changed, 31 insertions, 3 deletions
diff --git a/migratetags/migratetags.cpp b/migratetags/migratetags.cpp
index d1b284a..3dec240 100644
--- a/migratetags/migratetags.cpp
+++ b/migratetags/migratetags.cpp
@@ -36,12 +36,13 @@ void insert(SWBuf addText, SWBuf &out, int bibPos, BibMap &bibMap, BibMap &wTags
 SWBuf findCanonicalBibleText(SWBuf orig, BibMap &bibMap, BibMap &tTags);
 SWBuf buildWordMaps(const SWBuf &markupBuf, const BibMap &bibMap, vector<SWBuf> &targetWords, vector<int> &targetWordStarts, vector<int> &targetWordEnds);
 void pullFromModData(SWModule &fromMod, vector<XMLTag>&wordTags, vector<SWBuf> &fromWords, vector<int> &fromWordTags);
-void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds);
+void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds, SWConfig *lex = 0);
 
 // app options
 bool optionFilterAccents = false;
 bool optionFilterAppCrit = false;
 bool optionDebug         = false;
+bool optionIncludeLex    = false;
 vector<SWBuf> optionExceptionFile;
 SWConfig *exceptionFile = 0;
 
@@ -50,6 +51,7 @@ void usage(const char *progName, const char *error = 0) {
 	fprintf(stderr, "\n=== migratetags (Revision $Rev$) Migrate word morphology from one module to another.\n");
 	fprintf(stderr, "\nusage: %s [options]\n", progName);
 	fprintf(stderr, "  -ss <moduleName>\t provide the Strong's source module name\n");
+	fprintf(stderr, "  -l \t\t include lexical and source information\n");
 	fprintf(stderr, "  -t  <moduleName>\t provide the target module name\n");
 	fprintf(stderr, "  -tei <filename>\t provide the target tei filename\n");
 	fprintf(stderr, "  -e  <exception file>\t provide an ini-style .conf file with overriding tag exceptions.\n");
@@ -188,6 +190,9 @@ int main(int argc, char **argv) {
 		else if (!strcmp(argv[i], "-fa")) {
 			optionFilterAccents = true;
 		}
+		else if (!strcmp(argv[i], "-l")) {
+			optionIncludeLex = true;
+		}
 		else if (!strcmp(argv[i], "-fc")) {
 			optionFilterAppCrit = true;
 		}
@@ -256,6 +261,10 @@ int main(int argc, char **argv) {
 	VerseKey *targetModKey = (VerseKey *)(targetInput ? fromMod.createKey() : targetMod->createKey());
 	targetModKey->setIntros(true);
 	SWBuf targetModText;
+	SWConfig *lex = 0;
+	if (optionIncludeLex) {
+		lex = new SWConfig("../flashtools/greek.conf");
+	}
 	while ((targetInput ? getNextVerseTEI(targetModKey, &targetModText) : getNextVerse(targetModKey, &targetModText))) {
 		if (targetModKey->getError()) {
 			cout << targetModText;
@@ -408,9 +417,10 @@ if (optionDebug) {
 		//
 		matcher->matchWords(targetWordTags, targetWords, fromWords, fromWordTags);
 
+
 		// ok, now that we have our targetWordTags magically populated
 		// let's do the grunt work of inserting the <w> and </w> tags
-		insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds);
+		insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds, lex);
 
 
 if (optionDebug) {
@@ -477,6 +487,7 @@ if (optionDebug) {
 	}
 
 	delete exceptionFile;
+	delete lex;
 
 	return 0;
 }
@@ -686,7 +697,7 @@ void pullFromModData(SWModule &fromMod, vector<XMLTag>&wordTags, vector<SWBuf> &
 }
 
 
-void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds) {
+void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds, SWConfig *lex) {
 	// TODO: this method needs some work,
 	// like putting multiple consecutive words
 	// together in one tag
@@ -711,6 +722,23 @@ void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTag
 			}
 		}
 		if (wordTag.length()) {
+			// if we have been asked to include extra lexical data
+			if (lex) {
+				XMLTag w(wordTag);
+				int attCount = w.getAttributePartCount("lemma", ' ');
+				for (int i = 0; i < attCount; ++i) {
+					SWBuf a = w.getAttribute("lemma", i, ' ');
+					SWBuf c = a.stripPrefix(':');
+					if (c == "strong") {
+						if (a.startsWith("G") || a.startsWith("H")) a << 1;
+						SWBuf dict = (*lex)[a]["UTF8"];
+						SWBuf gloss = (*lex)[a]["Meaning"];
+						w.setAttribute("corresp", dict);
+						w.setAttribute("gloss", gloss);
+						wordTag = w.toString();
+					}
+				}
+			}
 			insert((const char *)wordTag, markupBuf, targetWordStarts[i], bibMap, wTags);
 			insert("</w>", markupBuf, targetWordEnds[i], bibMap, wTags, true);
 		}
author	Troy A. Griffitts <scribe@crosswire.org>	2023-04-16 15:15:24 +0000
committer	Troy A. Griffitts <scribe@crosswire.org>	2023-04-16 15:15:24 +0000
commit	71612e4d6c459c8b9022e4887fd884dfab0ec0a7 (patch)
tree	95be627fd6aa5af094b1febdb35c28075bc89938
parent	4f33c33b5492e1cc391c13bee8aa9265784706c9 (diff)
download	sword-tools-71612e4d6c459c8b9022e4887fd884dfab0ec0a7.tar.gz