From 71612e4d6c459c8b9022e4887fd884dfab0ec0a7 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Sun, 16 Apr 2023 15:15:24 +0000 Subject: added option to allow additions of lexical information git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@556 07627401-56e2-0310-80f4-f8cd0041bdcd --- migratetags/migratetags.cpp | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/migratetags/migratetags.cpp b/migratetags/migratetags.cpp index d1b284a..3dec240 100644 --- a/migratetags/migratetags.cpp +++ b/migratetags/migratetags.cpp @@ -36,12 +36,13 @@ void insert(SWBuf addText, SWBuf &out, int bibPos, BibMap &bibMap, BibMap &wTags SWBuf findCanonicalBibleText(SWBuf orig, BibMap &bibMap, BibMap &tTags); SWBuf buildWordMaps(const SWBuf &markupBuf, const BibMap &bibMap, vector &targetWords, vector &targetWordStarts, vector &targetWordEnds); void pullFromModData(SWModule &fromMod, vector&wordTags, vector &fromWords, vector &fromWordTags); -void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector &targetWordTags, const vector &wordTags, const vector &targetWordStarts, const vector &targetWordEnds); +void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector &targetWordTags, const vector &wordTags, const vector &targetWordStarts, const vector &targetWordEnds, SWConfig *lex = 0); // app options bool optionFilterAccents = false; bool optionFilterAppCrit = false; bool optionDebug = false; +bool optionIncludeLex = false; vector optionExceptionFile; SWConfig *exceptionFile = 0; @@ -50,6 +51,7 @@ void usage(const char *progName, const char *error = 0) { fprintf(stderr, "\n=== migratetags (Revision $Rev$) Migrate word morphology from one module to another.\n"); fprintf(stderr, "\nusage: %s [options]\n", progName); fprintf(stderr, " -ss \t provide the Strong's source module name\n"); + fprintf(stderr, " -l \t\t include lexical and source information\n"); fprintf(stderr, " -t \t provide the target module name\n"); fprintf(stderr, " -tei \t provide the target tei filename\n"); fprintf(stderr, " -e \t provide an ini-style .conf file with overriding tag exceptions.\n"); @@ -188,6 +190,9 @@ int main(int argc, char **argv) { else if (!strcmp(argv[i], "-fa")) { optionFilterAccents = true; } + else if (!strcmp(argv[i], "-l")) { + optionIncludeLex = true; + } else if (!strcmp(argv[i], "-fc")) { optionFilterAppCrit = true; } @@ -256,6 +261,10 @@ int main(int argc, char **argv) { VerseKey *targetModKey = (VerseKey *)(targetInput ? fromMod.createKey() : targetMod->createKey()); targetModKey->setIntros(true); SWBuf targetModText; + SWConfig *lex = 0; + if (optionIncludeLex) { + lex = new SWConfig("../flashtools/greek.conf"); + } while ((targetInput ? getNextVerseTEI(targetModKey, &targetModText) : getNextVerse(targetModKey, &targetModText))) { if (targetModKey->getError()) { cout << targetModText; @@ -408,9 +417,10 @@ if (optionDebug) { // matcher->matchWords(targetWordTags, targetWords, fromWords, fromWordTags); + // ok, now that we have our targetWordTags magically populated // let's do the grunt work of inserting the and tags - insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds); + insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds, lex); if (optionDebug) { @@ -477,6 +487,7 @@ if (optionDebug) { } delete exceptionFile; + delete lex; return 0; } @@ -686,7 +697,7 @@ void pullFromModData(SWModule &fromMod, vector&wordTags, vector & } -void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector &targetWordTags, const vector &wordTags, const vector &targetWordStarts, const vector &targetWordEnds) { +void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector &targetWordTags, const vector &wordTags, const vector &targetWordStarts, const vector &targetWordEnds, SWConfig *lex) { // TODO: this method needs some work, // like putting multiple consecutive words // together in one tag @@ -711,6 +722,23 @@ void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTag } } if (wordTag.length()) { + // if we have been asked to include extra lexical data + if (lex) { + XMLTag w(wordTag); + int attCount = w.getAttributePartCount("lemma", ' '); + for (int i = 0; i < attCount; ++i) { + SWBuf a = w.getAttribute("lemma", i, ' '); + SWBuf c = a.stripPrefix(':'); + if (c == "strong") { + if (a.startsWith("G") || a.startsWith("H")) a << 1; + SWBuf dict = (*lex)[a]["UTF8"]; + SWBuf gloss = (*lex)[a]["Meaning"]; + w.setAttribute("corresp", dict); + w.setAttribute("gloss", gloss); + wordTag = w.toString(); + } + } + } insert((const char *)wordTag, markupBuf, targetWordStarts[i], bibMap, wTags); insert("", markupBuf, targetWordEnds[i], bibMap, wTags, true); } -- cgit