diff options
Diffstat (limited to 'migratetags/migratetags.cpp')
-rw-r--r-- | migratetags/migratetags.cpp | 73 |
1 files changed, 49 insertions, 24 deletions
diff --git a/migratetags/migratetags.cpp b/migratetags/migratetags.cpp index 2051a22..689640a 100644 --- a/migratetags/migratetags.cpp +++ b/migratetags/migratetags.cpp @@ -50,7 +50,7 @@ void usage(const char *progName, const char *error = 0) { int main(int argc, char **argv) { const char *progName = argv[0]; - for (int i = 1; i < argc; i++) { + for (int i = 1; i < argc; ++i) { if (!strcmp(argv[i], "-v")) { optionDebug = true; } @@ -62,7 +62,7 @@ int main(int argc, char **argv) { } else usage(progName, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } - VerseKey vk; + SWMgr lib; lib.setGlobalOption("Textual Variants", "Secondary Reading"); SWModule *m = lib.getModule(targetModuleName); @@ -246,15 +246,40 @@ if (optionDebug) { cout << "\nTargetMod Words: " << endl; } bool warned = false; - for (int i = 0; i < targetWords.size(); i++) { + for (int i = 0; i < targetWords.size(); ++i) { if (targetWordTags[i] == -1 && !strstr(ignoreSeries, targetWords[i])) { - if (!warned) cerr << "*** Error: didn't match all words: " << targetMod.getKeyText() << endl; - warned = true; + if (!warned) { + cerr << "*** Error: didn't match all words: " << targetMod.getKeyText() << endl; + cerr << strongsSourceModuleName << ":"; + for (int j = 0; j < fromWords.size(); ++j) { + cerr << " " << fromWords[j]; + } + cerr << endl; + cerr << targetModuleName << ":"; + for (int j = 0; j < targetWords.size(); ++j) { + cerr << " " << targetWords[j]; + } + cerr << endl; + cerr << endl; + cerr << "Unmatched Words:" << endl; + warned = true; + } + cerr << " " << i << ": " << targetWords[i] << " (" << matcher->sanitizeWord(targetWords[i]) << ")" << endl; } if (optionDebug) { cout << targetWords[i] << " : " << targetWordTags[i] << " => " << (targetWordTags[i] != -1 ? wordTags[targetWordTags[i]] : "") << endl; } } + if (warned) { + cerr << "\n" << targetModuleName << " Tags:\n"; + VerseKey *vk = (VerseKey *)targetMod.getKey(); + for (int j = 0; j < targetWords.size(); ++j) { + if (!strstr(ignoreSeries, targetWords[j])) { + cerr << targetWords[j] << "\t\t " << vk->getOSISRef() << "." << j << "=" << (targetWordTags[j] != -1 ? wordTags[targetWordTags[j]] : "") << endl; + } + } + cerr << "---------------------" << endl; + } if (optionDebug) { cout << "---------------------" << endl; @@ -279,7 +304,7 @@ SWBuf findCanonicalBibleText(SWBuf orig, BibMap &bibMap, BibMap &wTags) { int tagLevel = 0; int wTag = -1; int inTag = 0; - for (int i = 0; i < orig.length(); i++) { + for (int i = 0; i < orig.length(); ++i) { if (orig[i] == '<') { inTag = true; } @@ -330,7 +355,7 @@ void insert(SWBuf addText, SWBuf &out, int bibPos, BibMap &bibMap, BibMap &wTags } if (!after || wTags[bibPos] == -1) { out.insert(to, addText); - for (int i = bibPos+((after)?1:0); i < bibMap.size(); i++) { + for (int i = bibPos+((after)?1:0); i < bibMap.size(); ++i) { bibMap[i] += addText.length(); if (wTags[i] != -1) wTags[i] += addText.length(); } @@ -338,7 +363,6 @@ void insert(SWBuf addText, SWBuf &out, int bibPos, BibMap &bibMap, BibMap &wTags } - SWBuf buildWordMaps(const SWBuf &markupBuf, const BibMap &bibMap, vector<SWBuf> &targetWords, vector<int> &targetWordStarts, vector<int> &targetWordEnds) { SWBuf bibWord = ""; SWBuf fromWord = ""; @@ -386,36 +410,38 @@ void pullFromModData(SWModule &fromMod, vector<XMLTag>&wordTags, vector<SWBuf> & // this is our new <w> XMLTag. // attributes will be added below XMLTag w("w"); + // this only gives us word count, not if we have multiple entries per word + // don't use as loop int parts = atoi(it->second["PartCount"]); SWBuf lemma = ""; SWBuf morph = ""; - for (int i = 1; i <= parts; i++) { + bool found = true; + for (int i = 1; found; ++i) { + found = false; SWBuf key = ""; - key = (parts == 1) ? "Lemma" : SWBuf().setFormatted("Lemma.%d", i); + key = SWBuf().setFormatted("Lemma.%d", i); AttributeValue::iterator li = it->second.find(key); + if (i == 1 && li == it->second.end()) li = it->second.find("Lemma"); if (li != it->second.end()) { + found = true; if (i > 1) lemma += " "; - key = (parts == 1) ? "LemmaClass" : SWBuf().setFormatted("LemmaClass.%d", i); + key = SWBuf().setFormatted("LemmaClass.%d", i); AttributeValue::iterator lci = it->second.find(key); + if (i == 1 && lci == it->second.end()) lci = it->second.find("LemmaClass"); if (lci != it->second.end()) { lemma += lci->second + ":"; } lemma += li->second; } - key = (parts == 1) ? "Morph" : SWBuf().setFormatted("Morph.%d", i); + key = SWBuf().setFormatted("Morph.%d", i); li = it->second.find(key); - // silly. sometimes morph counts don't equal lemma counts - if (i == 1 && parts != 1 && li == it->second.end()) { - li = it->second.find("Morph"); - } + if (i == 1 && li == it->second.end()) li = it->second.find("Morph"); if (li != it->second.end()) { + found = true; if (i > 1) morph += " "; - key = (parts == 1) ? "MorphClass" : SWBuf().setFormatted("MorphClass.%d", i); + key = SWBuf().setFormatted("MorphClass.%d", i); AttributeValue::iterator lci = it->second.find(key); - // silly. sometimes morph counts don't equal lemma counts - if (i == 1 && parts != 1 && lci == it->second.end()) { - lci = it->second.find("MorphClass"); - } + if (i == 1 && lci == it->second.end()) lci = it->second.find("MorphClass"); if (lci != it->second.end()) { morph += lci->second + ":"; } @@ -430,9 +456,8 @@ void pullFromModData(SWModule &fromMod, vector<XMLTag>&wordTags, vector<SWBuf> & fromWord = it->second["Text"]; bibWord = ""; - for (int j = 0; j < fromWord.length(); j++) { + for (int j = 0; j < fromWord.length(); ++j) { char c = fromWord[j]; -// if (!strchr(ignoreSeries, c)) { if (c != ' ' && c != '.' && c != ';' && c != ',') { bibWord += c; } @@ -458,7 +483,7 @@ void insertWordTags(SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, const vecto // TODO: this method needs some work, // like putting multiple consecutive words // together in one tag - for (int i = 0; i < targetWordTags.size(); i++) { + for (int i = 0; i < targetWordTags.size(); ++i) { if (targetWordTags[i] > -1) { insert((const char *)wordTags[targetWordTags[i]], markupBuf, targetWordStarts[i], bibMap, wTags); insert("</w>", markupBuf, targetWordEnds[i], bibMap, wTags, true); |