diff options
Diffstat (limited to 'modules/nasb2020/lockosis.cpp')
-rw-r--r-- | modules/nasb2020/lockosis.cpp | 622 |
1 files changed, 248 insertions, 374 deletions
diff --git a/modules/nasb2020/lockosis.cpp b/modules/nasb2020/lockosis.cpp index deaadbb..87904b3 100644 --- a/modules/nasb2020/lockosis.cpp +++ b/modules/nasb2020/lockosis.cpp @@ -140,6 +140,7 @@ void outTrailer(); void unicodeTicks(string &outstring); void prepLine(string &outstring, int currentTestament, bool note); string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); +int replaceFirst(string &haystack, string needle, string replacement); @@ -302,9 +303,13 @@ int main(int argc, char **argv) { } //<SH> - if (!strncmp(outstring.c_str(), "<SH>", 4)) { - string heading = outstring.c_str()+4; - heading = heading.substr(0, heading.find("</SH>")); + if ((!strncmp(outstring.c_str(), "<SH>", 4)) || (!strncmp(outstring.c_str(), "<SHI>", 5))) { + bool shi = outstring.c_str()[3] == 'I'; + if (shi) { + fprintf(stderr, "found shi.\n"); + } + string heading = outstring.c_str()+(shi ? 5 : 4); + heading = heading.substr(0, heading.find(shi ? "</SHI>" : "</SH>")); outstring = ""; if (!strncmp(lookahead.c_str(), "<PM>", 4)) { @@ -315,7 +320,9 @@ int main(int argc, char **argv) { outstring += "</verse>\n"; inVerse = false; } - outstring += (string)"<title type=\"section\" subType=\"x-preverse\">" + heading + (string)"</title>"; + outstring += (string)"<title type=\"section\""; + if (!shi) outstring += (string)" subType=\"x-preverse\""; + outstring += (string)">" + heading + (string)"</title>"; } if (!strncmp(outstring.c_str(), "<SS>", 4)) { string heading = (outstring.c_str()+4); @@ -330,7 +337,7 @@ int main(int argc, char **argv) { - // {{x:y}} + // {{x::y}} // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string // int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! const char *outstr = outstring.c_str(); @@ -344,7 +351,7 @@ int main(int argc, char **argv) { end++; int testmt = 0, book = 0, chap = 0; string bkch = outstring.substr(start+2, end-start-2); - sscanf(bkch.c_str(), "%d:%d", &book, &chap); + sscanf(bkch.c_str(), "%d::%d", &book, &chap); currentChapter = chap; int vNumEnd = outstring.find_first_of(" ", end); currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); @@ -354,7 +361,7 @@ int main(int argc, char **argv) { book -= nasbMax[0]; } if (currentBook != osisBooks[testmt][book-1]) { - fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); + fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); exit(-3); } char chapString[20], verseString[20]; @@ -397,6 +404,7 @@ int main(int argc, char **argv) { string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); if (preChap) preChapNote = noteLine; outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>"); +// outstring.replace(start, end-start+1, (string)"--note--"); continue; } } @@ -404,56 +412,23 @@ int main(int argc, char **argv) { // <RS> - outstr = outstring.c_str(); - found = strstr(outstr, "<RS>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<q who=\"Jesus\">"); - continue; - - } + if (replaceFirst(outstring, "<RS>", "<q who=\"Jesus\">") > -1) continue; // <RS> - outstr = outstring.c_str(); - found = strstr(outstr, "</RS>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 5, (string)"</q>"); - continue; + if (replaceFirst(outstring, "</RS>", "</q>") > -1) continue; - } // <RT> - outstr = outstring.c_str(); - found = strstr(outstr, "<RT>"); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "<RT>", "<milestone type=\"x-RT\"/>") > -1) continue; - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"x-RT\"/>"); - continue; + if (replaceFirst(outstring, "<SHI>", "<title type=\"section\">") > -1) continue; + if (replaceFirst(outstring, "</SHI>", "</title>") > -1) continue; - } // <?> - outstr = outstring.c_str(); - found = strstr(outstr, "<?>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 3, (string)"¿"); - continue; - - } + if (replaceFirst(outstring, "<?>", "¿") > -1) continue; + // <!> - outstr = outstring.c_str(); - found = strstr(outstr, "<!>"); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "<!>", "¡") > -1) continue; - if (start > -1) { - outstring.replace(start, 3, (string)"¡"); - continue; - - } outstr = outstring.c_str(); found = strstr(outstr, "<R"); start = (found) ? (found - outstr) : -1; @@ -507,101 +482,40 @@ int main(int argc, char **argv) { } */ // <,> - outstr = outstring.c_str(); - found = strstr(outstr, "<,>"); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "<,>", "<milestone type=\"x-superiorComma\"/>") > -1) continue; - if (start > -1) { - outstring.replace(start, 3, (string)"<milestone type=\"x-superiorComma\"/>"); - continue; - - } // <NA> - outstr = outstring.c_str(); - found = strstr(outstr, "<NA>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"x-NA\"/>"); - continue; + if (replaceFirst(outstring, "<NA>", "<milestone type=\"x-superiorOne\"/>") > -1) continue; - } // <NB> - outstr = outstring.c_str(); - found = strstr(outstr, "<NB>"); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "<NB>", "<milestone type=\"x-superiorTwo\"/>") > -1) continue; - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"x-NB\"/>"); - continue; - - } // <NC> - outstr = outstring.c_str(); - found = strstr(outstr, "<NC>"); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "<NC>", "<milestone type=\"x-superiorThree\"/>") > -1) continue; - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"x-NC\"/>"); - continue; - - } // paragraph break <PM> - outstr = outstring.c_str(); - found = strstr(outstr, "<PM>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PM\"/>"); - continue; + if (replaceFirst(outstring, "<PM>", "<milestone type=\"line\" subType=\"x-PM\"/>") > -1) continue; - } // poetry break <PN> - outstr = outstring.c_str(); - found = strstr(outstr, "<PN>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"x-PN\" />"); - continue; + if (replaceFirst(outstring, "<PN>", "<milestone type=\"x-Poetry\" />") > -1) continue; - } // poetry break <PO> - outstr = outstring.c_str(); - found = strstr(outstr, "<PO>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PO\" />"); - continue; + if (replaceFirst(outstring, "<PO>", "<milestone type=\"line\" subType=\"x-Poetry\" />") > -1) continue; - } - // poetry break <PE> - outstr = outstring.c_str(); - found = strstr(outstr, "<PE>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PE\" />"); - continue; + // poetry break <PR> + if (replaceFirst(outstring, "<PR>", "<milestone type=\"x-PoetryEnd\" />") > -1) continue; - } // letter indent <HL> - outstr = outstring.c_str(); - found = strstr(outstr, "<HL>"); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)"<milestone type=\"x-HL\" />"); - continue; + if (replaceFirst(outstring, "<HL>", "<milestone type=\"x-HL\" />") > -1) continue; - } + // letter indent <HLL> + if (replaceFirst(outstring, "<HLL>", "<milestone type=\"line\" subType=\"x-HLL\" />") > -1) continue; break; } int strongsStart = 0; int transChangeStart = 0; - bool strongsFound = false; + bool strongsStartFound = false; bool intoken = false; bool intag = false; bool inNote = false; @@ -610,12 +524,21 @@ int main(int argc, char **argv) { string previousToken = ""; int tenseChange = -1; // strongs numbers - for (unsigned int i = 0; i < outstring.length(); i++) { - if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) { - if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { - strongsStart = i; - strongsFound = true; + for (unsigned int i = 0; i < outstring.length(); ++i) { + if ((!strongsStartFound) && (!inNote) && (!intoken)) { + if (!intag) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = i; + strongsStartFound = true; + } + } + else if (!strncmp(lastToken.c_str(), "hi", 2) && strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = tokenStart - 1; + strongsStartFound = true; + } } + } if (outstring[i] =='*') tenseChange = i; @@ -633,48 +556,61 @@ int main(int argc, char **argv) { } /* if (!strncmp(lastToken.c_str(), "seg", 3)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "divineName", 10)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } */ if (!strncmp(lastToken.c_str(), "/divineName", 10)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "note", 4)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; inNote = true; } if (!strncmp(lastToken.c_str(), "/note", 5)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; inNote = false; } if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } - if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) { - strongsFound = false; + if (!strncmp(lastToken.c_str(), "/q", 2)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 20)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) { + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/transChange", 12)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "milestone", 9)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/seg", 4)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } + if (!strncmp(lastToken.c_str(), "verse", 5)) { + strongsStartFound = false; + strongsStart = i+1; + } + if ((!strncmp(lastToken.c_str(), "verse", 5))) { intag = false; } @@ -712,7 +648,7 @@ int main(int argc, char **argv) { i += lastToken.length() - 1; // (-1 because we're about to i++) } strongsStart = i+1; - strongsFound = false; + strongsStartFound = false; if (tenseChange > -1) { // relocate because position may have changed from all the token inserts const char *buf = outstring.c_str(); @@ -849,46 +785,55 @@ void unicodeTicks(string &outstring) { } } + +// return offset of occurence replace; otherwise -1 +int replaceFirst(string &haystack, string needle, string replacement) { + const char *outstr = haystack.c_str(); + const char *found = strstr(outstr, needle.c_str()); + int start = (found) ? (found - outstr) : -1; + if (start > -1) { + haystack.replace(start, needle.size(), replacement); + } + return start; +} + + void prepLine(string &outstring, int currentTestament, bool note) { int end = 0; while (1) { // ------------------------------------------ -// redundant markers +// redundant or unneeded or unknown markers size_t s; + + // <1EVA> + if (replaceFirst(outstring, "<1EVA>", "") > -1) continue; + + // <1EVB> + if (replaceFirst(outstring, "<1EVB>", "") > -1) continue; + + // <FA> + if (replaceFirst(outstring, "<FA>", "") > -1) continue; + + // <PR> + if (replaceFirst(outstring, "<PR>", "") > -1) continue; + // <V> - s = outstring.find("<V>"); - if (s != string::npos) { - outstring.erase(s, 3); - continue; - } + if (replaceFirst(outstring, "<V>", "") > -1) continue; + + // <T> + if (replaceFirst(outstring, "<T>", "") > -1) continue; // <P> - s = outstring.find("<P>"); - if (s != string::npos) { - outstring.erase(s, 3); - continue; - } + if (replaceFirst(outstring, "<P>", "") > -1) continue; // <C> - s = outstring.find("<C>"); - if (s != string::npos) { - outstring.erase(s, 3); - continue; - } + if (replaceFirst(outstring, "<C>", "") > -1) continue; // <CC> - s = outstring.find("<CC>"); - if (s != string::npos) { - outstring.erase(s, 4); - continue; - } + if (replaceFirst(outstring, "<CC>", "") > -1) continue; // <CP> - s = outstring.find("<CP>"); - if (s != string::npos) { - outstring.erase(s, 4); - continue; - } + if (replaceFirst(outstring, "<CP>", "") > -1) continue; // <$F...>> s = outstring.find("<$F"); @@ -897,14 +842,17 @@ void prepLine(string &outstring, int currentTestament, bool note) { outstring.erase(s, e-s+2); continue; } -// ---------------------------------------------- - - // <A> - s = outstring.find("<A>"); + // <EOV> + s = outstring.find("<EOV>"); if (s != string::npos) { - outstring.replace(s, 3, "<milestone type=\"line\" subType=\"x-A\"/>"); + size_t e = outstring.find("</EOV>", s); + outstring.erase(s, e-s+6); continue; } +// ---------------------------------------------- + + // <A> + if (replaceFirst(outstring, "<A>", "<milestone type=\"line\" subType=\"x-A\"/>") > -1) continue; // ~“ char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0; @@ -961,42 +909,42 @@ void prepLine(string &outstring, int currentTestament, bool note) { target = "-«"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>"); + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"«\"/>"); continue; } // -» target = "-»"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>"); + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"»\"/>"); continue; } // -“ target = "-“"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"“\"/>"); continue; } // -” target = "-”"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>"); + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"”\"/>"); continue; } // -‘ target = "-‘"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"‘\"/>"); continue; } // -’ target = "-’"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>"); + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"’\"/>"); continue; } @@ -1009,182 +957,70 @@ void prepLine(string &outstring, int currentTestament, bool note) { outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); continue; } - + if (replaceFirst(outstring, "<B>", "<hi type=\"bold\">") > -1) continue; + if (replaceFirst(outstring, "</B>", "</hi>") > -1) continue; + + if (replaceFirst(outstring, "L<\\>{ORD}</>'<\\>{S}</>", "<seg><divineName>Lord's</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD’S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S} </>", "<seg><divineName>Lord’s</divineName></seg> ") > -1) continue; + if (replaceFirst(outstring, "L<\\>ORD</>’<\\>S<MH3068></>", "<seg><divineName>Lord’s<MH3068></divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD,}</>", "<seg><divineName>Lord</divineName></seg>,") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD} </>", "<seg><divineName>Lord</divineName></seg> ") > -1) continue; + if (replaceFirst(outstring, "L}<\\>{ORD}</>{", "<seg><divineName>Lord</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L}<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>}") > -1) continue; + if (replaceFirst(outstring, "S<\\>{EN~OR}</>", "<seg><divineName>Sen~or</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "S<\\>{EÑOR}</>", "<seg><divineName>Señor</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH,}</>", "<seg><divineName>Yah</divineName></seg>,") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH,} </>", "<seg><divineName>Yah</divineName></seg>, ") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH}</>", "<seg><divineName>Yah</divineName></seg>") > -1) continue; + + // Do these first before Daniel Inscriptions + // LB = add macron, only with 'a': ā + if (replaceFirst(outstring, "a<LB>", "ā") > -1) continue; + if (replaceFirst(outstring, "E<LE>", "Ē") > -1) continue; + if (replaceFirst(outstring, "e<LE>", "ē") > -1) continue; + + if (replaceFirst(outstring, "MENE", "<hi type=\"inscription\">Mene</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENE</>", "<hi type=\"inscription\">Mene</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENE:</>", "<hi type=\"inscription\">Mene</hi>:") > -1) continue; + if (replaceFirst(outstring, "TEKEL", "<hi type=\"inscription\">Tekel</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKEL</>", "<hi type=\"inscription\">Tekel</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKEL:</>", "<hi type=\"inscription\">Tekel</hi>:") > -1) continue; + if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue; + if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "PERES", "<hi type=\"inscription\">Peres</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERES</>", "<hi type=\"inscription\">Peres</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERES:</>", "<hi type=\"inscription\">Peres</hi>:") > -1) continue; + + if (replaceFirst(outstring, "MENĒ", "<hi type=\"inscription\">Menē</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ</>", "<hi type=\"inscription\">Menē</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ<MH4484></>", "<hi type=\"inscription\">Menē<MH4484></hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ:</>", "<hi type=\"inscription\">Menē</hi>:") > -1) continue; + if (replaceFirst(outstring, "TEKĒL", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL</>", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL<MH8625b></>", "<hi type=\"inscription\">Tekēl<MH8625b></hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL:</>", "<hi type=\"inscription\">Tekēl</hi>:") > -1) continue; + if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue; + if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "U<\\>PHARSIN<MH6537b></>", "<hi type=\"inscription\">Ufarsin<MH6537b></hi>") > -1) continue; + if (replaceFirst(outstring, "PERĒS", "<hi type=\"inscription\">Perēs</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS</>", "<hi type=\"inscription\">Perēs</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS<MH6537b></>", "<hi type=\"inscription\">Perēs<MH6537b></hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS:</>", "<hi type=\"inscription\">Perēs</hi>:") > -1) continue; + + if (replaceFirst(outstring, "H<\\>OLY<MH6944> TO THE</> L<\\>ORD<MH3068></>", + "<hi type=\"inscription\">Holy<MH6944> to the L<\\>ORD<MH3068></></hi>") > -1) continue; const char *outstr = outstring.c_str(); - const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/"); + const char *found = strstr(outstr+end, "<\\>"); int start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD}/’\\{S}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 16, "<seg><divineName>Lord’s</divineName></seg>"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD,}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 9, "<seg><divineName>Lord</divineName></seg>,"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "S\\{EN~OR}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 10, "<seg><divineName>Sen~or</divineName></seg>"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "S\\{EÑOR}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 10, "<seg><divineName>Señor</divineName></seg>"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "Y\\{AH,}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "Y\\{AH}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>"); - continue; - } - // is this really valid markup? should 'also be' be in small - // caps? 3 { and only 2 } ? - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {also be}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {give}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {bless}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "MENE"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 4, "<hi type=\"inscription\">Mene</hi>"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "M\\ENE/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 6, "<hi type=\"inscription\">Mene</hi>"); - continue; - } - found = strstr(outstr, "M\\ENE:/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "<hi type=\"inscription\">Mene</hi>:"); - continue; - } - found = strstr(outstr, "TEKEL"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 5, "<hi type=\"inscription\">Tekel</hi>"); - continue; - } - found = strstr(outstr, "T\\EKEL/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "<hi type=\"inscription\">Tekel</hi>"); - continue; - } - found = strstr(outstr, "T\\EKEL:/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "<hi type=\"inscription\">Tekel</hi>:"); - continue; - } - found = strstr(outstr, "UPHARSIN"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "<hi type=\"inscription\">Upharsin</hi>"); - continue; - } - found = strstr(outstr, "UFARSIN"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "<hi type=\"inscription\">Ufarsin</hi>"); - continue; - } - found = strstr(outstr, "U\\FARSIN/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 9, "<hi type=\"inscription\">Ufarsin</hi>"); - continue; - } - found = strstr(outstr, "PERES"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 5, "<hi type=\"inscription\">Peres</hi>"); - continue; - } - found = strstr(outstr, "P\\ERES/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "<hi type=\"inscription\">Peres</hi>"); - continue; - } - found = strstr(outstr, "P\\ERES:/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "<hi type=\"inscription\">Peres</hi>:"); - continue; - } - // LB ??? Don't have info on this. Assuming '-' - outstr = outstring.c_str(); - found = strstr(outstr, "<LB>"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 4, "-"); - continue; - } - - found = strstr(outstr+end, "\\"); - start = (found) ? (found - outstr) : -1; if (start > -1) { - for (;start;start--) { + for (--start;start;start--) { if ((!std::isupper(outstring[start])) && (!strchr("\\/ ~", outstring[start]))) { break; @@ -1198,32 +1034,22 @@ void prepLine(string &outstring, int currentTestament, bool note) { else { outstring.insert(start, "<seg><divineName>"); start += 17; - const char *b = outstring.c_str(); - const char *found = strstr(b, "L\\{ORD}/"); - int s = (found) ? (found - b) : -1; - if (s > -1) - outstring.replace(s, 8, "Lord"); - end = s+4; + + int s = replaceFirst(outstring, "L<\\>{ORD}</>", "Lord"); + if (s > -1) end = s+4; } + + // do small cap logic bool lower = false; - bool token = false; - for (int charLen = 1;start < (int)outstring.length(); start+=charLen) { + string token = ""; + for (int charLen = 1; start < (int)outstring.length(); start += charLen) { const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start; const unsigned char *endChar = startChar; SW_u32 testChar = getUniCharFromUTF8(&endChar, true); charLen = endChar - startChar; // set the size of the UTF-8 sequence - if (!token) { - if (testChar == '\\') { - lower = true; - outstring.erase(start, 1); - start--; - continue; - } - if (testChar == '/') { - lower = false; - outstring.erase(start, 1); - end = start; - start--; + if (!token.size()) { + if (testChar == '<') { + token = "<"; continue; } // what is this? It screws MENE MENE up in Daniel @@ -1238,10 +1064,40 @@ void prepLine(string &outstring, int currentTestament, bool note) { continue; } } - if (testChar == '>') - token = false; - if (testChar == '<') - token = true; + else { + token += testChar; + + if (testChar == '>') { + if (token == "<\\>") { + lower = true; + outstring.erase(start-2, 3); + start -= 3; + } + if (token == "</>") { + lower = false; + outstring.erase(start-2, 3); + end = start - 2; + start -= 3; + unsigned int nextStrongs = outstring.find("<M"); + unsigned int nextUp = outstring.find("</>"); + if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) { + break; + } + } + // end divineName if we hit a PO in the middle + if (token == "<PO>") { + break; + } + unsigned int s = token.find("<N"); + if (s == string::npos || s > 0) s = token.find("<R"); + if (s == 0 && token.size() > 2) { + if (StringMgr::getSystemStringMgr()->isDigit(token[2])) { + break; + } + } + token = ""; + } + } } if (currentTestament) { outstring.insert(end, "</seg>"); @@ -1254,6 +1110,19 @@ void prepLine(string &outstring, int currentTestament, bool note) { continue; } + // these are places where we unnecessarily stop and then start otPassage + // we could make the otPassage logic work better, but these exception clean + // thing up for now. + if (replaceFirst(outstring, "</seg>’<seg type=\"otPassage\">s", "’s") > -1) continue; + if (replaceFirst(outstring, "</seg>-<seg type=\"otPassage\">", "-") > -1) continue; + if (replaceFirst(outstring, "</seg>,<seg type=\"otPassage\">", ",") > -1) continue; + if (replaceFirst(outstring, "</seg>, <seg type=\"otPassage\">", ", ") > -1) continue; + if (replaceFirst(outstring, "</seg>! <seg type=\"otPassage\">", "! ") > -1) continue; + if (replaceFirst(outstring, "</seg>; <seg type=\"otPassage\">", "; ") > -1) continue; + if (replaceFirst(outstring, "</seg> <seg type=\"otPassage\">", " ") > -1) continue; + if (replaceFirst(outstring, "</seg>, ‘<seg type=\"otPassage\">", ", ‘") > -1) continue; + if (replaceFirst(outstring, "</seg>,’ <seg type=\"otPassage\">", ",’ ") > -1) continue; + if (note) { outstr = outstring.c_str(); found = strstr(outstr, "{"); @@ -1278,6 +1147,9 @@ void prepLine(string &outstring, int currentTestament, bool note) { } } } + +// if (replaceFirst(outstring, ")</hi>", "</hi>)") > -1) continue; + break; } } @@ -1309,10 +1181,10 @@ string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, end++; int book, chap; string bkch = noteLine.substr(start+2, end-start-2); - sscanf(bkch.c_str(), "%d:%d", &book, &chap); + sscanf(bkch.c_str(), "%d::%d", &book, &chap); int vNumEnd = noteLine.find_first_of(" ", end); int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); - if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) { + if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) { fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); exit(-1); } @@ -1334,7 +1206,9 @@ string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, if (start > -1) { start += tag.length(); - found = strstr(outstr+start, " <"); + const char *nFound = strstr(outstr+start, " <N"); + const char *rFound = strstr(outstr+start, " <R"); + found = (nFound && (!rFound || nFound < rFound)) ? nFound : rFound; int end = (found) ? (found - outstr) : -1; if (end<0) end = noteLine.length(); retVal = noteLine.substr(start, end-start); |