From 37ca7a7b1dcf1c9f962ff346f4b36c17f587db08 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Tue, 13 Sep 2022 13:27:50 +0000 Subject: First cut of the NASB 2020 module git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@549 07627401-56e2-0310-80f4-f8cd0041bdcd --- modules/nasb2020/Makefile | 43 ++- modules/nasb2020/generalchanges.sed | 14 + modules/nasb2020/lockosis.cpp | 622 +++++++++++++-------------------- modules/nasb2020/mods.d/nasb.conf | 20 +- modules/nasb2020/notes.corrections.sed | 8 + modules/nasb2020/srcfixes.sed | 1 + 6 files changed, 316 insertions(+), 392 deletions(-) create mode 100644 modules/nasb2020/generalchanges.sed create mode 100644 modules/nasb2020/notes.corrections.sed create mode 100644 modules/nasb2020/srcfixes.sed diff --git a/modules/nasb2020/Makefile b/modules/nasb2020/Makefile index 3db8992..f0d5f24 100644 --- a/modules/nasb2020/Makefile +++ b/modules/nasb2020/Makefile @@ -1,5 +1,8 @@ +#SRC_ZIP_PATH=pristine-private/nasb/historical/nasb.zip SRC_ZIP_PATH=pristine-private/nasb/NASB-2020-full-07-25-22.zip MODNAME=NASB +MODNAMEPREV=NASB1995 +MODNAMEPATHSEG=nasb REPO=lockman SRCNAME=nasb\ 2020\ master\ nocode\ 07-25-22.txt @@ -7,16 +10,17 @@ NOTESNAME=nasb\ 2020\ notes\ master\ nocode\ 07-25-22.txt all: $(MODNAME).zip -$(MODNAME).zip: modules/texts/ztext/nasb/ot.bzv +$(MODNAME).zip: modules/texts/ztext/$(MODNAMEPATHSEG)/ot.bzv zip -r $(MODNAME).zip mods.d modules -modules/texts/ztext/nasb/ot.bzv: combined.osis.xml - mkdir -p modules/texts/ztext/nasb - osis2mod modules/texts/ztext/nasb/ combined.osis.xml -z z -b 4 - chmod a+r modules/texts/ztext/nasb/* +modules/texts/ztext/$(MODNAMEPATHSEG)/ot.bzv: combined.osis.xml + mkdir -p modules/texts/ztext/$(MODNAMEPATHSEG) + osis2mod modules/texts/ztext/$(MODNAMEPATHSEG)/ combined.osis.xml -z z -b 4 + chmod a+r modules/texts/ztext/$(MODNAMEPATHSEG)/* combined.osis.xml: src.txt lockosis ./lockosis src.txt notes.txt 2> combined.osis.err > out.xml + xmllint out.xml 2> lint.err > /dev/null mv out.xml combined.osis.xml tmp/$(SRCNAME): src.zip @@ -24,21 +28,42 @@ tmp/$(SRCNAME): src.zip cd tmp && unzip -uD ../src.zip cd tmp && touch * -src.txt: tmp/$(SRCNAME) notes.corrections.sed - cd tmp && cp $(SRCNAME) ../src.txt +src.txt: tmp/$(SRCNAME) + cd tmp && sed -f ../srcfixes.sed $(SRCNAME) > src-patched.txt + cd tmp && cp src-patched.txt ../src.txt cd tmp && sed -f ../notes.corrections.sed $(NOTESNAME) > ../notes.txt src.zip: scp host.crosswire.org:~swordmod/${SRC_ZIP_PATH} src.zip +$(MODNAME)_previous.zip: + wget "https://crosswire.org/ftpmirror/pub/sword/lockmanpackages/$(MODNAMEPREV).zip" -O $(MODNAME)_previous.zip + +tmp/$(MODNAME)_previous.imp: $(MODNAME)_previous.zip generalchanges.sed + cd tmp && rm -rf mods.d modules + cd tmp && unzip -uD ../$(MODNAME)_previous.zip + cd tmp && mod2imp $(MODNAMEPREV) > $(MODNAME)_previous_raw.imp + cd tmp && sed -f ../generalchanges.sed -i $(MODNAME)_previous_raw.imp + cd tmp && sed 's/\([^^]\)\(<[^/]\)/\1\n\2/g' $(MODNAME)_previous_raw.imp > $(MODNAME)_previous.imp + +tmp/$(MODNAME).imp: $(MODNAME).zip + cd tmp && rm -rf mods.d modules + cd tmp && unzip -uD ../$(MODNAME).zip + cd tmp && mod2imp $(MODNAME) > $(MODNAME)_raw.imp + cd tmp && sed -f ../generalchanges.sed -i $(MODNAME)_raw.imp + cd tmp && sed 's/\([^^]\)\(<[^/]\)/\1\n\2/g' $(MODNAME)_raw.imp > $(MODNAME).imp + +diff: tmp/$(MODNAME).imp tmp/$(MODNAME)_previous.imp + diff -Pu tmp/$(MODNAME)_previous.imp tmp/$(MODNAME).imp > diff; [ $$? -lt 2 ] + deploy: $(MODNAME).zip scp $(MODNAME).zip host.crosswire.org:/home/ftp/pub/sword/$(REPO)/ clean: - rm -rf combined.osis.xml *.o lockosis modules $(MODNAME).zip notes.txt src.txt tmp + rm -rf combined.osis.* *.o lockosis modules $(MODNAME).zip notes.txt src.txt tmp diff lint.err distclean: clean - rm -rf src.zip + rm -rf src.zip $(MODNAME)_previous.zip .cpp: g++ -O0 -g -DHAVESWORD -Wall -Werror `pkg-config --cflags sword` $< -o $@ `pkg-config --libs sword` diff --git a/modules/nasb2020/generalchanges.sed b/modules/nasb2020/generalchanges.sed new file mode 100644 index 0000000..de1d2df --- /dev/null +++ b/modules/nasb2020/generalchanges.sed @@ -0,0 +1,14 @@ +#changes since last revision that we don't care about when diff'ing +# get rid of the uniq start and end ID attibutes as they will be numbered differently +s/sID="[^"]*"/sID="sX"/g +s/eID="[^"]*"/eID="eX"/g +#s/x-PN"/x-Poetry"/g +#s/x-PO"/x-Poetry"/g +#s///g +#s///g +#s/“//g +#s/‘//g +#s/"strong:H384"/"strong:this"/g +#s/"strong:H2088"/"strong:this"/g +#s/\(.\)<\/hi>/\1/g + diff --git a/modules/nasb2020/lockosis.cpp b/modules/nasb2020/lockosis.cpp index deaadbb..87904b3 100644 --- a/modules/nasb2020/lockosis.cpp +++ b/modules/nasb2020/lockosis.cpp @@ -140,6 +140,7 @@ void outTrailer(); void unicodeTicks(string &outstring); void prepLine(string &outstring, int currentTestament, bool note); string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); +int replaceFirst(string &haystack, string needle, string replacement); @@ -302,9 +303,13 @@ int main(int argc, char **argv) { } // - if (!strncmp(outstring.c_str(), "", 4)) { - string heading = outstring.c_str()+4; - heading = heading.substr(0, heading.find("")); + if ((!strncmp(outstring.c_str(), "", 4)) || (!strncmp(outstring.c_str(), "", 5))) { + bool shi = outstring.c_str()[3] == 'I'; + if (shi) { + fprintf(stderr, "found shi.\n"); + } + string heading = outstring.c_str()+(shi ? 5 : 4); + heading = heading.substr(0, heading.find(shi ? "" : "")); outstring = ""; if (!strncmp(lookahead.c_str(), "", 4)) { @@ -315,7 +320,9 @@ int main(int argc, char **argv) { outstring += "\n"; inVerse = false; } - outstring += (string)"" + heading + (string)""; + outstring += (string)"" + heading + (string)""; } if (!strncmp(outstring.c_str(), "", 4)) { string heading = (outstring.c_str()+4); @@ -330,7 +337,7 @@ int main(int argc, char **argv) { - // {{x:y}} + // {{x::y}} // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string // int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! const char *outstr = outstring.c_str(); @@ -344,7 +351,7 @@ int main(int argc, char **argv) { end++; int testmt = 0, book = 0, chap = 0; string bkch = outstring.substr(start+2, end-start-2); - sscanf(bkch.c_str(), "%d:%d", &book, &chap); + sscanf(bkch.c_str(), "%d::%d", &book, &chap); currentChapter = chap; int vNumEnd = outstring.find_first_of(" ", end); currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); @@ -354,7 +361,7 @@ int main(int argc, char **argv) { book -= nasbMax[0]; } if (currentBook != osisBooks[testmt][book-1]) { - fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); + fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); exit(-3); } char chapString[20], verseString[20]; @@ -397,6 +404,7 @@ int main(int argc, char **argv) { string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); if (preChap) preChapNote = noteLine; outstring.replace(start, end-start+1, (string)"" + noteBody + ""); +// outstring.replace(start, end-start+1, (string)"--note--"); continue; } } @@ -404,56 +412,23 @@ int main(int argc, char **argv) { // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; - - } + if (replaceFirst(outstring, "", "") > -1) continue; // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 5, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; - } // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "", "") > -1) continue; - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; + if (replaceFirst(outstring, "</SHI>", "") > -1) continue; - } // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 3, (string)"¿"); - continue; - - } + if (replaceFirst(outstring, "", "¿") > -1) continue; + // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "", "¡") > -1) continue; - if (start > -1) { - outstring.replace(start, 3, (string)"¡"); - continue; - - } outstr = outstring.c_str(); found = strstr(outstr, " - outstr = outstring.c_str(); - found = strstr(outstr, "<,>"); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "<,>", "") > -1) continue; - if (start > -1) { - outstring.replace(start, 3, (string)""); - continue; - - } // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; - } // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "", "") > -1) continue; - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; - - } // - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; + if (replaceFirst(outstring, "", "") > -1) continue; - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; - - } // paragraph break - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; - } // poetry break - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; - } // poetry break - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; - } - // poetry break - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + // poetry break + if (replaceFirst(outstring, "", "") > -1) continue; - } // letter indent - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - - if (start > -1) { - outstring.replace(start, 4, (string)""); - continue; + if (replaceFirst(outstring, "", "") > -1) continue; - } + // letter indent + if (replaceFirst(outstring, "", "") > -1) continue; break; } int strongsStart = 0; int transChangeStart = 0; - bool strongsFound = false; + bool strongsStartFound = false; bool intoken = false; bool intag = false; bool inNote = false; @@ -610,12 +524,21 @@ int main(int argc, char **argv) { string previousToken = ""; int tenseChange = -1; // strongs numbers - for (unsigned int i = 0; i < outstring.length(); i++) { - if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) { - if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { - strongsStart = i; - strongsFound = true; + for (unsigned int i = 0; i < outstring.length(); ++i) { + if ((!strongsStartFound) && (!inNote) && (!intoken)) { + if (!intag) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = i; + strongsStartFound = true; + } + } + else if (!strncmp(lastToken.c_str(), "hi", 2) && strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = tokenStart - 1; + strongsStartFound = true; + } } + } if (outstring[i] =='*') tenseChange = i; @@ -633,48 +556,61 @@ int main(int argc, char **argv) { } /* if (!strncmp(lastToken.c_str(), "seg", 3)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "divineName", 10)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } */ if (!strncmp(lastToken.c_str(), "/divineName", 10)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "note", 4)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; inNote = true; } if (!strncmp(lastToken.c_str(), "/note", 5)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; inNote = false; } if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } - if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) { - strongsFound = false; + if (!strncmp(lastToken.c_str(), "/q", 2)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 20)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) { + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/transChange", 12)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "milestone", 9)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/seg", 4)) { - strongsFound = false; + strongsStartFound = false; strongsStart = i+1; } + if (!strncmp(lastToken.c_str(), "verse", 5)) { + strongsStartFound = false; + strongsStart = i+1; + } + if ((!strncmp(lastToken.c_str(), "verse", 5))) { intag = false; } @@ -712,7 +648,7 @@ int main(int argc, char **argv) { i += lastToken.length() - 1; // (-1 because we're about to i++) } strongsStart = i+1; - strongsFound = false; + strongsStartFound = false; if (tenseChange > -1) { // relocate because position may have changed from all the token inserts const char *buf = outstring.c_str(); @@ -849,46 +785,55 @@ void unicodeTicks(string &outstring) { } } + +// return offset of occurence replace; otherwise -1 +int replaceFirst(string &haystack, string needle, string replacement) { + const char *outstr = haystack.c_str(); + const char *found = strstr(outstr, needle.c_str()); + int start = (found) ? (found - outstr) : -1; + if (start > -1) { + haystack.replace(start, needle.size(), replacement); + } + return start; +} + + void prepLine(string &outstring, int currentTestament, bool note) { int end = 0; while (1) { // ------------------------------------------ -// redundant markers +// redundant or unneeded or unknown markers size_t s; + + // <1EVA> + if (replaceFirst(outstring, "<1EVA>", "") > -1) continue; + + // <1EVB> + if (replaceFirst(outstring, "<1EVB>", "") > -1) continue; + + // + if (replaceFirst(outstring, "", "") > -1) continue; + + // + if (replaceFirst(outstring, "", "") > -1) continue; + // - s = outstring.find(""); - if (s != string::npos) { - outstring.erase(s, 3); - continue; - } + if (replaceFirst(outstring, "", "") > -1) continue; + + // + if (replaceFirst(outstring, "", "") > -1) continue; //

- s = outstring.find("

"); - if (s != string::npos) { - outstring.erase(s, 3); - continue; - } + if (replaceFirst(outstring, "

", "") > -1) continue; // - s = outstring.find(""); - if (s != string::npos) { - outstring.erase(s, 3); - continue; - } + if (replaceFirst(outstring, "", "") > -1) continue; // - s = outstring.find(""); - if (s != string::npos) { - outstring.erase(s, 4); - continue; - } + if (replaceFirst(outstring, "", "") > -1) continue; // - s = outstring.find(""); - if (s != string::npos) { - outstring.erase(s, 4); - continue; - } + if (replaceFirst(outstring, "", "") > -1) continue; // <$F...>> s = outstring.find("<$F"); @@ -897,14 +842,17 @@ void prepLine(string &outstring, int currentTestament, bool note) { outstring.erase(s, e-s+2); continue; } -// ---------------------------------------------- - - // - s = outstring.find(""); + // + s = outstring.find(""); if (s != string::npos) { - outstring.replace(s, 3, ""); + size_t e = outstring.find("", s); + outstring.erase(s, e-s+6); continue; } +// ---------------------------------------------- + + // + if (replaceFirst(outstring, "", "") > -1) continue; // ~“ char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0; @@ -961,42 +909,42 @@ void prepLine(string &outstring, int currentTestament, bool note) { target = "-«"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), ""); + outstring.replace(s, target.length(), ""); continue; } // -» target = "-»"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), ""); + outstring.replace(s, target.length(), ""); continue; } // -“ target = "-“"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), ""); + outstring.replace(s, target.length(), ""); continue; } // -” target = "-”"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), ""); + outstring.replace(s, target.length(), ""); continue; } // -‘ target = "-‘"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), ""); + outstring.replace(s, target.length(), ""); continue; } // -’ target = "-’"; s = outstring.find(target); if (s != string::npos) { - outstring.replace(s, target.length(), ""); + outstring.replace(s, target.length(), ""); continue; } @@ -1009,182 +957,70 @@ void prepLine(string &outstring, int currentTestament, bool note) { outstring.replace(s, target.length(), ""); continue; } - + if (replaceFirst(outstring, "", "") > -1) continue; + if (replaceFirst(outstring, "", "") > -1) continue; + + if (replaceFirst(outstring, "L<\\>{ORD}'<\\>{S}", "Lord's") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD’S}", "Lord’s") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}’<\\>{S}", "Lord’s") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}’<\\>{S} ", "Lord’s ") > -1) continue; + if (replaceFirst(outstring, "L<\\>ORD’<\\>S", "Lord’s") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD,}", "Lord,") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}", "Lord") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD} ", "Lord ") > -1) continue; + if (replaceFirst(outstring, "L}<\\>{ORD}{", "Lord") > -1) continue; + if (replaceFirst(outstring, "L}<\\>{ORD}", "Lord}") > -1) continue; + if (replaceFirst(outstring, "S<\\>{EN~OR}", "Sen~or") > -1) continue; + if (replaceFirst(outstring, "S<\\>{EÑOR}", "Señor") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH,}", "Yah,") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH,} ", "Yah, ") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH}", "Yah") > -1) continue; + + // Do these first before Daniel Inscriptions + // LB = add macron, only with 'a': ā + if (replaceFirst(outstring, "a", "ā") > -1) continue; + if (replaceFirst(outstring, "E", "Ē") > -1) continue; + if (replaceFirst(outstring, "e", "ē") > -1) continue; + + if (replaceFirst(outstring, "MENE", "Mene") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENE", "Mene") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENE:", "Mene:") > -1) continue; + if (replaceFirst(outstring, "TEKEL", "Tekel") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKEL", "Tekel") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKEL:", "Tekel:") > -1) continue; + if (replaceFirst(outstring, "UPHARSIN", "Upharsin") > -1) continue; + if (replaceFirst(outstring, "UFARSIN", "Ufarsin") > -1) continue; + if (replaceFirst(outstring, "U<\\>FARSIN", "Ufarsin") > -1) continue; + if (replaceFirst(outstring, "PERES", "Peres") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERES", "Peres") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERES:", "Peres:") > -1) continue; + + if (replaceFirst(outstring, "MENĒ", "Menē") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ", "Menē") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ", "Menē") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ:", "Menē:") > -1) continue; + if (replaceFirst(outstring, "TEKĒL", "Tekēl") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL", "Tekēl") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL", "Tekēl") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL:", "Tekēl:") > -1) continue; + if (replaceFirst(outstring, "UPHARSIN", "Upharsin") > -1) continue; + if (replaceFirst(outstring, "UFARSIN", "Ufarsin") > -1) continue; + if (replaceFirst(outstring, "U<\\>FARSIN", "Ufarsin") > -1) continue; + if (replaceFirst(outstring, "U<\\>PHARSIN", "Ufarsin") > -1) continue; + if (replaceFirst(outstring, "PERĒS", "Perēs") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS", "Perēs") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS", "Perēs") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS:", "Perēs:") > -1) continue; + + if (replaceFirst(outstring, "H<\\>OLY TO THE L<\\>ORD", + "Holy to the L<\\>ORD") > -1) continue; const char *outstr = outstring.c_str(); - const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/"); + const char *found = strstr(outstr+end, "<\\>"); int start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 14, "Lord's"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD}/’\\{S}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 16, "Lord’s"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD,}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 9, "Lord,"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "Lord"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "S\\{EN~OR}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 10, "Sen~or"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "S\\{EÑOR}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 10, "Señor"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "Y\\{AH,}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "Yah,"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "Y\\{AH}/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "Yah"); - continue; - } - // is this really valid markup? should 'also be' be in small - // caps? 3 { and only 2 } ? - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {also be}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 18, "Lord also be}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {give}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 15, "Lord give}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {bless}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 16, "Lord bless}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 73, "Lord are my Refuge; You have made the Most High your dwelling place}"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "MENE"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 4, "Mene"); - continue; - } - outstr = outstring.c_str(); - found = strstr(outstr, "M\\ENE/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 6, "Mene"); - continue; - } - found = strstr(outstr, "M\\ENE:/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "Mene:"); - continue; - } - found = strstr(outstr, "TEKEL"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 5, "Tekel"); - continue; - } - found = strstr(outstr, "T\\EKEL/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "Tekel"); - continue; - } - found = strstr(outstr, "T\\EKEL:/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "Tekel:"); - continue; - } - found = strstr(outstr, "UPHARSIN"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "Upharsin"); - continue; - } - found = strstr(outstr, "UFARSIN"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "Ufarsin"); - continue; - } - found = strstr(outstr, "U\\FARSIN/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 9, "Ufarsin"); - continue; - } - found = strstr(outstr, "PERES"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 5, "Peres"); - continue; - } - found = strstr(outstr, "P\\ERES/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 7, "Peres"); - continue; - } - found = strstr(outstr, "P\\ERES:/"); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 8, "Peres:"); - continue; - } - // LB ??? Don't have info on this. Assuming '-' - outstr = outstring.c_str(); - found = strstr(outstr, ""); - start = (found) ? (found - outstr) : -1; - if (start > -1) { - outstring.replace(start, 4, "-"); - continue; - } - - found = strstr(outstr+end, "\\"); - start = (found) ? (found - outstr) : -1; if (start > -1) { - for (;start;start--) { + for (--start;start;start--) { if ((!std::isupper(outstring[start])) && (!strchr("\\/ ~", outstring[start]))) { break; @@ -1198,32 +1034,22 @@ void prepLine(string &outstring, int currentTestament, bool note) { else { outstring.insert(start, ""); start += 17; - const char *b = outstring.c_str(); - const char *found = strstr(b, "L\\{ORD}/"); - int s = (found) ? (found - b) : -1; - if (s > -1) - outstring.replace(s, 8, "Lord"); - end = s+4; + + int s = replaceFirst(outstring, "L<\\>{ORD}", "Lord"); + if (s > -1) end = s+4; } + + // do small cap logic bool lower = false; - bool token = false; - for (int charLen = 1;start < (int)outstring.length(); start+=charLen) { + string token = ""; + for (int charLen = 1; start < (int)outstring.length(); start += charLen) { const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start; const unsigned char *endChar = startChar; SW_u32 testChar = getUniCharFromUTF8(&endChar, true); charLen = endChar - startChar; // set the size of the UTF-8 sequence - if (!token) { - if (testChar == '\\') { - lower = true; - outstring.erase(start, 1); - start--; - continue; - } - if (testChar == '/') { - lower = false; - outstring.erase(start, 1); - end = start; - start--; + if (!token.size()) { + if (testChar == '<') { + token = "<"; continue; } // what is this? It screws MENE MENE up in Daniel @@ -1238,10 +1064,40 @@ void prepLine(string &outstring, int currentTestament, bool note) { continue; } } - if (testChar == '>') - token = false; - if (testChar == '<') - token = true; + else { + token += testChar; + + if (testChar == '>') { + if (token == "<\\>") { + lower = true; + outstring.erase(start-2, 3); + start -= 3; + } + if (token == "") { + lower = false; + outstring.erase(start-2, 3); + end = start - 2; + start -= 3; + unsigned int nextStrongs = outstring.find(""); + if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) { + break; + } + } + // end divineName if we hit a PO in the middle + if (token == "") { + break; + } + unsigned int s = token.find(" 0) s = token.find(" 2) { + if (StringMgr::getSystemStringMgr()->isDigit(token[2])) { + break; + } + } + token = ""; + } + } } if (currentTestament) { outstring.insert(end, ""); @@ -1254,6 +1110,19 @@ void prepLine(string &outstring, int currentTestament, bool note) { continue; } + // these are places where we unnecessarily stop and then start otPassage + // we could make the otPassage logic work better, but these exception clean + // thing up for now. + if (replaceFirst(outstring, "’s", "’s") > -1) continue; + if (replaceFirst(outstring, "-", "-") > -1) continue; + if (replaceFirst(outstring, ",", ",") > -1) continue; + if (replaceFirst(outstring, ", ", ", ") > -1) continue; + if (replaceFirst(outstring, "! ", "! ") > -1) continue; + if (replaceFirst(outstring, "; ", "; ") > -1) continue; + if (replaceFirst(outstring, " ", " ") > -1) continue; + if (replaceFirst(outstring, ", ‘", ", ‘") > -1) continue; + if (replaceFirst(outstring, ",’ ", ",’ ") > -1) continue; + if (note) { outstr = outstring.c_str(); found = strstr(outstr, "{"); @@ -1278,6 +1147,9 @@ void prepLine(string &outstring, int currentTestament, bool note) { } } } + +// if (replaceFirst(outstring, ")", ")") > -1) continue; + break; } } @@ -1309,10 +1181,10 @@ string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, end++; int book, chap; string bkch = noteLine.substr(start+2, end-start-2); - sscanf(bkch.c_str(), "%d:%d", &book, &chap); + sscanf(bkch.c_str(), "%d::%d", &book, &chap); int vNumEnd = noteLine.find_first_of(" ", end); int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); - if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) { + if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) { fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); exit(-1); } @@ -1334,7 +1206,9 @@ string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, if (start > -1) { start += tag.length(); - found = strstr(outstr+start, " <"); + const char *nFound = strstr(outstr+start, " Learn more about the NASB. -About=NEW AMERICAN STANDARD BIBLE \par\pard \ -Copyright (C) 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995 by THE LOCKMAN FOUNDATION \par\pard \ +About=NEW AMERICAN STANDARD BIBLE - NASB 2020 \par\pard \ +Copyright (C) 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995, 2020 by THE LOCKMAN FOUNDATION \par\pard \ A Corporation Not for Profit \par\pard \ LA HABRA, CA \par\pard \ All Rights Reserved \par\pard \ http://www.lockman.org \par\pard \ \par\pard \ -The "NASB," "NAS," "New American Standard Bible," and "New American Standard" trademarks are registered in the United States Patent and Trademark Office by The Lockman Foundation. Use of these trademarks requires the permission of The Lockman Foundation. \par\pard \ +The "NASB," "NAS," "New American Standard Bible," "New American Standard," and lighthouse logo are trademarks registered in the United States Patent and Trademark Office by The Lockman Foundation. Use of these trademarks requires the permission of The Lockman Foundation. \par\pard \ \par\pard \ PERMISSION TO QUOTE \par\pard \ \par\pard \ @@ -38,7 +40,7 @@ The text of the New American Standard Bible(R) may be quoted and/or reprinted up \par\pard \ Notice of copyright must appear on the title or copyright page of the work as follows: \par\pard \ \par\pard \ -"Scripture taken from the NEW AMERICAN STANDARD BIBLE(R), Copyright (C) 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995 by The Lockman Foundation. Used by permission." \par\pard \ +"Scripture taken from the NEW AMERICAN STANDARD BIBLE(R), Copyright (C) 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995, 2020 by The Lockman Foundation. Used by permission." \par\pard \ \par\pard \ When quotations from the NASB(R) text are used in not-for-sale media, such as church bulletins, orders of service, posters, transparencies or similar media, the abbreviation (NASB) may be used at the end of the quotation. \par\pard \ \par\pard \ diff --git a/modules/nasb2020/notes.corrections.sed b/modules/nasb2020/notes.corrections.sed new file mode 100644 index 0000000..cc2a4f6 --- /dev/null +++ b/modules/nasb2020/notes.corrections.sed @@ -0,0 +1,8 @@ +/^{{05::4}}43/ { +:notdone + N + s/^\({{05::4}}43[^\n]*\)\(\n\([^\n]*\n\)*\)\({{05::4}}42[^\n]*\)$/\4\2\1/ + t + bnotdone +} + diff --git a/modules/nasb2020/srcfixes.sed b/modules/nasb2020/srcfixes.sed new file mode 100644 index 0000000..14a6aeb --- /dev/null +++ b/modules/nasb2020/srcfixes.sed @@ -0,0 +1 @@ +s/<\\>ROBBERS<\/>’/<\\>ROBBERS’<\/>/g -- cgit