diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/nasb/Makefile | 36 | ||||
-rw-r--r-- | modules/nasb/lockosis.cpp | 1370 | ||||
-rw-r--r-- | modules/nasb/mods.d/nasb.conf | 49 |
3 files changed, 1455 insertions, 0 deletions
diff --git a/modules/nasb/Makefile b/modules/nasb/Makefile new file mode 100644 index 0000000..fd117fe --- /dev/null +++ b/modules/nasb/Makefile @@ -0,0 +1,36 @@ +#CIPHERKEY=1234-abcd-9876 +#CIPHERKEY=nasb123 +all: NASB.zip + +NASB.zip: modules/texts/ztext/nasb/ot.bzv + zip -r NASB.zip mods.d modules + +modules/texts/ztext/nasb/ot.bzv: nasb.osis.xml + mkdir -p modules/texts/ztext/nasb + osis2mod modules/texts/ztext/nasb/ nasb.osis.xml -z z -b 4 #-c ${CIPHERKEY} + chmod a+r modules/texts/ztext/nasb/* + +nasb-lockman-src.zip: + scp scribe@host.crosswire.org:~swordmod/pristine-private/nasb/nasb.zip nasb-lockman-src.zip + +nasb.txt: nasb-lockman-src.zip + unzip nasb-lockman-src.zip + mv "nasb cross refs.txt" nasb_ref.txt + +nasb.osis.xml: nasb.txt lockosis + ./lockosis nasb.txt nasb_ref.txt > nasb.osis.xml + +deploy: NASB.zip + scp NASB.zip scribe@host.crosswire.org:/home/ftp/pub/sword/lockmanpackages/ + +clean: + rm -rf nasb.osis.xml *.o lockosis modules NASB.zip nasb_ref.txt "nasb cross refs.txt" nasbinfo.txt codenc.txt FRONT95.txt "nasb(c).txt" nasb.txt + +distclean: clean + rm -rf nasb-lockman-src.zip + +.cpp: + g++ -O0 -g -DHAVESWORD -Wall -Werror `pkg-config --cflags sword` $< -o $@ `pkg-config --libs sword` +.c: + gcc -O0 -g -DHAVESWORD -Wall -Werror `pkg-config --cflags sword` $< -o $@ `pkg-config --libs sword` -lstdc++ + diff --git a/modules/nasb/lockosis.cpp b/modules/nasb/lockosis.cpp new file mode 100644 index 0000000..deaadbb --- /dev/null +++ b/modules/nasb/lockosis.cpp @@ -0,0 +1,1370 @@ +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <clocale> +#include <locale> + + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <iostream> +#include <string> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifdef HAVESWORD +#include <versekey.h> +#include <localemgr.h> +#include <stringmgr.h> +using namespace sword; +#endif + +using std::string; +using std::cout; +using std::endl; + +static const char *osisOTBooks[] = { + "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", + "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", + "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", + "Eccl", "Song", "Isa", "Jer", "Lam", + "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", + "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal", +// extra-Biblical + "Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth", + "EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan", + "Ps151", "Sir", "Tob", "Wis"}; +static const char *osisNTBooks[] = { + "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", + "2Cor", "Gal", "Eph", "Phil", "Col", + "1Thess", "2Thess", "1Tim", "2Tim", "Titus", + "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", + "3John", "Jude", "Rev"}; +static const char **osisBooks[] = { osisOTBooks, osisNTBooks }; + +const char nasbMax[2] = {39, 27}; + +char readline(int fd, char **buf) { + char ch; + if (*buf) + delete [] *buf; + *buf = 0; + int len; + + + long index = lseek(fd, 0, SEEK_CUR); + // clean up any preceding white space + while ((len = read(fd, &ch, 1)) == 1) { + if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t')) + break; + else index++; + } + + + while (ch != 10) { + if ((len = read(fd, &ch, 1)) != 1) + break; + } + + int size = (lseek(fd, 0, SEEK_CUR) - index) - 1; + + *buf = new char [ (size+2) * 2 ]; + + if (size > 0) { + lseek(fd, index, SEEK_SET); + read(fd, *buf, size); + read(fd, &ch, 1); //pop terminating char + (*buf)[size] = 0; + + // clean up any trailing junk on buf + int buflen = strlen(*buf); + for (char *it = *buf+(buflen-1); it > *buf; it--) { + if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t')) + break; + else *it = 0; + } + // convert all spanish characters to combined + for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) { + switch (*it) { +/* + case 0xE2 : // ‘ + if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) { + memmove(it, it+1, buflen - (it-(unsigned char *)*buf)); + buflen--; + it[0] = 0xcc; + it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence + } + else { + fprintf(stderr, "oddity: %s\n", *buf); + exit(-4); + } + break; +*/ + case 0x60 : // ` + if (isalpha(it[-1])) { + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x80; + } + else { +// fprintf(stderr, "oddity: %s\n", *buf); +// exit(-4); + } + break; + case 0x7E : // ~ + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x83; + break; + } + } + } + else **buf = 0; + return !len; +} + + +void outHeader(); +void outTrailer(); +void unicodeTicks(string &outstring); +void prepLine(string &outstring, int currentTestament, bool note); +string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); + + + + +int main(int argc, char **argv) { + +#ifdef HAVESWORD + LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es"); +#endif + + std::setlocale(LC_CTYPE, ""); + + // Let's test our command line arguments + if (argc < 2) { +// fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]); + fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]); + exit(-1); + } + + + // Let's see if we can open our input file + int fd = open(argv[1], O_RDONLY|O_BINARY); + if (fd < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]); + exit(-2); + } + + int fdn = -1; + if (argc > 2) { + fdn = open(argv[2], O_RDONLY|O_BINARY); + if (fdn < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); + exit(-2); + } + } + + outHeader(); + + string header; + char *buffer = 0; + char *nbuffer = 0; + int result = 0; + string currentBook = ""; + int currentBookNo = 0; + int currentTestament = 0; + int currentChapter = 0; + int currentVerse = 0; + bool inBook = false; + bool inChapter = false; + bool inVerse = false; + string noteLine = ""; + string preChapNote = ""; + string outstring; + result = readline(fd, &buffer); + string lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + do { + result = readline(fd, &buffer); + if (lookahead.length()) { + string savebuf = buffer; + if (buffer) + delete [] buffer; + buffer = new char [ lookahead.length() + 1]; + strcpy(buffer, lookahead.c_str()); + lookahead = savebuf; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + result = 0; + } + else if (!result) { + string savebuf = buffer; + result = readline(fd, &buffer); + lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + + + if (buffer) + delete [] buffer; + buffer = new char [ savebuf.length() + 1]; + strcpy(buffer, savebuf.c_str()); + result = 0; + } + + outstring = buffer; + + + + // BOOK NAMES <BN> + if (!strncmp(outstring.c_str(), "<BN>", 4)) { + string book = outstring.c_str()+4; + book = book.substr(0, book.find_first_of("<")); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + if (inBook) { + outstring += "</div>"; + inBook = false; + } + outstring += (string)"<div type=\"book\" osisID=\""; + + VerseKey bookName(book.c_str()); + if (bookName.popError()) { + fprintf(stderr, "error: %s: couldn't find book match for %s. Please check book array in conversion program.\n", argv[0], book.c_str()); + exit(-3); + } + currentBook = bookName.getOSISBookName(); + outstring += currentBook; + currentTestament = bookName.getTestament()-1; + + outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>"; + inBook = true; + } + + + // CHAPTERS + //<SN>PSALM + if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) { + string chapterTitle = outstring.c_str()+4; + chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<")); + string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">"; + outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>"; + currentChapter = atoi(chapter.c_str()); + inChapter = true; + currentVerse = 1; + } + + //<SF> + if (!strncmp(outstring.c_str(), "<SF>", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("</SF>")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "<PM>", 4)) { + lookahead.erase(0, 4); + outstring += "<milestone type=\"line\" subType=\"x-PM\"/>"; + } + if (inVerse) { + outstring += "</verse>\n"; + inVerse = false; + } + outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + + //<SH> + if (!strncmp(outstring.c_str(), "<SH>", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("</SH>")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "<PM>", 4)) { + lookahead.erase(0, 4); + outstring += "<milestone type=\"line\" subType=\"x-PM\"/>"; + } + if (inVerse) { + outstring += "</verse>\n"; + inVerse = false; + } + outstring += (string)"<title type=\"section\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + if (!strncmp(outstring.c_str(), "<SS>", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("</SS>")); + outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + if (!strncmp(outstring.c_str(), "<SB>", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("</SB>")); + outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + + + + // {{x:y}} + // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string +// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "{{"); + int start = (found) ? (found - outstr) : -1; +// ---- end of whacked replacement + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int testmt = 0, book = 0, chap = 0; + string bkch = outstring.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d:%d", &book, &chap); + currentChapter = chap; + int vNumEnd = outstring.find_first_of(" ", end); + currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); + currentBookNo = book; + if (book > nasbMax[0]) { + testmt = 1; + book -= nasbMax[0]; + } + if (currentBook != osisBooks[testmt][book-1]) { + fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); + exit(-3); + } + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string newstring = ""; + if (inVerse) { + newstring += "</verse>"; + inVerse = false; + } + newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">"; + outstring.replace(start, vNumEnd-start+1, newstring); + inVerse = true; + noteLine = preChapNote; + preChapNote = ""; + } + + + + // multiple occurances on a line stuff + while (1) { + + // NOTE + outstr = outstring.c_str(); + found = strstr(outstr, "<N"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + // NOTE <N#> + if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) { + bool preChap = strchr("A", nStr.c_str()[0]); + if (preChap) noteLine = ""; + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); + if (preChap) preChapNote = noteLine; + outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>"); + continue; + } + } + + + + // <RS> + outstr = outstring.c_str(); + found = strstr(outstr, "<RS>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<q who=\"Jesus\">"); + continue; + + } + + // <RS> + outstr = outstring.c_str(); + found = strstr(outstr, "</RS>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 5, (string)"</q>"); + continue; + + } + // <RT> + outstr = outstring.c_str(); + found = strstr(outstr, "<RT>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-RT\"/>"); + continue; + + } + // <?> + outstr = outstring.c_str(); + found = strstr(outstr, "<?>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)"¿"); + continue; + + } + // <!> + outstr = outstring.c_str(); + found = strstr(outstr, "<!>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)"¡"); + continue; + + } + outstr = outstring.c_str(); + found = strstr(outstr, "<R"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R"); + outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>"); + continue; + } + // transChange added {} + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)"<transChange type=\"added\">"); + size_t end = outstring.find_first_of("}", start+1); + if (end != string::npos) { + outstring.erase(end, 1); + } + else end = outstring.size()-1; + while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--; + outstring.insert(end+1, "</transChange>"); + + continue; + + } +/* + // transChange tenseChange * + outstr = outstring.c_str(); + found = strstr(outstr, "*"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">"); + for (end = start + 34; (end < outstring.length()); end++) { + if ((!isalpha(outstring[end])) && + (outstring[end] != '\'')) + break; + } + outstring.replace(end, 1, "</transChange>"); + continue; + + } +*/ + // <,> + outstr = outstring.c_str(); + found = strstr(outstr, "<,>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)"<milestone type=\"x-superiorComma\"/>"); + continue; + + } + // <NA> + outstr = outstring.c_str(); + found = strstr(outstr, "<NA>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-NA\"/>"); + continue; + + } + // <NB> + outstr = outstring.c_str(); + found = strstr(outstr, "<NB>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-NB\"/>"); + continue; + + } + // <NC> + outstr = outstring.c_str(); + found = strstr(outstr, "<NC>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-NC\"/>"); + continue; + + } + // paragraph break <PM> + outstr = outstring.c_str(); + found = strstr(outstr, "<PM>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PM\"/>"); + continue; + + } + // poetry break <PN> + outstr = outstring.c_str(); + found = strstr(outstr, "<PN>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-PN\" />"); + continue; + + } + // poetry break <PO> + outstr = outstring.c_str(); + found = strstr(outstr, "<PO>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PO\" />"); + continue; + + } + // poetry break <PE> + outstr = outstring.c_str(); + found = strstr(outstr, "<PE>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PE\" />"); + continue; + + } + // letter indent <HL> + outstr = outstring.c_str(); + found = strstr(outstr, "<HL>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-HL\" />"); + continue; + + } + break; + } + + int strongsStart = 0; + int transChangeStart = 0; + bool strongsFound = false; + bool intoken = false; + bool intag = false; + bool inNote = false; + int tokenStart = 0; + string lastToken = ""; + string previousToken = ""; + int tenseChange = -1; + // strongs numbers + for (unsigned int i = 0; i < outstring.length(); i++) { + if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = i; + strongsFound = true; + } + } + if (outstring[i] =='*') + tenseChange = i; + if (outstring[i] == '<') { tokenStart = i+1; intoken = true; } + if (outstring[i] == '>') { + intoken = false; + previousToken = lastToken; + lastToken = outstring.substr(tokenStart, i-tokenStart); + // Not completely safe, but works for current NASB data + if (strchr(lastToken.c_str(), '/')) + intag = false; + else intag = true; + if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) { + transChangeStart = i+1; + } +/* + if (!strncmp(lastToken.c_str(), "seg", 3)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "divineName", 10)) { + strongsFound = false; + strongsStart = i+1; + } +*/ + if (!strncmp(lastToken.c_str(), "/divineName", 10)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "note", 4)) { + strongsFound = false; + strongsStart = i+1; + inNote = true; + } + if (!strncmp(lastToken.c_str(), "/note", 5)) { + strongsFound = false; + strongsStart = i+1; + inNote = false; + } + if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/transChange", 12)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "milestone", 9)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/seg", 4)) { + strongsFound = false; + strongsStart = i+1; + } + if ((!strncmp(lastToken.c_str(), "verse", 5))) { + intag = false; + } + + if ( (!strncmp(lastToken.c_str(), "MG", 2)) || + (!strncmp(lastToken.c_str(), "MH", 2))) { + + // insert </w> + // fix tenseChange to be inside <w> so we can include a subset of the <w> content. + outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>")); + i = (tokenStart-1) + ((tenseChange > -1) ? 18:4); + + // build <w ... > tag + char lang = lastToken[1]; // H or G + lastToken.replace(0, 1, "<w lemma=\"strong:"); + while ((start = lastToken.find(", ")) > -1) { + lastToken.replace(start, 2, (string)" strong:" + lang); + } + lastToken += "\">"; + intag = false; + + + if (tenseChange > -1) { + lastToken.insert(0, "<transChange type=\"tenseChange\">"); + } + if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) { + outstring.insert(transChangeStart, lastToken); + intag = true; + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + + // insert our token + else { + outstring.insert(strongsStart, lastToken); + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + strongsStart = i+1; + strongsFound = false; + if (tenseChange > -1) { + // relocate because position may have changed from all the token inserts + const char *buf = outstring.c_str(); + tenseChange = (strchr(buf, '*') - buf); + outstring.erase(tenseChange, 1); + tenseChange = -1; + } + } + } + } + + + // clean up stuff that didn't work quite right + while (1) { + + // divineName strongs tags misorderings + string target = "</w></divineName></seg>"; + size_t s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "</divineName></seg></w>"); + continue; + } + target = "</w>,</divineName></seg>"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "</divineName></seg></w>,"); + continue; + } + + break; + } + + + std::cout << outstring; + if (!result) std::cout << "\n"; + } + while (!result); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + if (inBook) { + outstring += "</div>"; + inBook = false; + } + std::cout << outstring; + + outTrailer(); + + // clean up our buffers that readline might have allocated + if (buffer) + delete [] buffer; + if (nbuffer) + delete [] nbuffer; + + close(fd); + + if (fdn > -1) + close(fdn); +} + +void outHeader() { + +std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n"; +std::cout << "<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.1.9.xsd\">" << "\n"; +std::cout << " <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n"; +std::cout << " <header>" << "\n"; +std::cout << " <work osisWork=\"nasb\">" << "\n"; +std::cout << " <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n"; +std::cout << " <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n"; +std::cout << " <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n"; +std::cout << " <refSystem>Bible</refSystem>" << "\n"; +std::cout << " </work>" << "\n"; +std::cout << " <work osisWork=\"strongs\">" << "\n"; +std::cout << " </work>" << "\n"; +std::cout << " </header>" << "\n"; + +} + +void outTrailer() { + std::cout << "</osisText>\n"; + std::cout << "</osis>\n"; +} + +void unicodeTicks(string &outstring) { + + while (1) { + const char *outstr; + const char *found; + int start; + + outstr = outstring.c_str(); + found = strstr(outstr, "``"); + char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 2, "“"); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "`"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "'"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "\""); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + break; + } +} + +void prepLine(string &outstring, int currentTestament, bool note) { + int end = 0; + while (1) { +// ------------------------------------------ +// redundant markers + size_t s; + // <V> + s = outstring.find("<V>"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // <P> + s = outstring.find("<P>"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // <C> + s = outstring.find("<C>"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // <CC> + s = outstring.find("<CC>"); + if (s != string::npos) { + outstring.erase(s, 4); + continue; + } + + // <CP> + s = outstring.find("<CP>"); + if (s != string::npos) { + outstring.erase(s, 4); + continue; + } + + // <$F...>> + s = outstring.find("<$F"); + if (s != string::npos) { + size_t e = outstring.find(">>", s); + outstring.erase(s, e-s+2); + continue; + } +// ---------------------------------------------- + + // <A> + s = outstring.find("<A>"); + if (s != string::npos) { + outstring.replace(s, 3, "<milestone type=\"line\" subType=\"x-A\"/>"); + continue; + } + + // ~“ + char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0; +// string target = "~“"; + string target = uchar; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + continue; + } + // +« + target = "+«"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>"); + continue; + } + // +» + target = "+»"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>"); + continue; + } + // +“ + target = "+“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + continue; + } + // +” + target = "+”"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>"); + continue; + } + // +‘ + target = "+‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + continue; + } + // +’ + target = "+’"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>"); + continue; + } + // -« + target = "-«"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>"); + continue; + } + // -» + target = "-»"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>"); + continue; + } + // -“ + target = "-“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + continue; + } + // -” + target = "-”"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>"); + continue; + } + // -‘ + target = "-‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + continue; + } + // -’ + target = "-’"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>"); + continue; + } + + // ~‘ + uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0; +// target = "~‘"; + target = uchar; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + continue; + } + + + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/"); + int start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD}/’\\{S}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 16, "<seg><divineName>Lord’s</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD,}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 9, "<seg><divineName>Lord</divineName></seg>,"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "S\\{EN~OR}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 10, "<seg><divineName>Sen~or</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "S\\{EÑOR}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 10, "<seg><divineName>Señor</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "Y\\{AH,}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "Y\\{AH}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>"); + continue; + } + // is this really valid markup? should 'also be' be in small + // caps? 3 { and only 2 } ? + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {also be}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {give}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {bless}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "MENE"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 4, "<hi type=\"inscription\">Mene</hi>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "M\\ENE/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 6, "<hi type=\"inscription\">Mene</hi>"); + continue; + } + found = strstr(outstr, "M\\ENE:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "<hi type=\"inscription\">Mene</hi>:"); + continue; + } + found = strstr(outstr, "TEKEL"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 5, "<hi type=\"inscription\">Tekel</hi>"); + continue; + } + found = strstr(outstr, "T\\EKEL/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "<hi type=\"inscription\">Tekel</hi>"); + continue; + } + found = strstr(outstr, "T\\EKEL:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<hi type=\"inscription\">Tekel</hi>:"); + continue; + } + found = strstr(outstr, "UPHARSIN"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<hi type=\"inscription\">Upharsin</hi>"); + continue; + } + found = strstr(outstr, "UFARSIN"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "<hi type=\"inscription\">Ufarsin</hi>"); + continue; + } + found = strstr(outstr, "U\\FARSIN/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 9, "<hi type=\"inscription\">Ufarsin</hi>"); + continue; + } + found = strstr(outstr, "PERES"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 5, "<hi type=\"inscription\">Peres</hi>"); + continue; + } + found = strstr(outstr, "P\\ERES/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "<hi type=\"inscription\">Peres</hi>"); + continue; + } + found = strstr(outstr, "P\\ERES:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<hi type=\"inscription\">Peres</hi>:"); + continue; + } + // LB ??? Don't have info on this. Assuming '-' + outstr = outstring.c_str(); + found = strstr(outstr, "<LB>"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 4, "-"); + continue; + } + + found = strstr(outstr+end, "\\"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + for (;start;start--) { + if ((!std::isupper(outstring[start])) && + (!strchr("\\/ ~", outstring[start]))) { + break; + } + } + for (start++; outstring[start] == ' '; start++); + if (currentTestament) { + outstring.insert(start, "<seg type=\"otPassage\">"); + start += 22; + } + else { + outstring.insert(start, "<seg><divineName>"); + start += 17; + const char *b = outstring.c_str(); + const char *found = strstr(b, "L\\{ORD}/"); + int s = (found) ? (found - b) : -1; + if (s > -1) + outstring.replace(s, 8, "Lord"); + end = s+4; + } + bool lower = false; + bool token = false; + for (int charLen = 1;start < (int)outstring.length(); start+=charLen) { + const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start; + const unsigned char *endChar = startChar; + SW_u32 testChar = getUniCharFromUTF8(&endChar, true); + charLen = endChar - startChar; // set the size of the UTF-8 sequence + if (!token) { + if (testChar == '\\') { + lower = true; + outstring.erase(start, 1); + start--; + continue; + } + if (testChar == '/') { + lower = false; + outstring.erase(start, 1); + end = start; + start--; + continue; + } + // what is this? It screws MENE MENE up in Daniel +// if (testChar == ':') +// break; + + if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) { + if (StringMgr::getSystemStringMgr()->isLower(testChar)) + break; + if (lower) + outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower()); + continue; + } + } + if (testChar == '>') + token = false; + if (testChar == '<') + token = true; + } + if (currentTestament) { + outstring.insert(end, "</seg>"); + end+=6; + } + else { + outstring.insert(end, "</divineName></seg>"); + end+=19; + } + continue; + } + + if (note) { + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, "<hi type=\"italic\">"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, "</hi>"); + continue; + } + s = outstring.find("</reference></hi>"); + if (s != string::npos) { + const size_t s2 = outstring.find("<hi type=\"italic\"><reference"); + if (s2 == string::npos) { + outstring.replace(s, 17, "</hi></reference>"); + continue; + } + } + } + break; + } +} + +string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) { + char *nbuffer = 0; + int start = -1; + const char *found = (const char *)-1; + const char *outstr = (const char *)-1; + + while (start == -1) { + if (!noteLine.length() && fdn > -1) { + if (readline(fdn, &nbuffer)) return ""; // eof + noteLine = nbuffer; + } + outstr = noteLine.c_str(); + found = strstr(outstr, "{{"); + start = (found) ? (found - outstr) : -1; + // be sure we have at least one of these. We've found note lines without any actual notes + if (found) found = strstr(outstr, "<R"); + if (!found) found = strstr(outstr, "<N"); + if (!found) start = -1; + if (start == -1) noteLine = ""; + } + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int book, chap; + string bkch = noteLine.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d:%d", &book, &chap); + int vNumEnd = noteLine.find_first_of(" ", end); + int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); + if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) { + fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); + exit(-1); + } + } + else { + fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str()); + exit(-1); + } + + + + + + outstr = noteLine.c_str(); + string tag = (string)"<"+(string)nx+nStr+(string)">"; + found = strstr(outstr, tag.c_str()); + start = (found) ? (found - outstr) : -1; + string retVal = ""; + + if (start > -1) { + start += tag.length(); + found = strstr(outstr+start, " <"); + int end = (found) ? (found - outstr) : -1; + if (end<0) end = noteLine.length(); + retVal = noteLine.substr(start, end-start); + } + unicodeTicks(retVal); +#ifdef HAVESWORD + if (*nx == 'R') { + // } { get's deleted. e.g. {Luke} {9:10-17} + outstr = retVal.c_str(); + found = strstr(outstr, "} {"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 3, " "); + } + outstr = retVal.c_str(); + found = strstr(outstr, ";}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 2, "};"); + } + VerseKey key = osisID.c_str(); +//std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str(); + retVal = VerseKey::convertToOSIS(retVal.c_str(), &key); +//std::cerr << ": " << retVal.c_str(); + } +#endif + prepLine(retVal, 0, true); + if (nbuffer) + delete [] nbuffer; +//std::cerr << ": " << retVal.c_str() << "\n"; + return retVal; +} + diff --git a/modules/nasb/mods.d/nasb.conf b/modules/nasb/mods.d/nasb.conf new file mode 100644 index 0000000..0242173 --- /dev/null +++ b/modules/nasb/mods.d/nasb.conf @@ -0,0 +1,49 @@ +[NASB] +DataPath=./modules/texts/ztext/nasb/ +ModDrv=zText +BlockType=BOOK +CompressType=ZIP +Version=1.0 +SwordVersionDate=2020-03-10 +MinimumVersion=1.5.9 +OSISqToTick=false +SourceType=OSIS +Encoding=UTF-8 +GlobalOptionFilter=OSISStrongs +GlobalOptionFilter=OSISFootnotes +GlobalOptionFilter=OSISScripref +GlobalOptionFilter=OSISHeadings +GlobalOptionFilter=OSISRedLetterWords +Feature=StrongsNumbers +Lang=en +Description=New American Standard Bible +LCSH=Bible. English. +TextSource=lockman +ShortCopyright=New American Standard Bible Copyright © 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995 \ +by The Lockman Foundation, La Habra, Calif. All rights reserved. \ +For Permission to Quote Information visit http://www.lockman.org. +ShortPromo=<a target="_blank" href="http://www.lockman.org">Learn more about the NASB.</a> +About=NEW AMERICAN STANDARD BIBLE \par\pard \ +Copyright (C) 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995 by THE LOCKMAN FOUNDATION \par\pard \ +A Corporation Not for Profit \par\pard \ +LA HABRA, CA \par\pard \ +All Rights Reserved \par\pard \ +<a href="http://www.lockman.org">http://www.lockman.org</a> \par\pard \ +\par\pard \ +The "NASB," "NAS," "New American Standard Bible," and "New American Standard" trademarks are registered in the United States Patent and Trademark Office by The Lockman Foundation. Use of these trademarks requires the permission of The Lockman Foundation. \par\pard \ +\par\pard \ +PERMISSION TO QUOTE \par\pard \ +\par\pard \ +The text of the New American Standard Bible(R) may be quoted and/or reprinted up to and inclusive of five hundred (500) verses without express written permission of The Lockman Foundation, providing the verses do not amount to a complete book of the Bible nor do the verses quoted account for more than 25% of the total work in which they are quoted. \par\pard \ +\par\pard \ +Notice of copyright must appear on the title or copyright page of the work as follows: \par\pard \ +\par\pard \ +"Scripture taken from the NEW AMERICAN STANDARD BIBLE(R), Copyright (C) 1960, 1962, 1963, 1968, 1971, 1972, 1973, 1975, 1977, 1995 by The Lockman Foundation. Used by permission." \par\pard \ +\par\pard \ +When quotations from the NASB(R) text are used in not-for-sale media, such as church bulletins, orders of service, posters, transparencies or similar media, the abbreviation (NASB) may be used at the end of the quotation. \par\pard \ +\par\pard \ +This permission to quote is limited to material which is wholly manufactured in compliance with the provisions of the copyright laws of the United States of America. The Lockman Foundation may terminate this permission at any time. \par\pard \ +\par\pard \ +Quotations and/or reprints in excess of the above limitations, or other permission requests, must be directed to and approved in writing by The Lockman Foundation, PO Box 2279, La Habra, CA 90632-2279 (714)879-3055. \par\pard \ +<a href="http://www.lockman.org">http://www.lockman.org</a> +DistributionLicense=Copyrighted; Permission to distribute granted to CrossWire |