diff options
Diffstat (limited to 'src')
220 files changed, 46287 insertions, 0 deletions
diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..fc04b62 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,5 @@ + +root := .. + +all: + make -C ${root} diff --git a/src/frontend/Makefile b/src/frontend/Makefile new file mode 100644 index 0000000..ef8eccd --- /dev/null +++ b/src/frontend/Makefile @@ -0,0 +1,5 @@ + +root := ../.. + +all: + make -C ${root} diff --git a/src/frontend/Makefile.am b/src/frontend/Makefile.am new file mode 100644 index 0000000..df82518 --- /dev/null +++ b/src/frontend/Makefile.am @@ -0,0 +1,6 @@ +frontenddir = $(top_srcdir)/src/frontend + +libsword_la_SOURCES += $(frontenddir)/swdisp.cpp +libsword_la_SOURCES += $(frontenddir)/swlog.cpp + + diff --git a/src/frontend/X11/Makefile b/src/frontend/X11/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/frontend/X11/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/frontend/framework/Makefile b/src/frontend/framework/Makefile new file mode 100644 index 0000000..81f7721 --- /dev/null +++ b/src/frontend/framework/Makefile @@ -0,0 +1,4 @@ +root := ../../.. + +all: + make -C ${root} diff --git a/src/frontend/framework/femain.cpp b/src/frontend/framework/femain.cpp new file mode 100644 index 0000000..415ab6f --- /dev/null +++ b/src/frontend/framework/femain.cpp @@ -0,0 +1,12 @@ +#include <femain.h> + +FEMain::FEMain() { +} + +FEMain::~FEMain() { + list <SWDisplay *>::iterator it; + + for (it = displays.begin(); it != displays.end(); it++) + delete *it; + +} diff --git a/src/frontend/im/Makefile b/src/frontend/im/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/frontend/im/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/frontend/im/hebrewmcim.cpp b/src/frontend/im/hebrewmcim.cpp new file mode 100644 index 0000000..9ec55a9 --- /dev/null +++ b/src/frontend/im/hebrewmcim.cpp @@ -0,0 +1,653 @@ + +/** + * Title: Keyboard mapping for Michigan-Claremont Hebrew input + * Description: + * Copyright: Copyright (c) 2001 CrossWire Bible Society under the terms of the GNU GPL + * Company: + * @author Troy A. Griffitts + * @version 1.0 + */ + +#include <hebrewmcim.h> + +HebrewMCIM::HebrewMCIM() + :SWInputMethod() { + + init(); +} + + +int *HebrewMCIM::translate(char in) { + int retVal = 0; + static int retString[5]; + int retStringIndex = 0; + + memset(retString, 0, 5); + + if (getState() > 1) { + if (getState() >= 12) { // serious issue with internal structure + setState(0); + retString[retStringIndex++] = in; + return retString; + } + map<int, int>::iterator find = subst2[getState()].find(in); + if (find != subst2[getState()].end()) + retVal = find->second; + else retVal = in; + + setState(0); + retString[retStringIndex++] = retVal; + return retString; + } + else { + retVal = subst[in]; + + if (retVal == 0) { + setState(0); + retString[retStringIndex++] = in; + return retString; + } + if (retVal > 100) { + setState(1); + retString[retStringIndex++] = retVal; + return retString; + } + if (retVal == 50) { // multiChar + setState(1); + int *chars = multiChars[in]; + if (chars != 0) { + retString[retStringIndex++] = chars[0]; + retString[retStringIndex++] = chars[1]; + return retString; + } + } + } + setState(retVal); + return 0; +} + + +void HebrewMCIM::init() { + memset(subst, 0, 255); + + subst[')'] = 1488; + subst['B'] = 1489; + subst['G'] = 1490; + subst['D'] = 1491; + subst['H'] = 1492; + subst['W'] = 1493; + subst['Z'] = 1494; + subst['X'] = 1495; + subst['+'] = 1496; + subst['Y'] = 1497; + + subst['k'] = 1498; // finals + subst['m'] = 1501; + subst['n'] = 1503; + subst['c'] = 1509; + + subst['P'] = 1508; + subst['K'] = 1499; + subst['L'] = 1500; + subst['M'] = 1502; + subst['N'] = 1504; + subst['S'] = 1505; + subst['('] = 1506; + subst['p'] = 1507; + subst['C'] = 1510; + subst['Q'] = 1511; + subst['R'] = 1512; + subst['#'] = 1513; + + // special multiChars + subst['&'] = 50; + subst['$'] = 50; + + static int x[] = {1513, 1474}; + multiChars['&'] = x; + static int y[] = {1513, 1473}; + multiChars['$'] = y; + + subst['T'] = 1514; + + // VOWELS + subst['A'] = 1463; + subst['F'] = 1464; + subst['E'] = 1462; + subst['"'] = 1461; + subst['I'] = 1460; + subst['O'] = 1465; + subst['U'] = 1467; + + + + // OTHER DIACRITICS + subst['.'] = 1468; + subst['-'] = 1470; + subst[','] = 1471; + + // Compound input + + // CANTILLATION + + subst[':'] = 2; + subst2[2]['A'] = 1458; + subst2[2]['E'] = 1457; + subst2[2]['F'] = 1459; + + + /* Telisha qetana is postpositive as in '04' above. However, Michigan +# code '24' is for a medial telisha. Graphically, there is no +# difference. + */ + subst['2'] = 5; + subst2[5]['4'] = 1449; + + + /* Note Michigan encoding distinguishes between medial metheg '35' (occuring +# on the left of the vowel), and the ordinary meteg '95' (occuring on the +# right of the vowel). It is also used for silluq. + */ + subst['3'] = 6; + subst2[6]['3'] = 1433; + subst2[6]['5'] = 1469; + + + /* The Michigan code of telisha gedola in medial position. Graphically, +# there is no difference. + */ + subst['4'] = 7; + subst2[7]['4'] = 1440; + + subst['6'] = 8; + subst2[8]['0'] = 1451; + subst2[8]['1'] = 1436; + + subst['1'] = 4; + subst2[4]['0'] = 1434; + + /* In the poetic books, prepositive dehi occurs; it's unclear whether +# tipeha also occurs in the poetic books. Otherwise, we could simply +# check for what book in the Tanach we are in. Michigan uses the same +# code for each. + */ + + subst2[4]['3'] = 1430; + + /* This is the poetic accent mugrash, which also includes rebia, but is +# encoded separately as '81' in the Michigan text. + */ + subst2[4]['1'] = 1437; + subst2[4]['4'] = 1440; + + + subst['0'] = 3; + subst2[3]['0'] = 1475; + subst2[3]['1'] = 1426; + + /* According to BHS, zarqa and sinnor are both postpositive. However, +# the Michigan encoding uses one code for both. The Unicode zarqa +# (0x0598) is definitely NOT postpositive. And further, the shape of +# the symbol is different in BHS and Uniocde. This needs further +# research to determine what's going on here. For now, we follow BHS +# and use the postpositive Unicode zinor or both accents. + */ + + subst2[3]['2'] = 1454; + + /* Pashta is postpositive, and the Unicode equivalent reflects +# this. However, there is a poetic equivalent -- azla legarmeh -- +# which is not postpositive, but no equivalent code point exists in +# Unicode. The Michigan encoding does not distinguish between the two, +# although it could be algorithmically determined. + */ + + subst2[3]['3'] = 1433; + subst2[3]['4'] = 1449; + subst2[3]['5'] = 1472; + + + /* This is the Unicode Hebrew *accent*; there is also another Hebrew +# *punctuation* called GERSHAYIM 0x05F4. I'm using the more +# traditional rounded marks, rather than the alternate straight +# marks. + */ + + subst2[8]['2'] = 1438; + + // Also known as azla + subst2[8]['3'] = 1448; + subst2[8]['4'] = 1452; + subst2[8]['5'] = 1427; + + + subst['8'] = 9; + subst2[9]['0'] = 1428; + subst2[9]['1'] = 1431; + + /* Note, this accent is actually sinnorit, but it does not exist as a +# separate glyph in the Unicode standard. The 'ZINOR' Unicode accent +# is postpositive, while sinnorit is not. ZARQA is as close as I can +# get to this. + */ + subst2[9]['2'] = 1432; + + /* The Unicode form does not match the form used by BHS, but the names +# are the same. + */ + subst2[9]['3'] = 1441; + subst2[9]['4'] = 1439; + subst2[9]['5'] = 1429; + + subst['7'] = 10; + subst2[10]['0'] = 1444; + subst2[10]['1'] = 1445; + subst2[10]['2'] = 1446; + subst2[10]['3'] = 1430; // also '13', '73' also is used for majela + subst2[10]['4'] = 1443; + subst2[10]['5'] = 1469; // this is silluq; should appear to the left of the vowel + + subst['9'] = 11; + subst2[11]['1'] = 1435; + subst2[11]['2'] = 1425; + subst2[11]['3'] = 1450; + subst2[11]['4'] = 1447; + subst2[11]['5'] = 1469; // should appear to the right of the vowel + +} + + /* + + +# CANTILLION MARKS + + my $ETNAHTA = '֑'; +# officially the Unicode name for this symbol was "SEGOL." However, that is +# not a unique name, conflicting with the vowel of the same name. Further, +# the position of the symbol is different. I have changed the name of the +# accent to "SEGOLTA," the traditional name for this accent. + my $SEGOLTA = '֒'; + my $SHALSHELET = '֓'; + my $ZAQEF_QATAN = '֔'; + my $ZAQEF_GADOL = '֕'; + my $TIPEHA = '֖'; + my $REVIA = '֗'; + my $ZARQA = '֘'; + my $PASHTA = '֙'; + my $YETIV = '֚'; + my $TEVIR = '֛'; + my $GERESH = '֜'; + my $GERESH_MUQDAM = '֝'; + my $GERSHAYIM = '֞'; + my $QARNEY_PARA = '֟'; + my $TELISHA_GEDOLA = '֠'; + my $PAZER = '֡'; + my $MUNAH = '֣'; + my $MAHAPAKH = '֤'; + my $MERKHA = '֥'; + my $MERKHA_KEFULA = '֦'; + my $DARGA = '֧'; + my $QADMA = '֨'; + my $TELISHA_QETANA = '֩'; + my $YERAH_BEN_YOMO = '֪'; + my $OLE = '֫'; + my $ILUY = '֬'; + my $DEHI = '֭'; + my $ZINOR = '֮'; +# HEBREW MARK + my $MASORA_CIRCLE = '֯'; +# HEBREW EXTENDED-A points and punctuation + my $SHEVA = 'ְ'; + my $HATAF_SEGOL = 'ֱ'; + my $HATAF_PATAH = 'ֲ'; + my $HATAF_QAMATS = 'ֳ'; + my $HIRIQ = 'ִ'; + my $TSERE = 'ֵ'; + my $SEGOL = 'ֶ'; +# furtive Patah is not a distinct character + my $PATAH = 'ַ'; + my $QAMATS = 'ָ'; + my $HOLAM = 'ֹ'; + my $QUBUTS = 'ֻ'; +# also used as shuruq +# falls within the base letter + my $DAGESH_OR_MAPIQ = 'ּ'; +# also used as siluq + my $METAG = 'ֽ'; + my $MAQAF = '־'; + my $RAFE = 'ֿ'; +# Also used for legarmeh +# may be treated as spacing punctuation, not as a point + my $PASEQ = '׀'; + my $SHIN_DOT = 'ׁ'; + my $SIN_DOT = 'ׂ'; + my $SOF_PASUQ = '׃'; +# HEBREW MARK + my $UPPER_DOT = 'ׄ'; +# HEBREW LETTERS based on ISO 8859-8 +# aleph +# x (alef symbol - 2135) + my $ALEF = 'א'; +# x (bet symbol - 2136) + my $BET = 'ב'; +# x (gimel symbol - 2137) + my $GIMEL = 'ג'; +# x (dalet symbol - 2138) + my $DALET = 'ד'; + my $HE = 'ה'; + my $VAV = 'ו'; + my $ZAYIN = 'ז'; + my $HET = 'ח'; + my $TET = 'ט'; + my $YOD = 'י'; + my $FINAL_KAF = 'ך'; + my $KAF = 'כ'; + my $LAMED = 'ל'; + my $FINAL_MEM = 'ם'; + my $MEM = 'מ'; + my $FINAL_NUN = 'ן'; + my $NUN = 'נ'; + my $SAMEKH = 'ס'; + my $AYIN = 'ע'; + my $FINAL_PE = 'ף'; + my $PE = 'פ'; + my $FINAL_TSADI = 'ץ'; +# also known as zade + my $TSADI = 'צ'; + my $QOF = 'ק'; + my $RESH = 'ר'; + my $SHIN = 'ש'; + my $TAV = 'ת'; +# Yiddish digraphs +# Hebrew Ligature +# tsvey vovn + my $DOUBLE_VAV = 'װ'; + my $VAV_YOD = 'ױ'; +# tsvey yudn + my $DOUBLE_YOD = 'ײ'; + +# Additional punctuation + my $PUNCT_GERESH = '׳'; + my $PUNCT_GERSHAYIM = '״'; +# Reserved: 0x05F5" +# x (hebrew point judeo-spanish varika - FB1E) +#my $JUDEO_SPANISH_VARIKA = pack("U",0xFB1E); # UTF-8 OxFB1E + +############################# +# End of Unicode 2.0 Hebrew # +############################# + +# A hash whose key is a Michagan code, and whose value is a Unicode +# equvalent + + char subst[] = new char [255]; + subst[')'] = 1488; + 'B' => $BET, + 'G' => $GIMEL, + 'D' => $DALET, + 'H' => $HE, + 'W' => $VAV, + 'Z' => $ZAYIN, + 'X' => $HET, + '+' => $TET, + 'Y' => $YOD, + 'K' => $KAF, + 'L' => $LAMED, + 'M' => $MEM, + 'N' => $NUN, + 'S' => $SAMEKH, + '(' => $AYIN, + 'P' => $PE, + 'C' => $TSADI, + 'Q' => $QOF, + 'R' => $RESH, + '#' => $SHIN, # the letter shin without a point + '&' => ($SHIN . $SIN_DOT), + '$' => ($SHIN . $SHIN_DOT), # ' + 'T' => $TAV, +# VOWELS + 'A' => $PATAH, + 'F' => $QAMATS, + 'E' => $SEGOL, + '"' => $TSERE, + 'I' => $HIRIQ, + 'O' => $HOLAM, + 'U' => $QUBUTS, + ':' => $SHEVA, + ':A' => $HATAF_PATAH, + ':E' => $HATAF_SEGOL, + ':F' => $HATAF_QAMATS, +# OTHER DIACRITICS + '.' => $DAGESH_OR_MAPIQ, + '-' => $MAQAF, + ',' => $RAFE, +# CANTILLATION + '00' => $SOF_PASUQ, + '01' => $SEGOLTA, +# According to BHS, zarqa and sinnor are both postpositive. However, +# the Michigan encoding uses one code for both. The Unicode zarqa +# (0x0598) is definitely NOT postpositive. And further, the shape of +# the symbol is different in BHS and Uniocde. This needs further +# research to determine what's going on here. For now, we follow BHS +# and use the postpositive Unicode zinor or both accents. + '02' => $ZINOR, +# Pashta is postpositive, and the Unicode equivalent reflects +# this. However, there is a poetic equivalent -- azla legarmeh -- +# which is not postpositive, but no equivalent code point exists in +# Unicode. The Michigan encoding does not distinguish between the two, +# although it could be algorithmically determined. + '03' => $PASHTA, + '04' => $TELISHA_QETANA, + '05' => $PASEQ, + '10' => $YETIV, +# In the poetic books, prepositive dehi occurs; it's unclear whether +# tipeha also occurs in the poetic books. Otherwise, we could simply +# check for what book in the Tanach we are in. Michigan uses the same +# code for each. + '13' => $TIPEHA, # also $DEHI +# This is the poetic accent mugrash, which also includes rebia, but is +# encoded separately as '81' in the Michigan text. + '11' => $GERESH_MUQDAM, + '14' => $TELISHA_GEDOLA, +# Telisha qetana is postpositive as in '04' above. However, Michigan +# code '24' is for a medial telisha. Graphically, there is no +# difference. + '24' => $TELISHA_QETANA, + '33' => $PASHTA, +# The Michigan code of telisha gedola in medial position. Graphically, +# there is no difference. + '44' => $TELISHA_GEDOLA, + '60' => $OLE, + '61' => $GERESH, +# This is the Unicode Hebrew *accent*; there is also another Hebrew +# *punctuation* called GERSHAYIM 0x05F4. I'm using the more +# traditional rounded marks, rather than the alternate straight +# marks. + '62' => $GERSHAYIM, +# Also known as azla + '63' => $QADMA, + '64' => $ILUY, + '65' => $SHALSHELET, + '80' => $ZAQEF_QATAN, + '81' => $REVIA, +# Note, this accent is actually sinnorit, but it does not exist as a +# separate glyph in the Unicode standard. The 'ZINOR' Unicode accent +# is postpositive, while sinnorit is not. ZARQA is as close as I can +# get to this. + '82' => $ZARQA, +# The Unicode form does not match the form used by BHS, but the names +# are the same. + '83' => $PAZER, + '84' => $QARNEY_PARA, + '85' => $ZAQEF_GADOL, +# Note Michigan encoding distinguishes between medial metheg '35' (occuring +# on the left of the vowel), and the ordinary meteg '95' (occuring on the +# right of the vowel). It is also used for silluq. + '35' => $METAG, + '70' => $MAHAPAKH, + '71' => $MERKHA, + '72' => $MERKHA_KEFULA, + '73' => $TIPEHA, # also '13', '73' also is used for majela + '74' => $MUNAH, + '75' => $METAG, # this is silluq; should appear to the left of the vowel + '91' => $TEVIR, + '92' => $ETNAHTA, + '93' => $YERAH_BEN_YOMO, + '94' => $DARGA, + '95' => $METAG, # should appear to the right of the vowel + +# Not used by the Michigan Encoding +# $UPPER_DOT = '05C4'; + ); + +# declare other variables + my (@bhsLines, + @bhsVerse, + @entity_line) = (); + + my ($i, + $verse, + $word, + $character) = 0; + + my ($element, + $saveGuttural) = ""; + +# read in a line + while (<>) { +# Process one verse +# iterate over every character and change to XML decimal entity + CHAR: for ( $i = 0; ($i < scalar(@bhsVerse)); $i++) { + # find and convert final kaf, mem, nun, pe, tsade + ( # if final form + $bhsVerse[$i] =~ /[KMNPC]/ + ) + && + ( + ( # whitespace or + $bhsVerse[$i+1] =~ /[ \-?]/ + ) + || + ( # EOL or + $i == ( scalar(@bhsVerse) - 1 ) + ) + || + ( # sof pasuq or + ( $bhsVerse[$i+1] =~ /0/ ) && + ( $bhsVerse[$i+2] =~ /0/ ) + ) + || + ( # one accent followed by white, eol or + ( + ( $bhsVerse[$i+1] =~ /\d/ ) && + ( $bhsVerse[$i+2] =~ /\d/ ) + ) && + ( + ( $bhsVerse[$i+3] =~ /[ \-?]/ ) || + ( $i == ( scalar(@bhsVerse) - 1 ) ) + ) + ) + || + ( # two accents followed by white, eol + ( + ( $bhsVerse[$i+1] =~ /\d/ ) && + ( $bhsVerse[$i+2] =~ /\d/ ) && + ( $bhsVerse[$i+3] =~ /\d/ ) && + ( $bhsVerse[$i+4] =~ /\d/ ) + ) && + ( + ( $bhsVerse[$i+5] =~ /[ \-?]/ ) || + ( $i == ( scalar(@bhsVerse) - 1 ) ) + ) + ) + || + ( # followed by a vowel and white, eol, sof pasuq + ( $bhsVerse[$i+1] =~ /[:F]/ ) && + ( # followed by + ( $bhsVerse[$i+2] =~ /[ \-?]/ ) || # whitespace or + ( $i == ( scalar(@bhsVerse) - 1 ) ) || # eol or + ( # sof pasuq + ( $bhsVerse[$i+2] =~ /0/ ) && + ( $bhsVerse[$i+3] =~ /0/ ) + ) + ) + ) + ) # end of what follows after final letter + && + do { + $bhsVerse[$i] =~ /K/ && eval { push @entity_line,$FINAL_KAF; } + && next CHAR; + $bhsVerse[$i] =~ /M/ && eval { push @entity_line,$FINAL_MEM; } + && next CHAR; + $bhsVerse[$i] =~ /N/ && eval { push @entity_line,$FINAL_NUN; } + && next CHAR; + $bhsVerse[$i] =~ /P/ && eval { push @entity_line,$FINAL_PE; } + && next CHAR; + $bhsVerse[$i] =~ /C/ && eval { push @entity_line,$FINAL_TSADI; } + && next CHAR; + }; + # find and convert "furtive patach" + ( $bhsVerse[$i] =~ /A/ ) && # If the letter is a patach + ( $bhsVerse[$i-1] =~ /[)HX(]/ ) && # and is preceeded by a guttural + ( ( $bhsVerse[$i-2] =~ /[AEFOU]/ ) || # and is preceeded by a vowel + ( ( $bhsVerse[$i-2] =~ /\./ ) && # or by suruq + ( $bhsVerse[$i-3] =~ /W/ ) ) || # + ( ( $bhsVerse[$i-2] =~ /W/ ) && # or by holem (written plene) + ( $bhsVerse[$i-3] =~ /O/ ) ) || # + ( ( $bhsVerse[$i-2] =~ /Y/ ) && # or by hiriq-yod + ( $bhsVerse[$i-3] =~ /I/ ) ) ) && + do { + $saveGuttural = pop @entity_line; # snip off the gutteral + push @entity_line,$PATAH; # push on the patach + push @entity_line,$saveGuttural; # push back on the gutteral + next CHAR; + }; + # convert cantillation + # since we have previously dealt with all other cases of + # numbers, two digit patterns are all we have to search for + $bhsVerse[$i] =~ /\d/ && $bhsVerse[$i+1] =~ /\d/ && do { + push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]$bhsVerse[$i+1]"}; + $i++; # accents are two digits long, so advance past the 2nd digit + next CHAR; + }; + # convert katef vowels, which are two characters long + $bhsVerse[$i] =~ /:/ && $bhsVerse[$i+1] =~ /[AEF]/ && do { + push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]$bhsVerse[$i+1]"}; + $i++; + next CHAR; + }; + # convert everything else + push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]"}; + } # end CHAR +# print the line to standard output with XML character-level encoding +# each character has the following format: +# <c id="1kg1.verse#.word#.character#">Ӓ</c> + +# set up the verse element + $word = 1; + $character = 1; + print "<verse>\n<word>\n"; +# print each character element +# if there is a space, then close the word entity, open a new word +# entity, increment the word number, reset the character number to +# zero. + foreach $element (@entity_line) { + if ( $element =~ " " ) { + $word++; + $character = 1; + print "</word>\n<word>\n"; + next; + } + print "<c id=\"1kg1.$verse.$word.$character\">$element</c>\n"; + $character++; + } +# close the verse element + print "</word></verse>\n"; +# reinitialize variables + @bhsVerse = (); + @entity_line = (); + @bhsLines = (); + } # end while +# close the XML document + print "</body>\n"; + */ diff --git a/src/frontend/im/nullim.cpp b/src/frontend/im/nullim.cpp new file mode 100644 index 0000000..a4f4aad --- /dev/null +++ b/src/frontend/im/nullim.cpp @@ -0,0 +1,11 @@ +#include <nullim.h> + + +NullIM::NullIM() { +} + +int *NullIM::translate(char ch) { + static int retVal[1]; + *retVal = ch; + return retVal; +} diff --git a/src/frontend/im/swinputmeth.cpp b/src/frontend/im/swinputmeth.cpp new file mode 100644 index 0000000..7f64865 --- /dev/null +++ b/src/frontend/im/swinputmeth.cpp @@ -0,0 +1,26 @@ +/** + * Title: + * Description: + * Copyright: Copyright (c) 2001 CrossWire Bible Society under the terms of the GNU GPL + * Company: + * @author Troy A. Griffitts + * @version 1.0 + */ + +#include <swinputmeth.h> + +SWInputMethod::SWInputMethod() { + state = 0; +} + +void SWInputMethod::setState(int state) { + this->state = state; +} + +int SWInputMethod::getState() { + return state; +} + +void SWInputMethod::clearState() { + state = 0; +} diff --git a/src/frontend/swdisp.cpp b/src/frontend/swdisp.cpp new file mode 100644 index 0000000..412ce2e --- /dev/null +++ b/src/frontend/swdisp.cpp @@ -0,0 +1,27 @@ +/****************************************************************************** + * swdisp.cpp - code for base class 'swdisp'. swdisp is the basis for all + * types of displays (e.g. raw textout, curses, xwindow, etc.) + */ + +#include <iostream> +#include <swmodule.h> +#include <swdisp.h> + +static const char *classes[] = {"SWDisplay", "SWObject", 0}; +SWClass SWDisplay::classdef(classes); + +/****************************************************************************** + * SWDisplay::Display - casts a module to a character pointer and displays it to + * raw output (overriden for different display types and + * module types if necessary) + * + * ENT: imodule - module to display + * + * RET: error status + */ + +char SWDisplay::Display(SWModule &imodule) +{ + std::cout << (const char *)imodule; + return 0; +} diff --git a/src/frontend/swlog.cpp b/src/frontend/swlog.cpp new file mode 100644 index 0000000..277a70d --- /dev/null +++ b/src/frontend/swlog.cpp @@ -0,0 +1,95 @@ +//--------------------------------------------------------------------------- + +#include <stdarg.h> +#include <stdio.h> +#ifndef _MSC_VER +#include <iostream> +#endif +#include "swlog.h" +//--------------------------------------------------------------------------- + + +SWLog *SWLog::systemlog = 0; + + +class __staticsystemlog { +public: + __staticsystemlog() { + SWLog::systemlog = new SWLog(); + } + ~__staticsystemlog() { + delete SWLog::systemlog; + } +} _staticsystemlog; + + +void SWLog::LogWarning(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel >= 2) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cerr << msg; + std::cerr << std::endl; +#endif + } +} + + +void SWLog::LogError(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cerr << msg; + std::cerr << std::endl; +#endif + } +} + + +void SWLog::LogTimedInformation(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel >= 4) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cout << msg; + std::cout << std::endl; +#endif + } +} + + +void SWLog::LogInformation(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel >= 3) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cout << msg; + std::cout << std::endl; +#endif + } +} diff --git a/src/frontend/windoze/Makefile b/src/frontend/windoze/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/frontend/windoze/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/keys/Makefile b/src/keys/Makefile new file mode 100644 index 0000000..339f87a --- /dev/null +++ b/src/keys/Makefile @@ -0,0 +1,4 @@ +root := ../.. + +all: + make -C ${root} diff --git a/src/keys/Makefile.am b/src/keys/Makefile.am new file mode 100644 index 0000000..ddeab6a --- /dev/null +++ b/src/keys/Makefile.am @@ -0,0 +1,9 @@ +keysdir = $(top_srcdir)/src/keys + +libsword_la_SOURCES += $(keysdir)/swkey.cpp +libsword_la_SOURCES += $(keysdir)/listkey.cpp +libsword_la_SOURCES += $(keysdir)/versekey.cpp +libsword_la_SOURCES += $(keysdir)/strkey.cpp +libsword_la_SOURCES += $(keysdir)/treekey.cpp +libsword_la_SOURCES += $(keysdir)/treekeyidx.cpp + diff --git a/src/keys/genarray.c b/src/keys/genarray.c new file mode 100644 index 0000000..f519950 --- /dev/null +++ b/src/keys/genarray.c @@ -0,0 +1,33 @@ +#include <stdio.h> +#include <fcntl.h> +#include <stdlib.h> + +main() +{ + int fd, l1, l2, l3; + char *fnames[] = {"ot.bks", "ot.cps", "nt.bks", "nt.cps"}; + long val; + char buf[64]; + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + + for (l1 = 0; l1 < 2; l1++) { + for (l2 = 0; l2 < 2; l2++) { + l3 = 1; + sprintf(buf, "%s", fnames[(l1*2)+l2]); + printf(" // %s\n", fnames[(l1*2)+l2]); + fd = open(buf, O_RDONLY|O_BINARY); + while (read(fd, &val, 4) == 4) { + l3++; + printf("%ld, ", val/(4 + (l2*2))); + if (!(l3%7)) + printf("\n"); + } + close(fd); + printf("}, \n"); + } + } +} diff --git a/src/keys/listkey.cpp b/src/keys/listkey.cpp new file mode 100644 index 0000000..0d1ff33 --- /dev/null +++ b/src/keys/listkey.cpp @@ -0,0 +1,257 @@ +/****************************************************************************** + * listkey.cpp - code for base class 'ListKey'. ListKey is the basis for all + * types of keys that have lists of specified indexes + * (e.g. a list of verses, place, etc.) + */ + +#include <utilfuns.h> +#include <string.h> +#include <stdlib.h> +#include <swkey.h> +#include <listkey.h> + +static const char *classes[] = {"ListKey", "SWKey", "SWObject", 0}; +SWClass ListKey::classdef(classes); + +/****************************************************************************** + * ListKey Constructor - initializes instance of ListKey + * + * ENT: ikey - text key + */ + +ListKey::ListKey(const char *ikey): SWKey(ikey) { + arraymax = 0; + ClearList(); + init(); +} + + +ListKey::ListKey(ListKey const &k) : SWKey(k.keytext) { + arraymax = k.arraymax; + arraypos = k.arraypos; + arraycnt = k.arraycnt; + array = (arraymax)?(SWKey **)malloc(k.arraymax * sizeof(SWKey *)):0; + for (int i = 0; i < arraycnt; i++) + array[i] = k.array[i]->clone(); + init(); +} + + +void ListKey::init() { + myclass = &classdef; +} + + +SWKey *ListKey::clone() const +{ + return new ListKey(*this); +} + +/****************************************************************************** + * ListKey Destructor - cleans up instance of ListKey + */ + +ListKey::~ListKey() +{ + ClearList(); +} + + +/****************************************************************************** + * ListKey::ClearList - Clears out elements of list + */ + +void ListKey::ClearList() +{ + int loop; + + if (arraymax) { + for (loop = 0; loop < arraycnt; loop++) + delete array[loop]; + + free(array); + arraymax = 0; + } + arraycnt = 0; + arraypos = 0; + array = 0; +} + + +/****************************************************************************** + * ListKey::copyFrom Equates this ListKey to another ListKey object + * + * ENT: ikey - other ListKey object + */ + +void ListKey::copyFrom(const ListKey &ikey) { + ClearList(); + + arraymax = ikey.arraymax; + arraypos = ikey.arraypos; + arraycnt = ikey.arraycnt; + array = (arraymax)?(SWKey **)malloc(ikey.arraymax * sizeof(SWKey *)):0; + for (int i = 0; i < arraycnt; i++) + array[i] = ikey.array[i]->clone(); + + SetToElement(0); +} + + +/****************************************************************************** + * ListKey::add - Adds an element to the list + */ + +void ListKey::add(const SWKey &ikey) { + if (++arraycnt > arraymax) { + array = (SWKey **) ((array) ? realloc(array, (arraycnt + 32) * sizeof(SWKey *)) : calloc(arraycnt + 32, sizeof(SWKey *))); + arraymax = arraycnt + 32; + } + array[arraycnt-1] = ikey.clone(); + SetToElement(arraycnt-1); +} + + + +/****************************************************************************** + * ListKey::setPosition(SW_POSITION) - Positions this key + * + * ENT: p - position + * + * RET: *this + */ + +void ListKey::setPosition(SW_POSITION p) { + switch (p) { + case 1: // GCC won't compile P_TOP + SetToElement(0); + break; + case 2: // GCC won't compile P_BOTTOM + SetToElement(arraycnt-1); + break; + } +} + + +/****************************************************************************** + * ListKey::increment - Increments a number of elements + */ + +void ListKey::increment(int step) { + if (step < 0) { + decrement(step*-1); + return; + } + Error(); // clear error + for(; step && !Error(); step--) { + if (arraypos < arraycnt) { + (*(array[arraypos]))++; + if (array[arraypos]->Error()) { + SetToElement(arraypos+1); + } + else *this = (const char *)(*array[arraypos]); + } + else error = KEYERR_OUTOFBOUNDS; + } +} + + +/****************************************************************************** + * ListKey::decrement - Decrements a number of elements + */ + +void ListKey::decrement(int step) { + if (step < 0) { + increment(step*-1); + return; + } + Error(); // clear error + for(; step && !Error(); step--) { + if (arraypos > -1) { + (*(array[arraypos]))--; + if (array[arraypos]->Error()) { + SetToElement(arraypos-1, BOTTOM); + } + else *this = (const char *)(*array[arraypos]); + } + else error = KEYERR_OUTOFBOUNDS; + } +} + + +/****************************************************************************** + * ListKey::Count - Returns number of elements in list + */ + +int ListKey::Count() { + return arraycnt; +} + + +/****************************************************************************** + * ListKey::SetToElement - Sets key to element number + * + * ENT: ielement - element number to set to + * + * RET: error status + */ + +char ListKey::SetToElement(int ielement, SW_POSITION pos) { + arraypos = ielement; + if (arraypos >= arraycnt) { + arraypos = (arraycnt>0)?arraycnt - 1:0; + error = KEYERR_OUTOFBOUNDS; + } + else { + if (arraypos < 0) { + arraypos = 0; + error = KEYERR_OUTOFBOUNDS; + } + else { + error = 0; + } + } + + if (arraycnt) { + (*array[arraypos]) = pos; + *this = (const char *)(*array[arraypos]); + } + else *this = ""; + + return error; +} + + +/****************************************************************************** + * ListKey::GetElement - Gets a key element number + * + * ENT: pos - element number to get (or default current) + * + * RET: Key or null on error + */ + +SWKey *ListKey::GetElement(int pos) { + if (pos < 0) + pos = arraypos; + + if (pos >=arraycnt) + error = KEYERR_OUTOFBOUNDS; + + return (error) ? 0:array[pos]; +} + + +/****************************************************************************** + * ListKey::Remove - Removes current element from list + */ + +void ListKey::Remove() { + if ((arraypos > -1) && (arraypos < arraycnt)) { + delete array[arraypos]; + if (arraypos < arraycnt - 1) + memmove(&array[arraypos], &array[arraypos+1], (arraycnt - arraypos - 1) * sizeof(SWKey *)); + arraycnt--; + + SetToElement((arraypos)?arraypos-1:0); + } +} diff --git a/src/keys/nt.bks b/src/keys/nt.bks Binary files differnew file mode 100644 index 0000000..6a3cf92 --- /dev/null +++ b/src/keys/nt.bks diff --git a/src/keys/nt.cps b/src/keys/nt.cps Binary files differnew file mode 100644 index 0000000..fdaa6f4 --- /dev/null +++ b/src/keys/nt.cps diff --git a/src/keys/ot.bks b/src/keys/ot.bks Binary files differnew file mode 100644 index 0000000..512f064 --- /dev/null +++ b/src/keys/ot.bks diff --git a/src/keys/ot.cps b/src/keys/ot.cps Binary files differnew file mode 100644 index 0000000..f4bf66b --- /dev/null +++ b/src/keys/ot.cps diff --git a/src/keys/strkey.cpp b/src/keys/strkey.cpp new file mode 100644 index 0000000..7e2d539 --- /dev/null +++ b/src/keys/strkey.cpp @@ -0,0 +1,41 @@ +/****************************************************************************** + * StrKey.cpp - code for class 'StrKey'- a standard string key class (used + * for modules that index on single strings (eg. cities, + * names, words, etc.) + */ + +#include <swmacs.h> +#include <utilfuns.h> +#include <strkey.h> +#include <string.h> +#include <stdio.h> + + +static const char *classes[] = {"StrKey", "SWKey", "SWObject", 0}; +SWClass StrKey::classdef(classes); + +/****************************************************************************** + * StrKey Constructor - initializes instance of StrKey + * + * ENT: ikey - text key (word, city, name, etc.) + */ + +StrKey::StrKey(const char *ikey) : SWKey(ikey) +{ + init(); +} + + +void StrKey::init() { + myclass = &classdef; +} + + +/****************************************************************************** + * StrKey Destructor - cleans up instance of StrKey + * + * ENT: ikey - text key + */ + +StrKey::~StrKey() { +} diff --git a/src/keys/swkey.cpp b/src/keys/swkey.cpp new file mode 100644 index 0000000..e633369 --- /dev/null +++ b/src/keys/swkey.cpp @@ -0,0 +1,196 @@ +/****************************************************************************** + * swkey.cpp - code for base class 'SWKey'. SWKey is the basis for all + * types of keys for indexing into modules (e.g. verse, word, + * place, etc.) + */ + +#include <swkey.h> +#include <utilfuns.h> +#include <string.h> + +static const char *classes[] = {"SWKey", "SWObject", 0}; +SWClass SWKey::classdef(classes); + +/****************************************************************************** + * SWKey Constructor - initializes instance of SWKey + * + * ENT: ikey - text key + */ + +SWKey::SWKey(const char *ikey) +{ + index = 0; + persist = 0; + keytext = 0; + error = 0; + stdstr(&keytext, ikey); + init(); +} + +SWKey::SWKey(SWKey const &k) +{ + index = k.index; + persist = k.persist; + userData = k.userData; + keytext = 0; + error = k.error; + stdstr(&keytext, k.keytext); + init(); +} + +void SWKey::init() { + myclass = &classdef; +} + +SWKey *SWKey::clone() const +{ + return new SWKey(*this); +} + +/****************************************************************************** + * SWKey Destructor - cleans up instance of SWKey + */ + +SWKey::~SWKey() { + if (keytext) + delete [] keytext; +} + + +/****************************************************************************** + * SWKey::Persist - Gets whether this object itself persists within a + * module that it was used to SetKey or just a copy. + * (1 - persists in module; 0 - a copy is attempted + * + * RET: value of persist + */ + +char SWKey::Persist() const +{ + return persist; +} + + +/****************************************************************************** + * SWKey::Persist - Set/gets whether this object itself persists within a + * module that it was used to SetKey or just a copy. + * (1 - persists in module; 0 - a copy is attempted + * + * ENT: ipersist - value which to set persist + * [-1] - only get + * + * RET: value of persist + */ + +char SWKey::Persist(signed char ipersist) +{ + if (ipersist != -1) + persist = ipersist; + + return persist; +} + + +/****************************************************************************** + * SWKey::Error - Gets and clears error status + * + * RET: error status + */ + +char SWKey::Error() +{ + char retval = error; + + error = 0; + return retval; +} + + +/****************************************************************************** + * SWKey::setText Equates this SWKey to a character string + * + * ENT: ikey - other swkey object + */ + +void SWKey::setText(const char *ikey) { + stdstr(&keytext, ikey); +} + + +/****************************************************************************** + * SWKey::copyFrom Equates this SWKey to another SWKey object + * + * ENT: ikey - other swkey object + */ + +void SWKey::copyFrom(const SWKey &ikey) { +// not desirable Persist(ikey.Persist()); + setText((const char *)ikey); +} + + +/****************************************************************************** + * SWKey::getText - returns text key if (char *) cast is requested + */ + +const char *SWKey::getText() const { + return keytext; +} + + +/****************************************************************************** + * SWKey::compare - Compares another VerseKey object + * + * ENT: ikey - key to compare with this one + * + * RET: > 0 if this key is greater than compare key + * < 0 + * 0 + */ + +int SWKey::compare(const SWKey &ikey) +{ + return strcmp((const char *)*this, (const char *)ikey); +} + + +/****************************************************************************** + * SWKey::setPosition(SW_POSITION) - Positions this key if applicable + */ + +void SWKey::setPosition(SW_POSITION p) { + switch (p) { + case POS_TOP: +// *this = ""; + break; + case POS_BOTTOM: +// *this = "zzzzzzzzz"; + break; + } +} + + +/****************************************************************************** + * SWKey::increment - Increments key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void SWKey::increment(int) { + error = KEYERR_OUTOFBOUNDS; +} + + +/****************************************************************************** + * SWKey::decrement - Decrements key a number of entries + * + * ENT: decrement - Number of entries to jump backward + * + * RET: *this + */ + +void SWKey::decrement(int) { + error = KEYERR_OUTOFBOUNDS; +} diff --git a/src/keys/treekey.cpp b/src/keys/treekey.cpp new file mode 100644 index 0000000..d92b7a4 --- /dev/null +++ b/src/keys/treekey.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * versekey.h - code for class 'versekey'- a standard Biblical verse key + * + * $Id: treekey.cpp,v 1.2 2002/04/15 21:26:44 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <treekey.h> + +static const char *classes[] = {"TreeKey", "SWKey", "SWObject", 0}; +SWClass TreeKey::classdef(classes); + +void TreeKey::init() { + myclass = &classdef; +} diff --git a/src/keys/treekeyidx.cpp b/src/keys/treekeyidx.cpp new file mode 100644 index 0000000..acd9b5a --- /dev/null +++ b/src/keys/treekeyidx.cpp @@ -0,0 +1,590 @@ +/****************************************************************************** + * versekey.h - code for class 'versekey'- a standard Biblical verse key + * + * $Id: treekeyidx.cpp,v 1.7 2002/04/15 21:26:44 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <treekeyidx.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <string> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +using namespace std; +static const char nl = '\n'; +static const char *classes[] = {"TreeKeyIdx", "TreeKey", "SWKey", "SWObject", 0}; +SWClass TreeKeyIdx::classdef(classes); + + +TreeKeyIdx::TreeKeyIdx(const TreeKeyIdx &ikey) : currentNode() { + init(); + path = 0; + idxfd = 0; + datfd = 0; + copyFrom(ikey); +} + +TreeKeyIdx::TreeKeyIdx(const char *idxPath, int fileMode) : currentNode() { + char buf[127]; + + init(); + path = 0; + stdstr(&path, idxPath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + error = errno; + } + else { + root(); + } +} + + +void TreeKeyIdx::init() { + myclass = &classdef; +} + + +TreeKeyIdx::~TreeKeyIdx () { + if (path) + delete [] path; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +const char *TreeKeyIdx::getLocalName() { + return currentNode.name; +} + + +const char *TreeKeyIdx::getUserData(int *size) { + if (size) + *size = (int)currentNode.dsize; + return currentNode.userData; +} + + +void TreeKeyIdx::setUserData(const char *userData, int size) { + if (currentNode.userData) + delete currentNode.userData; + + if (!size) + size = strlen(userData) + 1; + + currentNode.userData = new char [ size ]; + memcpy(currentNode.userData, userData, size); + currentNode.dsize = size; +} + +const char *TreeKeyIdx::setLocalName(const char *newName) { + stdstr(&(currentNode.name), newName); + return currentNode.name; +} + + +void TreeKeyIdx::save() { + saveTreeNode(¤tNode); +} + + +const char *TreeKeyIdx::getFullName() const { + TreeNode parent; + static string fullPath; + fullPath = currentNode.name; + parent.parent = currentNode.parent; + while (parent.parent > -1) { + getTreeNodeFromIdxOffset(parent.parent, &parent); + fullPath = ((string)parent.name) + (string) "/" + fullPath; + } + return fullPath.c_str(); +} + + +void TreeKeyIdx::root() { + error = getTreeNodeFromIdxOffset(0, ¤tNode); +} + + +bool TreeKeyIdx::parent() { + if (currentNode.parent > -1) { + error = getTreeNodeFromIdxOffset(currentNode.parent, ¤tNode); + return true; + } + return false; +} + + +bool TreeKeyIdx::firstChild() { + if (currentNode.firstChild > -1) { + error = getTreeNodeFromIdxOffset(currentNode.firstChild, ¤tNode); + return true; + } + return false; +} + + +bool TreeKeyIdx::nextSibling() { + if (currentNode.next > -1) { + error = getTreeNodeFromIdxOffset(currentNode.next, ¤tNode); + return true; + } + return false; +} + + +bool TreeKeyIdx::previousSibling() { + TreeNode iterator; + __u32 target = currentNode.offset; + if (currentNode.parent > -1) { + getTreeNodeFromIdxOffset(currentNode.parent, &iterator); + getTreeNodeFromIdxOffset(iterator.firstChild, &iterator); + if (iterator.offset != target) { + while ((iterator.next != target) && (iterator.next > -1)) + getTreeNodeFromIdxOffset(iterator.next, &iterator); + if (iterator.next > -1) { + error = getTreeNodeFromIdxOffset(iterator.offset, ¤tNode); + return true; + } + } + } + return false; +} + + +bool TreeKeyIdx::hasChildren() { + return (currentNode.firstChild > -1); +} + + +void TreeKeyIdx::append() { + TreeNode lastSib; + if (currentNode.offset) { + getTreeNodeFromIdxOffset(currentNode.offset, &lastSib); + while (lastSib.next > -1) { + getTreeNodeFromIdxOffset(lastSib.next, &lastSib); + } + __u32 idxOffset = lseek(idxfd->getFd(), 0, SEEK_END); + lastSib.next = idxOffset; + saveTreeNodeOffsets(&lastSib); + __u32 parent = currentNode.parent; + currentNode.clear(); + currentNode.offset = idxOffset; + currentNode.parent = parent; + } +} + + +void TreeKeyIdx::appendChild() { + if (firstChild()) { + append(); + } + else { + __u32 idxOffset = lseek(idxfd->getFd(), 0, SEEK_END); + currentNode.firstChild = idxOffset; + saveTreeNodeOffsets(¤tNode); + __u32 parent = currentNode.offset; + currentNode.clear(); + currentNode.offset = idxOffset; + currentNode.parent = parent; + } +} + + +void TreeKeyIdx::insertBefore() { +} + + +void TreeKeyIdx::remove() { +} + + +/****************************************************************************** + * TreeKeyIdx::Create - Creates new key idx/dat files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char TreeKeyIdx::create(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + TreeKeyIdx newTree(path); + TreeKeyIdx::TreeNode root; + stdstr(&(root.name), ""); + newTree.saveTreeNode(&root); + + delete [] path; + + return 0; +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * node - address of pointer to allocate for storage of string + */ + +void TreeKeyIdx::getTreeNodeFromDatOffset(long ioffset, TreeNode *node) const { + char ch; + __s32 tmp; + __u16 tmp2; + + if (datfd > 0) { + + lseek(datfd->getFd(), ioffset, SEEK_SET); + + read(datfd->getFd(), &tmp, 4); + node->parent = swordtoarch32(tmp); + + read(datfd->getFd(), &tmp, 4); + node->next = swordtoarch32(tmp); + + read(datfd->getFd(), &tmp, 4); + node->firstChild = swordtoarch32(tmp); + + string name; + do { + read(datfd->getFd(), &ch, 1); + name += ch; + } while (ch); + + stdstr(&(node->name), name.c_str()); + + read(datfd->getFd(), &tmp2, 2); + node->dsize = swordtoarch16(tmp2); + + if (node->dsize) { + if (node->userData) + delete [] node->userData; + node->userData = new char [node->dsize]; + read(datfd->getFd(), node->userData, node->dsize); + } + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +char TreeKeyIdx::getTreeNodeFromIdxOffset(long ioffset, TreeNode *node) const { + __u32 offset; + char error = 0; + + if (ioffset < 0) { + ioffset = 0; + error = KEYERR_OUTOFBOUNDS; + } + + node->offset = ioffset; + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + if (read(idxfd->getFd(), &offset, 4) == 4) { + offset = swordtoarch32(offset); + getTreeNodeFromDatOffset(offset, node); + } + else { + lseek(idxfd->getFd(), -4, SEEK_END); + if (read(idxfd->getFd(), &offset, 4) == 4) { + offset = swordtoarch32(offset); + getTreeNodeFromDatOffset(offset, node); + } + error = KEYERR_OUTOFBOUNDS; + } + } + return error; +} + + +unsigned long TreeKeyIdx::getOffset() const { + return currentNode.offset; +} + +void TreeKeyIdx::setOffset(unsigned long offset) { + error = getTreeNodeFromIdxOffset(offset, ¤tNode); +} + + +void TreeKeyIdx::saveTreeNodeOffsets(TreeNode *node) { + long datOffset = 0; + __s32 tmp; + + if (idxfd > 0) { + lseek(idxfd->getFd(), node->offset, SEEK_SET); + if (read(idxfd->getFd(), &tmp, 4) != 4) { + datOffset = lseek(datfd->getFd(), 0, SEEK_END); + tmp = archtosword32(datOffset); + write(idxfd->getFd(), &tmp, 4); + } + else { + datOffset = swordtoarch32(tmp); + lseek(datfd->getFd(), datOffset, SEEK_SET); + } + + tmp = archtosword32(node->parent); + write(datfd->getFd(), &tmp, 4); + + tmp = archtosword32(node->next); + write(datfd->getFd(), &tmp, 4); + + tmp = archtosword32(node->firstChild); + write(datfd->getFd(), &tmp, 4); + } +} + + +void TreeKeyIdx::copyFrom(const TreeKeyIdx &ikey) { + + SWKey::copyFrom(ikey); + + currentNode.offset = ikey.currentNode.offset; + currentNode.parent = ikey.currentNode.parent; + currentNode.next = ikey.currentNode.next; + currentNode.firstChild = ikey.currentNode.firstChild; + stdstr(&(currentNode.name), ikey.currentNode.name); + currentNode.dsize = ikey.currentNode.dsize; + + if (currentNode.userData) + delete [] currentNode.userData; + if (currentNode.dsize) { + currentNode.userData = new char [ currentNode.dsize ]; + memcpy(currentNode.userData, ikey.currentNode.userData, currentNode.dsize); + } + else currentNode.userData = 0; + + bool newFiles = true; + + if (path && ikey.path) + newFiles = strcmp(path, ikey.path); + + if (newFiles) { + stdstr(&path, ikey.path); + + if (idxfd) { + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + } + idxfd = FileMgr::systemFileMgr.open(ikey.idxfd->path, ikey.idxfd->mode, ikey.idxfd->perms); + datfd = FileMgr::systemFileMgr.open(ikey.datfd->path, ikey.datfd->mode, ikey.datfd->perms); + } +} + + +void TreeKeyIdx::saveTreeNode(TreeNode *node) { + long datOffset = 0; + __s32 tmp; + if (idxfd > 0) { + + lseek(idxfd->getFd(), node->offset, SEEK_SET); + datOffset = lseek(datfd->getFd(), 0, SEEK_END); + tmp = archtosword32(datOffset); + write(idxfd->getFd(), &tmp, 4); + + saveTreeNodeOffsets(node); + + write(datfd->getFd(), node->name, strlen(node->name)); + char null = 0; + write(datfd->getFd(), &null, 1); + + __u16 tmp2 = archtosword16(node->dsize); + write(datfd->getFd(), &tmp2, 2); + + if (node->dsize) { + write(datfd->getFd(), node->userData, node->dsize); + } + } +} + + +void TreeKeyIdx::setText(const char *ikey) { + char *buf = 0; + stdstr(&buf, ikey); + char *leaf = strtok(buf, "/"); + root(); + while ((leaf) && (!Error())) { + bool ok, inChild = false; + for (ok = firstChild(); ok; ok = nextSibling()) { + inChild = true; + if (!stricmp(leaf, getLocalName())) + break; + } + leaf = strtok(0, "/"); + if (!ok) { + if (inChild) { // if we didn't find a matching child node, default to first child + parent(); + firstChild(); + } + if (leaf) + error = KEYERR_OUTOFBOUNDS; + break; + } + } + delete [] buf; +} + + + +void TreeKeyIdx::copyFrom(const SWKey &ikey) { + SWKey::copyFrom(ikey); +} + +void TreeKeyIdx::setPosition(SW_POSITION p) { + switch (p) { + case POS_TOP: + root(); + break; + case POS_BOTTOM: + error = getTreeNodeFromIdxOffset(lseek(idxfd->getFd(), -4, SEEK_END), ¤tNode); + break; + } + Error(); // clear error from normalize +} + +const char *TreeKeyIdx::getText() const { + return getFullName(); +} + + +int TreeKeyIdx::_compare (const TreeKeyIdx & ikey) { + return (getOffset() - ikey.getOffset()); +} + + +int TreeKeyIdx::compare(const SWKey &ikey) { + TreeKeyIdx *treeKey = SWDYNAMIC_CAST(TreeKeyIdx, (&ikey)); + if (treeKey) + return _compare(*treeKey); + return SWKey::compare(ikey); +} + + +void TreeKeyIdx::decrement(int steps) { + error = getTreeNodeFromIdxOffset(currentNode.offset - (4*steps), ¤tNode); +} + +void TreeKeyIdx::increment(int steps) { + error = getTreeNodeFromIdxOffset(currentNode.offset + (4*steps), ¤tNode); + +/* + // assert positive + if (steps < 0) { + decrement(steps * -1); + return; + } + + while (steps > 0) { + if (!firstChild()) { + if (!nextSibbling() { + error = KEYERR_OUTOFBOUNDS; + return; + } + } + steps--; + } +*/ +} + + + +TreeKeyIdx::TreeNode::TreeNode() { + + name = 0; + stdstr(&name, ""); + userData = 0; + + clear(); +} + + +void TreeKeyIdx::TreeNode::clear() { + offset = 0; + parent = -1; + next = -1; + firstChild = -1; + dsize = 0; + + if (name) + delete [] name; + name = 0; + stdstr(&name, ""); + + if (userData) + delete [] userData; + userData = 0; +} + + +TreeKeyIdx::TreeNode::~TreeNode() { + if (name) + delete [] name; + + if (userData) + delete [] userData; +} diff --git a/src/keys/versekey.cpp b/src/keys/versekey.cpp new file mode 100644 index 0000000..05f1b8b --- /dev/null +++ b/src/keys/versekey.cpp @@ -0,0 +1,1450 @@ +/****************************************************************************** + * VerseKey.cpp - code for class 'VerseKey'- a standard Biblical verse key + */ + +#include <swmacs.h> +#include <utilfuns.h> +#include <string.h> +#include <stdio.h> +#include <fcntl.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <utilstr.h> +#include <swkey.h> +#include <swlog.h> +#include <versekey.h> +#include <localemgr.h> +extern "C" { +#include <roman.h> +} + + +static const char *classes[] = {"VerseKey", "SWKey", "SWObject", 0}; +SWClass VerseKey::classdef(classes); + +/****************************************************************************** + * Initialize static members of VerseKey + */ + +#include <canon.h> // Initialize static members of canonical books structure + +struct sbook *VerseKey::builtin_books[2] = {0,0}; +const char VerseKey::builtin_BMAX[2] = {39, 27}; +long *VerseKey::offsets[2][2] = {{VerseKey::otbks, VerseKey::otcps}, {VerseKey::ntbks, VerseKey::ntcps}}; +int VerseKey::instance = 0; +VerseKey::LocaleCache VerseKey::localeCache; + + +/****************************************************************************** + * VerseKey::init - initializes instance of VerseKey + */ + +void VerseKey::init() { + myclass = &classdef; + if (!instance) + initstatics(); + + instance++; + autonorm = 1; // default auto normalization to true + headings = 0; // default display headings option is false + upperBound = 0; + lowerBound = 0; + testament = 0; + book = 0; + chapter = 0; + verse = 0; + locale = 0; + + setLocale(LocaleMgr::systemLocaleMgr.getDefaultLocaleName()); +} + +/****************************************************************************** + * VerseKey Constructor - initializes instance of VerseKey + * + * ENT: ikey - base key (will take various forms of 'BOOK CH:VS'. See + * VerseKey::parse for more detailed information) + */ + +VerseKey::VerseKey(const SWKey *ikey) : SWKey(*ikey) +{ + init(); + if (ikey) + parse(); +} + + +/****************************************************************************** + * VerseKey Constructor - initializes instance of VerseKey + * + * ENT: ikey - text key (will take various forms of 'BOOK CH:VS'. See + * VerseKey::parse for more detailed information) + */ + +VerseKey::VerseKey(const char *ikey) : SWKey(ikey) +{ + init(); + if (ikey) + parse(); +} + + +VerseKey::VerseKey(VerseKey const &k) : SWKey(k) +{ + init(); + autonorm = k.autonorm; + headings = k.headings; + testament = k.Testament(); + book = k.Book(); + chapter = k.Chapter(); + verse = k.Verse(); + LowerBound(k.LowerBound()); + UpperBound(k.UpperBound()); +} + + +VerseKey::VerseKey(const char *min, const char *max) : SWKey() +{ + init(); + LowerBound(min); + UpperBound(max); + setPosition(TOP); +} + + +SWKey *VerseKey::clone() const +{ + return new VerseKey(*this); +} + + +/****************************************************************************** + * VerseKey Destructor - cleans up instance of VerseKey + * + * ENT: ikey - text key + */ + +VerseKey::~VerseKey() { + if (upperBound) + delete upperBound; + if (lowerBound) + delete lowerBound; + if (locale) + delete [] locale; + + --instance; +} + + +void VerseKey::setLocale(const char *name) { + char *BMAX; + struct sbook **books; + bool useCache = false; + + if (localeCache.name) + useCache = (!strcmp(localeCache.name, name)); + + if (!useCache) { // if we're setting params for a new locale + stdstr(&(localeCache.name), name); + localeCache.abbrevsCnt = 0; + } + + SWLocale *locale = (useCache) ? localeCache.locale : LocaleMgr::systemLocaleMgr.getLocale(name); + localeCache.locale = locale; + + if (locale) { + locale->getBooks(&BMAX, &books); + setBooks(BMAX, books); + setBookAbbrevs(locale->getBookAbbrevs(), localeCache.abbrevsCnt); + localeCache.abbrevsCnt = abbrevsCnt; + } + else { + setBooks(builtin_BMAX, builtin_books); + setBookAbbrevs(builtin_abbrevs, localeCache.abbrevsCnt); + localeCache.abbrevsCnt = abbrevsCnt; + } + stdstr(&(this->locale), localeCache.name); +} + + +void VerseKey::setBooks(const char *iBMAX, struct sbook **ibooks) { + BMAX = iBMAX; + books = ibooks; +} + + +void VerseKey::setBookAbbrevs(const struct abbrev *bookAbbrevs, unsigned int size) { + abbrevs = bookAbbrevs; + if (!size) { + for (abbrevsCnt = 0; *abbrevs[abbrevsCnt].ab; abbrevsCnt++) { + /* + if (strcmp(abbrevs[abbrevsCnt-1].ab, abbrevs[abbrevsCnt].ab) > 0) { + fprintf(stderr, "ERROR: book abbreviation (canon.h or locale) misordered at entry: %s\n", abbrevs[abbrevsCnt].ab); + exit(-1); + } + */ + } + for (int t = 0; t < 2; t++) { + for (int i = 0; i < BMAX[t]; i++) { + int bn = getBookAbbrev(books[t][i].name); + if ((bn-1)%39 != i) { + SWLog::systemlog->LogError("Book: %s does not have a matching toupper abbrevs entry! book number returned was: %d", books[t][i].name, bn); + } + } + } + } + else abbrevsCnt = size; +} + + +/****************************************************************************** + * VerseKey::initstatics - initializes statics. Performed only when first + * instance on VerseKey (or descendent) is created. + */ + +void VerseKey::initstatics() { + int l1, l2, chaptmp = 0; + + builtin_books[0] = otbooks; + builtin_books[1] = ntbooks; + + for (l1 = 0; l1 < 2; l1++) { + for (l2 = 0; l2 < builtin_BMAX[l1]; l2++) { + builtin_books[l1][l2].versemax = &vm[chaptmp]; + chaptmp += builtin_books[l1][l2].chapmax; + } + } +} + + +/****************************************************************************** + * VerseKey::parse - parses keytext into testament|book|chapter|verse + * + * RET: error status + */ + +char VerseKey::parse() +{ + + + testament = 1; + book = 1; + chapter = 1; + verse = 1; + + int error = 0; + + if (keytext) { + ListKey tmpListKey = VerseKey::ParseVerseList(keytext); + if (tmpListKey.Count()) { + SWKey::setText((const char *)tmpListKey); + for (testament = 1; testament < 3; testament++) { + for (book = 1; book <= BMAX[testament-1]; book++) { + if (!strncmp(keytext, books[testament-1][book-1].name, strlen(books[testament-1][book-1].name))) + break; + } + if (book <= BMAX[testament-1]) + break; + } + + if (testament < 3) { + sscanf(&keytext[strlen(books[testament-1][book-1].name)], "%d:%d", &chapter, &verse); + } + else error = 1; + } else error = 1; + } + Normalize(1); + freshtext(); + + return (this->error) ? this->error : (this->error = error); +} + + +/****************************************************************************** + * VerseKey::freshtext - refreshes keytext based on + * testament|book|chapter|verse + */ + +void VerseKey::freshtext() const +{ + char buf[2024]; + int realtest = testament; + int realbook = book; + + if (book < 1) { + if (testament < 1) + sprintf(buf, "[ Module Heading ]"); + else sprintf(buf, "[ Testament %d Heading ]", (int)testament); + } + else { + if (realbook > BMAX[realtest-1]) { + realbook -= BMAX[realtest-1]; + if (realtest < 2) + realtest++; + if (realbook > BMAX[realtest-1]) + realbook = BMAX[realtest-1]; + } + sprintf(buf, "%s %d:%d", books[realtest-1][realbook-1].name, chapter, verse); + } + + stdstr((char **)&keytext, buf); +} + + + +/****************************************************************************** + * VerseKey::getBookAbbrev - Attempts to find a book abbreviation for a buffer + * + * ENT: abbr - key for which to search; + * RET: book number or < 0 = not valid + */ + +int VerseKey::getBookAbbrev(const char *iabbr) +{ + int loop, diff, abLen, min, max, target, retVal = -1; + + char *abbr = 0; + + stdstr(&abbr, iabbr); + strstrip(abbr); + toupperstr(abbr); + abLen = strlen(abbr); + + if (abLen) { + min = 0; +// max = abbrevsCnt - 1; + max = abbrevsCnt; + while(1) { + target = min + ((max - min) / 2); + diff = strncmp(abbr, abbrevs[target].ab, abLen); + if ((!diff)||(target >= max)||(target <= min)) + break; + if (diff > 0) + min = target; + else max = target; + } + for (; target > 0; target--) { + if (strncmp(abbr, abbrevs[target-1].ab, abLen)) + break; + } + + retVal = (!diff) ? abbrevs[target].book : -1; + } + delete [] abbr; + return retVal; +} + +/****************************************************************************** + * VerseKey::ParseVerseList - Attempts to parse a buffer into separate + * verse entries returned in a ListKey + * + * ENT: buf - buffer to parse; + * defaultKey - if verse, chap, book, or testament is left off, + * pull info from this key (ie. Gen 2:3; 4:5; + * Gen would be used when parsing the 4:5 section) + * expandRange - whether or not to expand eg. John 1:10-12 or just + * save John 1:10 + * + * RET: ListKey reference filled with verse entries contained in buf + * + * COMMENT: This code works but wreaks. Rewrite to make more maintainable. + */ + +ListKey VerseKey::ParseVerseList(const char *buf, const char *defaultKey, bool expandRange) { + SWKey textkey; + + char book[255]; + char number[255]; + int tobook = 0; + int tonumber = 0; + int chap = -1, verse = -1; + int bookno = 0; + VerseKey curkey, lBound; + curkey.setLocale(getLocale()); + lBound.setLocale(getLocale()); + int loop; + char comma = 0; + char dash = 0; + const char *orig = buf; + ListKey tmpListKey; + ListKey internalListKey; + SWKey tmpDefaultKey = defaultKey; + char lastPartial = 0; + + curkey.AutoNormalize(0); + tmpListKey << tmpDefaultKey; + tmpListKey.GetElement()->userData = (void *)buf; + + while (*buf) { + switch (*buf) { + case ':': + number[tonumber] = 0; + tonumber = 0; + if (*number) + chap = atoi(number); + *number = 0; + break; + + case '-': + case ',': // on number new verse + case ';': // on number new chapter + number[tonumber] = 0; + tonumber = 0; + if (*number) { + if (chap >= 0) + verse = atoi(number); + else chap = atoi(number); + } + *number = 0; + book[tobook] = 0; + tobook = 0; + bookno = -1; + if (*book) { + for (loop = strlen(book) - 1; loop+1; loop--) { + if ((isdigit(book[loop])) || (book[loop] == ' ')) { + book[loop] = 0; + continue; + } + else { + if ((SW_toupper(book[loop])=='F')&&(loop)) { + if ((isdigit(book[loop-1])) || (book[loop-1] == ' ') || (SW_toupper(book[loop-1]) == 'F')) { + book[loop] = 0; + continue; + } + } + } + break; + } + + for (loop = strlen(book) - 1; loop+1; loop--) { + if (book[loop] == ' ') { + if (isroman(&book[loop+1])) { + if (verse == -1) { + verse = chap; + chap = from_rom(&book[loop+1]); + book[loop] = 0; + } + } + break; + } + } + + if ((!stricmp(book, "V")) || (!stricmp(book, "VER"))) { // Verse abbrev + if (verse == -1) { + verse = chap; + chap = VerseKey(tmpListKey).Chapter(); + *book = 0; + } + } + + bookno = getBookAbbrev(book); + } + if (((bookno > -1) || (!*book)) && ((*book) || (chap >= 0) || (verse >= 0))) { + char partial = 0; + curkey.Verse(1); + curkey.Chapter(1); + curkey.Book(1); + + if (bookno < 0) { + curkey.Testament(VerseKey(tmpListKey).Testament()); + curkey.Book(VerseKey(tmpListKey).Book()); + } + else { + curkey.Testament(1); + curkey.Book(bookno); + } + + if (((comma)||((verse < 0)&&(bookno < 0)))&&(!lastPartial)) { +// if (comma) { + curkey.Chapter(VerseKey(tmpListKey).Chapter()); + curkey.Verse(chap); // chap because this is the first number captured + } + else { + if (chap >= 0) { + curkey.Chapter(chap); + } + else { + partial++; + curkey.Chapter(1); + } + if (verse >= 0) { + curkey.Verse(verse); + } + else { + partial++; + curkey.Verse(1); + } + } + + if ((*buf == '-') && (expandRange)) { // if this is a dash save lowerBound and wait for upper + VerseKey newElement; + newElement.LowerBound(curkey); + newElement.setPosition(TOP); + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + if (!dash) { // if last separator was not a dash just add + if (expandRange && partial) { + VerseKey newElement; + newElement.LowerBound(curkey); + if (partial > 1) + curkey.setPosition(MAXCHAPTER); + if (partial > 0) + curkey = MAXVERSE; + newElement.UpperBound(curkey); + newElement = TOP; + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + tmpListKey << (const SWKey &)(const SWKey)(const char *)curkey; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + else if (expandRange) { + VerseKey *newElement = SWDYNAMIC_CAST(VerseKey, tmpListKey.GetElement()); + if (newElement) { + if (partial > 1) + curkey = MAXCHAPTER; + if (partial > 0) + curkey = MAXVERSE; + newElement->UpperBound(curkey); + *newElement = TOP; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + } + lastPartial = partial; + } + *book = 0; + chap = -1; + verse = -1; + if (*buf == ',') + comma = 1; + else comma = 0; + if (*buf == '-') + dash = 1; + else dash = 0; + break; + case 10: // ignore these + case 13: + break; + case '.': + if (buf > orig) // ignore (break) if preceeding char is not a digit + if (!isdigit(*(buf-1))) + break; + + default: + if (isdigit(*buf)) { + number[tonumber++] = *buf; + } + else { + switch (*buf) { + case ' ': // ignore these and don't reset number + case 'f': + case 'F': + break; + default: + number[tonumber] = 0; + tonumber = 0; + break; + } + } + if (chap == -1) + book[tobook++] = *buf; + } + buf++; + } + number[tonumber] = 0; + tonumber = 0; + if (*number) { + if (chap >= 0) + verse = atoi(number); + else chap = atoi(number); + } + *number = 0; + book[tobook] = 0; + tobook = 0; + if (*book) { + for (loop = strlen(book) - 1; loop+1; loop--) { + if ((isdigit(book[loop])) || (book[loop] == ' ')) { + book[loop] = 0; + continue; + } + else { + if ((SW_toupper(book[loop])=='F')&&(loop)) { + if ((isdigit(book[loop-1])) || (book[loop-1] == ' ') || (SW_toupper(book[loop-1]) == 'F')) { + book[loop] = 0; + continue; + } + } + } + break; + } + + for (loop = strlen(book) - 1; loop+1; loop--) { + if (book[loop] == ' ') { + if (isroman(&book[loop+1])) { + if (verse == -1) { + verse = chap; + chap = from_rom(&book[loop+1]); + book[loop] = 0; + } + } + break; + } + } + + if ((!stricmp(book, "V")) || (!stricmp(book, "VER"))) { // Verse abbrev. + if (verse == -1) { + verse = chap; + chap = VerseKey(tmpListKey).Chapter(); + *book = 0; + } + } + + bookno = getBookAbbrev(book); + } + if (((bookno > -1) || (!*book)) && ((*book) || (chap >= 0) || (verse >= 0))) { + char partial = 0; + curkey.Verse(1); + curkey.Chapter(1); + curkey.Book(1); + + if (bookno < 0) { + curkey.Testament(VerseKey(tmpListKey).Testament()); + curkey.Book(VerseKey(tmpListKey).Book()); + } + else { + curkey.Testament(1); + curkey.Book(bookno); + } + + if (((comma)||((verse < 0)&&(bookno < 0)))&&(!lastPartial)) { +// if (comma) { + curkey.Chapter(VerseKey(tmpListKey).Chapter()); + curkey.Verse(chap); // chap because this is the first number captured + } + else { + if (chap >= 0) { + curkey.Chapter(chap); + } + else { + partial++; + curkey.Chapter(1); + } + if (verse >= 0) { + curkey.Verse(verse); + } + else { + partial++; + curkey.Verse(1); + } + } + + if ((*buf == '-') && (expandRange)) { // if this is a dash save lowerBound and wait for upper + VerseKey newElement; + newElement.LowerBound(curkey); + newElement = TOP; + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + if (!dash) { // if last separator was not a dash just add + if (expandRange && partial) { + VerseKey newElement; + newElement.LowerBound(curkey); + if (partial > 1) + curkey = MAXCHAPTER; + if (partial > 0) + curkey = MAXVERSE; + newElement.UpperBound(curkey); + newElement = TOP; + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + tmpListKey << (const SWKey &)(const SWKey)(const char *)curkey; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + else if (expandRange) { + VerseKey *newElement = SWDYNAMIC_CAST(VerseKey, tmpListKey.GetElement()); + if (newElement) { + if (partial > 1) + curkey = MAXCHAPTER; + if (partial > 0) + curkey = MAXVERSE; + newElement->UpperBound(curkey); + *newElement = TOP; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + } + } + *book = 0; + tmpListKey = TOP; + tmpListKey.Remove(); // remove defaultKey + internalListKey = tmpListKey; + internalListKey = TOP; // Align internalListKey to first element before passing back; + + return internalListKey; +} + + +/****************************************************************************** + * VerseKey::LowerBound - sets / gets the lower boundary for this key + */ + +VerseKey &VerseKey::LowerBound(const char *lb) +{ + if (!lowerBound) + initBounds(); + + (*lowerBound) = lb; + lowerBound->Normalize(); + + return (*lowerBound); +} + + +/****************************************************************************** + * VerseKey::UpperBound - sets / gets the upper boundary for this key + */ + +VerseKey &VerseKey::UpperBound(const char *ub) +{ + if (!upperBound) + initBounds(); + +// need to set upperbound parsing to resolve to max verse/chap if not specified + (*upperBound) = ub; + if (*upperBound < *lowerBound) + *upperBound = *lowerBound; + upperBound->Normalize(); + +// until we have a proper method to resolve max verse/chap use this kludge + int len = strlen(ub); + bool alpha = false; + bool versespec = false; + bool chapspec = false; + for (int i = 0; i < len; i++) { + if (isalpha(ub[i])) + alpha = true; + if (ub[i] == ':') // if we have a : we assume verse spec + versespec = true; + if ((isdigit(ub[i])) && (alpha)) // if digit after alpha assume chap spec + chapspec = true; + } + if (!chapspec) + *upperBound = MAXCHAPTER; + if (!versespec) + *upperBound = MAXVERSE; + + +// -- end kludge + + return (*upperBound); +} + + +/****************************************************************************** + * VerseKey::LowerBound - sets / gets the lower boundary for this key + */ + +VerseKey &VerseKey::LowerBound() const +{ + if (!lowerBound) + initBounds(); + + return (*lowerBound); +} + + +/****************************************************************************** + * VerseKey::UpperBound - sets / gets the upper boundary for this key + */ + +VerseKey &VerseKey::UpperBound() const +{ + if (!upperBound) + initBounds(); + + return (*upperBound); +} + + +/****************************************************************************** + * VerseKey::ClearBounds - clears bounds for this VerseKey + */ + +void VerseKey::ClearBounds() +{ + initBounds(); +} + + +void VerseKey::initBounds() const +{ + if (!upperBound) { + upperBound = new VerseKey(); + upperBound->AutoNormalize(0); + upperBound->Headings(1); + } + if (!lowerBound) { + lowerBound = new VerseKey(); + lowerBound->AutoNormalize(0); + lowerBound->Headings(1); + } + + lowerBound->Testament(0); + lowerBound->Book(0); + lowerBound->Chapter(0); + lowerBound->Verse(0); + + upperBound->Testament(2); + upperBound->Book(BMAX[1]); + upperBound->Chapter(books[1][BMAX[1]-1].chapmax); + upperBound->Verse(books[1][BMAX[1]-1].versemax[upperBound->Chapter()-1]); +} + + +/****************************************************************************** + * VerseKey::copyFrom - Equates this VerseKey to another VerseKey + */ + +void VerseKey::copyFrom(const VerseKey &ikey) { + SWKey::copyFrom(ikey); + + parse(); +} + + +/****************************************************************************** + * VerseKey::copyFrom - Equates this VerseKey to another SWKey + */ + +void VerseKey::copyFrom(const SWKey &ikey) { + SWKey::copyFrom(ikey); + + parse(); +} + + +/****************************************************************************** + * VerseKey::getText - refreshes keytext before returning if cast to + * a (char *) is requested + */ + +const char *VerseKey::getText() const { + freshtext(); + return keytext; +} + + +const char *VerseKey::getShortText() const { + static char *stext = 0; + char buf[2047]; + freshtext(); + if (book < 1) { + if (testament < 1) + sprintf(buf, "[ Module Heading ]"); + else sprintf(buf, "[ Testament %d Heading ]", (int)testament); + } + else { + sprintf(buf, "%s %d:%d", books[testament-1][book-1].prefAbbrev, chapter, verse); + } + stdstr(&stext, buf); + return stext; +} + + +const char *VerseKey::getBookName() const { + return books[testament-1][book-1].name; +} + + +const char *VerseKey::getBookAbbrev() const { + return books[testament-1][book-1].prefAbbrev; +} +/****************************************************************************** + * VerseKey::setPosition(SW_POSITION) - Positions this key + * + * ENT: p - position + * + * RET: *this + */ + +void VerseKey::setPosition(SW_POSITION p) { + switch (p) { + case POS_TOP: + testament = LowerBound().Testament(); + book = LowerBound().Book(); + chapter = LowerBound().Chapter(); + verse = LowerBound().Verse(); + break; + case POS_BOTTOM: + testament = UpperBound().Testament(); + book = UpperBound().Book(); + chapter = UpperBound().Chapter(); + verse = UpperBound().Verse(); + break; + case POS_MAXVERSE: + Normalize(); + verse = books[testament-1][book-1].versemax[chapter-1]; + break; + case POS_MAXCHAPTER: + verse = 1; + Normalize(); + chapter = books[testament-1][book-1].chapmax; + break; + } + Normalize(1); + Error(); // clear error from normalize +} + + +/****************************************************************************** + * VerseKey::increment - Increments key a number of verses + * + * ENT: step - Number of verses to jump forward + * + * RET: *this + */ + +void VerseKey::increment(int step) { + char ierror = 0; + Index(Index() + step); + while ((!verse) && (!headings) && (!ierror)) { + Index(Index() + 1); + ierror = Error(); + } + + error = (ierror) ? ierror : error; +} + + +/****************************************************************************** + * VerseKey::decrement - Decrements key a number of verses + * + * ENT: step - Number of verses to jump backward + * + * RET: *this + */ + +void VerseKey::decrement(int step) { + char ierror = 0; + + Index(Index() - step); + while ((!verse) && (!headings) && (!ierror)) { + Index(Index() - 1); + ierror = Error(); + } + if ((ierror) && (!headings)) + (*this)++; + + error = (ierror) ? ierror : error; +} + + +/****************************************************************************** + * VerseKey::Normalize - checks limits and normalizes if necessary (e.g. + * Matthew 29:47 = Mark 2:2). If last verse is + * exceeded, key is set to last Book CH:VS + * RET: *this + */ + +void VerseKey::Normalize(char autocheck) +{ + error = 0; + + if ((autocheck) && (!autonorm)) // only normalize if we were explicitely called or if autonorm is turned on + return; + + if ((headings) && (!verse)) // this is cheeze and temporary until deciding what actions should be taken. + return; // so headings should only be turned on when positioning with Index() or incrementors + + while ((testament < 3) && (testament > 0)) { + + if (book > BMAX[testament-1]) { + book -= BMAX[testament-1]; + testament++; + continue; + } + + if (book < 1) { + if (--testament > 0) { + book += BMAX[testament-1]; + } + continue; + } + + if (chapter > books[testament-1][book-1].chapmax) { + chapter -= books[testament-1][book-1].chapmax; + book++; + continue; + } + + if (chapter < 1) { + if (--book > 0) { + chapter += books[testament-1][book-1].chapmax; + } + else { + if (testament > 1) { + chapter += books[0][BMAX[0]-1].chapmax; + } + } + continue; + } + + if (verse > books[testament-1][book-1].versemax[chapter-1]) { // -1 because e.g chapter 1 of Matthew is books[1][0].versemax[0] + verse -= books[testament-1][book-1].versemax[chapter++ - 1]; + continue; + } + + if (verse < 1) { + if (--chapter > 0) { + verse += books[testament-1][book-1].versemax[chapter-1]; + } + else { + if (book > 1) { + verse += books[testament-1][book-2].versemax[books[testament-1][book-2].chapmax-1]; + } + else { + if (testament > 1) { + verse += books[0][BMAX[0]-1].versemax[books[0][BMAX[0]-1].chapmax-1]; + } + } + } + continue; + } + + break; // If we've made it this far (all failure checks continue) we're ok + } + + if (testament > 2) { + testament = 2; + book = BMAX[testament-1]; + chapter = books[testament-1][book-1].chapmax; + verse = books[testament-1][book-1].versemax[chapter-1]; + error = KEYERR_OUTOFBOUNDS; + } + + if (testament < 1) { + error = ((!headings) || (testament < 0) || (book < 0)) ? KEYERR_OUTOFBOUNDS : 0; + testament = ((headings) ? 0 : 1); + book = ((headings) ? 0 : 1); + chapter = ((headings) ? 0 : 1); + verse = ((headings) ? 0 : 1); + } + if (_compare(UpperBound()) > 0) { + *this = UpperBound(); + error = KEYERR_OUTOFBOUNDS; + } + if (_compare(LowerBound()) < 0) { + *this = LowerBound(); + error = KEYERR_OUTOFBOUNDS; + } +} + + +/****************************************************************************** + * VerseKey::Testament - Gets testament + * + * RET: value of testament + */ + +char VerseKey::Testament() const +{ + return testament; +} + + +/****************************************************************************** + * VerseKey::Book - Gets book + * + * RET: value of book + */ + +char VerseKey::Book() const +{ + return book; +} + + +/****************************************************************************** + * VerseKey::Chapter - Gets chapter + * + * RET: value of chapter + */ + +int VerseKey::Chapter() const +{ + return chapter; +} + + +/****************************************************************************** + * VerseKey::Verse - Gets verse + * + * RET: value of verse + */ + +int VerseKey::Verse() const +{ + return verse; +} + + +/****************************************************************************** + * VerseKey::Testament - Sets/gets testament + * + * ENT: itestament - value which to set testament + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of testament + * if changed -> previous value of testament + */ + +char VerseKey::Testament(char itestament) +{ + char retval = testament; + + if (itestament != MAXPOS(char)) { + testament = itestament; + Normalize(1); + } + return retval; +} + + +/****************************************************************************** + * VerseKey::Book - Sets/gets book + * + * ENT: ibook - value which to set book + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of book + * if changed -> previous value of book + */ + +char VerseKey::Book(char ibook) +{ + char retval = book; + + Chapter(1); + book = ibook; + Normalize(1); + + return retval; +} + + +/****************************************************************************** + * VerseKey::Chapter - Sets/gets chapter + * + * ENT: ichapter - value which to set chapter + * [MAXPOS(int)] - only get + * + * RET: if unchanged -> value of chapter + * if changed -> previous value of chapter + */ + +int VerseKey::Chapter(int ichapter) +{ + int retval = chapter; + + Verse(1); + chapter = ichapter; + Normalize(1); + + return retval; +} + + +/****************************************************************************** + * VerseKey::Verse - Sets/gets verse + * + * ENT: iverse - value which to set verse + * [MAXPOS(int)] - only get + * + * RET: if unchanged -> value of verse + * if changed -> previous value of verse + */ + +int VerseKey::Verse(int iverse) +{ + int retval = verse; + + verse = iverse; + Normalize(1); + + return retval; +} + + +/****************************************************************************** + * VerseKey::AutoNormalize - Sets/gets flag that tells VerseKey to auto- + * matically normalize itself when modified + * + * ENT: iautonorm - value which to set autonorm + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of autonorm + * if changed -> previous value of autonorm + */ + +char VerseKey::AutoNormalize(char iautonorm) +{ + char retval = autonorm; + + if (iautonorm != MAXPOS(char)) { + autonorm = iautonorm; + Normalize(1); + } + return retval; +} + + +/****************************************************************************** + * VerseKey::Headings - Sets/gets flag that tells VerseKey to include + * chap/book/testmnt/module headings + * + * ENT: iheadings - value which to set headings + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of headings + * if changed -> previous value of headings + */ + +char VerseKey::Headings(char iheadings) +{ + char retval = headings; + + if (iheadings != MAXPOS(char)) { + headings = iheadings; + Normalize(1); + } + return retval; +} + + +/****************************************************************************** + * VerseKey::findindex - binary search to find the index closest, but less + * than the given value. + * + * ENT: array - long * to array to search + * size - number of elements in the array + * value - value to find + * + * RET: the index into the array that is less than but closest to value + */ + +int VerseKey::findindex(long *array, int size, long value) +{ + int lbound, ubound, tval; + + lbound = 0; + ubound = size - 1; + while ((ubound - lbound) > 1) { + tval = lbound + (ubound-lbound)/2; + if (array[tval] <= value) + lbound = tval; + else ubound = tval; + } + return (array[ubound] <= value) ? ubound : lbound; +} + + +/****************************************************************************** + * VerseKey::Index - Gets index based upon current verse + * + * RET: offset + */ + +long VerseKey::Index() const +{ + long offset; + + if (!testament) { // if we want module heading + offset = 0; + verse = 0; + } + else { + if (!book) + chapter = 0; + if (!chapter) + verse = 0; + + offset = offsets[testament-1][0][book]; + offset = offsets[testament-1][1][(int)offset + chapter]; + if (!(offset|verse)) // if we have a testament but nothing else. + offset = 1; + } + return (offset + verse); +} + + +/****************************************************************************** + * VerseKey::Index - Gets index based upon current verse + * + * RET: offset + */ + +long VerseKey::NewIndex() const +{ + static long otMaxIndex = 32300 - 8245; // total positions - new testament positions +// static long otMaxIndex = offsets[0][1][(int)offsets[0][0][BMAX[0]] + books[0][BMAX[0]].chapmax]; + return ((testament-1) * otMaxIndex) + Index(); +} + + +/****************************************************************************** + * VerseKey::Index - Sets index based upon current verse + * + * ENT: iindex - value to set index to + * + * RET: offset + */ + +long VerseKey::Index(long iindex) +{ + long offset; + +// This is the dirty stuff -------------------------------------------- + + if (!testament) + testament = 1; + + if (iindex < 1) { // if (-) or module heading + if (testament < 2) { + if (iindex < 0) { + testament = 0; // previously we changed 0 -> 1 + error = KEYERR_OUTOFBOUNDS; + } + else testament = 0; // we want module heading + } + else { + testament--; + iindex = (offsets[testament-1][1][offsize[testament-1][1]-1] + books[testament-1][BMAX[testament-1]-1].versemax[books[testament-1][BMAX[testament-1]-1].chapmax-1]) + iindex; // What a doozy! ((offset of last chapter + number of verses in the last chapter) + iindex) + } + } + +// -------------------------------------------------------------------- + + + if (testament) { + if ((!error) && (iindex)) { + offset = findindex(offsets[testament-1][1], offsize[testament-1][1], iindex); + verse = iindex - offsets[testament-1][1][offset]; + book = findindex(offsets[testament-1][0], offsize[testament-1][0], offset); + chapter = offset - offsets[testament-1][0][VerseKey::book]; + verse = (chapter) ? verse : 0; // funny check. if we are index=1 (testmt header) all gets set to 0 exept verse. Don't know why. Fix if you figure out. Think its in the offsets table. + if (verse) { // only check if -1 won't give negative + if (verse > books[testament-1][book-1].versemax[chapter-1]) { + if (testament > 1) { + verse = books[testament-1][book-1].versemax[chapter-1]; + error = KEYERR_OUTOFBOUNDS; + } + else { + testament++; + Index(verse - books[testament-2][book-1].versemax[chapter-1]); + } + } + } + } + } + if (_compare(UpperBound()) > 0) { + *this = UpperBound(); + error = KEYERR_OUTOFBOUNDS; + } + if (_compare(LowerBound()) < 0) { + *this = LowerBound(); + error = KEYERR_OUTOFBOUNDS; + } + return Index(); +} + + +/****************************************************************************** + * VerseKey::compare - Compares another SWKey object + * + * ENT: ikey - key to compare with this one + * + * RET: >0 if this versekey is greater than compare versekey + * <0 < + * 0 = + */ + +int VerseKey::compare(const SWKey &ikey) +{ + VerseKey ivkey = (const char *)ikey; + return _compare(ivkey); +} + + +/****************************************************************************** + * VerseKey::_compare - Compares another VerseKey object + * + * ENT: ikey - key to compare with this one + * + * RET: >0 if this versekey is greater than compare versekey + * <0 < + * 0 = + */ + +int VerseKey::_compare(const VerseKey &ivkey) +{ + long keyval1 = 0; + long keyval2 = 0; + + keyval1 += Testament() * 1000000000; + keyval2 += ivkey.Testament() * 1000000000; + keyval1 += Book() * 1000000; + keyval2 += ivkey.Book() * 1000000; + keyval1 += Chapter() * 1000; + keyval2 += ivkey.Chapter() * 1000; + keyval1 += Verse(); + keyval2 += ivkey.Verse(); + keyval1 -= keyval2; + keyval1 = (keyval1) ? ((keyval1 > 0) ? 1 : -1) /*keyval1/labs(keyval1)*/:0; // -1 | 0 | 1 + return keyval1; +} + + +const char *VerseKey::getOSISRef() const { + static char buf[5][254]; + static char loop = 0; + + if (loop > 4) + loop = 0; + + static char *osisotbooks[] = { + "Gen","Exod","Lev","Num","Deut","Josh","Judg","Ruth","_1Sam","_2Sam", + "_1Kgs","_2Kgs","_1Chr","_2Chr","Ezra","Neh","Esth","Job","Ps", + "Prov", // added this. Was not in OSIS spec + "Eccl", + "Song","Isa","Jer","Lam","Ezek","Dan","Hos","Joel","Amos","Obad", + "Jonah","Mic","Nah","Hab","Zeph","Hag","Zech","Mal","Bar","PrAzar", + "Bel","Sus","_1Esd","_2Esd","AddEsth","EpJer","Jdt","_1Macc","_2Macc","_3Macc", + "_4Macc","PrMan","Ps151","Sir","Tob","Wis"}; + static char *osisntbooks[] = { + "Matt","Mark","Luke","John","Acts","Rom","_1Cor","_2Cor","Gal","Eph", + "Phil","Col","_1Thess","_2Thess","_1Tim","_2Tim","Titus","Phlm","Heb","Jas", + "_1Pet","_2Pet","_1John","_2John","_3John","Jude","Rev"}; + static char **osisbooks[] = { osisotbooks, osisntbooks }; + if (Verse()) + sprintf(buf[loop], "%s.%d.%d", osisbooks[Testament()-1][Book()-1], (int)Chapter(), (int)Verse()); + else if (Chapter()) + sprintf(buf[loop], "%s.%d", osisbooks[Testament()-1][Book()-1], (int)Chapter()); + else if (Book()) + sprintf(buf[loop], "%s", osisbooks[Testament()-1][Book()-1]); + else sprintf(buf[loop], ""); + return buf[loop++]; +} diff --git a/src/mgr/Makefile b/src/mgr/Makefile new file mode 100644 index 0000000..339f87a --- /dev/null +++ b/src/mgr/Makefile @@ -0,0 +1,4 @@ +root := ../.. + +all: + make -C ${root} diff --git a/src/mgr/Makefile.am b/src/mgr/Makefile.am new file mode 100644 index 0000000..c648032 --- /dev/null +++ b/src/mgr/Makefile.am @@ -0,0 +1,26 @@ +mgrdir = $(top_srcdir)/src/mgr + +if CONFDEF +globdef = -DGLOBCONFPATH=\"${globalconfdir}/sword.conf\" +else +globdef = +endif + +if ICU +icudatadir = -DICUDATA=\"${pkglibdir}\" +else +icudatadir = +endif + + +DEFS += $(globdef) $(icudatadir) + +libsword_la_SOURCES += $(mgrdir)/swconfig.cpp +libsword_la_SOURCES += $(mgrdir)/swmgr.cpp +libsword_la_SOURCES += $(mgrdir)/swfiltermgr.cpp +libsword_la_SOURCES += $(mgrdir)/encfiltmgr.cpp +libsword_la_SOURCES += $(mgrdir)/markupfiltmgr.cpp +libsword_la_SOURCES += $(mgrdir)/filemgr.cpp +libsword_la_SOURCES += $(mgrdir)/swlocale.cpp +libsword_la_SOURCES += $(mgrdir)/localemgr.cpp +libsword_la_SOURCES += $(mgrdir)/swcacher.cpp diff --git a/src/mgr/encfiltmgr.cpp b/src/mgr/encfiltmgr.cpp new file mode 100644 index 0000000..ab55de9 --- /dev/null +++ b/src/mgr/encfiltmgr.cpp @@ -0,0 +1,148 @@ +/****************************************************************************** + * swencodingmgr.cpp - implementaion of class EncodingFilterMgr, subclass of + * used to transcode all module text to a requested + * encoding. + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <encfiltmgr.h> + +#include <scsuutf8.h> +#include <latin1utf8.h> + +#include <unicodertf.h> +#include <utf8latin1.h> +#include <utf8utf16.h> +#include <utf8html.h> + +#include <swmgr.h> + +/****************************************************************************** + * EncodingFilterMgr Constructor - initializes instance of EncodingFilterMgr + * + * ENT: + * enc - Encoding format to emit + */ + +EncodingFilterMgr::EncodingFilterMgr (char enc) + : SWFilterMgr() { + + scsuutf8 = new SCSUUTF8(); + latin1utf8 = new Latin1UTF8(); + + encoding = enc; + + switch (encoding) { + case ENC_LATIN1: + targetenc = new UTF8Latin1(); + break; + case ENC_UTF16: + targetenc = new UTF8UTF16(); + break; + case ENC_RTF: + targetenc = new UnicodeRTF(); + break; + case ENC_HTML: + targetenc = new UTF8HTML(); + break; + default: // i.e. case ENC_UTF8 + targetenc = NULL; + } +} + +/****************************************************************************** + * EncodingFilterMgr Destructor - Cleans up instance of EncodingFilterMgr + */ +EncodingFilterMgr::~EncodingFilterMgr() { + if (scsuutf8) + delete scsuutf8; + if (latin1utf8) + delete latin1utf8; + if (targetenc) + delete targetenc; +} + +void EncodingFilterMgr::AddRawFilters(SWModule *module, ConfigEntMap §ion) { + + ConfigEntMap::iterator entry; + + string encoding = ((entry = section.find("Encoding")) != section.end()) ? (*entry).second : (string)""; + if (encoding.empty() || !stricmp(encoding.c_str(), "Latin-1")) { + module->AddRawFilter(latin1utf8); + } + else if (!stricmp(encoding.c_str(), "SCSU")) { + module->AddRawFilter(scsuutf8); + } +} + +void EncodingFilterMgr::AddEncodingFilters(SWModule *module, ConfigEntMap §ion) { + if (targetenc) + module->AddEncodingFilter(targetenc); +} + +/****************************************************************************** + * EncodingFilterMgr::Encoding - sets/gets encoding + * + * ENT: enc - new encoding or 0 to simply get the current encoding + * + * RET: encoding + */ +char EncodingFilterMgr::Encoding(char enc) { + if (enc && enc != encoding) { + encoding = enc; + SWFilter * oldfilter = targetenc; + + switch (encoding) { + case ENC_LATIN1: + targetenc = new UTF8Latin1(); + break; + case ENC_UTF16: + targetenc = new UTF8UTF16(); + break; + case ENC_RTF: + targetenc = new UnicodeRTF(); + break; + case ENC_HTML: + targetenc = new UTF8HTML(); + break; + default: // i.e. case ENC_UTF8 + targetenc = NULL; + } + + ModMap::const_iterator module; + + if (oldfilter != targetenc) { + if (oldfilter) { + if (!targetenc) { + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + module->second->RemoveRenderFilter(oldfilter); + } + else { + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + module->second->ReplaceRenderFilter(oldfilter, targetenc); + } + delete oldfilter; + } + else if (targetenc) { + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + module->second->AddRenderFilter(targetenc); + } + } + + } + return encoding; +} diff --git a/src/mgr/filemgr.cpp b/src/mgr/filemgr.cpp new file mode 100644 index 0000000..0b31576 --- /dev/null +++ b/src/mgr/filemgr.cpp @@ -0,0 +1,266 @@ +/****************************************************************************** + * filemgr.cpp - implementation of class FileMgr used for pooling file + * handles + * + * $Id: filemgr.cpp,v 1.22 2002/07/31 20:26:38 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <filemgr.h> +#include <utilstr.h> + +#include <dirent.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +// ---------------- statics ----------------- +FileMgr FileMgr::systemFileMgr; + +// --------------- end statics -------------- + + +FileDesc::FileDesc(FileMgr *parent, char *path, int mode, int perms, bool tryDowngrade) { + this->parent = parent; + this->path = 0; + stdstr(&this->path, path); + this->mode = mode; + this->perms = perms; + this->tryDowngrade = tryDowngrade; + offset = 0; + fd = -77; +} + + +FileDesc::~FileDesc() { + if (fd > 0) + close(fd); + + if (path) + delete [] path; +} + + +int FileDesc::getFd() { + if (fd == -77) + fd = parent->sysOpen(this); + return fd; +} + + +FileMgr::FileMgr(int maxFiles) { + this->maxFiles = maxFiles; // must be at least 2 + files = 0; +} + + +FileMgr::~FileMgr() { + FileDesc *tmp; + + while(files) { + tmp = files->next; + delete files; + files = tmp; + } +} + + +FileDesc *FileMgr::open(char *path, int mode, bool tryDowngrade) { + return open(path, mode, S_IREAD | S_IWRITE, tryDowngrade); +} + +FileDesc *FileMgr::open(char *path, int mode, int perms, bool tryDowngrade) { + FileDesc **tmp, *tmp2; + + for (tmp = &files; *tmp; tmp = &((*tmp)->next)) { + if ((*tmp)->fd < 0) // insert as first non-system_open file + break; + } + + tmp2 = new FileDesc(this, path, mode, perms, tryDowngrade); + tmp2->next = *tmp; + *tmp = tmp2; + + return tmp2; +} + + +void FileMgr::close(FileDesc *file) { + FileDesc **loop; + + for (loop = &files; *loop; loop = &((*loop)->next)) { + if (*loop == file) { + *loop = (*loop)->next; + delete file; + break; + } + } +} + + +// to truncate a file at its current position +// leaving byte at current possition intact +// deleting everything afterward. +signed char FileMgr::trunc(FileDesc *file) { + + static const char *writeTest = "x"; + long size = lseek(file->getFd(), 1, SEEK_CUR); + if (size == 1) // was empty + size = 0; + char nibble [ 32767 ]; + bool writable = write(file->getFd(), writeTest, 1); + int bytes = 0; + + if (writable) { + // get tmpfilename + char *buf = new char [ strlen(file->path) + 10 ]; + int i; + for (i = 0; i < 9999; i++) { + sprintf(buf, "%stmp%.4d", file->path, i); + if (!existsFile(buf)) + break; + } + if (i == 9999) + return -2; + + int fd = ::open(buf, O_CREAT|O_RDWR, S_IREAD|S_IWRITE); + if (fd < 0) + return -3; + + lseek(file->getFd(), 0, SEEK_SET); + while (size > 0) { + bytes = read(file->getFd(), nibble, 32767); + write(fd, nibble, (bytes < size)?bytes:size); + size -= bytes; + } + // zero out the file + ::close(file->fd); + file->fd = ::open(file->path, O_TRUNC, S_IREAD|S_IWRITE); + ::close(file->fd); + file->fd = -77; // force file open by filemgr + // copy tmp file back (dumb, but must preserve file permissions) + lseek(fd, 0, SEEK_SET); + do { + bytes = read(fd, nibble, 32767); + write(file->getFd(), nibble, bytes); + } while (bytes == 32767); + + ::close(fd); + ::close(file->fd); + unlink(buf); // remove our tmp file + file->fd = -77; // causes file to be swapped out forcing open on next call to getFd() + } + else { // put offset back and return failure + lseek(file->getFd(), -1, SEEK_CUR); + return -1; + } + return 0; +} + + +int FileMgr::sysOpen(FileDesc *file) { + FileDesc **loop; + int openCount = 1; // because we are presently opening 1 file, and we need to be sure to close files to accomodate, if necessary + + for (loop = &files; *loop; loop = &((*loop)->next)) { + + if ((*loop)->fd > 0) { + if (++openCount > maxFiles) { + (*loop)->offset = lseek((*loop)->fd, 0, SEEK_CUR); + ::close((*loop)->fd); + (*loop)->fd = -77; + } + } + + if (*loop == file) { + if (*loop != files) { + *loop = (*loop)->next; + file->next = files; + files = file; + } + if ((!access(file->path, 04)) || ((file->mode & O_CREAT) == O_CREAT)) { // check for at least file exists / read access before we try to open + char tries = (((file->mode & O_RDWR) == O_RDWR) && (file->tryDowngrade)) ? 2 : 1; // try read/write if possible + for (int i = 0; i < tries; i++) { + if (i > 0) { + file->mode = (file->mode & ~O_RDWR); // remove write access + file->mode = (file->mode | O_RDONLY);// add read access + } + file->fd = ::open(file->path, file->mode, file->perms); + + if (file->fd >= 0) + break; + } + + if (file->fd >= 0) + lseek(file->fd, file->offset, SEEK_SET); + } + else file->fd = -1; + if (!*loop) + break; + } + } + return file->fd; +} + + +signed char FileMgr::existsFile(const char *ipath, const char *ifileName) +{ + int len = strlen(ipath) + ((ifileName)?strlen(ifileName):0) + 3; + char *ch; + char *path = new char [ len ]; + strcpy(path, ipath); + + if ((path[strlen(path)-1] == '\\') || (path[strlen(path)-1] == '/')) + path[strlen(path)-1] = 0; + + if (ifileName) { + ch = path + strlen(path); + sprintf(ch, "/%s", ifileName); + } + signed char retVal = !access(path, 04); + delete [] path; + return retVal; +} + + +signed char FileMgr::existsDir(const char *ipath, const char *idirName) +{ + char *ch; + int len = strlen(ipath) + ((idirName)?strlen(idirName):0) + 1; + if (idirName) + len += strlen(idirName); + char *path = new char [ len ]; + strcpy(path, ipath); + + if ((path[strlen(path)-1] == '\\') || (path[strlen(path)-1] == '/')) + path[strlen(path)-1] = 0; + + if (idirName) { + ch = path + strlen(path); + sprintf(ch, "/%s", idirName); + } + signed char retVal = !access(path, 04); + delete [] path; + return retVal; +} diff --git a/src/mgr/localemgr.cpp b/src/mgr/localemgr.cpp new file mode 100644 index 0000000..bc12f4c --- /dev/null +++ b/src/mgr/localemgr.cpp @@ -0,0 +1,184 @@ +/****************************************************************************** + * localemgr.cpp - implementation of class LocaleMgr used to interact with + * registered locales for a sword installation + * + * $Id: localemgr.cpp,v 1.12 2002/06/19 09:24:44 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#include <unixstr.h> +#endif +#include <sys/stat.h> +#include <dirent.h> + +#include <swmgr.h> +#include <utilfuns.h> + +#include <localemgr.h> +#include <filemgr.h> + + +LocaleMgr LocaleMgr::systemLocaleMgr; + + +LocaleMgr::LocaleMgr(const char *iConfigPath) { + char *prefixPath = 0; + char *configPath = 0; + char configType = 0; + string path; + + defaultLocaleName = 0; + + char *lang = getenv ("LANG"); + if (lang) { + if (strlen(lang) > 0) + setDefaultLocaleName(lang); + else setDefaultLocaleName("en_us"); + } + else setDefaultLocaleName("en_us"); + + if (!iConfigPath) + SWMgr::findConfig(&configType, &prefixPath, &configPath); + else configPath = (char *)iConfigPath; + + if (prefixPath) { + switch (configType) { + case 2: + int i; + for (i = strlen(configPath)-1; ((i) && (configPath[i] != '/') && (configPath[i] != '\\')); i--); + configPath[i] = 0; + path = configPath; + path += "/"; + break; + default: + path = prefixPath; + if ((prefixPath[strlen(prefixPath)-1] != '\\') && (prefixPath[strlen(prefixPath)-1] != '/')) + path += "/"; + + break; + } + if (FileMgr::existsDir(path.c_str(), "locales.d")) { + path += "locales.d"; + loadConfigDir(path.c_str()); + } + } + + if (prefixPath) + delete [] prefixPath; + + if (configPath) + delete [] configPath; +} + + +LocaleMgr::~LocaleMgr() { + if (defaultLocaleName) + delete [] defaultLocaleName; + deleteLocales(); +} + + +void LocaleMgr::loadConfigDir(const char *ipath) { + DIR *dir; + struct dirent *ent; + string newmodfile; + LocaleMap::iterator it; + + if ((dir = opendir(ipath))) { + rewinddir(dir); + while ((ent = readdir(dir))) { + if ((strcmp(ent->d_name, ".")) && (strcmp(ent->d_name, ".."))) { + newmodfile = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + newmodfile += "/"; + newmodfile += ent->d_name; + SWLocale *locale = new SWLocale(newmodfile.c_str()); + if (locale->getName()) { + it = locales.find(locale->getName()); + if (it != locales.end()) { + *((*it).second) += *locale; + delete locale; + } + else locales.insert(LocaleMap::value_type(locale->getName(), locale)); + } + else delete locale; + } + } + closedir(dir); + } +} + + +void LocaleMgr::deleteLocales() { + + LocaleMap::iterator it; + + for (it = locales.begin(); it != locales.end(); it++) + delete (*it).second; + + locales.erase(locales.begin(), locales.end()); +} + + +SWLocale *LocaleMgr::getLocale(const char *name) { + LocaleMap::iterator it; + + it = locales.find(name); + if (it != locales.end()) + return (*it).second; + + return 0; +} + + +list <string> LocaleMgr::getAvailableLocales() { + list <string> retVal; + for (LocaleMap::iterator it = locales.begin(); it != locales.end(); it++) + retVal.push_back((*it).second->getName()); + + return retVal; +} + + +const char *LocaleMgr::translate(const char *text, const char *localeName) { + SWLocale *target; + if (!localeName) { + localeName = getDefaultLocaleName(); + } + target = getLocale(localeName); + if (target) + return target->translate(text); + return text; +} + + +const char *LocaleMgr::getDefaultLocaleName() { + return defaultLocaleName; +} + + +void LocaleMgr::setDefaultLocaleName(const char *name) { + stdstr(&defaultLocaleName, name); +} diff --git a/src/mgr/markupfiltmgr.cpp b/src/mgr/markupfiltmgr.cpp new file mode 100644 index 0000000..8dc68ea --- /dev/null +++ b/src/mgr/markupfiltmgr.cpp @@ -0,0 +1,236 @@ +/****************************************************************************** + * swmarkupmgr.cpp - implementaion of class MarkupFilterMgr, subclass of + * used to transcode all module text to a requested + * markup. + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <thmlplain.h> +#include <gbfplain.h> +#include <thmlgbf.h> +#include <gbfthml.h> +#include <thmlhtml.h> +#include <gbfhtml.h> +#include <plainhtml.h> +#include <thmlhtmlhref.h> +#include <gbfhtmlhref.h> +#include <thmlrtf.h> +#include <gbfrtf.h> +#include <gbfosis.h> +#include <thmlosis.h> + +#include <markupfiltmgr.h> + +#include <swmgr.h> + + +/****************************************************************************** + * MarkupFilterMgr Constructor - initializes instance of MarkupFilterMgr + * + * ENT: + * enc - Encoding format to emit + * mark - Markup format to emit + */ + +MarkupFilterMgr::MarkupFilterMgr (char mark, char enc) + : EncodingFilterMgr(enc) { + + markup = mark; + + CreateFilters(markup); +} + + +/****************************************************************************** + * MarkupFilterMgr Destructor - Cleans up instance of MarkupFilterMgr + */ + +MarkupFilterMgr::~MarkupFilterMgr() { + if (fromthml) + delete (fromthml); + if (fromgbf) + delete (fromgbf); + if (fromplain) + delete (fromplain); + if (fromosis) + delete (fromosis); +} + +/****************************************************************************** + * MarkupFilterMgr::Markup - sets/gets markup + * + * ENT: mark - new encoding or 0 to simply get the current markup + * + * RET: markup + */ +char MarkupFilterMgr::Markup(char mark) { + if (mark && mark != markup) { + markup = mark; + ModMap::const_iterator module; + + SWFilter * oldplain = fromplain; + SWFilter * oldthml = fromthml; + SWFilter * oldgbf = fromgbf; + SWFilter * oldosis = fromosis; + + CreateFilters(markup); + + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + switch (module->second->Markup()) { + case FMT_THML: + if (oldthml != fromthml) { + if (oldthml) { + if (!fromthml) { + module->second->RemoveRenderFilter(oldthml); + } + else { + module->second->ReplaceRenderFilter(oldthml, fromthml); + } + } + else if (fromthml) { + module->second->AddRenderFilter(fromthml); + } + } + break; + case FMT_GBF: + if (oldgbf != fromgbf) { + if (oldgbf) { + if (!fromgbf) { + module->second->RemoveRenderFilter(oldgbf); + } + else { + module->second->ReplaceRenderFilter(oldgbf, fromgbf); + } + } + else if (fromgbf) { + module->second->AddRenderFilter(fromgbf); + } + break; + } + case FMT_PLAIN: + if (oldplain != fromplain) { + if (oldplain) { + if (!fromplain) { + module->second->RemoveRenderFilter(oldplain); + } + else { + module->second->ReplaceRenderFilter(oldplain, fromplain); + } + } + else if (fromplain) { + module->second->AddRenderFilter(fromplain); + } + break; + } + case FMT_OSIS: + if (oldosis != fromosis) { + if (oldosis) { + if (!fromosis) { + module->second->RemoveRenderFilter(oldosis); + } + else { + module->second->ReplaceRenderFilter(oldosis, fromosis); + } + } + else if (fromosis) { + module->second->AddRenderFilter(fromosis); + } + break; + } + } + + if (oldthml) + delete oldthml; + if (oldgbf) + delete oldgbf; + if (oldplain) + delete oldplain; + if (oldosis) + delete oldosis; + } + return markup; +} + +void MarkupFilterMgr::AddRenderFilters(SWModule *module, ConfigEntMap §ion) { + switch (module->Markup()) { + case FMT_THML: + if (fromthml) + module->AddRenderFilter(fromthml); + break; + case FMT_GBF: + if (fromgbf) + module->AddRenderFilter(fromgbf); + break; + case FMT_PLAIN: + if (fromplain) + module->AddRenderFilter(fromplain); + break; + case FMT_OSIS: + if (fromosis) + module->AddRenderFilter(fromosis); + break; + } +} + +void MarkupFilterMgr::CreateFilters(char markup) { + + switch (markup) { + case FMT_PLAIN: + fromplain = NULL; + fromthml = new ThMLPlain(); + fromgbf = new GBFPlain(); + fromosis = NULL; + break; + case FMT_THML: + fromplain = NULL; + fromthml = NULL; + fromgbf = new GBFThML(); + fromosis = NULL; + break; + case FMT_GBF: + fromplain = NULL; + fromthml = new ThMLGBF(); + fromgbf = NULL; + fromosis = NULL; + break; + case FMT_HTML: + fromplain = new PLAINHTML(); + fromthml = new ThMLHTML(); + fromgbf = new GBFHTML(); + fromosis = NULL; + break; + case FMT_HTMLHREF: + fromplain = NULL; + fromthml = new ThMLHTMLHREF(); + fromgbf = new GBFHTMLHREF(); + fromosis = NULL; + break; + case FMT_RTF: + fromplain = NULL; + fromthml = new ThMLRTF(); + fromgbf = new GBFRTF(); + fromosis = NULL; + break; + case FMT_OSIS: + fromplain = NULL; + fromthml = new ThMLOSIS(); + fromgbf = new GBFOSIS(); + fromosis = NULL; + break; + } + +} diff --git a/src/mgr/swcacher.cpp b/src/mgr/swcacher.cpp new file mode 100644 index 0000000..8128a70 --- /dev/null +++ b/src/mgr/swcacher.cpp @@ -0,0 +1,43 @@ +/****************************************************************************** + * swcacher.h - definition of class SWCacher used to provide an interface for + * objects that cache and want a standard interface for cleaning up. + * + * $Id: swcacher.cpp,v 1.1 2002/03/16 01:12:37 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swcacher.h> + + +SWCacher::SWCacher() { +} + + +SWCacher::~SWCacher() { +} + + +void SWCacher::flush() { +} + +long SWCacher::resourceConsumption() { + return 0; +} + +long SWCacher::lastAccess() { + return 0; +} diff --git a/src/mgr/swconfig.cpp b/src/mgr/swconfig.cpp new file mode 100644 index 0000000..d73d475 --- /dev/null +++ b/src/mgr/swconfig.cpp @@ -0,0 +1,163 @@ +/****************************************************************************** + * swconfig.cpp - implementation of Class SWConfig used for saving and + * retrieval of configuration information + * + * $Id: swconfig.cpp,v 1.9 2002/07/28 01:48:38 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swconfig.h> +#include <utilfuns.h> + + +SWConfig::SWConfig(const char * ifilename) { + filename = ifilename; + Load(); +} + + +SWConfig::~SWConfig() { +} + + +char SWConfig::getline(FILE *fp, string &line) +{ + char retval = 0; + char buf[255]; + + line = ""; + + while (fgets(buf, 254, fp)) { + while (buf[strlen(buf)-1] == '\n' || buf[strlen(buf)-1] == '\r') + buf[strlen(buf)-1] = 0; + + if (buf[strlen(buf)-1] == '\\') { + buf[strlen(buf)-1] = 0; + line += buf; + continue; + } + line += buf; + + if (strlen(buf) < 253) { + retval = 1; + break; + } + } + return retval; +} + + +void SWConfig::Load() { + FILE *cfile; + char *buf, *data; + string line; + ConfigEntMap cursect; + string sectname; + bool first = true; + + Sections.erase(Sections.begin(), Sections.end()); + + if ((cfile = fopen(filename.c_str(), "r"))) { + while (getline(cfile, line)) { + buf = new char [ line.length() + 1 ]; + strcpy(buf, line.c_str()); + if (*strstrip(buf) == '[') { + if (!first) + Sections.insert(SectionMap::value_type(sectname, cursect)); + else first = false; + + cursect.erase(cursect.begin(), cursect.end()); + + strtok(buf, "]"); + sectname = buf+1; + } + else { + strtok(buf, "="); + if ((*buf) && (*buf != '=')) { + if ((data = strtok(NULL, ""))) + cursect.insert(ConfigEntMap::value_type(buf, strstrip(data))); + else cursect.insert(ConfigEntMap::value_type(buf, "")); + } + } + delete [] buf; + } + if (!first) + Sections.insert(SectionMap::value_type(sectname, cursect)); + + fclose(cfile); + } +} + + +void SWConfig::Save() { + FILE *cfile; + string buf; + SectionMap::iterator sit; + ConfigEntMap::iterator entry; + string sectname; + + if ((cfile = fopen(filename.c_str(), "w"))) { + + for (sit = Sections.begin(); sit != Sections.end(); sit++) { + buf = "\n["; + buf += (*sit).first.c_str(); + buf += "]\n"; + fputs(buf.c_str(), cfile); + for (entry = (*sit).second.begin(); entry != (*sit).second.end(); entry++) { + buf = (*entry).first.c_str(); + buf += "="; + buf += (*entry).second.c_str(); + buf += "\n"; + fputs(buf.c_str(), cfile); + } + } + fputs("\n", cfile); // so getline will find last line + fclose(cfile); + } +} + + +void SWConfig::augment(SWConfig &addFrom) { + + SectionMap::iterator section; + ConfigEntMap::iterator entry, start, end; + + for (section = addFrom.Sections.begin(); section != addFrom.Sections.end(); section++) { + for (entry = (*section).second.begin(); entry != (*section).second.end(); entry++) { + start = Sections[section->first].lower_bound(entry->first); + end = Sections[section->first].upper_bound(entry->first); + if (start != end) { + if (((++start) != end) + || ((++(addFrom.Sections[section->first].lower_bound(entry->first))) != addFrom.Sections[section->first].upper_bound(entry->first))) { + for (--start; start != end; start++) { + if (!strcmp(start->second.c_str(), entry->second.c_str())) + break; + } + if (start == end) + Sections[(*section).first].insert(ConfigEntMap::value_type((*entry).first, (*entry).second)); + } + else Sections[section->first][entry->first.c_str()] = entry->second.c_str(); + } + else Sections[section->first][entry->first.c_str()] = entry->second.c_str(); + } + } +} + + +ConfigEntMap & SWConfig::operator [] (const char *section) { + return Sections[section]; +} diff --git a/src/mgr/swfiltermgr.cpp b/src/mgr/swfiltermgr.cpp new file mode 100644 index 0000000..264b5a6 --- /dev/null +++ b/src/mgr/swfiltermgr.cpp @@ -0,0 +1,90 @@ +/****************************************************************************** + * swfiltermgr.cpp - definition of class SWFilterMgr used as an interface to + * manage filters on a module + * + * $Id: swfiltermgr.cpp,v 1.2 2001/11/30 12:04:34 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swfiltermgr.h> + + +SWFilterMgr::SWFilterMgr() { +} + + +SWFilterMgr::~SWFilterMgr() { +} + + +void SWFilterMgr::setParentMgr(SWMgr *parentMgr) { + this->parentMgr = parentMgr; +} + + +SWMgr *SWFilterMgr::getParentMgr() { + return parentMgr; +} + + +void SWFilterMgr::AddGlobalOptions(SWModule * module, ConfigEntMap & section, ConfigEntMap::iterator start, ConfigEntMap::iterator end) { +} + + +void SWFilterMgr::AddLocalOptions(SWModule * module, ConfigEntMap & section, ConfigEntMap::iterator start, ConfigEntMap::iterator end) { +} + + +/** +* Adds the encoding filters which are defined in "section" to the SWModule object "module". +* @param module To this module the encoding filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddEncodingFilters(SWModule * module, ConfigEntMap & section) { +} + + +/** +* Adds the render filters which are defined in "section" to the SWModule object "module". +* @param module To this module the render filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddRenderFilters(SWModule * module, ConfigEntMap & section) { +} + + +/** +* Adds the strip filters which are defined in "section" to the SWModule object "module". +* @param module To this module the strip filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddStripFilters(SWModule * module, ConfigEntMap & section) { +} + + +/** +* Adds the raw filters which are defined in "section" to the SWModule object "module". +* @param module To this module the raw filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddRawFilters(SWModule * module, ConfigEntMap & section) { +} + diff --git a/src/mgr/swlocale.cpp b/src/mgr/swlocale.cpp new file mode 100644 index 0000000..d85d1eb --- /dev/null +++ b/src/mgr/swlocale.cpp @@ -0,0 +1,140 @@ +/****************************************************************************** + * swlocale.cpp - implementation of Class SWLocale used for retrieval + * of locale lookups + * + * $Id: swlocale.cpp,v 1.4 2002/07/28 01:48:38 scribe Exp $ + * + * Copyright 2000 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swlocale.h> +#include <utilfuns.h> + + +SWLocale::SWLocale(const char * ifilename) { + ConfigEntMap::iterator confEntry; + + name = 0; + description = 0; + bookAbbrevs = 0; + BMAX = 0; + books = 0; + localeSource = new SWConfig(ifilename); + + confEntry = localeSource->Sections["Meta"].find("Name"); + if (confEntry != localeSource->Sections["Meta"].end()) + stdstr(&name, (*confEntry).second.c_str()); + + confEntry = localeSource->Sections["Meta"].find("Description"); + if (confEntry != localeSource->Sections["Meta"].end()) + stdstr(&description, (*confEntry).second.c_str()); +} + + +SWLocale::~SWLocale() { + + delete localeSource; + + if (description) + delete [] description; + + if (name) + delete [] name; + + if (bookAbbrevs) + delete [] bookAbbrevs; + + if (BMAX) { + for (int i = 0; i < 2; i++) + delete [] books[i]; + delete [] BMAX; + delete [] books; + } +} + + +const char *SWLocale::translate(const char *text) { + LookupMap::iterator entry; + + entry = lookupTable.find(text); + + if (entry == lookupTable.end()) { + ConfigEntMap::iterator confEntry; + confEntry = localeSource->Sections["Text"].find(text); + if (confEntry == localeSource->Sections["Text"].end()) + lookupTable.insert(LookupMap::value_type(text, text)); + else lookupTable.insert(LookupMap::value_type(text, (*confEntry).second.c_str())); + entry = lookupTable.find(text); + } + return (*entry).second.c_str(); +} + + +const char *SWLocale::getName() { + return name; +} + + +const char *SWLocale::getDescription() { + return description; +} + + +void SWLocale::augment(SWLocale &addFrom) { + *localeSource += *addFrom.localeSource; +} + + +const struct abbrev *SWLocale::getBookAbbrevs() { + static const char *nullstr = ""; + if (!bookAbbrevs) { + ConfigEntMap::iterator it; + int i; + int size = localeSource->Sections["Book Abbrevs"].size(); + bookAbbrevs = new struct abbrev[size + 1]; + for (i = 0, it = localeSource->Sections["Book Abbrevs"].begin(); it != localeSource->Sections["Book Abbrevs"].end(); it++, i++) { + bookAbbrevs[i].ab = (*it).first.c_str(); + bookAbbrevs[i].book = atoi((*it).second.c_str()); + } + bookAbbrevs[i].ab = nullstr; + bookAbbrevs[i].book = -1; + } + + return bookAbbrevs; +} + + +void SWLocale::getBooks(char **iBMAX, struct sbook ***ibooks) { + if (!BMAX) { + BMAX = new char [2]; + BMAX[0] = VerseKey::builtin_BMAX[0]; + BMAX[1] = VerseKey::builtin_BMAX[1]; + + books = new struct sbook *[2]; + books[0] = new struct sbook[BMAX[0]]; + books[1] = new struct sbook[BMAX[1]]; + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < BMAX[i]; j++) { + books[i][j] = VerseKey::builtin_books[i][j]; + books[i][j].name = translate(VerseKey::builtin_books[i][j].name); + } + } + } + + *iBMAX = BMAX; + *ibooks = books; +} diff --git a/src/mgr/swmgr.cpp b/src/mgr/swmgr.cpp new file mode 100644 index 0000000..ff36acc --- /dev/null +++ b/src/mgr/swmgr.cpp @@ -0,0 +1,1084 @@ +/****************************************************************************** + * swmgr.cpp - implementaion of class SWMgr used to interact with an install + * base of sword modules. + * + * $Id: swmgr.cpp,v 1.79 2002/08/09 05:53:48 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#include <unixstr.h> +#endif +#include <sys/stat.h> +#ifndef _MSC_VER +#include <iostream> +#endif +#include <dirent.h> + +#include <swmgr.h> +#include <rawtext.h> +#include <rawgenbook.h> +#include <rawcom.h> +#include <hrefcom.h> +#include <rawld.h> +#include <rawld4.h> +#include <utilfuns.h> +#include <gbfplain.h> +#include <thmlplain.h> +#include <gbfstrongs.h> +#include <gbffootnotes.h> +#include <gbfheadings.h> +#include <gbfmorph.h> +#include <thmlstrongs.h> +#include <thmlfootnotes.h> +#include <thmlheadings.h> +#include <thmlmorph.h> +#include <thmllemma.h> +#include <thmlscripref.h> +#include <cipherfil.h> +#include <rawfiles.h> +#include <ztext.h> +#include <zld.h> +#include <zcom.h> +#include <lzsscomprs.h> +#include <utf8greekaccents.h> +#include <utf8cantillation.h> +#include <utf8hebrewpoints.h> +#include <greeklexattribs.h> +#include <swfiltermgr.h> + + + +#ifdef _ICU_ +#include <utf8transliterator.h> +bool SWMgr::isICU = true; +#else +bool SWMgr::isICU = false; +#endif + +#ifndef EXCLUDEZLIB +#include <zipcomprs.h> +#endif + +bool SWMgr::debug = false; + +#ifdef GLOBCONFPATH +const char *SWMgr::globalConfPath = GLOBCONFPATH; +#else +const char *SWMgr::globalConfPath = "/etc/sword.conf:/usr/local/etc/sword.conf"; +#endif + +void SWMgr::init() { + SWFilter *tmpFilter = 0; + configPath = 0; + prefixPath = 0; + configType = 0; + myconfig = 0; + mysysconfig = 0; + homeConfig = 0; + + + cipherFilters.clear(); + optionFilters.clear(); + cleanupFilters.clear(); + + tmpFilter = new GBFStrongs(); + optionFilters.insert(FilterMap::value_type("GBFStrongs", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GBFFootnotes(); + optionFilters.insert(FilterMap::value_type("GBFFootnotes", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GBFMorph(); + optionFilters.insert(FilterMap::value_type("GBFMorph", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GBFHeadings(); + optionFilters.insert(FilterMap::value_type("GBFHeadings", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLStrongs(); + optionFilters.insert(FilterMap::value_type("ThMLStrongs", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLFootnotes(); + optionFilters.insert(FilterMap::value_type("ThMLFootnotes", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLMorph(); + optionFilters.insert(FilterMap::value_type("ThMLMorph", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLHeadings(); + optionFilters.insert(FilterMap::value_type("ThMLHeadings", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLLemma(); + optionFilters.insert(FilterMap::value_type("ThMLLemma", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLScripref(); + optionFilters.insert(FilterMap::value_type("ThMLScripref", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new UTF8GreekAccents(); + optionFilters.insert(FilterMap::value_type("UTF8GreekAccents", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new UTF8HebrewPoints(); + optionFilters.insert(FilterMap::value_type("UTF8HebrewPoints", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new UTF8Cantillation(); + optionFilters.insert(FilterMap::value_type("UTF8Cantillation", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GreekLexAttribs(); + optionFilters.insert(FilterMap::value_type("GreekLexAttribs", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + +// UTF8Transliterator needs to be handled differently because it should always available as an option, for all modules +#ifdef _ICU_ + transliterator = new UTF8Transliterator(); + optionFilters.insert(FilterMap::value_type("UTF8Transliterator", transliterator)); + options.push_back(transliterator->getOptionName()); + cleanupFilters.push_back(transliterator); +#endif + + gbfplain = new GBFPlain(); + cleanupFilters.push_back(gbfplain); + + thmlplain = new ThMLPlain(); + cleanupFilters.push_back(thmlplain); +} + + +SWMgr::SWMgr(SWFilterMgr *filterMgr) { + commonInit(0, 0, true, filterMgr); +} + + +SWMgr::SWMgr(SWConfig *iconfig, SWConfig *isysconfig, bool autoload, SWFilterMgr *filterMgr) { + commonInit(iconfig, isysconfig, autoload, filterMgr); +} + + +void SWMgr::commonInit(SWConfig * iconfig, SWConfig * isysconfig, bool autoload, SWFilterMgr *filterMgr) { + this->filterMgr = filterMgr; + if (filterMgr) + filterMgr->setParentMgr(this); + + init(); + + if (iconfig) { + config = iconfig; + myconfig = 0; + } + else config = 0; + if (isysconfig) { + sysconfig = isysconfig; + mysysconfig = 0; + } + else sysconfig = 0; + + if (autoload) + Load(); +} + + +SWMgr::SWMgr(const char *iConfigPath, bool autoload, SWFilterMgr *filterMgr) { + + string path; + + this->filterMgr = filterMgr; + if (filterMgr) + filterMgr->setParentMgr(this); + + init(); + + path = iConfigPath; + if ((iConfigPath[strlen(iConfigPath)-1] != '\\') && (iConfigPath[strlen(iConfigPath)-1] != '/')) + path += "/"; + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + stdstr(&prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(&configPath, path.c_str()); + } + else { + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + stdstr(&prefixPath, path.c_str()); + path += "mods.d"; + stdstr(&configPath, path.c_str()); + configType = 1; + } + } + + config = 0; + sysconfig = 0; + + if (autoload && configPath) + Load(); +} + + +SWMgr::~SWMgr() { + + DeleteMods(); + + for (FilterList::iterator it = cleanupFilters.begin(); it != cleanupFilters.end(); it++) + delete (*it); + + if (homeConfig) + delete homeConfig; + + if (myconfig) + delete myconfig; + + if (prefixPath) + delete [] prefixPath; + + if (configPath) + delete [] configPath; + + if (filterMgr) + delete filterMgr; +} + + +void SWMgr::findConfig(char *configType, char **prefixPath, char **configPath, list<string> *augPaths) { + string path; + ConfigEntMap::iterator entry; + ConfigEntMap::iterator lastEntry; + + char *envsworddir = getenv ("SWORD_PATH"); + char *envhomedir = getenv ("HOME"); + + *configType = 0; + +#ifndef _MSC_VER + // check working directory +if (debug) + std::cerr << "Checking working directory for mods.conf..."; +#endif + + if (FileMgr::existsFile(".", "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, "./"); + stdstr(configPath, "./mods.conf"); + return; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking working directory for mods.d..."; +#endif + + if (FileMgr::existsDir(".", "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, "./"); + stdstr(configPath, "./mods.d"); + *configType = 1; + return; + } + + + // check environment variable SWORD_PATH +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking SWORD_PATH..."; +#endif + + if (envsworddir != NULL) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found (" << envsworddir << ")\n"; +#endif + + path = envsworddir; + if ((envsworddir[strlen(envsworddir)-1] != '\\') && (envsworddir[strlen(envsworddir)-1] != '/')) + path += "/"; + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking $SWORD_PATH for mods.conf..."; +#endif + + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(configPath, path.c_str()); + return; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking $SWORD_PATH for mods.d..."; +#endif + + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.d"; + stdstr(configPath, path.c_str()); + *configType = 1; + return; + } + } + + + // check for systemwide globalConfPath + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nParsing " << globalConfPath << "..."; +#endif + + char *globPaths = 0; + char *gfp; + stdstr(&globPaths, globalConfPath); + for (gfp = strtok(globPaths, ":"); gfp; gfp = strtok(0, ":")) { + + #ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking for " << gfp << "..."; +#endif + + if (FileMgr::existsFile(gfp)) + break; + } + + if (gfp) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + SWConfig etcconf(gfp); + if ((entry = etcconf.Sections["Install"].find("DataPath")) != etcconf.Sections["Install"].end()) { + path = (*entry).second; + if (((*entry).second.c_str()[strlen((*entry).second.c_str())-1] != '\\') && ((*entry).second.c_str()[strlen((*entry).second.c_str())-1] != '/')) + path += "/"; + +#ifndef _MSC_VER +if (debug) + std::cerr << "DataPath in " << gfp << " is set to: " << path; +#endif + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking for mods.conf in DataPath "; +#endif + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(configPath, path.c_str()); + *configType = 1; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking for mods.d in DataPath "; +#endif + + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.d"; + stdstr(configPath, path.c_str()); + *configType = 1; + } + } + if (augPaths) { + augPaths->clear(); + entry = etcconf.Sections["Install"].lower_bound("AugmentPath"); + lastEntry = etcconf.Sections["Install"].upper_bound("AugmentPath"); + for (;entry != lastEntry; entry++) { + path = entry->second; + if ((entry->second.c_str()[strlen(entry->second.c_str())-1] != '\\') && (entry->second.c_str()[strlen(entry->second.c_str())-1] != '/')) + path += "/"; + augPaths->push_back(path); + } + } + } + + delete [] globPaths; + if (*configType) + return; + + // check ~/.sword/ + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking home directory for ~/.sword/mods.conf" << path; +#endif + + if (envhomedir != NULL) { + path = envhomedir; + if ((envhomedir[strlen(envhomedir)-1] != '\\') && (envhomedir[strlen(envhomedir)-1] != '/')) + path += "/"; + path += ".sword/"; + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << " found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(configPath, path.c_str()); + return; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking home directory for ~/.sword/mods.d" << path; +#endif + + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.d"; + stdstr(configPath, path.c_str()); + *configType = 2; + return; + } + } +} + + +void SWMgr::loadConfigDir(const char *ipath) +{ + DIR *dir; + struct dirent *ent; + string newmodfile; + + if ((dir = opendir(ipath))) { + rewinddir(dir); + while ((ent = readdir(dir))) { + if ((strcmp(ent->d_name, ".")) && (strcmp(ent->d_name, ".."))) { + newmodfile = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + newmodfile += "/"; + newmodfile += ent->d_name; + if (config) { + SWConfig tmpConfig(newmodfile.c_str()); + *config += tmpConfig; + } + else config = myconfig = new SWConfig(newmodfile.c_str()); + } + } + closedir(dir); + if (!config) { // if no .conf file exist yet, create a default + newmodfile = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + newmodfile += "/"; + newmodfile += "globals.conf"; + config = myconfig = new SWConfig(newmodfile.c_str()); + } + } +} + + +void SWMgr::augmentModules(const char *ipath) { + string path = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + path += "/"; + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + char *savePrefixPath = 0; + char *saveConfigPath = 0; + SWConfig *saveConfig = 0; + stdstr(&savePrefixPath, prefixPath); + stdstr(&prefixPath, path.c_str()); + path += "mods.d"; + stdstr(&saveConfigPath, configPath); + stdstr(&configPath, path.c_str()); + saveConfig = config; + config = myconfig = 0; + loadConfigDir(configPath); + + CreateMods(); + + stdstr(&prefixPath, savePrefixPath); + delete []savePrefixPath; + stdstr(&configPath, saveConfigPath); + delete []saveConfigPath; + (*saveConfig) += *config; + homeConfig = myconfig; + config = myconfig = saveConfig; + } +} + + +/*********************************************************************** + * SWMgr::Load - loads actual modules + * + * RET: status - 0 = ok; -1 no config found; 1 = no modules installed + * + */ + +signed char SWMgr::Load() { + signed char ret = 0; + + if (!config) { // If we weren't passed a config object at construction, find a config file + if (!configPath) // If we weren't passed a config path at construction... + findConfig(&configType, &prefixPath, &configPath, &augPaths); + if (configPath) { + if (configType) + loadConfigDir(configPath); + else config = myconfig = new SWConfig(configPath); + } + } + + if (config) { + SectionMap::iterator Sectloop, Sectend; + ConfigEntMap::iterator Entryloop, Entryend; + + DeleteMods(); + + for (Sectloop = config->Sections.lower_bound("Globals"), Sectend = config->Sections.upper_bound("Globals"); Sectloop != Sectend; Sectloop++) { // scan thru all 'Globals' sections + for (Entryloop = (*Sectloop).second.lower_bound("AutoInstall"), Entryend = (*Sectloop).second.upper_bound("AutoInstall"); Entryloop != Entryend; Entryloop++) // scan thru all AutoInstall entries + InstallScan((*Entryloop).second.c_str()); // Scan AutoInstall entry directory for new modules and install + } + if (configType) { // force reload on config object because we may have installed new modules + delete myconfig; + config = myconfig = 0; + loadConfigDir(configPath); + } + else config->Load(); + + CreateMods(); + + for (list<string>::iterator pathIt = augPaths.begin(); pathIt != augPaths.end(); pathIt++) { + augmentModules(pathIt->c_str()); + } +// augment config with ~/.sword/mods.d if it exists --------------------- + char *envhomedir = getenv ("HOME"); + if (envhomedir != NULL && configType != 2) { // 2 = user only + string path = envhomedir; + if ((envhomedir[strlen(envhomedir)-1] != '\\') && (envhomedir[strlen(envhomedir)-1] != '/')) + path += "/"; + path += ".sword/"; + augmentModules(path.c_str()); + } +// ------------------------------------------------------------------------- + if ( !Modules.size() ) // config exists, but no modules + ret = 1; + + } + else { + SWLog::systemlog->LogError("SWMgr: Can't find 'mods.conf' or 'mods.d'. Try setting:\n\tSWORD_PATH=<directory containing mods.conf>\n\tOr see the README file for a full description of setup options (%s)", (configPath) ? configPath : "<configPath is null>"); + ret = -1; + } + + return ret; +} + +SWModule *SWMgr::CreateMod(string name, string driver, ConfigEntMap §ion) +{ + string description, datapath, misc1; + ConfigEntMap::iterator entry; + SWModule *newmod = 0; + string lang, sourceformat, encoding; + signed char direction, enc, markup; + + description = ((entry = section.find("Description")) != section.end()) ? (*entry).second : (string)""; + lang = ((entry = section.find("Lang")) != section.end()) ? (*entry).second : (string)"en"; + sourceformat = ((entry = section.find("SourceType")) != section.end()) ? (*entry).second : (string)""; + encoding = ((entry = section.find("Encoding")) != section.end()) ? (*entry).second : (string)""; + datapath = prefixPath; + if ((prefixPath[strlen(prefixPath)-1] != '\\') && (prefixPath[strlen(prefixPath)-1] != '/')) + datapath += "/"; + misc1 += ((entry = section.find("DataPath")) != section.end()) ? (*entry).second : (string)""; + char *buf = new char [ strlen(misc1.c_str()) + 1 ]; + char *buf2 = buf; + strcpy(buf, misc1.c_str()); +// for (; ((*buf2) && ((*buf2 == '.') || (*buf2 == '/') || (*buf2 == '\\'))); buf2++); + for (; ((*buf2) && ((*buf2 == '/') || (*buf2 == '\\'))); buf2++); + if (*buf2) + datapath += buf2; + delete [] buf; + + section["AbsoluteDataPath"] = datapath; + + if (!stricmp(sourceformat.c_str(), "GBF")) + markup = FMT_GBF; + else if (!stricmp(sourceformat.c_str(), "ThML")) + markup = FMT_THML; + else if (!stricmp(sourceformat.c_str(), "OSIS")) + markup = FMT_OSIS; + else + markup = FMT_PLAIN; + + if (!stricmp(encoding.c_str(), "SCSU")) + enc = ENC_SCSU; + else if (!stricmp(encoding.c_str(), "UTF-8")) { + enc = ENC_UTF8; + } + else enc = ENC_LATIN1; + + if ((entry = section.find("Direction")) == section.end()) { + direction = DIRECTION_LTR; + } + else if (!stricmp((*entry).second.c_str(), "rtol")) { + direction = DIRECTION_RTL; + } + else if (!stricmp((*entry).second.c_str(), "bidi")) { + direction = DIRECTION_BIDI; + } + else { + direction = DIRECTION_LTR; + } + + if ((!stricmp(driver.c_str(), "zText")) || (!stricmp(driver.c_str(), "zCom"))) { + SWCompress *compress = 0; + int blockType = CHAPTERBLOCKS; + misc1 = ((entry = section.find("BlockType")) != section.end()) ? (*entry).second : (string)"CHAPTER"; + if (!stricmp(misc1.c_str(), "VERSE")) + blockType = VERSEBLOCKS; + else if (!stricmp(misc1.c_str(), "CHAPTER")) + blockType = CHAPTERBLOCKS; + else if (!stricmp(misc1.c_str(), "BOOK")) + blockType = BOOKBLOCKS; + + misc1 = ((entry = section.find("CompressType")) != section.end()) ? (*entry).second : (string)"LZSS"; +#ifndef EXCLUDEZLIB + if (!stricmp(misc1.c_str(), "ZIP")) + compress = new ZipCompress(); + else +#endif + if (!stricmp(misc1.c_str(), "LZSS")) + compress = new LZSSCompress(); + + if (compress) { + if (!stricmp(driver.c_str(), "zText")) + newmod = new zText(datapath.c_str(), name.c_str(), description.c_str(), blockType, compress, 0, enc, direction, markup, lang.c_str()); + else newmod = new zCom(datapath.c_str(), name.c_str(), description.c_str(), blockType, compress, 0, enc, direction, markup, lang.c_str()); + } + } + + if (!stricmp(driver.c_str(), "RawText")) { + newmod = new RawText(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + // backward support old drivers + if (!stricmp(driver.c_str(), "RawGBF")) { + newmod = new RawText(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + if (!stricmp(driver.c_str(), "RawCom")) { + newmod = new RawCom(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + if (!stricmp(driver.c_str(), "RawFiles")) { + newmod = new RawFiles(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + if (!stricmp(driver.c_str(), "HREFCom")) { + misc1 = ((entry = section.find("Prefix")) != section.end()) ? (*entry).second : (string)""; + newmod = new HREFCom(datapath.c_str(), misc1.c_str(), name.c_str(), description.c_str()); + } + + if (!stricmp(driver.c_str(), "RawLD")) + newmod = new RawLD(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + + if (!stricmp(driver.c_str(), "RawLD4")) + newmod = new RawLD4(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + + if (!stricmp(driver.c_str(), "zLD")) { + SWCompress *compress = 0; + int blockCount; + misc1 = ((entry = section.find("BlockCount")) != section.end()) ? (*entry).second : (string)"200"; + blockCount = atoi(misc1.c_str()); + blockCount = (blockCount) ? blockCount : 200; + + misc1 = ((entry = section.find("CompressType")) != section.end()) ? (*entry).second : (string)"LZSS"; +#ifndef EXCLUDEZLIB + if (!stricmp(misc1.c_str(), "ZIP")) + compress = new ZipCompress(); + else +#endif + if (!stricmp(misc1.c_str(), "LZSS")) + compress = new LZSSCompress(); + + if (compress) { + newmod = new zLD(datapath.c_str(), name.c_str(), description.c_str(), blockCount, compress, 0, enc, direction, markup, lang.c_str()); + } + } + + if (!stricmp(driver.c_str(), "RawGenBook")) { + newmod = new RawGenBook(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + // if a specific module type is set in the config, use this + if ((entry = section.find("Type")) != section.end()) + newmod->Type(entry->second.c_str()); + + newmod->setConfig(§ion); + return newmod; +} + + +void SWMgr::AddGlobalOptions(SWModule *module, ConfigEntMap §ion, ConfigEntMap::iterator start, ConfigEntMap::iterator end) { + for (;start != end; start++) { + FilterMap::iterator it; + it = optionFilters.find((*start).second); + if (it != optionFilters.end()) { + module->AddOptionFilter((*it).second); // add filter to module and option as a valid option + OptionsList::iterator loop; + for (loop = options.begin(); loop != options.end(); loop++) { + if (!strcmp((*loop).c_str(), (*it).second->getOptionName())) + break; + } + if (loop == options.end()) // if we have not yet included the option + options.push_back((*it).second->getOptionName()); + } + } + if (filterMgr) + filterMgr->AddGlobalOptions(module, section, start, end); +#ifdef _ICU_ + module->AddOptionFilter(transliterator); +#endif +} + + +void SWMgr::AddLocalOptions(SWModule *module, ConfigEntMap §ion, ConfigEntMap::iterator start, ConfigEntMap::iterator end) +{ + for (;start != end; start++) { + FilterMap::iterator it; + it = optionFilters.find((*start).second); + if (it != optionFilters.end()) { + module->AddOptionFilter((*it).second); // add filter to module + } + } + + if (filterMgr) + filterMgr->AddLocalOptions(module, section, start, end); +} + + +void SWMgr::AddRawFilters(SWModule *module, ConfigEntMap §ion) { + string sourceformat, cipherKey; + ConfigEntMap::iterator entry; + + cipherKey = ((entry = section.find("CipherKey")) != section.end()) ? (*entry).second : (string)""; + if (!cipherKey.empty()) { + SWFilter *cipherFilter = new CipherFilter(cipherKey.c_str()); + cipherFilters.insert(FilterMap::value_type(module->Name(), cipherFilter)); + cleanupFilters.push_back(cipherFilter); + module->AddRawFilter(cipherFilter); + } + + if (filterMgr) + filterMgr->AddRawFilters(module, section); +} + + +void SWMgr::AddEncodingFilters(SWModule *module, ConfigEntMap §ion) { + + if (filterMgr) + filterMgr->AddEncodingFilters(module, section); +} + + +void SWMgr::AddRenderFilters(SWModule *module, ConfigEntMap §ion) { + string sourceformat; + ConfigEntMap::iterator entry; + + sourceformat = ((entry = section.find("SourceType")) != section.end()) ? (*entry).second : (string)""; + + // Temporary: To support old module types + // TODO: Remove at 1.6.0 release? + if (sourceformat.empty()) { + sourceformat = ((entry = section.find("ModDrv")) != section.end()) ? (*entry).second : (string)""; + if (!stricmp(sourceformat.c_str(), "RawGBF")) + sourceformat = "GBF"; + else sourceformat = ""; + } + +// process module - eg. follows +// if (!stricmp(sourceformat.c_str(), "GBF")) { +// module->AddRenderFilter(gbftortf); +// } + + if (filterMgr) + filterMgr->AddRenderFilters(module, section); + +} + + +void SWMgr::AddStripFilters(SWModule *module, ConfigEntMap §ion) +{ + string sourceformat; + ConfigEntMap::iterator entry; + + sourceformat = ((entry = section.find("SourceType")) != section.end()) ? (*entry).second : (string)""; + // Temporary: To support old module types + if (sourceformat.empty()) { + sourceformat = ((entry = section.find("ModDrv")) != section.end()) ? (*entry).second : (string)""; + if (!stricmp(sourceformat.c_str(), "RawGBF")) + sourceformat = "GBF"; + else sourceformat = ""; + } + + if (!stricmp(sourceformat.c_str(), "GBF")) { + module->AddStripFilter(gbfplain); + } + else if (!stricmp(sourceformat.c_str(), "ThML")) { + module->AddStripFilter(thmlplain); + } + + if (filterMgr) + filterMgr->AddStripFilters(module, section); + +} + + +void SWMgr::CreateMods() { + SectionMap::iterator it; + ConfigEntMap::iterator start; + ConfigEntMap::iterator end; + ConfigEntMap::iterator entry; + SWModule *newmod; + string driver, misc1; + for (it = config->Sections.begin(); it != config->Sections.end(); it++) { + ConfigEntMap §ion = (*it).second; + newmod = 0; + + driver = ((entry = section.find("ModDrv")) != section.end()) ? (*entry).second : (string)""; + if (!driver.empty()) { + newmod = CreateMod((*it).first, driver, section); + if (newmod) { + start = (*it).second.lower_bound("GlobalOptionFilter"); + end = (*it).second.upper_bound("GlobalOptionFilter"); + AddGlobalOptions(newmod, section, start, end); + + start = (*it).second.lower_bound("LocalOptionFilter"); + end = (*it).second.upper_bound("LocalOptionFilter"); + AddLocalOptions(newmod, section, start, end); + + AddRawFilters(newmod, section); + AddStripFilters(newmod, section); + AddRenderFilters(newmod, section); + AddEncodingFilters(newmod, section); + + Modules.insert(ModMap::value_type(newmod->Name(), newmod)); + } + } + } +} + + +void SWMgr::DeleteMods() { + + ModMap::iterator it; + + for (it = Modules.begin(); it != Modules.end(); it++) + delete (*it).second; + + Modules.clear(); +} + + +void SWMgr::InstallScan(const char *dirname) +{ + DIR *dir; + struct dirent *ent; + int conffd = 0; + string newmodfile; + string targetName; + + if (!access(dirname, 04)) { + if ((dir = opendir(dirname))) { + rewinddir(dir); + while ((ent = readdir(dir))) { + if ((strcmp(ent->d_name, ".")) && (strcmp(ent->d_name, ".."))) { + newmodfile = dirname; + if ((dirname[strlen(dirname)-1] != '\\') && (dirname[strlen(dirname)-1] != '/')) + newmodfile += "/"; + newmodfile += ent->d_name; + if (configType) { + if (config > 0) + close(conffd); + targetName = configPath; + if ((configPath[strlen(configPath)-1] != '\\') && (configPath[strlen(configPath)-1] != '/')) + targetName += "/"; + targetName += ent->d_name; + conffd = open(targetName.c_str(), O_WRONLY|O_CREAT, S_IREAD|S_IWRITE); + } + else { + if (conffd < 1) { + conffd = open(config->filename.c_str(), O_WRONLY|O_APPEND); + if (conffd > 0) + lseek(conffd, 0L, SEEK_END); + } + } + AddModToConfig(conffd, newmodfile.c_str()); + unlink(newmodfile.c_str()); + } + } + if (conffd > 0) + close(conffd); + closedir(dir); + } + } +} + + +char SWMgr::AddModToConfig(int conffd, const char *fname) +{ + int modfd; + char ch; + + SWLog::systemlog->LogTimedInformation("Found new module [%s]. Installing...", fname); + modfd = open(fname, O_RDONLY); + ch = '\n'; + write(conffd, &ch, 1); + while (read(modfd, &ch, 1) == 1) + write(conffd, &ch, 1); + ch = '\n'; + write(conffd, &ch, 1); + close(modfd); + return 0; +} + + +void SWMgr::setGlobalOption(const char *option, const char *value) +{ + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) + (*it).second->setOptionValue(value); + } + } +} + + +const char *SWMgr::getGlobalOption(const char *option) +{ + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) + return (*it).second->getOptionValue(); + } + } + return 0; +} + + +const char *SWMgr::getGlobalOptionTip(const char *option) +{ + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) + return (*it).second->getOptionTip(); + } + } + return 0; +} + + +OptionsList SWMgr::getGlobalOptions() +{ + return options; +} + + +OptionsList SWMgr::getGlobalOptionValues(const char *option) +{ + OptionsList options; + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) { + options = (*it).second->getOptionValues(); + break; // just find the first one. All option filters with the same option name should expect the same values + } + } + } + return options; +} + + +signed char SWMgr::setCipherKey(const char *modName, const char *key) { + FilterMap::iterator it; + ModMap::iterator it2; + + // check for filter that already exists + it = cipherFilters.find(modName); + if (it != cipherFilters.end()) { + ((CipherFilter *)(*it).second)->getCipher()->setCipherKey(key); + return 0; + } + // check if module exists + else { + it2 = Modules.find(modName); + if (it2 != Modules.end()) { + SWFilter *cipherFilter = new CipherFilter(key); + cipherFilters.insert(FilterMap::value_type(modName, cipherFilter)); + cleanupFilters.push_back(cipherFilter); + (*it2).second->AddRawFilter(cipherFilter); + return 0; + } + } + return -1; +} diff --git a/src/modules/Makefile b/src/modules/Makefile new file mode 100644 index 0000000..ef8eccd --- /dev/null +++ b/src/modules/Makefile @@ -0,0 +1,5 @@ + +root := ../.. + +all: + make -C ${root} diff --git a/src/modules/Makefile.am b/src/modules/Makefile.am new file mode 100644 index 0000000..944dc18 --- /dev/null +++ b/src/modules/Makefile.am @@ -0,0 +1,10 @@ +modulesdir = $(top_srcdir)/src/modules + +libsword_la_SOURCES += $(modulesdir)/swmodule.cpp + +include ../src/modules/common/Makefile.am +include ../src/modules/filters/Makefile.am +include ../src/modules/genbook/Makefile.am +include ../src/modules/texts/Makefile.am +include ../src/modules/comments/Makefile.am +include ../src/modules/lexdict/Makefile.am diff --git a/src/modules/comments/Makefile b/src/modules/comments/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/comments/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/Makefile.am b/src/modules/comments/Makefile.am new file mode 100644 index 0000000..1568544 --- /dev/null +++ b/src/modules/comments/Makefile.am @@ -0,0 +1,8 @@ +commentsdir = $(top_srcdir)/src/modules/comments + +libsword_la_SOURCES += $(commentsdir)/swcom.cpp + +include ../src/modules/comments/rawcom/Makefile.am +include ../src/modules/comments/rawfiles/Makefile.am +include ../src/modules/comments/zcom/Makefile.am +include ../src/modules/comments/hrefcom/Makefile.am diff --git a/src/modules/comments/hrefcom/Makefile b/src/modules/comments/hrefcom/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/hrefcom/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/hrefcom/Makefile.am b/src/modules/comments/hrefcom/Makefile.am new file mode 100644 index 0000000..a6a2115 --- /dev/null +++ b/src/modules/comments/hrefcom/Makefile.am @@ -0,0 +1,4 @@ +hrefcomdir = $(top_srcdir)/src/modules/comments/hrefcom + +libsword_la_SOURCES += $(hrefcomdir)/hrefcom.cpp + diff --git a/src/modules/comments/hrefcom/hrefcom.cpp b/src/modules/comments/hrefcom/hrefcom.cpp new file mode 100644 index 0000000..200e21f --- /dev/null +++ b/src/modules/comments/hrefcom/hrefcom.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * hrefcom.cpp - code for class 'HREFCom'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <hrefcom.h> + + + /****************************************************************************** + * HREFCom Constructor - Initializes data for instance of HREFCom + * + * ENT: iname - Internal name for module + * iprefix - string to prepend to each HREF (e.g. "file://mods/com/jfb/") + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +HREFCom::HREFCom(const char *ipath, const char *iprefix, const char *iname, const char *idesc, SWDisplay *idisp) : RawVerse(ipath), SWCom(iname, idesc, idisp) +{ + prefix = 0; + stdstr(&prefix, iprefix); +} + + +/****************************************************************************** + * HREFCom Destructor - Cleans up instance of HREFCom + */ + +HREFCom::~HREFCom() +{ + if (prefix) + delete [] prefix; +} + + +/****************************************************************************** + * HREFCom::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *HREFCom::getRawEntry() { + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = ((size + 2) + strlen(prefix)) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + tmpbuf = new char [ size + 10 ]; + + readtext(key->Testament(), start, size + 2, tmpbuf); + sprintf(entrybuf, "%s%s", prefix, tmpbuf); + preptext(entrybuf); + + delete [] tmpbuf; + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/comments/hrefcom/jfbgen.cpp b/src/modules/comments/hrefcom/jfbgen.cpp new file mode 100644 index 0000000..8b66a60 --- /dev/null +++ b/src/modules/comments/hrefcom/jfbgen.cpp @@ -0,0 +1,242 @@ +/***************************************************************************** + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + +#ifndef O_BINARY + #define O_BINARY 0 +#endif + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(); +void checkparams(int argc, char **argv); +void charsetconvert(char *data); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; +char startflag = 0; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + short size, tmp; + extern struct zonline online; + + checkparams(argc, argv); + + key1 = key2 = key3 = "Genesis 1:1"; + + openfiles(); + + num1 = key1.Chapter(); + num2 = key1.Verse(); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + key1++; +} + + +static VerseKey inckey = "Genesis 1:1"; + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + static int olbvnum = 0; + char data[256]; + char *bookabrev[66] = {"Ge", "Ex", "Le", "Nu", "De", "Jos", "Jud", "Ru", + "1Sa", "2Sa", "1Ki", "2Ki", "1Ch", "2Ch", "Ezr", "Ne", "Es", + "Job", "Ps", "Pr", "Ec", "So", "Isa", "Jer", "La", "Eze", "Da", + "Ho", "Joe", "Am", "Ob", "Jon", "Mic", "Na", "Heb", "Zep", + "Hag", "Zec", "Mal", + "Mt", "Mr", "Lu", "Joh", "Ac", "Ro", "1Co", "2Co", "Ga", + "Eph", "Php", "Col", "1Th", "2Th", "1Ti", "2Ti", "Tit", "Phm", + "Heb", "Jas", "1Pe", "2Pe", "1Jo", "2Jo", "3Jo", "Jude", "Re" }; + + if (++olbvnum <= 31102) { + + if (olbvnum == 23146) { // "Matthew 1:1" + close(vfp); + close(cfp); + close(bfp); + close(fp); + key1 = key2 = key3 = inckey = "Matthew 1:1"; + openfiles(); + startflag = 0; + } + + + *offset = lseek(fp, 0, SEEK_CUR); + + if ((olbvnum!=1) && (olbvnum != 23146)) + inckey++; + + *num1 = inckey.Chapter(); + *num2 = inckey.Verse(); + + sprintf(data, "JFB%.2d.htm#%s%d_%d", inckey.Book() + ((inckey.Testament()>1)?39:0), bookabrev[inckey.Book() + ((inckey.Testament()>1)?39:0)-1], inckey.Chapter(), inckey.Verse()); + write(fp, data, strlen(data)); + + *size = lseek(fp, 0, SEEK_CUR) - *offset; + write(fp, "\n", 1); + return 0; + } + return 1; +} + + +void openfiles() +{ + char buf[255]; + char fname[5]; + long pos; + short size; + + testmnt = key1.Testament(); + + strcpy(fname, (testmnt==2) ? "nt" : "ot"); + unlink(fname); + if ((fp = open(fname, O_CREAT|O_RDWR|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + unlink(buf); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + unlink(buf); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + unlink(buf); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + +} + + +void checkparams(int argc, char **argv) +{ + if (argc !=1) { + fprintf(stderr, "usage: %s\n", argv[0]); + exit(1); + } +} diff --git a/src/modules/comments/rawcom/Makefile b/src/modules/comments/rawcom/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/rawcom/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/rawcom/Makefile.am b/src/modules/comments/rawcom/Makefile.am new file mode 100644 index 0000000..901cf6b --- /dev/null +++ b/src/modules/comments/rawcom/Makefile.am @@ -0,0 +1,4 @@ +rawcomdir = $(top_srcdir)/src/modules/comments/rawcom + +libsword_la_SOURCES += $(rawcomdir)/rawcom.cpp + diff --git a/src/modules/comments/rawcom/mhcidx.cpp b/src/modules/comments/rawcom/mhcidx.cpp new file mode 100644 index 0000000..df16f55 --- /dev/null +++ b/src/modules/comments/rawcom/mhcidx.cpp @@ -0,0 +1,292 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (num2) { + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + } + else { + key2.Verse(1); + if (!startflag) { + startflag = 1; + } + else { + if (num1 <= key2.Chapter()) { // new book + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + } + key2.Chapter(num1); + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; + continue; + } + + key3 = key2; + key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != 10) + return 0; + if (buf[1] != '#') + return 0; + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + memset(buf, ' ', 2); + for (loop = 2; loop < 7; loop++) { + if ((buf[loop] == '-') || (buf[loop] == ',') || (buf[loop] == ' ')) { + buf[loop] = 0; + *num2 = atoi(buf); + break; + } + } + for (ch = loop + 1; ch < 7; ch++) { + if (buf[ch] == ' ') { + break; + } + } + buf[ch] = 0; + *rangemax = atoi(&buf[loop+1]); + if (!*rangemax) + *rangemax = *num2; + *offset = lseek(fp, 0, SEEK_CUR) - 5; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + if (!strncmp(buf, "$-$-$-", 6)) { + memset(buf2, 0, 7); + loop = 0; + while ((read(fp, &buf2[loop], 1) == 1) && (loop < 7)) { + if ((buf2[loop] == 10) || (buf2[loop] == 13)) { + buf2[loop] = 0; + break; + } + loop++; + } + while (read(fp, &ch, 1) == 1) { + if (ch == '*') + break; + } + + *offset = lseek(fp, 0, SEEK_CUR) - 1; + *num2 = 0; + for (loop = strlen(buf2) - 1; loop; loop--) { + if (buf2[loop] == ':') + break; + } + *num1 = atoi(&buf2[loop+1]); + printf("Chapter marker: %s\n", buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/comments/rawcom/rawcom.cpp b/src/modules/comments/rawcom/rawcom.cpp new file mode 100644 index 0000000..f71b6ec --- /dev/null +++ b/src/modules/comments/rawcom/rawcom.cpp @@ -0,0 +1,221 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'RawCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawcom.h> + + + /****************************************************************************** + * RawCom Constructor - Initializes data for instance of RawCom + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawCom::RawCom(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding encoding, SWTextDirection dir, SWTextMarkup markup, const char* ilang) + : RawVerse(ipath), + SWCom(iname, idesc, idisp, encoding, dir, markup, ilang){ +} + + +/****************************************************************************** + * RawCom Destructor - Cleans up instance of RawCom + */ + +RawCom::~RawCom() +{ +} + + +/****************************************************************************** + * RawCom::getRawEntry() - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawCom::increment - Increments module key a number of entries + * + * ENT: steps - Number of entries to jump forward + * + * RET: *this + */ + +void RawCom::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} + + +void RawCom::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawCom::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawCom::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} diff --git a/src/modules/comments/rawcom/rtfidx.cpp b/src/modules/comments/rawcom/rtfidx.cpp new file mode 100644 index 0000000..38b38bc --- /dev/null +++ b/src/modules/comments/rawcom/rtfidx.cpp @@ -0,0 +1,292 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (num2) { + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + } + else { + key2.Verse(1); + if (!startflag) { + startflag = 1; + } + else { + if (num1 <= key2.Chapter()) { // new book + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + } + key2.Chapter(num1); + printf("Found Chapter Break: %d ('%s')\n", num1, (char *)key2); + chapoffset = offset; + chapsize = size; + continue; + } + + key3 = key2; + key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != 10) + return 0; + if (buf[1] != '#') + return 0; + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + memset(buf, ' ', 2); + for (loop = 2; loop < 7; loop++) { + if ((buf[loop] == '-') || (buf[loop] == ',') || (buf[loop] == ' ')) { + buf[loop] = 0; + *num2 = atoi(buf); + break; + } + } + for (ch = loop + 1; ch < 7; ch++) { + if (buf[ch] == ' ') { + break; + } + } + buf[ch] = 0; + *rangemax = atoi(&buf[loop+1]); + if (!*rangemax) + *rangemax = *num2; + *offset = lseek(fp, 0, SEEK_CUR) - 5; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + if (!strncmp(buf, "$-$-$-", 6)) { + memset(buf2, 0, 7); + loop = 0; + while ((read(fp, &buf2[loop], 1) == 1) && (loop < 7)) { + if ((buf2[loop] == 10) || (buf2[loop] == 13)) { + buf2[loop] = 0; + break; + } + loop++; + } + while (read(fp, &ch, 1) == 1) { + if (ch == '*') + break; + } + + *offset = lseek(fp, 0, SEEK_CUR) - 1; + *num2 = 0; + for (loop = strlen(buf2) - 1; loop; loop--) { + if (buf2[loop] == ':') + break; + } + *num1 = atoi(&buf2[loop+1]); + printf("Chapter marker: %s\n", buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/comments/rawcom/rwpidx.cpp b/src/modules/comments/rawcom/rwpidx.cpp new file mode 100644 index 0000000..afcbd81 --- /dev/null +++ b/src/modules/comments/rawcom/rwpidx.cpp @@ -0,0 +1,266 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for RWP). Good luck! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1 = 0, num2 = 0, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (num2) { + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + } + else { + key2.Verse(1); + if (!startflag) { + startflag = 1; + } + else { + if (num1 <= key2.Chapter()) { // new book + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + } + key2.Chapter(num1); + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; + continue; + } + + key3 = key2; + key3 += (rangemax - key3.Verse()); + + printf("Found verse Break: ('%s')\n", (const char *)key2); + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startentry(char *buf) +{ + char colon = 0; + + if (buf[0] != 10) + return 0; + if (buf[1] != 10) + return 0; + if (!isdigit(buf[2])) + return 0; + if (!isdigit(buf[3])) { + if (buf[3]!= ':') + return 0; + else colon++; + } + if (!isdigit(buf[4])) { + if (buf[4]!= ':') + return 0; + else colon++; + } + if (colon != 1) + return 0; + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + buf[0] = ' '; + buf[1] = ' '; + sscanf(buf, "%d:%d", num1, num2); + *rangemax = *num2; + *offset = lseek(fp, 0, SEEK_CUR) - 5; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 2; + } + else { + *size = (offset2 - (*offset)) - 6; + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + if (!strncmp(buf, "$-$-$-", 6)) { + *offset = lseek(fp, 0, SEEK_CUR) - 1; + *num2 = 0; + (*num1)++; + printf("Book marker: %s\n", buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 2; + } + else { + *size = (offset2 - (*offset)) - 6; + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/comments/rawfiles/Makefile b/src/modules/comments/rawfiles/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/rawfiles/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/rawfiles/Makefile.am b/src/modules/comments/rawfiles/Makefile.am new file mode 100644 index 0000000..53aadbe --- /dev/null +++ b/src/modules/comments/rawfiles/Makefile.am @@ -0,0 +1,3 @@ +rawfilesdir = $(top_srcdir)/src/modules/comments/rawfiles + +libsword_la_SOURCES += $(rawfilesdir)/rawfiles.cpp diff --git a/src/modules/comments/rawfiles/rawfiles.cpp b/src/modules/comments/rawfiles/rawfiles.cpp new file mode 100644 index 0000000..c073a73 --- /dev/null +++ b/src/modules/comments/rawfiles/rawfiles.cpp @@ -0,0 +1,274 @@ +/****************************************************************************** + * rawfiles.cpp - code for class 'RawFiles'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawfiles.h> +#include <filemgr.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + + /****************************************************************************** + * RawFiles Constructor - Initializes data for instance of RawFiles + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawFiles::RawFiles(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawVerse(ipath, O_RDWR), SWCom(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawFiles Destructor - Cleans up instance of RawFiles + */ + +RawFiles::~RawFiles() +{ +} + + +/****************************************************************************** + * RawFiles::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawFiles::getRawEntry() { + FileDesc *datafile; + long start = 0; + unsigned short size = 0; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + + if (size) { + tmpbuf = new char [ (size + 2) + strlen(path) + 5 ]; + sprintf(tmpbuf,"%s/",path); + readtext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_RDONLY|O_BINARY); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + size = lseek(datafile->getFd(), 0, SEEK_END); + entrybuf = new char [ size * FILTERPAD ]; + memset(entrybuf, 0, size * FILTERPAD); + lseek(datafile->getFd(), 0, SEEK_SET); + read(datafile->getFd(), entrybuf, size); +// preptext(entrybuf); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + FileMgr::systemFileMgr.close(datafile); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawFiles::setEntry(char *)- Update the modules current key entry with + * provided text + */ + +void RawFiles::setEntry(const char *inbuf, long len) { + FileDesc *datafile; + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + + len = (len<0)?strlen(inbuf):len; + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ (size + 3) + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/", path); + readtext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + } + else { + tmpbuf = new char [ 16 + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/%s", path, getnextfilename()); + settext(key->Testament(), key->Index(), tmpbuf+strlen(path)+1); + } + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + write(datafile->getFd(), inbuf, len); + } + FileMgr::systemFileMgr.close(datafile); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::linkEntry(SWKey *)- Link the modules current key entry with + * another module entry + * + * RET: *this + */ + +void RawFiles::linkEntry(const SWKey *inkey) { + + long start; + unsigned short size; + char *tmpbuf; + const VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ size + 2]; + readtext(key->Testament(), start, size + 2, tmpbuf); + + if (key != inkey) + delete key; + key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + settext(key->Testament(), key->Index(), tmpbuf); + } + + if (key != inkey) + delete key; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawFiles::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::getnextfilename - generates a valid filename in which to store + * an entry + * + * RET: filename + */ + +char *RawFiles::getnextfilename() { + static char incfile[255]; + long number; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_RDONLY|O_BINARY); + if (read(datafile->getFd(), &number, 4) != 4) + number = 0; + number++; + FileMgr::systemFileMgr.close(datafile); + + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + write(datafile->getFd(), &number, 4); + FileMgr::systemFileMgr.close(datafile); + sprintf(incfile, "%.7ld", number-1); + return incfile; +} + + +char RawFiles::createModule (const char *path) { + char *incfile = new char [ strlen (path) + 16 ]; + static long zero = 0; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] incfile; + write(datafile->getFd(), &zero, 4); + FileMgr::systemFileMgr.close(datafile); + + return RawVerse::createModule (path); +} + + + diff --git a/src/modules/comments/rawfiles/rawfilesgen.cpp b/src/modules/comments/rawfiles/rawfilesgen.cpp new file mode 100644 index 0000000..f60c9e2 --- /dev/null +++ b/src/modules/comments/rawfiles/rawfilesgen.cpp @@ -0,0 +1,236 @@ +/***************************************************************************** + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + +#ifndef O_BINARY + #define O_BINARY 0 +#endif + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(); +void checkparams(int argc, char **argv); +void charsetconvert(char *data); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; +char startflag = 0; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + short size, tmp; + extern struct zonline online; + + checkparams(argc, argv); + + key1 = key2 = key3 = "Genesis 1:1"; + + openfiles(); + + num1 = key1.Chapter(); + num2 = key1.Verse(); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + printf("Created Empty Entry: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + size = 0; + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + key1++; +} + + +static VerseKey inckey = "Genesis 1:1"; + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + static int olbvnum = 0; + char data[16]; + + memset (data,0,16); + + if (++olbvnum <= 31102) { + + if (olbvnum == 23146) { // "Matthew 1:1" + close(vfp); + close(cfp); + close(bfp); + close(fp); + key1 = key2 = key3 = inckey = "Matthew 1:1"; + openfiles(); + startflag = 0; + } + + + *offset = lseek(fp, 0, SEEK_CUR); + + if ((olbvnum!=1) && (olbvnum != 23146)) + inckey++; + + *num1 = inckey.Chapter(); + *num2 = inckey.Verse(); + + + write(fp, data, 16); + + *size = lseek(fp, 0, SEEK_CUR) - *offset; + return 0; + } + return 1; +} + + +void openfiles() +{ + char buf[255]; + char fname[5]; + long pos; + short size; + + testmnt = key1.Testament(); + + strcpy(fname, (testmnt==2) ? "nt" : "ot"); + unlink(fname); + if ((fp = open(fname, O_CREAT|O_RDWR|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + unlink(buf); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + unlink(buf); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + unlink(buf); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + +} + + +void checkparams(int argc, char **argv) +{ + if (argc !=1) { + fprintf(stderr, "usage: %s\n", argv[0]); + exit(1); + } +} diff --git a/src/modules/comments/swcom.cpp b/src/modules/comments/swcom.cpp new file mode 100644 index 0000000..1feb0cf --- /dev/null +++ b/src/modules/comments/swcom.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * swcom.cpp - code for base class 'SWCom'- The basis for all commentary + * modules + */ + +#include <swcom.h> + + +/****************************************************************************** + * SWCom Constructor - Initializes data for instance of SWCom + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWCom::SWCom(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Commentaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); +} + + +/****************************************************************************** + * SWCom Destructor - Cleans up instance of SWCom + */ + +SWCom::~SWCom() +{ +} diff --git a/src/modules/comments/zcom/Makefile b/src/modules/comments/zcom/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/zcom/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/zcom/Makefile.am b/src/modules/comments/zcom/Makefile.am new file mode 100644 index 0000000..ad1ef6a --- /dev/null +++ b/src/modules/comments/zcom/Makefile.am @@ -0,0 +1,4 @@ +zcomdir = $(top_srcdir)/src/modules/comments/zcom + +libsword_la_SOURCES += $(zcomdir)/zcom.cpp + diff --git a/src/modules/comments/zcom/makeidx.c b/src/modules/comments/zcom/makeidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/comments/zcom/makeidx.c @@ -0,0 +1,146 @@ +#include <stdio.h> +#include <fcntl.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/comments/zcom/rawtxt2z.cpp b/src/modules/comments/zcom/rawtxt2z.cpp new file mode 100644 index 0000000..f8c18d0 --- /dev/null +++ b/src/modules/comments/zcom/rawtxt2z.cpp @@ -0,0 +1,83 @@ +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <swcomprs.h> + + +main(int argc, char **argv) +{ + SWCompress *zobj; + int ifd, ofd, ixfd, oxfd; + long offset, loffset, lzoffset; + short size, lsize, lzsize; + char *tmpbuf; + + if (argc != 2) { + fprintf(stderr, "usage: %s <datafilename>\n", argv[0]); + exit(1); + } + + zobj = new SWCompress(); + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + tmpbuf = new char [ strlen(argv[1]) + 9 ]; + ifd = open(argv[1], O_RDONLY|O_BINARY); + sprintf(tmpbuf, "%s.vss", argv[1]); + ixfd = open(tmpbuf, O_RDONLY|O_BINARY); + sprintf(tmpbuf, "%s.zzz", argv[1]); + ofd = open(tmpbuf, O_WRONLY|O_BINARY|O_CREAT); + sprintf(tmpbuf, "%s.zzz.vss", argv[1]); + oxfd = open(tmpbuf, O_WRONLY|O_BINARY|O_CREAT); + + delete [] tmpbuf; + + printf("\n"); + + while (1) { + if (read(ixfd, &offset, 4) != 4) + break; + if (read(ixfd, &size, 2) != 2) + break; + + if ((offset == loffset) && (size == lsize)) { + printf("using previous offset,size\n", size); + write(oxfd, &lzoffset, 4); + write(oxfd, &lzsize, 2); + } + else { + printf("%d -> ", size); + lsize = size; + loffset = offset; + + if (size) { + tmpbuf = (char *) calloc(size + 1, 1); + lseek(ifd, offset, SEEK_SET); + read(ifd, tmpbuf, size); + zobj->Buf(tmpbuf); + zobj->zBuf(&size); + free(tmpbuf); + } + offset = lseek(ofd, 0, SEEK_END); + write(oxfd, &offset, 4); + if (size) + write(ofd, zobj->zBuf(&size), size); + lzoffset = offset; + write(oxfd, &size, 2); + lzsize = size; + printf("%d \n", size); + } + } + delete zobj; +} diff --git a/src/modules/comments/zcom/zcom.cpp b/src/modules/comments/zcom/zcom.cpp new file mode 100644 index 0000000..a0b35c3 --- /dev/null +++ b/src/modules/comments/zcom/zcom.cpp @@ -0,0 +1,255 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'zCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <zverse.h> +#include <zcom.h> + + +/****************************************************************************** + * zCom Constructor - Initializes data for instance of zCom + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zCom::zCom(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWCom(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + +/****************************************************************************** + * zCom Destructor - Cleans up instance of zCom + */ + +zCom::~zCom() { + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + +/****************************************************************************** + * zCom::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ +char *zCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + zreadtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +bool zCom::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + +void zCom::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void zCom::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + +/****************************************************************************** + * zCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zCom::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zCom::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void zCom::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} + diff --git a/src/modules/common/Makefile b/src/modules/common/Makefile new file mode 100644 index 0000000..81f7721 --- /dev/null +++ b/src/modules/common/Makefile @@ -0,0 +1,4 @@ +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/common/Makefile.am b/src/modules/common/Makefile.am new file mode 100644 index 0000000..ac235d8 --- /dev/null +++ b/src/modules/common/Makefile.am @@ -0,0 +1,22 @@ +commondir = $(top_srcdir)/src/modules/common + +libsword_la_SOURCES += $(commondir)/rawstr.cpp +libsword_la_SOURCES += $(commondir)/rawstr4.cpp +libsword_la_SOURCES += $(commondir)/swcomprs.cpp +libsword_la_SOURCES += $(commondir)/lzsscomprs.cpp + +if ZLIB +SWZLIB = $(commondir)/zipcomprs.cpp +else +SWZLIB = +endif +libsword_la_SOURCES += $(SWZLIB) +libsword_la_SOURCES += $(commondir)/rawverse.cpp +libsword_la_SOURCES += $(commondir)/swcipher.cpp +libsword_la_SOURCES += $(commondir)/zverse.cpp +libsword_la_SOURCES += $(commondir)/zstr.cpp +libsword_la_SOURCES += $(commondir)/entriesblk.cpp + +DEFS += -DUNIX +libsword_la_SOURCES += $(commondir)/sapphire.cpp + diff --git a/src/modules/common/compress.cpp b/src/modules/common/compress.cpp new file mode 100644 index 0000000..5031adb --- /dev/null +++ b/src/modules/common/compress.cpp @@ -0,0 +1,767 @@ +Compression Info, 10-11-95 +Jeff Wheeler + +Source of Algorithm +------------------- + +The compression algorithms used here are based upon the algorithms developed and published by Haruhiko Okumura in a paper entitled "Data Compression Algorithms of LARC and LHarc." This paper discusses three compression algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of these, and is described as providing moderate compression with good speed. LZARI is described as an improved LZSS, a combination of the LZSS algorithm with adaptive arithmetic compression. It is described as being slower than LZSS but with better compression. LZHUF (the basis of the common LHA compression program) was included in the paper, however, a free usage license was not included. + +The following are copies of the statements included at the beginning of each source code listing that was supplied in the working paper. + + LZSS, dated 4/6/89, marked as "Use, distribute and + modify this program freely." + + LZARI, dated 4/7/89, marked as "Use, distribute and + modify this program freely." + + LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, + translated by Haruhiko Okumura on 4/7/89. Not + expressly marked as redistributable or modifiable. + +Since both LZSS and LZARI are marked as "use, distribute and modify freely" we have felt at liberty basing our compression algorithm on either of these. + +Selection of Algorithm +---------------------- + +Working samples of three possible compression algorithms are supplied in Okumura's paper. Which should be used? + +LZSS is the fastest at decompression, but does not generated as small a compressed file as the other methods. The other two methods provided, perhaps, a 15% improvement in compression. Or, put another way, on a 100K file, LZSS might compress it to 50K while the others might approach 40-45K. For STEP purposes, it was decided that decoding speed was of more importance than tighter compression. For these reasons, the first compression algorithm implemented is the LZSS algorithm. + +About LZSS Encoding +------------------- + +(adapted from Haruhiko Okumura's paper) + +This scheme was proposed by Ziv and Lempel [1]. A slightly modified version is described by Storer and Szymanski [2]. An implementation using a binary tree has been proposed by Bell [3]. + +The algorithm is quite simple. +1. Keep a ring buffer which initially contains all space characters. +2. Read several letters from the file to the buffer. +3. Search the buffer for the longest string that matches the letters just read, and send its length and position into the buffer. + +If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the length is represented in 4 bits, the <position, length> pair is two bytes long. If the longest match is no more than two characters, then just one character is sent without encoding. The process starts again with the next character. An extra bit is sent each time to tell the decoder whether the next item is a character of a <position, length> pair. + +[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977). +[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982). +[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986). + +void InitTree( // no return value + void); // no parameters + +void InsertNode( // no return value + short int Pos); // position in the buffer + +void DeleteNode( // no return value + short int Node); // node to be removed + +void Encode( // no return value + void); // no parameters + +void Decode( // no return value + void); // no parameters + +// The following are constant sizes used by the compression algorithm. +// +// N - This is the size of the ring buffer. It is set +// to 4K. It is important to note that a position +// within the ring buffer requires 12 bits. +// +// F - This is the maximum length of a character sequence +// that can be taken from the ring buffer. It is set +// to 18. Note that a length must be 3 before it is +// worthwhile to store a position/length pair, so the +// length can be encoded in only 4 bits. Or, put yet +// another way, it is not necessary to encode a length +// of 0-18, it is necessary to encode a length of +// 3-18, which requires 4 bits. +// +// THRESHOLD - It takes 2 bytes to store an offset and +// a length. If a character sequence only +// requires 1 or 2 characters to store +// uncompressed, then it is better to store +// it uncompressed than as an offset into +// the ring buffer. +// +// Note that the 12 bits used to store the position and the 4 bits +// used to store the length equal a total of 16 bits, or 2 bytes. + +#define N 4096 +#define F 18 +#define THRESHOLD 3 +#define NOT_USED N + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int m_match_position; +short int m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int m_lson[N + 1]; +short int m_rson[N + 257]; +short int m_dad[N + 1]; + +/* + ------------------------------------------------------------------------- + cLZSS::InitTree + + This function initializes the tree nodes to "empty" states. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InitTree( // no return value + void) // no parameters + throw() // exception list + + { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) + { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) + { + m_rson[i] = NOT_USED; + } + + // Done. + } + +/* + ------------------------------------------------------------------------- + cLZSS::InsertNode + + This function inserts a string from the ring buffer into one of + the trees. It loads the match position and length member variables + for the longest match. + + The string to be inserted is identified by the parameter Pos, + A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1] + are inserted. + + If the matched length is exactly F, then an old node is removed + in favor of the new one (because the old one will be deleted + sooner). + + Note that Pos plays a dual role. It is used as both a position + in the ring buffer and also as a tree node. m_ring_buffer[Pos] + defines a character that is used to identify a tree node. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InsertNode( // no return value + short int Pos) // position in the buffer + throw() // exception list + + { + short int i; + short int p; + int cmp; + unsigned char * key; + + ASSERT(Pos >= 0); + ASSERT(Pos < N); + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) + { + if (cmp >= 0) + { + if (m_rson[p] != NOT_USED) + { + p = m_rson[p]; + } + else + { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else + { + if (m_lson[p] != NOT_USED) + { + p = m_lson[p]; + } + else + { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) + { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) + { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) + { + m_rson[ m_dad[p] ] = Pos; + } + else + { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::DeleteNode + + This function removes the node "Node" from the tree. + ------------------------------------------------------------------------- +*/ + +void cLZSS::DeleteNode( // no return value + short int Node) // node to be removed + throw() // exception list + + { + short int q; + + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); + + if (m_dad[Node] == NOT_USED) + { + // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) + { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) + { + q = m_rson[Node]; + } + else + { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) + { + do + { + q = m_rson[q]; + } + while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) + { + m_rson[ m_dad[Node] ] = q; + } + else + { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::Encode + + This function "encodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Encode( // no return value + void) // no parameters + + { + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars(&(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) + { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do + { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) + { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) + { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else + { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (mask == 0) + { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars(code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) + { + + // Get next character... + + if (GetChars(&c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) + { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) + { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) + { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } + while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) + { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars(code_buf, code_buf_pos); + } + + // Done! + } + +/* + ------------------------------------------------------------------------- + cLZSS::Decode + + This function "decodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Decode( // no return value + void) // no parameters + + { + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) + { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) + { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else + { + // Next byte must be a flag. + + if (GetChars(&flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) + { + if (GetChars(c, 1) != 1) + break; + + if (SendChars(c, 1) != 1) + break; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else + { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars(c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) + { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" characters to the output stream. + + if (SendChars(c, len) != len) + break; + } + } + } + diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp new file mode 100644 index 0000000..d38cf53 --- /dev/null +++ b/src/modules/common/entriesblk.cpp @@ -0,0 +1,166 @@ +#include <entriesblk.h> +#include <stdlib.h> +#include <string.h> + +const int EntriesBlock::METAHEADERSIZE = 4; + // count(4); +const int EntriesBlock::METAENTRYSIZE = 8; + // offset(4); size(4); + +EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) { + block = (char *)calloc(1, size); + memcpy(block, iBlock, size); +} + + +EntriesBlock::EntriesBlock() { + block = (char *)calloc(1, sizeof(__u32)); +} + + +EntriesBlock::~EntriesBlock() { + free(block); +} + + +void EntriesBlock::setCount(int count) { + __u32 rawCount = archtosword32(count); + memcpy(block, &rawCount, sizeof(__u32)); +} + + +int EntriesBlock::getCount() { + __u32 count = 0; + memcpy(&count, block, sizeof(__u32)); + count = swordtoarch32(count); + return count; +} + + +void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) { + __u32 rawOffset = 0; + __u32 rawSize = 0; + *offset = 0; + *size = 0; + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset)); + memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize)); + + *offset = (unsigned long)swordtoarch32(rawOffset); + *size = (unsigned long)swordtoarch32(rawSize); +} + + +void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) { + __u32 rawOffset = archtosword32(offset); + __u32 rawSize = archtosword32(size); + + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset)); + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize)); +} + + +const char *EntriesBlock::getRawData(unsigned long *retSize) { + unsigned long max = 4; + int loop; + unsigned long offset; + unsigned long size; + for (loop = 0; loop < getCount(); loop++) { + getMetaEntry(loop, &offset, &size); + max = ((offset + size) > max) ? (offset + size) : max; + } + *retSize = max; + return block; +} + + +int EntriesBlock::addEntry(const char *entry) { + unsigned long dataSize; + getRawData(&dataSize); + unsigned long len = strlen(entry); + unsigned long offset; + unsigned long size; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + // new meta entry + new data size + 1 because null + block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1); + // shift right to make room for new meta entry + memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart); + + for (int loop = 0; loop < count; loop++) { + getMetaEntry(loop, &offset, &size); + if (offset) { // if not a deleted entry + offset += METAENTRYSIZE; + setMetaEntry(loop, offset, size); + } + } + + offset = dataSize; // original dataSize before realloc + size = len + 1; + // add our text to the end + memcpy(block + offset + METAENTRYSIZE, entry, size); + // increment count + setCount(count + 1); + // add our meta entry + setMetaEntry(count, offset + METAENTRYSIZE, size); + // return index of our new entry + return count; +} + + +const char *EntriesBlock::getEntry(int entryIndex) { + unsigned long offset; + unsigned long size; + static char *empty = ""; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? block+offset : empty; +} + + +unsigned long EntriesBlock::getEntrySize(int entryIndex) { + unsigned long offset; + unsigned long size; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? size : 0; +} + + +void EntriesBlock::removeEntry(int entryIndex) { + unsigned long offset; + unsigned long size, size2; + unsigned long dataSize; + getRawData(&dataSize); + getMetaEntry(entryIndex, &offset, &size); + unsigned long len = size - 1; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + + if (!offset) // already deleted + return; + + // shift left to retrieve space used for old entry + memmove(block + offset, block + offset + size, dataSize - (offset + size)); + + // fix offset for all entries after our entry that were shifted left + for (int loop = entryIndex + 1; loop < count; loop++) { + getMetaEntry(loop, &offset, &size2); + if (offset) { // if not a deleted entry + offset -= size; + setMetaEntry(loop, offset, size2); + } + } + + // zero out our meta entry + setMetaEntry(entryIndex, 0L, 0); +} + + diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp new file mode 100644 index 0000000..3606fbc --- /dev/null +++ b/src/modules/common/lzsscomprs.cpp @@ -0,0 +1,665 @@ +/****************************************************************************** + * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that + * provides LZSS compression + */ + +#include <string.h> +#include <stdlib.h> +#include <lzsscomprs.h> + + +/****************************************************************************** + * LZSSCompress Statics + */ + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char LZSSCompress::m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int LZSSCompress::m_match_position; +short int LZSSCompress::m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int LZSSCompress::m_lson[N + 1]; +short int LZSSCompress::m_rson[N + 257]; +short int LZSSCompress::m_dad[N + 1]; + + +/****************************************************************************** + * LZSSCompress Constructor - Initializes data for instance of LZSSCompress + * + */ + +LZSSCompress::LZSSCompress() : SWCompress() { +} + + +/****************************************************************************** + * LZSSCompress Destructor - Cleans up instance of LZSSCompress + */ + +LZSSCompress::~LZSSCompress() { +} + + +/****************************************************************************** + * LZSSCompress::InitTree - This function initializes the tree nodes to + * "empty" states. + */ + +void LZSSCompress::InitTree(void) { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) { + m_rson[i] = NOT_USED; + } +} + + +/****************************************************************************** + * LZSSCompress::InsertNode - This function inserts a string from the ring + * buffer into one of the trees. It loads the + * match position and length member variables + * for the longest match. + * + * The string to be inserted is identified by + * the parameter Pos, A full F bytes are + * inserted. So, + * m_ring_buffer[Pos ... Pos+F-1] + * are inserted. + * + * If the matched length is exactly F, then an + * old node is removed in favor of the new one + * (because the old one will be deleted + * sooner). + * + * Note that Pos plays a dual role. It is + * used as both a position in the ring buffer + * and also as a tree node. + * m_ring_buffer[Pos] defines a character that + * is used to identify a tree node. + * + * ENT: pos - position in the buffer + */ + +void LZSSCompress::InsertNode(short int Pos) +{ + short int i; + short int p; + int cmp; + unsigned char * key; + +/* + ASSERT(Pos >= 0); + ASSERT(Pos < N); +*/ + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) { + if (cmp >= 0) { + if (m_rson[p] != NOT_USED) { + p = m_rson[p]; + } + else { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else { + if (m_lson[p] != NOT_USED) { + p = m_lson[p]; + } + else { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) { + m_rson[ m_dad[p] ] = Pos; + } + else { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::DeleteNode - This function removes the node "Node" from the + * tree. + * + * ENT: node - node to be removed + */ + +void LZSSCompress::DeleteNode(short int Node) +{ + short int q; + +/* + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); +*/ + + if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) { + q = m_rson[Node]; + } + else { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) { + do { + q = m_rson[q]; + } while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) { + m_rson[ m_dad[Node] ] = q; + } + else { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void LZSSCompress::Encode(void) +{ + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars((char *) &(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (!mask) { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars((char *) code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) { + // Get next character... + + if (GetChars((char *) &c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars((char *) code_buf, code_buf_pos); + } + + + // must set zlen for parent class to know length of compressed buffer + zlen = zpos; +} + + +/****************************************************************************** + * LZSSCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void LZSSCompress::Decode(void) +{ + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + unsigned long totalLen = 0; + + direct = 1; // set direction needed by parent [Get|Send]Chars() + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else { + // Next byte must be a flag. + + if (GetChars((char *) &flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) { + if (GetChars((char *) c, 1) != 1) + break; + + if (SendChars((char *) c, 1) != 1) { + totalLen++; + break; + } + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars((char *) c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" :characters to the output stream. + + if (SendChars((char *) c, len) != (unsigned int)len) { + totalLen += len; + break; + } + } + } + slen = totalLen; +} diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp new file mode 100644 index 0000000..c7363d9 --- /dev/null +++ b/src/modules/common/rawstr.cpp @@ -0,0 +1,551 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <sysdata.h> +/****************************************************************************** + * RawStr Statics + */ + +int RawStr::instance = 0; +char RawStr::nl = '\n'; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr::RawStr(const char *ipath, int fileMode) +{ + char buf[127]; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr::~RawStr() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbufdat(long ioffset, char **buf) +{ + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + toupperstr_utf8(*buf); + } +} + + +/****************************************************************************** + * RawStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr::findoffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 6; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-6:6; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 6) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + while (away) { + long laststart = *start; + unsigned short lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 6 : -6; + + bool bad = false; + if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr::preptext(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr::readtext(long istart, unsigned short *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr::settext(const char *ikey, const char *buf, long len) +{ + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned short size; + unsigned short outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 6; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 6, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + (len); + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (int)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 2); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+6, shiftSize-6); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + settext(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp new file mode 100644 index 0000000..da0789b --- /dev/null +++ b/src/modules/common/rawstr4.cpp @@ -0,0 +1,555 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <sysdata.h> + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr4::instance = 0; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr4::RawStr4(const char *ipath, int fileMode) +{ + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr4::~RawStr4() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr4::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbufdat(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr4::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { + *targetbuf = *trybuf; + } + *targetbuf = 0; + trybuf = 0; + toupperstr_utf8(targetbuf); + } +} + + +/****************************************************************************** + * RawStr4::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr4::findoffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 8; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf) { // In case of extra entry at end of idx + tryoff += (tryoff > (maxoff / 2))?-8:8; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 8) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + while (away) { + long laststart = *start; + unsigned long lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 8 : -8; + + bool bad = false; + if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr4::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr4::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr4::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr4::readtext(long istart, unsigned long *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr4::setText(const char *ikey, const char *buf, long len) { + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned long size; + unsigned long outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 8; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 8, &start, &size, 0, &idxoff); + ++size; + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + len; + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len>0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (long)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 4); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+8, shiftSize-8); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr4::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr4::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp new file mode 100644 index 0000000..f77fbe5 --- /dev/null +++ b/src/modules/common/rawverse.cpp @@ -0,0 +1,348 @@ +/****************************************************************************** + * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#include <sys/stat.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <versekey.h> +#include <sysdata.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + +/****************************************************************************** + * RawVerse Statics + */ + +int RawVerse::instance = 0; +const char *RawVerse::nl = "\r\n"; + + +/****************************************************************************** + * RawVerse Constructor - Initializes data for instance of RawVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse::RawVerse(const char *ipath, int fileMode) +{ + char *buf; + + path = 0; + stdstr(&path, ipath); + buf = new char [ strlen(path) + 80 ]; + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.vss", path); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.vss", path); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot", path); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt", path); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + instance++; +} + + +/****************************************************************************** + * RawVerse Destructor - Cleans up instance of RawVerse + */ + +RawVerse::~RawVerse() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) { + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), start, 4); + long len = read(idxfp[testmt-1]->getFd(), size, 2); // read size + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (len < 2) { + *size = (unsigned short)((*start) ? (lseek(textfp[testmt-1]->getFd(), 0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = 10; +// *to++ = nl[1]; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; +// *to++ = nl[0]; + *to++ = 10; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawVerse::readtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse::readtext(char testmt, long start, unsigned short size, char *buf) { + memset(buf, 0, size+1); + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + read(textfp[testmt-1]->getFd(), buf, (int)size - 2); + } + } +} + + +/****************************************************************************** + * RawVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse::settext(char testmt, long idxoff, const char *buf, long len) +{ + long start, outstart; + unsigned short size; + unsigned short outsize; + + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = outsize = (len < 0) ? strlen(buf) : len; + + start = outstart = lseek(textfp[testmt-1]->getFd(), 0, SEEK_END); + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + + if (size) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + write(textfp[testmt-1]->getFd(), buf, (int)size); + + // add a new line to make data file easier to read in an editor + write(textfp[testmt-1]->getFd(), nl, 2); + } + else { + start = 0; + } + + outstart = archtosword32(start); + outsize = archtosword16(size); + + write(idxfp[testmt-1]->getFd(), &outstart, 4); + write(idxfp[testmt-1]->getFd(), &outsize, 2); + + +} + + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long start; + unsigned short size; + + destidxoff *= 6; + srcidxoff *= 6; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(idxfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), &start, 4); + read(idxfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(idxfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(idxfp[testmt-1]->getFd(), &start, 4); + write(idxfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.vss", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp new file mode 100644 index 0000000..686bccb --- /dev/null +++ b/src/modules/common/sapphire.cpp @@ -0,0 +1,228 @@ +/* sapphire.cpp -- the Saphire II stream cipher class. + Dedicated to the Public Domain the author and inventor: + (Michael Paul Johnson). This code comes with no warranty. + Use it at your own risk. + Ported from the Pascal implementation of the Sapphire Stream + Cipher 9 December 1994. + Added hash pre- and post-processing 27 December 1994. + Modified initialization to make index variables key dependent, + made the output function more resistant to cryptanalysis, + and renamed to Sapphire II 2 January 1995 +*/ + + +#ifdef WIN32 +#include <memory.h> +#endif + +#ifdef UNIX +#include <memory.h> +#include <unistd.h> +#else +#ifndef _MSC_VER +#include <mem.h> +#endif +#endif + +#ifdef _WIN32_WCE +#include <string.h> +#endif + +#include "sapphire.h" + +unsigned char sapphire::keyrand(int limit, + unsigned char *user_key, + unsigned char keysize, + unsigned char *rsum, + unsigned *keypos) + { + unsigned u, // Value from 0 to limit to return. + retry_limiter, // No infinite loops allowed. + mask; // Select just enough bits. + + if (!limit) return 0; // Avoid divide by zero error. + retry_limiter = 0; + mask = 1; // Fill mask with enough bits to cover + while (mask < (unsigned)limit) // the desired range. + mask = (mask << 1) + 1; + do + { + *rsum = cards[*rsum] + user_key[(*keypos)++]; + if (*keypos >= keysize) + { + *keypos = 0; // Recycle the user key. + *rsum += keysize; // key "aaaa" != key "aaaaaaaa" + } + u = mask & *rsum; + if (++retry_limiter > 11) + u %= limit; // Prevent very rare long loops. + } + while (u > (unsigned)limit); + return u; + } + +void sapphire::initialize(unsigned char *key, unsigned char keysize) + { + // Key size may be up to 256 bytes. + // Pass phrases may be used directly, with longer length + // compensating for the low entropy expected in such keys. + // Alternatively, shorter keys hashed from a pass phrase or + // generated randomly may be used. For random keys, lengths + // of from 4 to 16 bytes are recommended, depending on how + // secure you want this to be. + + int i; + unsigned char toswap, swaptemp, rsum; + unsigned keypos; + + // If we have been given no key, assume the default hash setup. + + if (keysize < 1) + { + hash_init(); + return; + } + + // Start with cards all in order, one of each. + + for (i=0;i<256;i++) + cards[i] = i; + + // Swap the card at each position with some other card. + + toswap = 0; + keypos = 0; // Start with first byte of user key. + rsum = 0; + for (i=255;i>=0;i--) + { + toswap = keyrand(i, key, keysize, &rsum, &keypos); + swaptemp = cards[i]; + cards[i] = cards[toswap]; + cards[toswap] = swaptemp; + } + + // Initialize the indices and data dependencies. + // Indices are set to different values instead of all 0 + // to reduce what is known about the state of the cards + // when the first byte is emitted. + + rotor = cards[1]; + ratchet = cards[3]; + avalanche = cards[5]; + last_plain = cards[7]; + last_cipher = cards[rsum]; + + toswap = swaptemp = rsum = 0; + keypos = 0; + } + +void sapphire::hash_init(void) + { + // This function is used to initialize non-keyed hash + // computation. + + int i, j; + + // Initialize the indices and data dependencies. + + rotor = 1; + ratchet = 3; + avalanche = 5; + last_plain = 7; + last_cipher = 11; + + // Start with cards all in inverse order. + + for (i=0, j=255;i<256;i++,j--) + cards[i] = (unsigned char) j; + } + +sapphire::sapphire(unsigned char *key, unsigned char keysize) + { + if (key && keysize) + initialize(key, keysize); + } + +void sapphire::burn(void) + { + // Destroy the key and state information in RAM. + memset(cards, 0, 256); + rotor = ratchet = avalanche = last_plain = last_cipher = 0; + } + +sapphire::~sapphire() + { + burn(); + } + +unsigned char sapphire::encrypt(unsigned char b) + { +#ifdef USBINARY + // Picture a single enigma rotor with 256 positions, rewired + // on the fly by card-shuffling. + + // This cipher is a variant of one invented and written + // by Michael Paul Johnson in November, 1993. + + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_plain = b; + return last_cipher; +#else + return b; +#endif + } + +unsigned char sapphire::decrypt(unsigned char b) + { + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_cipher = b; + return last_plain; + } + +void sapphire::hash_final(unsigned char *hash, // Destination + unsigned char hashlength) // Size of hash. + { + int i; + + for (i=255;i>=0;i--) + encrypt((unsigned char) i); + for (i=0;i<hashlength;i++) + hash[i] = encrypt(0); + } + diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp new file mode 100644 index 0000000..d221b8b --- /dev/null +++ b/src/modules/common/swcipher.cpp @@ -0,0 +1,123 @@ +/****************************************************************************** + * swcipher.cpp - code for class 'SWCipher'- a driver class that provides + * cipher utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcipher.h> + + +/****************************************************************************** + * SWCipher Constructor - Initializes data for instance of SWCipher + * + */ + +SWCipher::SWCipher(unsigned char *key) { + master.initialize(key, strlen((char *)key)); + buf = 0; +} + + +/****************************************************************************** + * SWCipher Destructor - Cleans up instance of SWCipher + */ + +SWCipher::~SWCipher() +{ + if (buf) + free(buf); +} + + +char *SWCipher::Buf(const char *ibuf, unsigned int ilen) +{ + if (ibuf) { + + if (buf) + free(buf); + + if (!ilen) { + len = strlen(buf); + ilen = len + 1; + } + else len = ilen; + + buf = (char *) malloc(ilen); + memcpy(buf, ibuf, ilen); + cipher = false; + } + + Decode(); + + return buf; +} + + +char *SWCipher::cipherBuf(unsigned int *ilen, const char *ibuf) +{ + if (ibuf) { + + if (buf) + free(buf); + + buf = (char *) malloc(*ilen); + memcpy(buf, ibuf, *ilen); + len = *ilen; + cipher = true; + } + + Encode(); + + *ilen = (short)len; + return buf; +} + + +/****************************************************************************** + * SWCipher::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Encode(void) +{ + if (!cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.encrypt(buf[i]); + cipher = true; + } +} + + +/****************************************************************************** + * SWCipher::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Decode(void) +{ + if (cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.decrypt(buf[i]); + cipher = false; + } +} + + +/****************************************************************************** + * SWCipher::setCipherKey - setter for a new CipherKey + * + */ + +void SWCipher::setCipherKey(const char *ikey) { + unsigned char *key = (unsigned char *)ikey; + master.initialize(key, strlen((char *)key)); +} diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp new file mode 100644 index 0000000..4bd2e5e --- /dev/null +++ b/src/modules/common/swcomprs.cpp @@ -0,0 +1,190 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'SWCompress'- a driver class that provides + * compression utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcomprs.h> + + +/****************************************************************************** + * SWCompress Constructor - Initializes data for instance of SWCompress + * + */ + +SWCompress::SWCompress() +{ + buf = zbuf = 0; + Init(); +} + + +/****************************************************************************** + * SWCompress Destructor - Cleans up instance of SWCompress + */ + +SWCompress::~SWCompress() +{ + if (zbuf) + free(zbuf); + + if (buf) + free(buf); +} + + +void SWCompress::Init() +{ + if (buf) + free(buf); + + if (zbuf) + free(zbuf); + + buf = 0; + zbuf = 0; + direct = 0; + zlen = 0; + slen = 0; + zpos = 0; + pos = 0; +} + + +char *SWCompress::Buf(const char *ibuf, unsigned long *len) { + // setting an uncompressed buffer + if (ibuf) { + Init(); + slen = (len) ? *len : strlen(ibuf); + buf = (char *) calloc(slen + 1, 1); + memcpy(buf, ibuf, slen); + } + + // getting an uncompressed buffer + if (!buf) { + buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return; + direct = 1; + Decode(); +// slen = strlen(buf); + if (len) + *len = slen; + } + return buf; +} + + +char *SWCompress::zBuf(unsigned long *len, char *ibuf) +{ + // setting a compressed buffer + if (ibuf) { + Init(); + zbuf = (char *) malloc(*len); + memcpy(zbuf, ibuf, *len); + zlen = *len; + } + + // getting a compressed buffer + if (!zbuf) { + direct = 0; + Encode(); + } + + *len = zlen; + return zbuf; +} + + +unsigned long SWCompress::GetChars(char *ibuf, unsigned long len) +{ + if (direct) { + len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos); + if (len > 0) { + memmove(ibuf, &zbuf[zpos], len); + zpos += len; + } + } + else { +// slen = strlen(buf); + len = (((slen - pos) > (unsigned)len) ? len : slen - pos); + if (len > 0) { + memmove(ibuf, &buf[pos], len); + pos += len; + } + } + return len; +} + + +unsigned long SWCompress::SendChars(char *ibuf, unsigned long len) +{ + if (direct) { + if (buf) { +// slen = strlen(buf); + if ((pos + len) > (unsigned)slen) { + buf = (char *) realloc(buf, pos + len + 1024); + memset(&buf[pos], 0, len + 1024); + } + } + else buf = (char *)calloc(1, len + 1024); + memmove(&buf[pos], ibuf, len); + pos += len; + } + else { + if (zbuf) { + if ((zpos + len) > zlen) { + zbuf = (char *) realloc(zbuf, zpos + len + 1024); + zlen = zpos + len + 1024; + } + } + else { + zbuf = (char *)calloc(1, len + 1024); + zlen = len + 1024; + } + memmove(&zbuf[zpos], ibuf, len); + zpos += len; + } + return len; +} + + +/****************************************************************************** + * SWCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Encode(void) +{ + cycleStream(); +} + + +/****************************************************************************** + * SWCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Decode(void) +{ + cycleStream(); +} + + +void SWCompress::cycleStream() { + char buf[1024]; + unsigned long len, totlen = 0; + + do { + len = GetChars(buf, 1024); + if (len) + totlen += SendChars(buf, len); + } while (len == 1024); + + zlen = slen = totlen; +} diff --git a/src/modules/common/swcomprs.doc b/src/modules/common/swcomprs.doc new file mode 100644 index 0000000..b6817f2 --- /dev/null +++ b/src/modules/common/swcomprs.doc @@ -0,0 +1,802 @@ +The following is the original information send from Parson's Technologies via +Craig Rairden. +_______________________________________________________________________________ +Compression Info, 10-11-95 +Jeff Wheeler + +Source of Algorithm +------------------- + +The compression algorithms used here are based upon the algorithms developed +and published by Haruhiko Okumura in a paper entitled "Data Compression +Algorithms of LARC and LHarc." This paper discusses three compression +algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of +these, and is described as providing moderate compression with good speed. +LZARI is described as an improved LZSS, a combination of the LZSS algorithm +with adaptive arithmetic compression. It is described as being slower than +LZSS but with better compression. LZHUF (the basis of the common LHA +compression program) was included in the paper, however, a free usage license +was not included. + +The following are copies of the statements included at the beginning of each +source code listing that was supplied in the working paper. + + LZSS, dated 4/6/89, marked as "Use, distribute and + modify this program freely." + + LZARI, dated 4/7/89, marked as "Use, distribute and + modify this program freely." + + LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, + translated by Haruhiko Okumura on 4/7/89. Not + expressly marked as redistributable or modifiable. + +Since both LZSS and LZARI are marked as "use, distribute and modify freely" we +have felt at liberty basing our compression algorithm on either of these. + +Selection of Algorithm +---------------------- + +Working samples of three possible compression algorithms are supplied in +Okumura's paper. Which should be used? + +LZSS is the fastest at decompression, but does not generated as small a +compressed file as the other methods. The other two methods provided, perhaps, +a 15% improvement in compression. Or, put another way, on a 100K file, LZSS +might compress it to 50K while the others might approach 40-45K. For STEP +purposes, it was decided that decoding speed was of more importance than +tighter compression. For these reasons, the first compression algorithm +implemented is the LZSS algorithm. + +About LZSS Encoding +------------------- + +(adapted from Haruhiko Okumura's paper) + +This scheme was proposed by Ziv and Lempel [1]. A slightly modified version +is described by Storer and Szymanski [2]. An implementation using a binary +tree has been proposed by Bell [3]. + +The algorithm is quite simple. +1. Keep a ring buffer which initially contains all space characters. +2. Read several letters from the file to the buffer. +3. Search the buffer for the longest string that matches the letters just + read, and send its length and position into the buffer. + +If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the +length is represented in 4 bits, the <position, length> pair is two bytes +long. If the longest match is no more than two characters, then just one +character is sent without encoding. The process starts again with the next +character. An extra bit is sent each time to tell the decoder whether the +next item is a character of a <position, length> pair. + +[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977). +[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982). +[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986). + +class SWCompress { +public: +void InitTree( // no return value + void); // no parameters + +void InsertNode( // no return value + short int Pos); // position in the buffer + +void DeleteNode( // no return value + short int Node); // node to be removed + +void Encode( // no return value + void); // no parameters + +void Decode( // no return value + void); // no parameters +}; + +// The following are constant sizes used by the compression algorithm. +// +// N - This is the size of the ring buffer. It is set +// to 4K. It is important to note that a position +// within the ring buffer requires 12 bits. +// +// F - This is the maximum length of a character sequence +// that can be taken from the ring buffer. It is set +// to 18. Note that a length must be 3 before it is +// worthwhile to store a position/length pair, so the +// length can be encoded in only 4 bits. Or, put yet +// another way, it is not necessary to encode a length +// of 0-18, it is necessary to encode a length of +// 3-18, which requires 4 bits. +// +// THRESHOLD - It takes 2 bytes to store an offset and +// a length. If a character sequence only +// requires 1 or 2 characters to store +// uncompressed, then it is better to store +// it uncompressed than as an offset into +// the ring buffer. +// +// Note that the 12 bits used to store the position and the 4 bits +// used to store the length equal a total of 16 bits, or 2 bytes. + +#define N 4096 +#define F 18 +#define THRESHOLD 3 +#define NOT_USED N + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int m_match_position; +short int m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int m_lson[N + 1]; +short int m_rson[N + 257]; +short int m_dad[N + 1]; + + + + +/* + ------------------------------------------------------------------------- + cLZSS::InitTree + + This function initializes the tree nodes to "empty" states. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InitTree( // no return value + void) // no parameters + throw() // exception list + + { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) + { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) + { + m_rson[i] = NOT_USED; + } + + // Done. + } + +/* + ------------------------------------------------------------------------- + cLZSS::InsertNode + + This function inserts a string from the ring buffer into one of + the trees. It loads the match position and length member variables + for the longest match. + + The string to be inserted is identified by the parameter Pos, + A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1] + are inserted. + + If the matched length is exactly F, then an old node is removed + in favor of the new one (because the old one will be deleted + sooner). + + Note that Pos plays a dual role. It is used as both a position + in the ring buffer and also as a tree node. m_ring_buffer[Pos] + defines a character that is used to identify a tree node. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InsertNode( // no return value + short int Pos) // position in the buffer + throw() // exception list + + { + short int i; + short int p; + int cmp; + unsigned char * key; + + ASSERT(Pos >= 0); + ASSERT(Pos < N); + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) + { + if (cmp >= 0) + { + if (m_rson[p] != NOT_USED) + { + p = m_rson[p]; + } + else + { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else + { + if (m_lson[p] != NOT_USED) + { + p = m_lson[p]; + } + else + { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) + { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) + { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) + { + m_rson[ m_dad[p] ] = Pos; + } + else + { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::DeleteNode + + This function removes the node "Node" from the tree. + ------------------------------------------------------------------------- +*/ + +void cLZSS::DeleteNode( // no return value + short int Node) // node to be removed + throw() // exception list + + { + short int q; + + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); + + if (m_dad[Node] == NOT_USED) + { + // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) + { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) + { + q = m_rson[Node]; + } + else + { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) + { + do + { + q = m_rson[q]; + } + while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) + { + m_rson[ m_dad[Node] ] = q; + } + else + { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::Encode + + This function "encodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Encode( // no return value + void) // no parameters + + { + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars(&(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) + { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do + { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) + { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) + { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else + { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (mask == 0) + { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars(code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) + { + + // Get next character... + + if (GetChars(&c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) + { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) + { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) + { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } + while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) + { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars(code_buf, code_buf_pos); + } + + // Done! + } + +/* + ------------------------------------------------------------------------- + cLZSS::Decode + + This function "decodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Decode( // no return value + void) // no parameters + + { + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) + { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) + { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else + { + // Next byte must be a flag. + + if (GetChars(&flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) + { + if (GetChars(c, 1) != 1) + break; + + if (SendChars(c, 1) != 1) + break; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else + { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars(c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) + { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" characters to the output stream. + + if (SendChars(c, len) != len) + break; + } + } + } + diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp new file mode 100644 index 0000000..01ba430 --- /dev/null +++ b/src/modules/common/zipcomprs.cpp @@ -0,0 +1,158 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'ZipCompress'- a driver class that provides + * compression utilities. - using zlib + */ + +#include <string.h> +#include <string> +#include <stdlib.h> +#include <stdio.h> +#include <zipcomprs.h> +#include <zlib.h> + +/****************************************************************************** + * ZipCompress Constructor - Initializes data for instance of ZipCompress + * + */ + +ZipCompress::ZipCompress() : SWCompress() +{ +// fprintf(stderr, "init compress\n"); +} + + +/****************************************************************************** + * ZipCompress Destructor - Cleans up instance of ZipCompress + */ + +ZipCompress::~ZipCompress() { +} + + +/****************************************************************************** + * ZipCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void ZipCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len)!=Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "No buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * ZipCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void ZipCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + if (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen) != Z_OK) { + fprintf(stderr, "no room in outbuffer to during decompression. see zipcomp.cpp\n"); + } + SendChars(buf, blen); + delete [] buf; + slen = blen; + } + else { + fprintf(stderr, "No buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp new file mode 100644 index 0000000..cd1add5 --- /dev/null +++ b/src/modules/common/zstr.cpp @@ -0,0 +1,705 @@ +/****************************************************************************** + * zstr.cpp - code for class 'zStr'- a module that reads compressed text + * files and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <zstr.h> +#include <swcomprs.h> + +#include <sysdata.h> +#include <entriesblk.h> + +/****************************************************************************** + * zStr Statics + */ + +int zStr::instance = 0; +const int zStr::IDXENTRYSIZE = 8; +const int zStr::ZDXENTRYSIZE = 8; + + +/****************************************************************************** + * zStr Constructor - Initializes data for instance of zStr + * + * ENT: ipath - path of the directory where data and index files are located. + */ + +zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp) { + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + compressor = (icomp) ? icomp : new SWCompress(); + this->blockCount = blockCount; +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdx", path); + zdxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdt", path); + zdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + cacheBlock = 0; + cacheBlockIndex = -1; + cacheDirty = false; + + instance++; +} + + +/****************************************************************************** + * zStr Destructor - Cleans up instance of zStr + */ + +zStr::~zStr() { + + flushCache(); + + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + FileMgr::systemFileMgr.close(zdxfd); + FileMgr::systemFileMgr.close(zdtfd); + + + if (compressor) + delete compressor; + +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromDatOffset(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromIdxOffset(long ioffset, char **buf) { + __u32 offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, sizeof(__u32)); + offset = swordtoarch32(offset); + getKeyFromDatOffset(offset, buf); + } +} + + +/****************************************************************************** + * zStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * offset - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) { + char *trybuf = 0, *key = 0, quitflag = 0; + signed char retval = 0; + __s32 headoff, tailoff, tryoff = 0, maxoff = 0; + __u32 start, size; + + if (idxfd->getFd() >= 0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - IDXENTRYSIZE; + if (*ikey) { + headoff = 0; + stdstr(&key, ikey); + toupperstr_utf8(key); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff; + lastoff = -1; + + getKeyFromIdxOffset(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE; + retval = -1; + break; + } + + int diff = strcmp(key, trybuf); + if (!diff) + break; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + IDXENTRYSIZE) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else { tryoff = 0; } + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + start = size = 0; + retval = (read(idxfd->getFd(), &start, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + retval = (read(idxfd->getFd(), &size, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + while (away) { + __u32 laststart = start; + __u32 lastsize = size; + __s32 lasttry = tryoff; + tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE; + + bool bad = false; + if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + start = laststart; + size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + + if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * zStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zStr::prepText(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * zStr::getText - gets text at a given offset + * + * ENT: + * offset - idxoffset where the key is located. + * buf - buffer to store text + * idxbuf - buffer to store index key + * NOTE: buffer will be alloc'd / realloc'd and + * should be free'd by the client + * + */ + +void zStr::getText(long offset, char **idxbuf, char **buf) { + char *ch; + char *idxbuflocal = 0; + getKeyFromIdxOffset(offset, &idxbuflocal); + __u32 start; + __u32 size; + + do { + lseek(idxfd->getFd(), offset, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1); + memset(*buf, 0, size + 1); + memset(*idxbuf, 0, size + 1); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)(size)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, size - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(*buf + 6, &offset); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + __u32 localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } + __u32 block = 0; + __u32 entry = 0; + memmove(&block, *buf, sizeof(__u32)); + memmove(&entry, *buf + sizeof(__u32), sizeof(__u32)); + block = swordtoarch32(block); + entry = swordtoarch32(entry); + getCompressedText(block, entry, buf); +} + + +/****************************************************************************** + * zStr::getCompressedText - Get text entry from a compressed index / zdata + * file. + */ + +void zStr::getCompressedText(long block, long entry, char **buf) { + + __u32 size = 0; + + if (cacheBlockIndex != block) { + __u32 start = 0; + + lseek(zdxfd->getFd(), block * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + + lseek(zdtfd->getFd(), start, SEEK_SET); + read(zdtfd->getFd(), *buf, size); + + flushCache(); + + unsigned long len = size; + compressor->zBuf(&len, *buf); + char * rawBuf = compressor->Buf(0, &len); + cacheBlock = new EntriesBlock(rawBuf, len); + cacheBlockIndex = block; + } + size = cacheBlock->getEntrySize(entry); + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + strcpy(*buf, cacheBlock->getEntry(entry)); +} + + +/****************************************************************************** + * zLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zStr::setText(const char *ikey, const char *buf, long len) { + + __u32 start, outstart; + __u32 size, outsize; + __s32 endoff; + long idxoff = 0; + __s32 shiftSize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + len = (len < 0) ? strlen(buf) : len; + stdstr(&key, ikey); + toupperstr_utf8(key); + + char notFound = findKeyIndex(ikey, &idxoff, 0); + if (!notFound) { + getKeyFromIdxOffset(idxoff, &dbKey); + int diff = strcmp(key, dbKey); + if (diff < 0) { + } + else if (diff > 0) { + idxoff += IDXENTRYSIZE; + } + else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry + do { + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, size); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff); + delete [] tmpbuf; + } + else break; + } + while (true); // while we're resolving links + } + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + if (len > 0) { // NOT a link + if (!cacheBlock) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + else if (cacheBlock->getCount() >= blockCount) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + __u32 entry = cacheBlock->addEntry(buf); + cacheDirty = true; + outstart = archtosword32(cacheBlockIndex); + outsize = archtosword32(entry); + memcpy (outbuf + size, &outstart, sizeof(__u32)); + memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32)); + size += (sizeof(__u32) * 2); + } + else { // link + memcpy(outbuf + size, buf, len); + size += len; + } + + start = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, sizeof(__u32)); + write(idxfd->getFd(), &outsize, sizeof(__u32)); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + } + } + + if (idxBytes) + delete [] idxBytes; + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * zLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zStr::linkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +void zStr::flushCache() { + if (cacheBlock) { + if (cacheDirty) { + __u32 start = 0; + unsigned long size = 0; + __u32 outstart = 0, outsize = 0; + + const char *rawBuf = cacheBlock->getRawData(&size); + compressor->Buf(rawBuf, &size); + compressor->zBuf(&size); + + long zdxSize = lseek(zdxfd->getFd(), 0, SEEK_END); + long zdtSize = lseek(zdtfd->getFd(), 0, SEEK_END); + + if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block + start = zdtSize; + } + else { + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &outsize, sizeof(__u32)); + start = swordtoarch32(start); + outsize = swordtoarch32(outsize); + if (start + outsize >= zdtSize) { // last entry, just overwrite + // start is already set + } + else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size + size = outsize; + } + else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space + start = zdtSize; + } + } + + + + outstart = archtosword32(start); + outsize = archtosword32((__u32)size); + + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + lseek(zdtfd->getFd(), start, SEEK_SET); + rawBuf = compressor->zBuf(&size); + write(zdtfd->getFd(), rawBuf, size); + + // add a new line to make data file easier to read in an editor + write(zdtfd->getFd(), &nl, 2); + + write(zdxfd->getFd(), &outstart, sizeof(__u32)); + write(zdxfd->getFd(), &outsize, sizeof(__u32)); + + delete cacheBlock; + } + } + cacheBlockIndex = -1; + cacheBlock = 0; + cacheDirty = false; +} + + +/****************************************************************************** + * zLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char zStr::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdt", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp new file mode 100644 index 0000000..6d76ddc --- /dev/null +++ b/src/modules/common/zverse.cpp @@ -0,0 +1,518 @@ +/****************************************************************************** + * zverse.h - code for class 'zVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey for compressed modules + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <versekey.h> +#include <zverse.h> +#include <sysdata.h> + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +/****************************************************************************** + * zVerse Statics + */ + +int zVerse::instance = 0; + +const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'}; + +/****************************************************************************** + * zVerse Constructor - Initializes data for instance of zVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + * fileMode - open mode for the files (O_RDONLY, etc.) + * blockType - verse, chapter, book, etc. + */ + +zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) +{ + char buf[127]; + + nl = '\n'; + path = 0; + cacheBufIdx = -1; + cacheTestament = 0; + cacheBuf = 0; + dirtyCache = false; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + compressor = (icomp) ? icomp : new SWCompress(); + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockType]); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockType]); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockType]); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockType]); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockType]); + compfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockType]); + compfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + instance++; +} + + +/****************************************************************************** + * zVerse Destructor - Cleans up instance of zVerse + */ + +zVerse::~zVerse() +{ + int loop1; + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + if (path) + delete [] path; + + if (compressor) + delete compressor; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + FileMgr::systemFileMgr.close(compfp[loop1]); + } +} + + +/****************************************************************************** + * zVerse::findoffset - Finds the offset of the key verse from the indexes + * + * + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * book - book to find (0 - testament introduction) + * chapter - chapter to find (0 - book introduction) + * verse - verse to find (0 - chapter introduction) + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void zVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) +{ + // set start to offset in + // set size to + // set + unsigned long ulBuffNum=0; // buffer number + unsigned long ulVerseStart=0; // verse offset within buffer + unsigned short usVerseSize=0; // verse size + unsigned long ulCompOffset=0; // compressed buffer start + unsigned long ulCompSize=0; // buffer size compressed + unsigned long ulUnCompSize=0; // buffer size uncompressed + char *pcCompText=NULL; // compressed text + + *start = *size = 0; + //printf ("Finding offset %ld\n", idxoff); + idxoff *= 10; + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + long newOffset = lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + if (newOffset == idxoff) { + if (read(compfp[testmt-1]->getFd(), &ulBuffNum, 4) != 4) { + printf ("Error reading ulBuffNum\n"); + return; + } + } + else return; + + ulBuffNum = swordtoarch32(ulBuffNum); + + if (read(compfp[testmt-1]->getFd(), &ulVerseStart, 4) < 2) + { + printf ("Error reading ulVerseStart\n"); + return; + } + if (read(compfp[testmt-1]->getFd(), &usVerseSize, 2) < 2) + { + printf ("Error reading usVerseSize\n"); + return; + } + + *start = swordtoarch32(ulVerseStart); + *size = swordtoarch16(usVerseSize); + + if (*size) { + if (((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf)) { + // have the text buffered + return; + } + + //printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize); + + + if (lseek(idxfp[testmt-1]->getFd(), ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12) + { + printf ("Error seeking compressed file index\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompOffset, 4)<4) + { + printf ("Error reading ulCompOffset\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompSize, 4)<4) + { + printf ("Error reading ulCompSize\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulUnCompSize, 4)<4) + { + printf ("Error reading ulUnCompSize\n"); + return; + } + + ulCompOffset = swordtoarch32(ulCompOffset); + ulCompSize = swordtoarch32(ulCompSize); + ulUnCompSize = swordtoarch32(ulUnCompSize); + + if (lseek(textfp[testmt-1]->getFd(), ulCompOffset, SEEK_SET)!=(long)ulCompOffset) + { + printf ("Error: could not seek to right place in compressed text\n"); + return; + } + pcCompText = new char[ulCompSize]; + + if (read(textfp[testmt-1]->getFd(), pcCompText, ulCompSize)<(long)ulCompSize) + { + printf ("Error reading compressed text\n"); + return; + } + compressor->zBuf(&ulCompSize, pcCompText); + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + unsigned long len = 0; + compressor->Buf(0, &len); + cacheBuf = (char *)calloc(len + 1, 1); + memcpy(cacheBuf, compressor->Buf(), len); + + cacheTestament = testmt; + cacheBufIdx = ulBuffNum; + if (pcCompText) + delete [] pcCompText; + } +} + + +/****************************************************************************** + * zVerse::zreadtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 1 (null) + * buf - buffer to store text + * + */ + +void zVerse::zreadtext(char testmt, long start, unsigned short size, char *inbuf) +{ + memset(inbuf, 0, size); + if (size > 2) { + strncpy(inbuf, &(cacheBuf[start]), size-2); + } +} + + +/****************************************************************************** + * zVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zVerse::settext(char testmt, long idxoff, const char *buf, long len) { + + len = (len < 0) ? strlen(buf) : len; + if (!testmt) + testmt = ((idxfp[0]) ? 1:2); + if ((!dirtyCache) || (cacheBufIdx < 0)) { + cacheBufIdx = lseek(idxfp[testmt-1]->getFd(), 0, SEEK_END) / 12; + cacheTestament = testmt; + if (cacheBuf) + free(cacheBuf); + cacheBuf = (char *)calloc(len + 1, 1); + } + else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1)); + + dirtyCache = true; + + unsigned long start, outstart; + unsigned long outBufIdx = cacheBufIdx; + unsigned short size; + unsigned short outsize; + + idxoff *= 10; + size = outsize = len; + + start = strlen(cacheBuf); + + if (!size) + start = outBufIdx = 0; + + outBufIdx = archtosword32(outBufIdx); + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &outBufIdx, 4); + write(compfp[testmt-1]->getFd(), &outstart, 4); + write(compfp[testmt-1]->getFd(), &outsize, 2); + strcat(cacheBuf, buf); +} + + +void zVerse::flushCache() { + if (dirtyCache) { + unsigned long idxoff; + unsigned long start, outstart; + unsigned long size, outsize; + unsigned long zsize, outzsize; + + idxoff = cacheBufIdx * 12; + size = outsize = zsize = outzsize = strlen(cacheBuf); + if (size) { +// if (compressor) { +// delete compressor; +// compressor = new LZSSCompress(); +// } + compressor->Buf(cacheBuf); + compressor->zBuf(&zsize); + outzsize = zsize; + + start = outstart = lseek(textfp[cacheTestament-1]->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + outzsize = archtosword32(zsize); + + write(textfp[cacheTestament-1]->getFd(), compressor->zBuf(&zsize), zsize); + + lseek(idxfp[cacheTestament-1]->getFd(), idxoff, SEEK_SET); + write(idxfp[cacheTestament-1]->getFd(), &outstart, 4); + write(idxfp[cacheTestament-1]->getFd(), &outzsize, 4); + write(idxfp[cacheTestament-1]->getFd(), &outsize, 4); + } + dirtyCache = false; + } +} + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long bufidx; + long start; + unsigned short size; + + destidxoff *= 10; + srcidxoff *= 10; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(compfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(compfp[testmt-1]->getFd(), &bufidx, 4); + read(compfp[testmt-1]->getFd(), &start, 4); + read(compfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(compfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &bufidx, 4); + write(compfp[testmt-1]->getFd(), &start, 4); + write(compfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char zVerse::createModule(const char *ipath, int blockBound) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); //compBufIdxOffset + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} + + +/****************************************************************************** + * zVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + if (to > buf) { + for (to--; to > buf; to--) { // remove trailing excess + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } + } +} diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/filters/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am new file mode 100644 index 0000000..65b1504 --- /dev/null +++ b/src/modules/filters/Makefile.am @@ -0,0 +1,77 @@ +filtersdir = $(top_srcdir)/src/modules/filters + +libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfhtml.cpp +libsword_la_SOURCES += $(filtersdir)/gbfhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/gbfplain.cpp +libsword_la_SOURCES += $(filtersdir)/gbfrtf.cpp +libsword_la_SOURCES += $(filtersdir)/plainhtml.cpp +libsword_la_SOURCES += $(filtersdir)/rwphtml.cpp +libsword_la_SOURCES += $(filtersdir)/rwprtf.cpp +libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp +libsword_la_SOURCES += $(filtersdir)/rtfhtml.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/gbffootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/gbfheadings.cpp +libsword_la_SOURCES += $(filtersdir)/gbfmorph.cpp +libsword_la_SOURCES += $(filtersdir)/plainfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/thmlstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/thmlfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/thmlheadings.cpp +libsword_la_SOURCES += $(filtersdir)/thmlmorph.cpp +libsword_la_SOURCES += $(filtersdir)/thmllemma.cpp +libsword_la_SOURCES += $(filtersdir)/thmlscripref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlvariants.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfthml.cpp +libsword_la_SOURCES += $(filtersdir)/thmlgbf.cpp +libsword_la_SOURCES += $(filtersdir)/thmlrtf.cpp +libsword_la_SOURCES += $(filtersdir)/thmlhtml.cpp +libsword_la_SOURCES += $(filtersdir)/thmlhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlosis.cpp +libsword_la_SOURCES += $(filtersdir)/gbfosis.cpp +libsword_la_SOURCES += $(filtersdir)/thmlplain.cpp + +libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp +libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp +libsword_la_SOURCES += $(filtersdir)/utf8html.cpp +libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp + +libsword_la_SOURCES += $(filtersdir)/thmlolb.cpp + +libsword_la_SOURCES += $(filtersdir)/greeklexattribs.cpp + +if ICU +ICUDEFS = -D_ICU_ +SWICUSRC = $(filtersdir)/utf8transliterator.cpp +SWICUSRC += $(filtersdir)/utf8nfc.cpp +SWICUSRC += $(filtersdir)/utf8nfkd.cpp +SWICUSRC += $(filtersdir)/utf8arshaping.cpp +SWICUSRC += $(filtersdir)/utf8bidireorder.cpp +else +SWICUSRC = +ICUDEFS = +endif + +if ICUSWORD +ICUDEFS = -D_ICU_ -D_ICUSWORD_ +SWICUSRC = $(filtersdir)/utf8transliterator.cpp +SWICUSRC += $(filtersdir)/utf8nfc.cpp +SWICUSRC += $(filtersdir)/utf8nfkd.cpp +SWICUSRC += $(filtersdir)/utf8arshaping.cpp +SWICUSRC += $(filtersdir)/utf8bidireorder.cpp +endif + +libsword_la_SOURCES += $(SWICUSRC) +DEFS += $(ICUDEFS) + +libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp +libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp +libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp + diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp new file mode 100644 index 0000000..ad55396 --- /dev/null +++ b/src/modules/filters/cipherfil.cpp @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * cipherfil - SWFilter decendant to decipher a module + */ + + +#include <stdlib.h> +#include <string.h> +#include <cipherfil.h> + + +CipherFilter::CipherFilter(const char *key) { + cipher = new SWCipher((unsigned char *)key); +} + + +CipherFilter::~CipherFilter() { + delete cipher; +} + + +SWCipher *CipherFilter::getCipher() { + return cipher; +} + + +char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + unsigned int len; +// len = strlen(text); + len = maxlen; + if (len > 0) { + cipher->cipherBuf(&len, text); + strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen); + } + text[maxlen] = 0; + text[maxlen+1] = 0; + return 0; +} diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp new file mode 100644 index 0000000..c5b7b90 --- /dev/null +++ b/src/modules/filters/gbffootnotes.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * gbffootnotes - SWFilter decendant to hide or show footnotes + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbffootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFFootnotes::on[] = "On"; +const char GBFFootnotes::off[] = "Off"; +const char GBFFootnotes::optName[] = "Footnotes"; +const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +GBFFootnotes::GBFFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFFootnotes::~GBFFootnotes() { +} + +void GBFFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'R': // Reference + switch(token[1]) { + case 'F': // Begin footnote + hide = true; + break; + case 'f': // end footnote + hide = false; + break; + } + continue; // skip token + case 'W': + if (token[1] == 'T') { + switch (token[2]) { + case 'P': + case 'S': + case 'A': + continue; // remove this token + default: + break; + } + } + } + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp new file mode 100644 index 0000000..590e2fa --- /dev/null +++ b/src/modules/filters/gbfheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * gbfheadings - SWFilter decendant to hide or show headings + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFHeadings::on[] = "On"; +const char GBFHeadings::off[] = "Off"; +const char GBFHeadings::optName[] = "Headings"; +const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +GBFHeadings::GBFHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFHeadings::~GBFHeadings() { +} + +void GBFHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 2048); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'T': // Reference + switch(token[1]) { + case 'S': // Begin heading + hide = true; + break; + case 's': // end heading + hide = false; + break; + } + continue; // skip token + } + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp new file mode 100644 index 0000000..73d445a --- /dev/null +++ b/src/modules/filters/gbfhtml.cpp @@ -0,0 +1,536 @@ +/*************************************************************************** + gbfhtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtml.h> + + +GBFHTML::GBFHTML() +{ +} + + +char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + bool hasFootnotePreTag = false; + bool isRightJustified = false; + bool isCentered = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else + from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '\n') { + *from = ' '; + } + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) + { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + for (tok = token+2; *tok; tok++) + *to++ = *tok; + *to++ = '<'; + *to++ = '/'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'B': //word(s) explained in footnote + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + hasFootnotePreTag = true; //we have the RB tag + continue; + case 'F': // footnote begin + if (hasFootnotePreTag) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + *to++ = ' '; + } + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '('; + + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + hasFootnotePreTag = false; + continue; + } + break; + + case 'F': // font tags + switch(token[1]) + { + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '#'; + *to++ = 'F'; + *to++ = 'F'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'J': //Justification + switch(token[1]) + { + case 'R': //right + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'r'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'h'; + *to++ = 't'; + *to++ = '\"'; + *to++ = '>'; + isRightJustified = true; + continue; + + case 'C': //center + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '\"'; + *to++ = '>'; + isCentered = true; + continue; + + case 'L': //left, reset right and center + if (isCentered) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '>'; + isCentered = false; + } + if (isRightJustified) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + isRightJustified = false; + } + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue;/* + case 'S': + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue;*/ + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp new file mode 100644 index 0000000..30b27ba --- /dev/null +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -0,0 +1,148 @@ +/*************************************************************************** + gbfhtmlhref.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtmlhref.h> + +GBFHTMLHREF::GBFHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("Rx", "</a>"); + addTokenSubstitute("FI", "<i>"); // italics begin + addTokenSubstitute("Fi", "</i>"); + addTokenSubstitute("FB", "<n>"); // bold begin + addTokenSubstitute("Fb", "</n>"); + addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin + addTokenSubstitute("Fr", "</font>"); + addTokenSubstitute("FU", "<u>"); // underline begin + addTokenSubstitute("Fu", "</u>"); + addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin + addTokenSubstitute("Fo", "</cite>"); + addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", "</sup>"); + addTokenSubstitute("FV", "<sub>"); // Subscript begin + addTokenSubstitute("Fv", "</sub>"); + addTokenSubstitute("TT", "<big>"); // Book title begin + addTokenSubstitute("Tt", "</big>"); + addTokenSubstitute("PP", "<cite>"); // poetry begin + addTokenSubstitute("Pp", "</cite>"); + addTokenSubstitute("Fn", "</font>"); // font end + addTokenSubstitute("CL", "<br />"); // new line + addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin + addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin + addTokenSubstitute("JL", "</div>"); // align end + +} + + +bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + pushString(buf, " <small><em><<a href=\"#"); + for (tok = token+1; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>></em></small>"); + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + pushString(buf, " <small><em>(<A HREF=\"#"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + pushString(buf, " <small><em>(<a href=\"M"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "RX", 2)) { + pushString(buf, "<a href=\""); + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *(*buf)++ = *tok; + } + else { + break; + } + } + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "RB", 2)) { + pushString(buf, "<i>"); + userData["hasFootnotePreTag"] = "true"; + } + + else if (!strncmp(token, "RF", 2)) { + if(userData["hasFootnotePreTag"] == "true") { + userData["hasFootnotePreTag"] = "false"; + pushString(buf, "</i> "); + } + pushString(buf, "<font color=\"#800000\"><small> ("); + } + + else if (!strncmp(token, "FN", 2)) { + pushString(buf, "<font face=\""); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + *(*buf)++ = (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp new file mode 100644 index 0000000..f8d336e --- /dev/null +++ b/src/modules/filters/gbfmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * gbfmorph - SWFilter decendant to hide or show morph tags + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFMorph::on[] = "On"; +const char GBFMorph::off[] = "Off"; +const char GBFMorph::optName[] = "Morphological Tags"; +const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +GBFMorph::GBFMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFMorph::~GBFMorph() { +} + +void GBFMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && token[1] == 'T') { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp new file mode 100644 index 0000000..43161d4 --- /dev/null +++ b/src/modules/filters/gbfosis.cpp @@ -0,0 +1,313 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <gbfosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +GBFOSIS::GBFOSIS() { +} + + +GBFOSIS::~GBFOSIS() { +} + + +char GBFOSIS::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + string tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + wordStart = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";, .:?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;.:?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + } + + // Footnote + if (!strcmp(token, "RF")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strcmp(token, "Rf")) { + tmp = "<note type=\"x-StudyNote\"><notePart type=\"x-MainText\">"; + tmp.append(textStart, (int)(textEnd - textStart)+1); + tmp += "</notePart></note>"; + pushString(&to, tmp.c_str()); + suspendTextPassThru = false; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + + /* uncomment for SWORD absolute path logic + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + end of uncomment for asolute path logic */ + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + return true; + } + + // Strongs numbers + else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + // normal strongs number + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + sprintf(buf, "lemma=\"x-Strong:%s\" ", val); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + } + } + } + + // Morphology + else if (*token == 'W' && token[1] == 'T' && (token[2] == 'G' || token[2] == 'H')) { // Strongs + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + sprintf(buf, "morph=\"x-%s:%s\" ", "StrongsMorph", val); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w morph=\"x-%s:%s\">", "StrongsMorph", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + } + } + + if (!keepToken) { // if we don't want strongs + if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verseStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Book()) + sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "<verseEnd ref=\"%s\" />", vkey->getOSISRef()); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); + sprintf(ref, "<chapterEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); + sprintf(ref, "<bookEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + } + } + } + + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; + return 0; +} + + +void GBFOSIS::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +const char *GBFOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static string outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\" refEnd=\"%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp new file mode 100644 index 0000000..65766d3 --- /dev/null +++ b/src/modules/filters/gbfplain.cpp @@ -0,0 +1,106 @@ +/****************************************************************************** + * + * gbfplain - SWFilter decendant to strip out all GBF tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfplain.h> + + +GBFPlain::GBFPlain() { +} + + +char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + for (char *tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'F': // footnote begin + *to++ = ' '; + *to++ = '['; + continue; + case 'f': // footnote end + *to++ = ']'; + *to++ = ' '; + continue; + } + break; + case 'C': + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; +/* Bug in WEB + case 'L': + *to++ = '<'; + continue; +*/ + case 'L': // Bug in WEB. Use above entry when fixed + case 'N': // new line + *to++ = '\n'; + continue; + case 'M': // new paragraph + *to++ = '\n'; + *to++ = '\n'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp new file mode 100644 index 0000000..40e5752 --- /dev/null +++ b/src/modules/filters/gbfrtf.cpp @@ -0,0 +1,298 @@ +/****************************************************************************** + * + * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfrtf.h> +#include <ctype.h> + +GBFRTF::GBFRTF() { +} + + +char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char *)&text[maxlen - len]; + } + else from = (unsigned char *)text; // ------------------------------- + for (to = (unsigned char *)text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '3'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = ' '; + *to++ = '<'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = '}'; + continue; + + case 'T': // Tense + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '4'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = ' '; + *to++ = '('; + bool separate = false; + for (tok = token + 2; *tok; tok++) { + if (separate) { + *to++ = ';'; + *to++ = ' '; + separate = false; + } + switch (*tok) { + case 'G': + case 'H': + for (tok++; *tok; tok++) { + if (isdigit(*tok)) { + *to++ = *tok; + separate = true; + } + else { + tok--; + break; + } + } + break; + default: + for (; *tok; tok++) { + *to++ = *tok; + } + } + } + *to++ = ')'; + *to++ = '}'; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '"'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = ' '; + *to++ = '}'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) { + case 'I': // italic start + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'i': // italic end + *to++ = '\\'; + *to++ = 'i'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'B': // bold start + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'b': // bold end + *to++ = '\\'; + *to++ = 'b'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'N': + *to++ = '{'; + if (!strnicmp(token+2, "Symbol", 6)) { + *to++ = '\\'; + *to++ = 'f'; + *to++ = '7'; + *to++ = ' '; + } + continue; + case 'n': + *to++ = '}'; + continue; + case 'S': + *to++ = '{'; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + continue; + case 'R': + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '6'; + *to++ = ' '; + continue; + case 'r': + *to++ = '}'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; + case 'L': // line break + *to++ = '\\'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'n'; + *to++ = 'e'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 'T': + *to++ = '<'; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '2'; + *to++ = '2'; + *to++ = ' '; + continue; + case 't': + *to++ = '}'; + continue; + case 'S': + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + break; + + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp new file mode 100644 index 0000000..cb722bd --- /dev/null +++ b/src/modules/filters/gbfstrongs.cpp @@ -0,0 +1,130 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +const char GBFStrongs::on[] = "On"; +const char GBFStrongs::off[] = "Off"; +const char GBFStrongs::optName[] = "Strong's Numbers"; +const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +GBFStrongs::GBFStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFStrongs::~GBFStrongs() { +} + +void GBFStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart = text, *textEnd = 0; + bool newText = false; + string tmp; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!option) { + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + if (newText) {textStart = to; newText = false; } + continue; + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp new file mode 100644 index 0000000..ca03e71 --- /dev/null +++ b/src/modules/filters/gbfthml.cpp @@ -0,0 +1,463 @@ +/*************************************************************************** + gbfthml.cpp - GBF to ThML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfthml.h> + + +GBFThML::GBFThML() +{ +} + + +char GBFThML::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') + { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': + case 'H': + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'S'; + *to++ = 't'; + *to++ = 'r'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 'g'; + *to++ = 's'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 1; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + + case 'T': // Tense + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'M'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = 'p'; + *to++ = 'h'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '<'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = ' '; + *to++ = 'p'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'o'; + *to++ = 't'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'f': // footnote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) + { + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = 'f'; + *to++ = 'f'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'p'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 'S': + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 's'; + *to++ = 's'; + *to++ = '='; + *to++ = '\"'; + *to++ = 's'; + *to++ = 'e'; + *to++ = 'c'; + *to++ = 'h'; + *to++ = 'e'; + *to++ = 'a'; + *to++ = 'd'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + continue; + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp new file mode 100644 index 0000000..0f85c6c --- /dev/null +++ b/src/modules/filters/greeklexattribs.cpp @@ -0,0 +1,96 @@ +/****************************************************************************** + * + * greeklexattribs - SWFilter decendant to set entry attributes for greek + * lexicons + */ + + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include <greeklexattribs.h> +#include <swmodule.h> + + +GreekLexAttribs::GreekLexAttribs() { +} + + +char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + if (module->isProcessEntryAttributes()) { + char *from; + bool inAV = false; + string phrase; + string freq; + char val[128], *valto; + char wordstr[7]; + char *currentPhrase = 0, *ch = 0; + char *currentPhraseEnd = 0; + int number = 0; + + + for (from = text; *from; from++) { + if (inAV) { + if (currentPhrase == 0) { + if (isalpha(*from)) + currentPhrase = from; + } + else { + if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) { + if (*from == '<') { + if (!currentPhraseEnd) + currentPhraseEnd = from - 1; + for (; *from && *from != '>'; from++) { + if (!strncmp(from, "value=\"", 7)) { + valto = val; + from += 7; + for (unsigned int i = 0; from[i] != '\"' && i < 127; i++) + *valto++ = from[i]; + *valto = 0; + sprintf(wordstr, "%03d", number+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val; + from += strlen(val); + } + } + continue; + } + + phrase = ""; + phrase.append(currentPhrase, (int)(((currentPhraseEnd)?currentPhraseEnd:from) - currentPhrase)-1); + currentPhrase = from; + while (*from && isdigit(*from)) from++; + freq = ""; + freq.append(currentPhrase, (int)(from - currentPhrase)); + if ((freq.length() > 0) && (phrase.length() > 0)) { + sprintf(wordstr, "%03d", ++number); + if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) { + string tmp = phrase.substr(0, phrase.find_first_of("(")); + phrase.erase(phrase.find_first_of("("), 1); + phrase.erase(phrase.find_first_of(")"), 1); + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase; + phrase = tmp; + } + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase; + module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq; + currentPhrase = 0; + currentPhraseEnd = 0; + } + } + } + if (*from == ';') inAV = false; + + } + else if (!strncmp(from, "AV-", 3)) { + inAV = true; + from+=2; + } + } + } + return 0; +} + + diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp new file mode 100644 index 0000000..75ee998 --- /dev/null +++ b/src/modules/filters/latin1utf16.cpp @@ -0,0 +1,120 @@ +/****************************************************************************** + * + * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf16.h> + +Latin1UTF16::Latin1UTF16() { +} + + +char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + switch (*from) { + case 0x80: // '€' + *to++ = 0x20AC; + break; + case 0x82: // '‚' + *to++ = 0x201A; + break; + case 0x83: // 'ƒ' + *to++ = 0x0192; + break; + case 0x84: // '„' + *to++ = 0x201E; + break; + case 0x85: // '…' + *to++ = 0x2026; + break; + case 0x86: // '†' + *to++ = 0x2020; + break; + case 0x87: // '‡' + *to++ = 0x2021; + break; + case 0x88: // 'ˆ' + *to++ = 0x02C6; + break; + case 0x89: // '‰' + *to++ = 0x2030; + break; + case 0x8A: // 'Š' + *to++ = 0x0160; + break; + case 0x8B: // '‹' + *to++ = 0x2039; + break; + case 0x8C: // 'Œ' + *to++ = 0x0152; + break; + case 0x8E: // 'Ž' + *to++ = 0x017D; + break; + case 0x91: // '‘' + *to++ = 0x2018; + break; + case 0x92: // '’' + *to++ = 0x2019; + break; + case 0x93: // '“' + *to++ = 0x201C; + break; + case 0x94: // '”' + *to++ = 0x201D; + break; + case 0x95: // '•' + *to++ = 0x2022; + break; + case 0x96: // '–' + *to++ = 0x2013; + break; + case 0x97: // '—' + *to++ = 0x2014; + break; + case 0x98: // '˜' + *to++ = 0x02DC; + break; + case 0x99: // '™' + *to++ = 0x2122; + break; + case 0x9A: // 'š' + *to++ = 0x0161; + break; + case 0x9B: // '›' + *to++ = 0x203A; + break; + case 0x9C: // 'œ' + *to++ = 0x0153; + break; + case 0x9E: // 'ž' + *to++ = 0x017E; + break; + case 0x9F: // 'Ÿ' + *to++ = 0x0178; + break; + default: + *to++ = (unsigned short)*from; + } + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp new file mode 100644 index 0000000..91af8dc --- /dev/null +++ b/src/modules/filters/latin1utf8.cpp @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf8.h> +#include <swmodule.h> + +Latin1UTF8::Latin1UTF8() { +} + + +char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + + len = strlen(text) + 1; + if (len == maxlen + 1) + maxlen = (maxlen + 1) * FILTERPAD; + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; // ------------------------------- + + + + for (to = (unsigned char*)text; *from; from++) { + if (*from < 0x80) { + *to++ = *from; + } + else if (*from < 0xc0) { + switch(*from) { + case 0x80: // '€' + *to++ = 0xe2; // 'â' + *to++ = 0x82; // '‚' + *to++ = 0xac; // '¬' + break; + case 0x82: // '‚' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9a; // 'š' + break; + case 0x83: // 'ƒ' + *to++ = 0xc6; // 'Æ' + *to++ = 0x92; // '’' + break; + case 0x84: // '„' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9e; // 'ž' + break; + case 0x85: // '…' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa6; // '¦' + break; + case 0x86: // '†' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa0; // ' ' + break; + case 0x87: // '‡' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa1; // '¡' + break; + case 0x88: // 'ˆ' + *to++ = 0xcb; // 'Ë' + *to++ = 0x86; // '†' + break; + case 0x89: // '‰' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb0; // '°' + break; + case 0x8A: // 'Š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa0; // ' ' + break; + case 0x8B: // '‹' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb9; // '¹' + break; + case 0x8C: // 'Œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x92; // '’' + break; + case 0x8E: // 'Ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbd; // '½' + break; + case 0x91: // '‘' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x98; // '˜' + break; + case 0x92: // '’' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x99; // '™' + break; + case 0x93: // '“' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9c; // 'œ' + break; + case 0x94: // '”' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9d; // '' + break; + case 0x95: // '•' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa2; // '¢' + break; + case 0x96: // '–' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x93; // '“' + break; + case 0x97: // '—' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x94; // '”' + break; + case 0x98: // '˜' + *to++ = 0xcb; // 'Ë' + *to++ = 0x9c; // 'œ' + break; + case 0x99: // '™' + *to++ = 0xe2; // 'â' + *to++ = 0x84; // '„' + *to++ = 0xa2; // '¢' + break; + case 0x9A: // 'š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa1; // '¡' + break; + case 0x9B: // '›' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xba; // 'º' + break; + case 0x9C: // 'œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x93; // '“' + break; + case 0x9E: // 'ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbe; // '¾' + break; + case 0x9F: // 'Ÿ' + *to++ = 0xc5; // 'Å' + *to++ = 0xb8; // '¸' + break; + default: + *to++ = 0xC2; + *to++ = *from; + } + } + else { + *to++ = 0xC3; + *to++ = (*from - 0x40); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp new file mode 100644 index 0000000..96fc4d8 --- /dev/null +++ b/src/modules/filters/plainfootnotes.cpp @@ -0,0 +1,102 @@ +/*************************************************************************** + plainfootnotes.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <plainfootnotes.h> +#include <swkey.h> + +#include <stdlib.h> +#include <string.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +const char PLAINFootnotes::on[] = "On"; +const char PLAINFootnotes::off[] = "Off"; +const char PLAINFootnotes::optName[] = "Footnotes"; +const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; + +PLAINFootnotes::PLAINFootnotes(){ + option = false; + options.push_back(on); + options.push_back(off); +} + +PLAINFootnotes::~PLAINFootnotes(){ +} + + +void PLAINFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *PLAINFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + + +char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + + if (!option) { // if we don't want footnotes + char *to, *from; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) + { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '{') // Footnote start + { + hide = true; + continue; + } + if (*from == '}') // Footnote end + { + hide=false; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} + diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp new file mode 100644 index 0000000..fefb029 --- /dev/null +++ b/src/modules/filters/plainhtml.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <plainhtml.h> + + +PLAINHTML::PLAINHTML() +{ +} + + +char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + int count = 0; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if ((*from == '\n') && (from[1] == '\n')) // paragraph + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + from++; + continue; + } else { + if ((*from == '\n')) // && (from[1] != '\n')) // new line + { + *to++ = '<'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + } + + if (*from == '{') { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = ' '; + *to++ = 'C'; + *to++ = 'O'; + *to++ = 'L'; + *to++ = 'O'; + *to++ = 'R'; + *to++ = '='; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + + *to++ = '<'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + *to++ = ' '; + *to++ = '('; + continue; + } + + if (*from == '}') + { + *to++ = ')'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + + *to++ = '<'; + *to++ = '/'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = '>'; + continue; + } + + if ((*from == ' ') && (count > 5000)) + { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + count = 0; + continue; + } + + *to++ = *from; + count++; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp new file mode 100644 index 0000000..f0b842b --- /dev/null +++ b/src/modules/filters/rtfhtml.cpp @@ -0,0 +1,99 @@ +/*************************************************************************** + rtfhtml.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <rtfhtml.h> + + +RTFHTML::RTFHTML() { + +} + + +char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + bool center = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') // a RTF command + { + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifier off + if (center) + { + *to++ = '<'; + *to++ = '/'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = false; + } + from += 4; + continue; + } + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + *to++ = '\n'; + from += 3; + continue; + } + if (from[1] == ' ') + { + from += 1; + continue; + } + if ((from[1] == 'q') && (from[2] == 'c')) // center on + { + if (!center) + { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = true; + } + from += 2; + continue; + } + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp new file mode 100644 index 0000000..6f8ae4f --- /dev/null +++ b/src/modules/filters/rwphtml.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <rwphtml.h> + +RWPHTML::RWPHTML() +{ +} + + +char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + signed char greek_str[500]; + bool inverse = false; + bool first_letter = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } else + from = text; + for (to = text; *from; from++) { + if (*from == '\\') { + ++from; + int i=0; + first_letter = true; + greek_str[0] = '\0'; + while (*from != '\\') { /* get the greek word or phrase */ + greek_str[i++] = *from; + greek_str[i + 1] = '\0'; + from++; + } /* convert to symbol font as best we can */ + strcpy(to,"<I> </I><FONT FACE=\"symbol\">"); + to += strlen(to); + for (int j = 0; j < i; j++) { + if ((first_letter) + && (greek_str[j] == 'h')) { + if (greek_str[j + 1] == 'o') { + *to++ = 'o'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'a') { + *to++ = 'a'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'w') { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'u') { + *to++ = 'u'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -109) { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -120) { + *to++ = 'h'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'i') { + *to++ = 'i'; + first_letter = false; + ++j; + continue; + }else if (greek_str[j + 1] == 'e') { + *to++ = 'e'; + first_letter = false; + ++j; + continue; + } + first_letter = false; + } + if ((greek_str[j] == 't') + && (greek_str[j + 1] == 'h')) { + *to++ = 'q'; + ++j; + continue; + } + if ((greek_str[j] == 'c') + && (greek_str[j + 1] == 'h')) { + *to++ = 'c'; + ++j; + continue; + } + if ((greek_str[j] == 'p') + && (greek_str[j + 1] == 'h')) { + ++j; + *to++ = 'f'; + continue; + } + if (greek_str[j] == -120) { + *to++ = 'h'; + continue; + } + if (greek_str[j] == -125) { + *to++ = 'a'; + continue; + } + if (greek_str[j] == -109) { + if(greek_str[j+1] == 'i') ++j; + *to++ = 'w'; + continue; + } + if (greek_str[j] == ' ') + first_letter = true; + if (greek_str[j] == 's') { + if(isalpha(greek_str[j + 1])) *to++ = 's'; + else if(!isprint(greek_str[j] )) *to++ = 's'; + else *to++ = 'V'; + continue; + } + if (greek_str[j] == '\'') { + continue; + } + *to++ = greek_str[j]; + } + strcpy(to,"</FONT><I> </I>"); + to += strlen(to); + continue; + } + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + strcpy(to,"<FONT COLOR=#0000FF>"); + to += strlen(to); + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + strcpy(to,"</FONT>"); + to += strlen(to); + continue; + } + if (*from == '{') { + strcpy(to,"<BR><STRONG>"); + to += strlen(to); + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + strcpy(to,"<P>"); + to += strlen(to); + } + continue; + } + if (*from == '}') { + strcpy(to," </STRONG>"); + to += strlen(to); + continue; + } + if ((*from == '\n') && (from[1] == '\n')) { + strcpy(to,"<P>"); + to += strlen(to); + continue; + } + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp new file mode 100644 index 0000000..8f7b074 --- /dev/null +++ b/src/modules/filters/rwprtf.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <rwprtf.h> + + +RWPRTF::RWPRTF() { + +} + + +char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + bool ingreek = false; + bool inverse = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') { + if(!ingreek) { + ingreek = true; + *to++ = '['; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = '8'; + *to++ = ' '; + continue; + } + else { + ingreek = false; + *to++ = '}'; + *to++ = ']'; + continue; + } + } + + if ((ingreek) && ((*from == 'h') || (*from == 'H'))) + continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them. + + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '2'; + *to++ = ' '; + *to++ = '#'; + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + *to++ = '|'; + *to++ = '}'; + continue; + } + + if (*from == '{') { + *to++ = '{'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = ' '; + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + } + continue; + } + + if (*from == '}') { + // this is kinda neat... DO NOTHING + } + if ((*from == '\n') && (from[1] == '\n')) { + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp new file mode 100644 index 0000000..d0d5ceb --- /dev/null +++ b/src/modules/filters/scsuutf8.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** + * + * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8 + * + */ + + +/* This class is based on: + * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl + * on Andrea's balcony in North Amsterdam on 1998-08-04 + * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion + * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 + * + * This is a deflator to UTF-8 output for input compressed in SCSU, + * the (Reuters) Standard Compression Scheme for Unicode as described + * in http://www.unicode.org/unicode/reports/tr6.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <swmodule.h> + +#include <scsuutf8.h> + +SCSUUTF8::SCSUUTF8() { +} + + +unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) +{ + /* join UTF-16 surrogates without any pairing sanity checks */ + + static int d; + + if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } + if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } + + /* output one character as UTF-8 multibyte sequence */ + + if (uchar < 0x80) { + *text++ = c; + } + else if (uchar < 0x800) { + *text++ = 0xc0 | uchar >> 6; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x10000) { + *text++ = 0xe0 | uchar >> 12; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *text++ = 0xf0 | uchar >> 18; + *text++ = 0x80 | uchar >> 12 & 0x3f; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + + return text; +} + +char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + unsigned long buflen = len * FILTERPAD; + char active = 0, mode = 0; + + static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; + static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; + static unsigned short win[256] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 + }; + + if (!len) + return 0; + + memmove(&text[buflen - len], text, len); + from = (unsigned char*)&text[buflen - len]; + to = (unsigned char *)text; + + // ------------------------------- + + for (int i = 0; i < len;) { + + + if (i >= len) break; + c = from[i++]; + + if (c >= 0x80) + { + to = UTF8Output (c - 0x80 + slide[active], to); + } + else if (c >= 0x20 && c <= 0x7F) + { + to = UTF8Output (c, to); + } + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + { + to = UTF8Output (c, to); + } + else if (c >= 0x1 && c <= 0x8) /* SQn */ + { + if (i >= len) break; + /* single quote */ d = from[i++]; + + to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : + d - 0x80 + slide [c - 0x1], to); + } + else if (c >= 0x10 && c <= 0x17) /* SCn */ + { + /* change window */ active = c - 0x10; + } + else if (c >= 0x18 && c <= 0x1F) /* SDn */ + { + /* define window */ active = c - 0x18; + if (i >= len) break; + slide [active] = win [from[i++]]; + } + else if (c == 0xB) /* SDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) /* SQU */ + { + if (i >= len) break; + /* SQU */ c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) /* SCU */ + { + /* change to Unicode mode */ mode = 1; + + while (mode) + { + if (i >= len) break; + c = from[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF0) /* UQU */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c >= 0xE0 && c <= 0xE7) /* UCn */ + { + active = c - 0xE0; mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) /* UDn */ + { + if (i >= len) break; + slide [active=c-0xE8] = win [from[i++]]; mode = 0; + } + else if (c == 0xF1) /* UDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + } + } + } + + + } + + *to++ = 0; + *to = 0; + return 0; +} + diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp new file mode 100644 index 0000000..dd5fe81 --- /dev/null +++ b/src/modules/filters/swbasicfilter.cpp @@ -0,0 +1,300 @@ +/****************************************************************************** + * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter + * impl that provides some basic methods that + * many filters will need and can use as a starting + * point. + * + * $Id: swbasicfilter.cpp,v 1.18 2002/06/06 21:08:47 scribe Exp $ + * + * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <swbasicfilter.h> +#include <stdio.h> +#include <stdarg.h> + +SWBasicFilter::SWBasicFilter() { + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; + + setTokenStart("<"); + setTokenEnd(">"); + setEscapeStart("&"); + setEscapeEnd(";"); + + escStringCaseSensitive = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; +} + + +void SWBasicFilter::setPassThruUnknownToken(bool val) { + passThruUnknownToken = val; +} + + +void SWBasicFilter::setPassThruUnknownEscapeString(bool val) { + passThruUnknownEsc = val; +} + + +void SWBasicFilter::setTokenCaseSensitive(bool val) { + tokenCaseSensitive = val; +} + + +void SWBasicFilter::setEscapeStringCaseSensitive(bool val) { + escStringCaseSensitive = val; +} + + +SWBasicFilter::~SWBasicFilter() { + if (tokenStart) + delete [] tokenStart; + + if (tokenEnd) + delete [] tokenEnd; + + if (escStart) + delete [] escStart; + + if (escEnd) + delete [] escEnd; +} + + +void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!tokenCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + tokenSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + escSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else escSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +bool SWBasicFilter::substituteToken(char **buf, const char *token) { + DualStringMap::iterator it; + + if (!tokenCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, token); + toupperstr(tmp); + it = tokenSubMap.find(tmp); + delete [] tmp; + } else + it = tokenSubMap.find(token); + + if (it != tokenSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { + DualStringMap::iterator it; + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = escSubMap.find(tmp); + delete [] tmp; + } else + it = escSubMap.find(escString); + + if (it != escSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) { + return substituteToken(buf, token); +} + + +bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) { + return substituteEscapeString(buf, escString); +} + + +void SWBasicFilter::setEscapeStart(const char *escStart) { + stdstr(&(this->escStart), escStart); +} + + +void SWBasicFilter::setEscapeEnd(const char *escEnd) { + stdstr(&(this->escEnd), escEnd); +} + + +void SWBasicFilter::setTokenStart(const char *tokenStart) { + stdstr(&(this->tokenStart), tokenStart); +} + + +void SWBasicFilter::setTokenEnd(const char *tokenEnd) { + stdstr(&(this->tokenEnd), tokenEnd); +} + + +char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + this->key = key; + this->module = module; + char *to, *from, token[4096]; + int tokpos = 0; + bool intoken = false; + int len; + bool inEsc = false; + char escStartLen = strlen(escStart); + char escEndLen = strlen(escEnd); + char escStartPos = 0, escEndPos = 0; + char tokenStartLen = strlen(tokenStart); + char tokenEndLen = strlen(tokenEnd); + char tokenStartPos = 0, tokenEndPos = 0; + DualStringMap userData; + string lastTextNode; + + bool suspendTextPassThru = false; + userData["suspendTextPassThru"] = "false"; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + resultBuffer = text; + + for (to = text; *from; from++) { + if (*from == tokenStart[tokenStartPos]) { + if (tokenStartPos == (tokenStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = false; + } + else tokenStartPos++; + continue; + } + + if (*from == escStart[escStartPos]) { + if (escStartPos == (escStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = true; + } + else escStartPos++; + continue; + } + + if (inEsc) { + if (*from == escEnd[escEndPos]) { + if (escEndPos == (escEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) { + pushString(&to, escStart); + pushString(&to, token); + pushString(&to, escEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (!inEsc) { + if (*from == tokenEnd[tokenEndPos]) { + if (tokenEndPos == (tokenEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) { + pushString(&to, tokenStart); + pushString(&to, token); + pushString(&to, tokenEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!suspendTextPassThru) + *to++ = *from; + lastTextNode += *from; + } + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp new file mode 100644 index 0000000..d9b1f0e --- /dev/null +++ b/src/modules/filters/thmlfootnotes.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlfootnotes - SWFilter decendant to hide or show footnotes + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlfootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLFootnotes::on[] = "On"; +const char ThMLFootnotes::off[] = "Off"; +const char ThMLFootnotes::optName[] = "Footnotes"; +const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +ThMLFootnotes::ThMLFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLFootnotes::~ThMLFootnotes() { +} + +void ThMLFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "note", 4)) { + hide = true; + continue; + } + else if (!strncmp(token, "/note", 5)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp new file mode 100644 index 0000000..66d9a20 --- /dev/null +++ b/src/modules/filters/thmlgbf.cpp @@ -0,0 +1,330 @@ +/*************************************************************************** + thmlgbf.cpp - ThML to GBF filter + ------------------- + begin : 1999-10-28 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlgbf.h> + + +ThMLGBF::ThMLGBF() +{ +} + + +char ThMLGBF::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + bool sechead = false; + bool title = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = '<'; + *to++ = 'W'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'T'; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'X'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'x'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "note", 4)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'F'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'f'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sup", 3)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'S'; + *to++ = '>'; + } + else if (!strncmp(token, "/sup", 4)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 's'; + *to++ = '>'; + } + else if (!strnicmp(token, "font color=#ff0000", 18)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'r'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'S'; + *to++ = '>'; + sechead = true; + continue; + } + else if (sechead && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 's'; + *to++ = '>'; + sechead = false; + continue; + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'T'; + *to++ = '>'; + title = true; + continue; + } + else if (title && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 't'; + *to++ = '>'; + title = false; + continue; + } + else if (!strnicmp(token, "br", 2)) { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'L'; + *to++ = '>'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'I'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'B'; + *to++ = '>'; + continue; + case '/': + switch(token[1]) { + case 'P': + case 'p': + *to++ = '<'; + *to++ = 'C'; + *to++ = 'M'; + *to++ = '>'; + continue; + case 'I': + case 'i': // italic end + *to++ = '<'; + *to++ = 'F'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp new file mode 100644 index 0000000..00b8a23 --- /dev/null +++ b/src/modules/filters/thmlheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * thmlheadings - SWFilter decendant to hide or show headings + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLHeadings::on[] = "On"; +const char ThMLHeadings::off[] = "Off"; +const char ThMLHeadings::optName[] = "Headings"; +const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +ThMLHeadings::ThMLHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLHeadings::~ThMLHeadings() { +} + +void ThMLHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "div class=\"sechead\"", 19)) { + hide = true; + continue; + } + if (!strnicmp(token, "div class=\"title\"", 17)) { + hide = true; + continue; + } + else if (hide && !strnicmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp new file mode 100644 index 0000000..9cb8679 --- /dev/null +++ b/src/modules/filters/thmlhtml.cpp @@ -0,0 +1,211 @@ +/*************************************************************************** + thmlhtml.cpp - ThML to HTML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtml.h> +#include <swmodule.h> + + +ThMLHTML::ThMLHTML() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", " </a>"); + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); +} + + +bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, "<small><em>"); + for (const char *tok = token + 5; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</em></small>"); + } + else if (token[27] == 'T') { + pushString(buf, "<small><i>"); + for (unsigned int i = 29; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</i></small>"); + } + } + else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + pushString(buf, "<small><em>"); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</em></small>"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "<small><em>("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")</em></small>"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if(!strncmp(token, "note", 4)) { + pushString(buf, " <font color=\"#800000\"><small>("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp new file mode 100644 index 0000000..ce7e3fd --- /dev/null +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -0,0 +1,269 @@ +/*************************************************************************** + thmlhtmlhref.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtmlhref.h> +#include <swmodule.h> + + +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); + addTokenSubstitute("/scripture", "</i> "); +} + + +bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync ", 5)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + + //scan for value and add it to the buffer + for (tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + pushString(buf, "</a>"); + } + + else if (!strncmp(token, "scripture ", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<i>"); + } + + else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + // we're starting a scripRef like "<scripRef>John 3:16</scripRef>" + else if (!strcmp(token, "scripRef")) { + userData["inscriptRef"] = "false"; + // let's stop text from going to output + userData["suspendTextPassThru"] = "true"; + } + + // we've ended a scripRef + else if (!strcmp(token, "/scripRef")) { + if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>" + userData["inscriptRef"] = "false"; + pushString(buf, "</a>"); + } + + else { // like "<scripRef>John 3:16</scripRef>" + pushString(buf, "<a href=\"passage="); + //char *strbuf = (char *)userData["lastTextNode"].c_str(); + pushString(buf, userData["lastTextNode"].c_str()); + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + pushString(buf, userData["lastTextNode"].c_str()); + // let's let text resume to output again + userData["suspendTextPassThru"] = "false"; + pushString(buf, "</a>"); + } + } + + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "/div", 4)) { + if (userData["SecHead"] == "true") { + pushString(buf, "</i></b><br />"); + userData["SecHead"] = "false"; + } + } + + else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 29; *(tok+2); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>"); + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " <small><font color=\"#800000\">("); + } + else { + *(*buf)++ = '<'; + for (const char *tok = token; *tok; tok++) + *(*buf)++ = *tok; + *(*buf)++ = '>'; + //return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp new file mode 100644 index 0000000..33856db --- /dev/null +++ b/src/modules/filters/thmllemma.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * thmllemma - SWFilter decendant to hide or show lemmas + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmllemma.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLLemma::on[] = "On"; +const char ThMLLemma::off[] = "Off"; +const char ThMLLemma::optName[] = "Lemmas"; +const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist"; + +ThMLLemma::ThMLLemma() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLLemma::~ThMLLemma() { +} + +void ThMLLemma::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLLemma::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want lemmas + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a lemma token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp new file mode 100644 index 0000000..f95bede --- /dev/null +++ b/src/modules/filters/thmlmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * thmlmorph - SWFilter decendant to hide or show morph tags + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLMorph::on[] = "On"; +const char ThMLMorph::off[] = "Off"; +const char ThMLMorph::optName[] = "Morphological Tags"; +const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +ThMLMorph::ThMLMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLMorph::~ThMLMorph() { +} + +void ThMLMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp new file mode 100644 index 0000000..2b31fab --- /dev/null +++ b/src/modules/filters/thmlolb.cpp @@ -0,0 +1,243 @@ +/*************************************************************************** + thmlolb.cpp - ThML to OLB filter + ------------------- + begin : 2001-05-10 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlolb.h> + + +ThMLOLB::ThMLOLB() +{ +} + + +char ThMLOLB::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + int i; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) + { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '#'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = ' '; + continue; + } + else if (!strncmp(token, "note ", 5)) { + *to++ = '{'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '}'; + continue; + } + else if (!strnicmp(token, "font", 4)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + case '/': + switch(token[1]) { + case 'I': + case 'i': // italic end + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2047) + token[tokpos++] = *from; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp new file mode 100644 index 0000000..54cd002 --- /dev/null +++ b/src/modules/filters/thmlosis.cpp @@ -0,0 +1,332 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <thmlosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +ThMLOSIS::ThMLOSIS() { +} + + +ThMLOSIS::~ThMLOSIS() { +} + + +char ThMLOSIS::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + string tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + wordStart = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";,: .?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;:.?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + } + + // Footnote + if (!strcmp(token, "note")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strcmp(token, "/note")) { + tmp = "<note type=\"x-StudyNote\"><notePart type=\"x-MainText\">"; + tmp.append(textStart, (int)(textEnd - textStart)+1); + tmp += "</notePart></note>"; + pushString(&to, tmp.c_str()); + suspendTextPassThru = false; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + + /* uncomment for SWORD absolute path logic + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + end of uncomment for asolute path logic */ + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + return true; + } + + // Strongs numbers + else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strstrip(val); + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; +// tmp = ""; +// tmp.append(textStart, (int)(wordEnd - wordStart)); +// module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + + // Morphology + else if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!strncmp(wordStart, "<w ", 3)) { + + const char *cls = "Unknown", *morph; + + if (module->getEntryAttributes()["Word"][wordstr]["Morph"].size() > 0) { + if (module->getEntryAttributes()["Word"][wordstr]["MorphClass"].size() > 0) + cls = module->getEntryAttributes()["Word"][wordstr]["MorphClass"].c_str(); + morph = module->getEntryAttributes()["Word"][wordstr]["Morph"].c_str(); + + sprintf(buf, "morph=\"x-%s:%s\" ", cls, morph); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + } + } + + if (!keepToken) { // if we don't want strongs + if (strchr(" ,:;.?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verseStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Book()) + sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "<verseEnd ref=\"%s\" />", vkey->getOSISRef()); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); + sprintf(ref, "<chapterEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); + sprintf(ref, "<bookEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + } + } + } + + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; + return 0; +} + + +void ThMLOSIS::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +const char *ThMLOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static string outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\" refEnd=\"%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp new file mode 100644 index 0000000..5609f16 --- /dev/null +++ b/src/modules/filters/thmlplain.cpp @@ -0,0 +1,201 @@ +/****************************************************************************** + * + * thmlplain - SWFilter decendant to strip out all ThML tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlplain.h> + + +ThMLPlain::ThMLPlain() { +} + + +char ThMLPlain::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == 10 || *from == 13) + from++; + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = ' '; + *to++ = '<'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = ' '; + *to++ = '('; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = ')'; + continue; + } + if (!strncmp("note", token, 4)) { + *to++ = ' '; + *to++ = '('; + } + else if (!strncmp("br", token, 2)) + *to++ = '\n'; + else if (!strncmp("/p", token, 2)) + *to++ = '\n'; + else if (!strncmp("/note", token, 5)) { + *to++ = ')'; + *to++ = ' '; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + + return 0; +} + + diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp new file mode 100644 index 0000000..8b603b0 --- /dev/null +++ b/src/modules/filters/thmlrtf.cpp @@ -0,0 +1,222 @@ +/*************************************************************************** + thmlrtf.cpp - ThML to RTF filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlrtf.h> + + +ThMLRTF::ThMLRTF() +{ + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("/note", ") }"); + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); +} + +bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, " {\\fs15 <"); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ">}"); + } + else if (token[27] == 'T') { + pushString(buf, " {\\fs15 ("); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + } + else if (!strncmp(token, "sync type=\"morph\" ", 18)) { + pushString(buf, " {\\fs15 ("); + for (const char *tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + + pushString(buf, ")}"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "{\\fs15 ("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + else if (!strncmp(token, "scripRef", 8)) { +// pushString(buf, "{\\cf2 #"); + pushString(buf, "<a href=\"\">"); + } + else if (!strncmp(token, "/scripRef", 9)) { + pushString(buf, "</a>"); + } + else if (!strncmp(token, "div", 3)) { + *(*buf)++ = '{'; + if (!strncmp(token, "div class=\"title\"", 17)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + } + else if (!strncmp(token, "/div", 4)) { + *(*buf)++ = '}'; + if (userData["sechead"] == "true") { + pushString(buf, "\\par "); + userData["sechead"] == "false"; + } + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " {\\i1\\fs15 ("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp new file mode 100644 index 0000000..23edd6d --- /dev/null +++ b/src/modules/filters/thmlscripref.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlscripref - SWFilter decendant to hide or show scripture references + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlscripref.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLScripref::on[] = "On"; +const char ThMLScripref::off[] = "Off"; +const char ThMLScripref::optName[] = "Scripture Cross-references"; +const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + + +ThMLScripref::ThMLScripref() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLScripref::~ThMLScripref() { +} + +void ThMLScripref::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLScripref::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want scriprefs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "scripRef", 8)) { + hide = true; + continue; + } + else if (!strnicmp(token, "/scripRef", 9)) { + hide = false; + continue; + } + + // if not a scripref token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp new file mode 100644 index 0000000..ab5a3eb --- /dev/null +++ b/src/modules/filters/thmlstrongs.cpp @@ -0,0 +1,156 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <thmlstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +const char ThMLStrongs::on[] = "On"; +const char ThMLStrongs::off[] = "Off"; +const char ThMLStrongs::optName[] = "Strong's Numbers"; +const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +ThMLStrongs::ThMLStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLStrongs::~ThMLStrongs() { +} + +void ThMLStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart = text, *textEnd = 0; + string tmp; + bool newText = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + + if (!option) { // if we don't want strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + if (newText) {textStart = to; newText = false; } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp new file mode 100644 index 0000000..a6a52cf --- /dev/null +++ b/src/modules/filters/thmlvariants.cpp @@ -0,0 +1,185 @@ +/****************************************************************************** + * + * thmlvariants - SWFilter decendant to hide or show textual variants + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlvariants.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLVariants::primary[] = "Primary Reading"; +const char ThMLVariants::secondary[] = "Secondary Reading"; +const char ThMLVariants::all[] = "All Readings"; + +const char ThMLVariants::optName[] = "Textual Variants"; +const char ThMLVariants::optTip[] = "Switch between Textual Variants modes"; + + +ThMLVariants::ThMLVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +ThMLVariants::~ThMLVariants() { +} + +void ThMLVariants::setOptionValue(const char *ival) +{ + if (!stricmp(ival, primary)) option = 0; + else if (!stricmp(ival, secondary)) option = 1; + else option = 2; +} + +const char *ThMLVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option == 0) { //we want primary only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\" class=\"2\"", 28)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + else if (option == 1) { //we want variant only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\" class=\"1\"", 28)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + return 0; +} + + + + + + diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp new file mode 100644 index 0000000..6313792 --- /dev/null +++ b/src/modules/filters/unicodertf.cpp @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * unicodertf - SWFilter decendant to convert a double byte unicode file + * to RTF tags + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <unicodertf.h> + +UnicodeRTF::UnicodeRTF() { +} + + +char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from, *maxto; + int len; + char digit[10]; + short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + maxto =(unsigned char*)text + maxlen; + + // ------------------------------- + bool lastUni = false; + for (to = (unsigned char*)text; *from && (to <= maxto); from++) { + ch = 0; + if ((*from & 128) != 128) { +// if ((*from == ' ') && (lastUni)) +// *to++ = ' '; + *to++ = *from; + lastUni = false; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '\\'; + *to++ = 'u'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = '?'; + lastUni = true; + } + + if (to != maxto) { + *to++ = 0; + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp new file mode 100644 index 0000000..5a7719f --- /dev/null +++ b/src/modules/filters/utf16utf8.cpp @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf16utf8.h> + +UTF16UTF8::UTF16UTF8() { +} + + +char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned short *from; + unsigned char *to; + + int len; + unsigned long uchar; + unsigned short schar; + + len = 0; + from = (unsigned short*) text; + while (*from) { + len += 2; + from++; + } + + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned short*)&text[maxlen - len]; + } + else + from = (unsigned short*)text; + + + // ------------------------------- + + for (to = (unsigned char*)text; *from; from++) { + uchar = 0; + + if (*from < 0xD800 || *from > 0xDFFF) { + uchar = *from; + } + else if (*from >= 0xD800 && *from <= 0xDBFF) { + uchar = *from; + schar = *(from+1); + if (uchar < 0xDC00 || uchar > 0xDFFF) { + //error, do nothing + continue; + } + uchar &= 0x03ff; + schar &= 0x03ff; + uchar <<= 10; + uchar |= schar; + uchar += 0x10000; + from++; + } + else { + //error, do nothing + continue; + } + + if (uchar < 0x80) { + *to++ = uchar; + } + else if (uchar < 0x800) { + *to++ = 0xc0 | (uchar >> 6); + *to++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + *to++ = 0xe0 | (uchar >> 12); + *to++ = 0x80 | (uchar >> 6) & 0x3f; + *to++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *to++ = 0xF0 | (uchar >> 18); + *to++ = 0x80 | (uchar >> 12) & 0x3F; + *to++ = 0x80 | (uchar >> 6) & 0x3F; + *to++ = 0x80 | uchar & 0x3F; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + + + + diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp new file mode 100644 index 0000000..5121f48 --- /dev/null +++ b/src/modules/filters/utf8arshaping.cpp @@ -0,0 +1,48 @@ +/****************************************************************************** +* +* utf8arshaping - SWFilter decendant to perform Arabic shaping on +* UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8arshaping.h> + +UTF8arShaping::UTF8arShaping() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8arShaping::~UTF8arShaping() { + ucnv_close(conv); +} + +char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; + ustr2 = new UChar[len]; + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + + len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp new file mode 100644 index 0000000..8fa7280 --- /dev/null +++ b/src/modules/filters/utf8bidireorder.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** +* +* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8 +* text to visual order according to Unicode BiDi +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8bidireorder.h> + +UTF8BiDiReorder::UTF8BiDiReorder() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8BiDiReorder::~UTF8BiDiReorder() { + ucnv_close(conv); +} + +char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + ustr2 = new UChar[len]; + + UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); + ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err); + len = ubidi_writeReordered(bidi, ustr2, len, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + ubidi_close(bidi); + +// len = ubidi_writeReverse(ustr, len, ustr2, len, +// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp new file mode 100644 index 0000000..84cb513 --- /dev/null +++ b/src/modules/filters/utf8cantillation.cpp @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8cantillation.h> + + +const char UTF8Cantillation::on[] = "On"; +const char UTF8Cantillation::off[] = "Off"; +const char UTF8Cantillation::optName[] = "Hebrew Cantillation"; +const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks"; + +UTF8Cantillation::UTF8Cantillation() { + option = false; + options.push_back(on); + options.push_back(off); +} + +UTF8Cantillation::~UTF8Cantillation(){}; + +void UTF8Cantillation::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8Cantillation::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + *to++ = *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + *to++ = *from; + from++; + *to++ = *from; + } + else { + from++; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp new file mode 100644 index 0000000..b0e5dc8 --- /dev/null +++ b/src/modules/filters/utf8greekaccents.cpp @@ -0,0 +1,252 @@ +/****************************************************************************** + * + * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8greekaccents.h> + + +const char UTF8GreekAccents::on[] = "On"; +const char UTF8GreekAccents::off[] = "Off"; +const char UTF8GreekAccents::optName[] = "Greek Accents"; +const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents"; + +UTF8GreekAccents::UTF8GreekAccents() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8GreekAccents::~UTF8GreekAccents(){}; + +void UTF8GreekAccents::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8GreekAccents::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + for (from = (unsigned char*)text; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) + from += 2; + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) + from++; + } + else if (*from == 0xCD && *(from + 1) == 0xBA) + from++; + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + *to++ = 0xCE; + *to++ = 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + *to++ = 0xCE; + *to++ = 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + *to++ = 0xCE; + *to++ = 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + *to++ = 0xCE; + *to++ = 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + *to++ = 0xCE; + *to++ = 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + *to++ = 0xCE; + *to++ = 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + *to++ = 0xCE; + *to++ = 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + *to++ = 0xCE; + *to++ = 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + *to++ = 0xCE; + *to++ = 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + *to++ = 0xCF; + *to++ = 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + *to++ = 0xCF; + *to++ = 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { + *to++ = 0xCE; + *to++ = 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + *to++ = 0xCE; + *to++ = 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + *to++ = 0xCE; + *to++ = 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + *to++ = 0xCE; + *to++ = 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { + *to++ = 0xCE; + *to++ = 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + *to++ = 0xCE; + *to++ = 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + *to++ = 0xCE; + *to++ = 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { + *to++ = 0xCE; + *to++ = 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + *to++ = 0xCE; + *to++ = 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + *to++ = 0xCE; + *to++ = 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + *to++ = 0xCE; + *to++ = 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + *to++ = 0xCE; + *to++ = 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + *to++ = 0xCF; + *to++ = 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + *to++ = 0xCF; + *to++ = 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + *to++ = 0xCF; + *to++ = 0x81; + from+=2; + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + } + return 0; +} + + + + + + diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp new file mode 100644 index 0000000..e5b50e1 --- /dev/null +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * + * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8hebrewpoints.h> + + +const char UTF8HebrewPoints::on[] = "On"; +const char UTF8HebrewPoints::off[] = "Off"; +const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points"; +const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points"; + +UTF8HebrewPoints::UTF8HebrewPoints() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8HebrewPoints::~UTF8HebrewPoints(){}; + +void UTF8HebrewPoints::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8HebrewPoints::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { + from++; + } + else { + *to++ = *from; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp new file mode 100644 index 0000000..7487815 --- /dev/null +++ b/src/modules/filters/utf8html.cpp @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8html.h> + +UTF8HTML::UTF8HTML() { +} + + +char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + char digit[10]; + unsigned long ch; + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + // ------------------------------- + for (to = (unsigned char*)text; *from; from++) { + ch = 0; + if ((*from & 128) != 128) { +// if (*from != ' ') + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '&'; + *to++ = '#'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = ';'; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..6cc1acd --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8latin1.h> + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + *to++ = (unsigned char)uchar; + } + else { + *to++ = replacementChar; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..df9e090 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter decendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfc.h> + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //canonical composition + unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp new file mode 100644 index 0000000..450cbbf --- /dev/null +++ b/src/modules/filters/utf8nfkd.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfkd.h> + +UTF8NFKD::UTF8NFKD() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFKD::~UTF8NFKD() { + ucnv_close(conv); +} + +char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //compatability decomposition + unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp new file mode 100644 index 0000000..3686717 --- /dev/null +++ b/src/modules/filters/utf8transliterator.cpp @@ -0,0 +1,718 @@ +/****************************************************************************** +* +* utf8transliterators - SWFilter decendant to transliterate between +* ICU-supported scripts. +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8transliterator.h> + +#ifndef _ICUSWORD_ +#include "unicode/resbund.h" +#endif +#include <swlog.h> + +#ifdef _ICU_ +class UnicodeCaster { + const UnicodeString &ustr; +public: + UnicodeCaster(const UnicodeString &ustr):ustr(ustr) {}; operator const char *() { return ""; }; +}; + +#endif +const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { + "Off", + "Latin", + "Basic Latin", + "Beta", + "BGreek", +/* + "Greek", + "Hebrew", + "Cyrillic", + "Arabic", + "Syriac", + "Katakana", + "Hiragana", + "Jamo", + "Hangul", + "Devanagari", + "Tamil", + "Bengali", + "Gurmukhi", + "Gujarati", + "Oriya", + "Telugu", + "Kannada", + "Malayalam", + "Thai", + "Georgian", + "Armenian", + "Ethiopic", + "Gothic", + "Ugaritic", + "Coptic" + */ +}; + +const char UTF8Transliterator::optName[] = "Transliteration"; +const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; + +SWTransMap UTF8Transliterator::transMap; + +#ifndef _ICUSWORD_ + +const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; +const char UTF8Transliterator::SW_RB_RULE[] = "Rule"; +#ifdef SWICU_DATA +const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA; +#else +const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/"; +#endif + +class SWCharString { + public: + inline SWCharString(const UnicodeString& str); + inline ~SWCharString(); + inline operator const char*() { return ptr; } + private: + char buf[128]; + char* ptr; +}; +SWCharString::SWCharString(const UnicodeString& str) { + // TODO This isn't quite right -- we should probably do + // preflighting here to determine the real length. + if (str.length() >= (int32_t)sizeof(buf)) { + ptr = new char[str.length() + 8]; + } else { + ptr = buf; + } + str.extract(0, 0x7FFFFFFF, ptr, ""); +} + +SWCharString::~SWCharString() { + if (ptr != buf) { + delete[] ptr; + } +} + +#endif // _ICUSWORD_ + + +UTF8Transliterator::UTF8Transliterator() { + option = 0; + unsigned long i; + for (i = 0; i < NUMTARGETSCRIPTS; i++) { + options.push_back(optionstring[i]); + } +#ifndef _ICUSWORD_ + utf8status = U_ZERO_ERROR; + Load(utf8status); +#endif +} + +void UTF8Transliterator::Load(UErrorCode &status) +{ +#ifndef _ICUSWORD_ + static const char translit_swordindex[] = "translit_swordindex"; + + UResourceBundle *bundle, *transIDs, *colBund; + bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return; + } + + transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status); + UParseError parseError; + + int32_t row, maxRows; + if (U_SUCCESS(status)) { + maxRows = ures_getSize(transIDs); + for (row = 0; row < maxRows; row++) { + colBund = ures_getByIndex(transIDs, row, 0, &status); + + if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { + UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); + UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); + UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); + SWLog::systemlog->LogInformation("ok so far"); + + if (U_SUCCESS(status)) { + switch (type) { + case 0x66: // 'f' + case 0x69: // 'i' + // 'file' or 'internal'; + // row[2]=resource, row[3]=direction + { + UBool visible = (type == 0x0066 /*f*/); + UTransDirection dir = + (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == + 0x0046 /*F*/) ? + UTRANS_FORWARD : UTRANS_REVERSE; + //registry->put(id, resString, dir, visible); + SWLog::systemlog->LogInformation("instantiating %s ...", (const char *)(UnicodeCaster)resString); + registerTrans(id, resString, dir, status); + SWLog::systemlog->LogInformation("done."); + } + break; + case 0x61: // 'a' + // 'alias'; row[2]=createInstance argument + //registry->put(id, resString, TRUE); + break; + } + } + else SWLog::systemlog->LogError("Failed to get resString"); + } + else SWLog::systemlog->LogError("Failed to get row"); + + ures_close(colBund); + } + } + else + { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + } + + ures_close(transIDs); + ures_close(bundle); + +#endif // _ICUSWORD_ +} + +void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource, + UTransDirection dir, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + SWLog::systemlog->LogInformation("registering ID locally %s", (const char *)(UnicodeCaster)ID); + SWTransData swstuff; + swstuff.resource = resource; + swstuff.dir = dir; + SWTransPair swpair; + swpair.first = ID; + swpair.second = swstuff; + transMap.insert(swpair); +#endif +} + +bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status); + if (!U_FAILURE(status)) + { + // already have it, clean up and return true + SWLog::systemlog->LogInformation("already have it %s", (const char *)(UnicodeCaster)ID); + delete trans; + return true; + } + status = U_ZERO_ERROR; + + SWTransMap::iterator swelement; + if ((swelement = transMap.find(ID)) != transMap.end()) + { + SWLog::systemlog->LogInformation("found element in map"); + SWTransData swstuff = (*swelement).second; + UParseError parseError; + //UErrorCode status; + //std::cout << "unregistering " << ID << std::endl; + //Transliterator::unregister(ID); + SWLog::systemlog->LogInformation("resource is %s", (const char *)(UnicodeCaster)swstuff.resource); + + // Get the rules + //std::cout << "importing: " << ID << ", " << resource << std::endl; + SWCharString ch(swstuff.resource); + UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status); + const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status); + ures_close(bundle); + //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, + // parseError, status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to get rules"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return false; + } + + + Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir, + parseError,status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to create transliterator"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + SWLog::systemlog->LogError("Parse error: line %s", parseError.line); + SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset); + SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext); + SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext); + SWLog::systemlog->LogError("rules were"); +// SWLog::systemlog->LogError((const char *)rules); + return false; + } + + Transliterator::registerInstance(trans); + return true; + + //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status); + //return trans; + } + else + { + return false; + } +#else +return true; +#endif // _ICUSWORD_ +} + +Transliterator * UTF8Transliterator::createTrans(const UnicodeString& preID, const UnicodeString& ID, + const UnicodeString& postID, UTransDirection dir, UErrorCode &status ) +{ + // extract id to check from ID xxx;id;xxx + if (checkTrans(ID, status)) { + UnicodeString fullID = preID; + fullID += ID; + fullID += postID; + Transliterator *trans = Transliterator::createInstance(fullID,UTRANS_FORWARD,status); + if (U_FAILURE(status)) { + delete trans; + return NULL; + } + else { + return trans; + } + } + else { + return NULL; + } +} + +void UTF8Transliterator::setOptionValue(const char *ival) +{ + unsigned char i = option = NUMTARGETSCRIPTS; + while (i && stricmp(ival, optionstring[i])) { + i--; + option = i; + } +} + +const char *UTF8Transliterator::getOptionValue() +{ + return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; +} + +char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option) { // if we want transliteration + unsigned long i, j; + UErrorCode err = U_ZERO_ERROR; + UConverter * conv = NULL; + conv = ucnv_open("UTF-8", &err); + + bool compat = false; + bool noNFC = false; + + if (option == SE_JAMO) { + noNFC = true; + } + + // Convert UTF-8 string to UTF-16 (UChars) + j = strlen(text); + int32_t len = (j * 2) + 1; + UChar *source = new UChar[len]; + err = U_ZERO_ERROR; + len = ucnv_toUChars(conv, source, len, text, j, &err); + source[len] = 0; + + // Figure out which scripts are used in the string + unsigned char scripts[NUMSCRIPTS]; + + for (i = 0; i < NUMSCRIPTS; i++) { + scripts[i] = false; + } + + for (i = 0; i < len; i++) { + j = ublock_getCode(source[i]); + switch (j) { + case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; + case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; + case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; + case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; + case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; + case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; + case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; + case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; + case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; + case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; + case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; + case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; + case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; + case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; + case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; + case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; + case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; + case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; + case UBLOCK_THAI: scripts[SE_THAI] = true; break; + case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; + case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; + case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; + case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; + // needs Unicode 3.2? or 4.0? support from ICU + //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; + case UBLOCK_CJK_RADICALS_SUPPLEMENT: + case UBLOCK_KANGXI_RADICALS: + case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: + scripts[SE_HAN] = true; + break; + case UBLOCK_CJK_COMPATIBILITY: + case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: + case UBLOCK_CJK_COMPATIBILITY_FORMS: + scripts[SE_HAN] = true; + compat = true; + break; + case UBLOCK_HANGUL_COMPATIBILITY_JAMO: + scripts[SE_HANGUL] = true; + compat = true; + break; + + default: scripts[SE_LATIN] = true; + } + } + scripts[option] = false; //turn off the reflexive transliteration + + //return if we have no transliteration to do for this text + j = 0; + for (i = 0; !j && i < NUMSCRIPTS; i++) { + if (scripts[i]) j++; + } + if (!j) { + ucnv_close(conv); + return 0; + } + + UnicodeString preid; + if (compat) { + preid = UnicodeString("NFKD;"); + } + else { + preid = UnicodeString("NFD;"); + } + + //Simple X to Latin transliterators + UnicodeString id; + if (scripts[SE_GREEK]) { + if (option == SE_BETA) + id = UnicodeString("Greek-Beta"); + else if (option == SE_BGREEK) + id = UnicodeString("Greek-BGreek"); + else { + if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + id = UnicodeString("Coptic-Latin"); + } + else { + id = UnicodeString("Greek-Latin"); + } + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_HEBREW]) { + if (option == SE_BETA) + id = UnicodeString("Hebrew-CCAT"); + else if (option == SE_SYRIAC) + id = UnicodeString("Hebrew-Syriac"); + else { + id = UnicodeString("Hebrew-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_CYRILLIC]) { + id = UnicodeString("Cyrillic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARABIC]) { + id = UnicodeString("Arabic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_SYRIAC]) { + if (option == SE_BETA) + id = UnicodeString("Syriac-CCAT"); + else if (option == SE_HEBREW) + id = UnicodeString("Syriac-Hebrew"); + else { + id = UnicodeString("Syriac-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + id = UnicodeString("Thai-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GEORGIAN]) { + id = UnicodeString("Georgian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARMENIAN]) { + id = UnicodeString("Armenian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ETHIOPIC]) { + id = UnicodeString("Ethiopic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GOTHIC]) { + id = UnicodeString("Gothic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_UGARITIC]) { + id = UnicodeString("Ugaritic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + id = UnicodeString("Kanji-OnRomaji"); + } + else { + id = UnicodeString("Han-Pinyin"); + } + scripts[SE_LATIN] = true; + } + + // Inter-Kana and Kana to Latin transliterators + if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { + id = UnicodeString("Katakana-Hiragana"); + scripts[SE_HIRAGANA] = true; + } + else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { + id = UnicodeString("Hiragana-Katakana"); + scripts[SE_KATAKANA] = true; + } + else { + if (scripts[SE_KATAKANA]) { + id = UnicodeString("Katakana-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HIRAGANA]) { + id = UnicodeString("Hiragana-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Inter-Korean and Korean to Latin transliterators + if (option == SE_HANGUL && scripts[SE_JAMO]) { + noNFC = false; + scripts[SE_HANGUL] = true; + } + else if (option == SE_JAMO && scripts[SE_HANGUL]) { + noNFC = true; + scripts[SE_JAMO] = true; + } + else { + if (scripts[SE_HANGUL]) { + id = UnicodeString("Hangul-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_JAMO]) { + id = UnicodeString("Jamo-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Indic-Latin + if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { + // Indic to Latin + if (scripts[SE_TAMIL]) { + id = UnicodeString("Tamil-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_BENGALI]) { + id = UnicodeString("Bengali-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GURMUKHI]) { + id = UnicodeString("Gurmukhi-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GUJARATI]) { + id = UnicodeString("Gujarati-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ORIYA]) { + id = UnicodeString("Oriya-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_TELUGU]) { + id = UnicodeString("Telugu-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_KANNADA]) { + id = UnicodeString("Kannada-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_MALAYALAM]) { + id = UnicodeString("Malayalam-Latin"); + scripts[SE_LATIN] = true; + } + } + else { + if (scripts[SE_LATIN]) { + id = UnicodeString("Latin-InterIndic"); + } + if (scripts[SE_DEVANAGARI]) { + id = UnicodeString("Devanagari-InterIndic"); + } + if (scripts[SE_TAMIL]) { + id = UnicodeString("Tamil-InterIndic"); + } + if (scripts[SE_BENGALI]) { + id = UnicodeString("Bengali-InterIndic"); + } + if (scripts[SE_GURMUKHI]) { + id = UnicodeString("Gurmurkhi-InterIndic"); + } + if (scripts[SE_GUJARATI]) { + id = UnicodeString("Gujarati-InterIndic"); + } + if (scripts[SE_ORIYA]) { + id = UnicodeString("Oriya-InterIndic"); + } + if (scripts[SE_TELUGU]) { + id = UnicodeString("Telugu-InterIndic"); + } + if (scripts[SE_KANNADA]) { + id = UnicodeString("Kannada-InterIndic"); + } + if (scripts[SE_MALAYALAM]) { + id = UnicodeString("Malayalam-InterIndic"); + } + + switch(option) { + case SE_DEVANAGARI: + id = UnicodeString("InterIndic-Devanagari"); + break; + case SE_TAMIL: + id = UnicodeString("InterIndic-Tamil"); + break; + case SE_BENGALI: + id = UnicodeString("InterIndic-Bengali"); + break; + case SE_GURMUKHI: + id = UnicodeString("InterIndic-Gurmukhi"); + break; + case SE_GUJARATI: + id = UnicodeString("InterIndic-Gujarati"); + break; + case SE_ORIYA: + id = UnicodeString("InterIndic-Oriya"); + break; + case SE_TELUGU: + id = UnicodeString("InterIndic-Telugu"); + break; + case SE_KANNADA: + id = UnicodeString("InterIndic-Kannada"); + break; + case SE_MALAYALAM: + id = UnicodeString("InterIndic-Malayalam"); + break; + default: + id = UnicodeString("InterIndic-Latin"); + scripts[SE_LATIN] = true; + break; + } + } + + if (scripts[SE_LATIN]) { + switch (option) { + case SE_GREEK: + id = UnicodeString("Latin-Greek"); + break; + case SE_HEBREW: + id = UnicodeString("Latin-Hebrew"); + break; + case SE_CYRILLIC: + id = UnicodeString("Latin-Cyrillic"); + break; + case SE_ARABIC: + id = UnicodeString("Latin-Arabic"); + break; + case SE_SYRIAC: + id = UnicodeString("Latin-Syriac"); + break; + case SE_THAI: + id = UnicodeString("Latin-Thai"); + break; + case SE_GEORGIAN: + id = UnicodeString("Latin-Georgian"); + break; + case SE_ARMENIAN: + id = UnicodeString("Latin-Armenian"); + break; + case SE_ETHIOPIC: + id = UnicodeString("Latin-Ethiopic"); + break; + case SE_GOTHIC: + id = UnicodeString("Latin-Gothic"); + break; + case SE_UGARITIC: + id = UnicodeString("Latin-Ugaritic"); + break; + case SE_COPTIC: + id = UnicodeString("Latin-Coptic"); + break; + case SE_KATAKANA: + id = UnicodeString("Latin-Katakana"); + break; + case SE_HIRAGANA: + id = UnicodeString("Latin-Hiragana"); + break; + case SE_JAMO: + id = UnicodeString("Latin-Jamo"); + break; + case SE_HANGUL: + id = UnicodeString("Latin-Hangul"); + break; + } + } + + if (option == SE_BASICLATIN) { + id = UnicodeString("Any-Latin1"); + } + UnicodeString postid; + if (noNFC) { + postid = UnicodeString(";NFD"); + } else { + postid = UnicodeString(";NFC"); + } + + //UParseError perr; + + err = U_ZERO_ERROR; + //Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); + Transliterator * trans = createTrans(preid, id, postid, UTRANS_FORWARD, err); + if (trans && !U_FAILURE(err)) { + UnicodeString target = UnicodeString(source); + trans->transliterate(target); + len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); + if (len < maxlen) *(text + len) = 0; + else *(text + maxlen) = 0; + delete trans; + } + ucnv_close(conv); + } + return 0; +} +#endif + + + diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp new file mode 100644 index 0000000..9aea6fe --- /dev/null +++ b/src/modules/filters/utf8utf16.cpp @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8utf16.h> + +UTF8UTF16::UTF8UTF16() { +} + + +char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + unsigned short schar; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0x1ffff) { + *to++ = (unsigned short)uchar; + } + else { + uchar -= 0x10000; + schar = 0xD800 | (uchar & 0x03ff); + uchar >>= 10; + uchar |= 0xDC00; + *to++ = (unsigned short)schar; + *to++ = (unsigned short)uchar; + } + } + *to = (unsigned short)0; + + return 0; +} + diff --git a/src/modules/genbook/Makefile b/src/modules/genbook/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/genbook/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/genbook/Makefile.am b/src/modules/genbook/Makefile.am new file mode 100644 index 0000000..02f6ab1 --- /dev/null +++ b/src/modules/genbook/Makefile.am @@ -0,0 +1,5 @@ +genbookdir = $(top_srcdir)/src/modules/genbook + +libsword_la_SOURCES += $(genbookdir)/swgenbook.cpp + +include ../src/modules/genbook/rawgenbook/Makefile.am diff --git a/src/modules/genbook/rawgenbook/Makefile b/src/modules/genbook/rawgenbook/Makefile new file mode 100644 index 0000000..aab8056 --- /dev/null +++ b/src/modules/genbook/rawgenbook/Makefile @@ -0,0 +1,4 @@ +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/genbook/rawgenbook/Makefile.am b/src/modules/genbook/rawgenbook/Makefile.am new file mode 100644 index 0000000..a176d75 --- /dev/null +++ b/src/modules/genbook/rawgenbook/Makefile.am @@ -0,0 +1,4 @@ +rawgenbookdir = $(top_srcdir)/src/modules/genbook/rawgenbook + +libsword_la_SOURCES += $(rawgenbookdir)/rawgenbook.cpp + diff --git a/src/modules/genbook/rawgenbook/rawgenbook.cpp b/src/modules/genbook/rawgenbook/rawgenbook.cpp new file mode 100644 index 0000000..e22532a --- /dev/null +++ b/src/modules/genbook/rawgenbook/rawgenbook.cpp @@ -0,0 +1,216 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawGenBook'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawgenbook.h> +#include <rawstr.h> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawGenBook Constructor - Initializes data for instance of RawGenBook + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGenBook::RawGenBook(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWGenBook(iname, idesc, idisp, enc, dir, mark, ilang) { + int fileMode = O_RDWR; + char *buf = new char [ strlen (ipath) + 20 ]; + + entryBuf = 0; + path = 0; + stdstr(&path, ipath); + + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + delete key; + key = CreateKey(); + + + sprintf(buf, "%s.bdt", path); + bdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + +} + + +/****************************************************************************** + * RawGenBook Destructor - Cleans up instance of RawGenBook + */ + +RawGenBook::~RawGenBook() { + + FileMgr::systemFileMgr.close(bdtfd); + + if (path) + delete [] path; + + if (entryBuf) + delete [] entryBuf; +} + + +/****************************************************************************** + * RawGenBook::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawGenBook::getRawEntry() { + + __u32 offset = 0; + __u32 size = 0; + + TreeKeyIdx *key = 0; + try { + key = SWDYNAMIC_CAST(TreeKeyIdx, (this->key)); + } + catch ( ... ) {} + + if (!key) { + key = (TreeKeyIdx *)CreateKey(); + (*key) = *(this->key); + } + + if (entryBuf) + delete [] entryBuf; + + int dsize; + key->getUserData(&dsize); + if (dsize > 7) { + memcpy(&offset, key->getUserData(), 4); + offset = swordtoarch32(offset); + + memcpy(&size, key->getUserData() + 4, 4); + size = swordtoarch32(size); + + entrySize = size; // support getEntrySize call + + entryBuf = new char [ (size + 2) * FILTERPAD ]; + *entryBuf = 0; + lseek(bdtfd->getFd(), offset, SEEK_SET); + read(bdtfd->getFd(), entryBuf, size); + + rawFilter(entryBuf, size, key); + + if (!isUnicode()) + RawStr::preptext(entryBuf); + } + else { + entryBuf = new char [2]; + entryBuf[0] = 0; + entryBuf[1] = 0; + entrySize = 0; + } + + if (key != this->key) // free our key if we created a VerseKey + delete key; + + return entryBuf; +} + + +void RawGenBook::setEntry(const char *inbuf, long len) { + + __u32 offset = archtosword32(lseek(bdtfd->getFd(), 0, SEEK_END)); + __u32 size = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + + char userData[8]; + + if (!len) + len = strlen(inbuf); + + write(bdtfd->getFd(), inbuf, len); + + size = archtosword32(len); + memcpy(userData, &offset, 4); + memcpy(userData+4, &size, 4); + key->setUserData(userData, 8); + key->save(); +} + + +void RawGenBook::linkEntry(const SWKey *inkey) { + TreeKeyIdx *srckey = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(TreeKeyIdx, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) { + srckey = (TreeKeyIdx *)CreateKey(); + (*srckey) = *inkey; + } + + key->setUserData(srckey->getUserData(), 8); + key->save(); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawGenBook::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawGenBook::deleteEntry() { + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + key->remove(); +} + + +char RawGenBook::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd; + signed char retval; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.bdt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + retval = TreeKeyIdx::create(path); + delete [] path; + return retval; +} + + +SWKey *RawGenBook::CreateKey() { + TreeKeyIdx *newKey = new TreeKeyIdx(path); + return newKey; +} diff --git a/src/modules/genbook/swgenbook.cpp b/src/modules/genbook/swgenbook.cpp new file mode 100644 index 0000000..589b0b9 --- /dev/null +++ b/src/modules/genbook/swgenbook.cpp @@ -0,0 +1,27 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include <swgenbook.h> + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWGenBook::SWGenBook(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Generic Books", enc, dir, mark, ilang) { +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWGenBook::~SWGenBook() { +} + diff --git a/src/modules/lexdict/Makefile b/src/modules/lexdict/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/lexdict/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/Makefile.am b/src/modules/lexdict/Makefile.am new file mode 100644 index 0000000..8cfe68b --- /dev/null +++ b/src/modules/lexdict/Makefile.am @@ -0,0 +1,7 @@ +lexdictdir = $(top_srcdir)/src/modules/lexdict + +libsword_la_SOURCES += $(lexdictdir)/swld.cpp + +include ../src/modules/lexdict/rawld/Makefile.am +include ../src/modules/lexdict/rawld4/Makefile.am +include ../src/modules/lexdict/zld/Makefile.am diff --git a/src/modules/lexdict/rawld/Makefile b/src/modules/lexdict/rawld/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/lexdict/rawld/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/rawld/Makefile.am b/src/modules/lexdict/rawld/Makefile.am new file mode 100644 index 0000000..2a2d996 --- /dev/null +++ b/src/modules/lexdict/rawld/Makefile.am @@ -0,0 +1,4 @@ +rawlddir = $(top_srcdir)/src/modules/lexdict/rawld + +libsword_la_SOURCES += $(rawlddir)/rawld.cpp + diff --git a/src/modules/lexdict/rawld/no13.c b/src/modules/lexdict/rawld/no13.c new file mode 100644 index 0000000..1e94846 --- /dev/null +++ b/src/modules/lexdict/rawld/no13.c @@ -0,0 +1,34 @@ +#include <fcntl.h> +#include <stdio.h> + +main(int argc, char **argv) +{ + int fd, loop; + char ch; + char breakcnt = 0; + + if (argc != 2) { + fprintf(stderr, "This program writes to stdout, so to be useful,\n\tit should be redirected (e.g no13 bla > bla.dat)\nusage: %s <filename>\n", argv[0]); + exit(1); + } + fd = open(argv[1], O_RDONLY); + while (read(fd, &ch, 1) == 1) { + if (ch == 0x0d) { // CR + breakcnt++; + continue; + } + if (ch == 0x1a) // Ctrl-Z + continue; + + if (ch != 0x0a) { // LF + if (breakcnt > 1) { + for (loop = breakcnt; loop > 0; loop--) + putchar(0x0d); + putchar(0x0a); + } + breakcnt=0; + } + putchar(ch); + } + close(fd); +} diff --git a/src/modules/lexdict/rawld/rawld.cpp b/src/modules/lexdict/rawld/rawld.cpp new file mode 100644 index 0000000..058679a --- /dev/null +++ b/src/modules/lexdict/rawld/rawld.cpp @@ -0,0 +1,173 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <rawld.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD::RawLD(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD::~RawLD() +{ +} + + +/****************************************************************************** + * RawLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD::strongsPad(char *buf) +{ + const char *check; + int size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD::getEntry(long away) +{ + long start = 0; + unsigned short size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + if (!(retval = findoffset(buf, &start, &size, away))) { + readtext(start, &size, &idxbuf, &entrybuf); + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD::getRawEntry() { + + char ret = getEntry(); + if (!ret) { + if (!isUnicode()) + preptext(entrybuf); + } + else error = ret; + + return entrybuf; +} + + +/****************************************************************************** + * RawLD::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawLD::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void RawLD::setEntry(const char *inbuf, long len) { + settext(*key, inbuf, len); +} + + +void RawLD::linkEntry(const SWKey *inkey) { + linkentry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD::deleteEntry() { + settext(*key, ""); +} diff --git a/src/modules/lexdict/rawld/rawldidx.c b/src/modules/lexdict/rawld/rawldidx.c new file mode 100644 index 0000000..cc4709c --- /dev/null +++ b/src/modules/lexdict/rawld/rawldidx.c @@ -0,0 +1,96 @@ +/***************************************************************************** + * Bible dictionary index utility + */ + + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + + +char findbreak(int fd, long *offset, short *size) +{ + char buf[3]; + char rc = 1; + long offset2; + + memset(buf, 0, sizeof(buf)); + + while (read(fd, &buf[sizeof(buf)-1], 1) == 1) { + if ((buf[0] == 10) && (buf[1] == '$') && (buf[2] == '$')) { + while (read(fd, buf, 1) == 1) { + if (*buf == 10) { + if (read(fd, buf, 1) == 1) { + *offset = lseek(fd, 0, SEEK_CUR); + rc = 0; + if (size) { + if (!findbreak(fd, &offset2, 0)) + *size = offset2 - *offset - 13; + else *size = lseek(fd, 0, SEEK_END) - *offset; + lseek(fd, *offset, SEEK_SET); + } + break; + } + } + } + break; + } + memmove(buf, &buf[1], sizeof(buf)-1); + } + return rc; +} + + +void main(int argc, char **argv) +{ + int fd, ifd; + long offset; + short size; + char *buf; + + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process (no .dat)>\n", argv[0]); + exit(1); + } + + buf = (char *) calloc(strlen(argv[1]) + 5, 1); + +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + sprintf(buf, "%s.dat", argv[1]); + fd = open(buf, O_RDONLY|O_BINARY); + + sprintf(buf, "%s.idx", argv[1]); + ifd = open(buf, O_CREAT|O_WRONLY|O_BINARY); + + offset = 0; /* write offset for intro */ + write(ifd, &offset, 4); + findbreak(fd, &offset, 0); + lseek(fd, 0L, SEEK_SET); + size = offset - 12; + write(ifd, &size, 2); + + buf[3] = 0; /* delimit string for read below */ + + while(!findbreak(fd, &offset, &size)) { + write(ifd, &offset, 4); + write(ifd, &size, 2); + read(fd, buf, 3); + printf("Found: %s...(%ld:%d)\n", buf, offset, size); + } + + free(buf); + + close(ifd); + close(fd); +} diff --git a/src/modules/lexdict/rawld/strongsidx.c b/src/modules/lexdict/rawld/strongsidx.c new file mode 100644 index 0000000..61bcda3 --- /dev/null +++ b/src/modules/lexdict/rawld/strongsidx.c @@ -0,0 +1,90 @@ +/***************************************************************************** + * Bible dictionary index utility + */ + + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + + +char findbreak(int fd, long *offset, short *size) +{ + char buf[3]; + char rc = 1; + long offset2; + + memset(buf, 0, sizeof(buf)); + + while (read(fd, &buf[sizeof(buf)-1], 1) == 1) { + if ((buf[0] == 10) && (isdigit(buf[1])) && (isdigit(buf[2]))) { + if (read(fd, buf, 1) == 1) { + *offset = lseek(fd, 0, SEEK_CUR) - 3; + rc = 0; + if (size) { + if (!findbreak(fd, &offset2, 0)) + *size = offset2 - *offset; + else *size = lseek(fd, 0, SEEK_END) - *offset; + lseek(fd, *offset, SEEK_SET); + } + break; + } + break; + } + memmove(buf, &buf[1], sizeof(buf)-1); + } + return rc; +} + + +void main(int argc, char **argv) +{ + int fd, ifd; + long offset; + short size; + char *buf; + char entbuf[6]; + + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process (no .dat)>\n", argv[0]); + exit(1); + } + + buf = (char *) calloc(strlen(argv[1]) + 5, 1); + + sprintf(buf, "%s.dat", argv[1]); + fd = open(buf, O_RDONLY); + + sprintf(buf, "%s.idx", argv[1]); + ifd = open(buf, O_CREAT|O_WRONLY); + + offset = 0; /* write offset for intro */ + write(ifd, &offset, 4); + findbreak(fd, &offset, 0); + lseek(fd, 0L, SEEK_SET); + size = offset - 12; + write(ifd, &size, 2); + + entbuf[5] = 0; /* delimit string for read below */ + + while(!findbreak(fd, &offset, &size)) { + write(ifd, &offset, 4); + write(ifd, &size, 2); + read(fd, entbuf, 5); + printf("Found: %s...(%ld:%d)\n", entbuf, offset, size); + } + + free(buf); + + close(ifd); + close(fd); +} diff --git a/src/modules/lexdict/rawld4/Makefile b/src/modules/lexdict/rawld4/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/lexdict/rawld4/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/rawld4/Makefile.am b/src/modules/lexdict/rawld4/Makefile.am new file mode 100644 index 0000000..697e2e5 --- /dev/null +++ b/src/modules/lexdict/rawld4/Makefile.am @@ -0,0 +1,4 @@ +rawld4dir = $(top_srcdir)/src/modules/lexdict/rawld4 + +libsword_la_SOURCES += $(rawld4dir)/rawld4.cpp + diff --git a/src/modules/lexdict/rawld4/rawld4.cpp b/src/modules/lexdict/rawld4/rawld4.cpp new file mode 100644 index 0000000..1bdf22f --- /dev/null +++ b/src/modules/lexdict/rawld4/rawld4.cpp @@ -0,0 +1,172 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <rawld4.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD4::RawLD4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr4(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD4::~RawLD4() +{ +} + + +/****************************************************************************** + * RawLD4::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD4::strongsPad(char *buf) +{ + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD4::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD4::getEntry(long away) +{ + long start = 0; + unsigned long size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findoffset(buf, &start, &size, away))) { + readtext(start, &size, &idxbuf, &entrybuf); + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD4::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD4::getRawEntry() { + + char ret = getEntry(); + if (!ret) { + if (!isUnicode()) + preptext(entrybuf); + } + else error = ret; + + return entrybuf; +} + + +/****************************************************************************** + * RawLD4::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawLD4::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void RawLD4::setEntry(const char *inbuf, long len) { + setText(*key, inbuf, len); +} + + +void RawLD4::linkEntry(const SWKey *inkey) { + linkentry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD4::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/lexdict/swld.cpp b/src/modules/lexdict/swld.cpp new file mode 100644 index 0000000..d28a5b8 --- /dev/null +++ b/src/modules/lexdict/swld.cpp @@ -0,0 +1,76 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include <swld.h> + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWLD::SWLD(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Lexicons / Dictionaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + entkeytxt = new char [1]; + *entkeytxt = 0; +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWLD::~SWLD() +{ + if (entkeytxt) + delete [] entkeytxt; +} + + +/****************************************************************************** + * SWLD::KeyText - Sets/gets module KeyText, getting from saved text if key is + * persistent + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWLD::KeyText(const char *ikeytext) +{ + if (key->Persist() && !ikeytext) { + getRawEntry(); // force module key to snap to entry + return entkeytxt; + } + else return SWModule::KeyText(ikeytext); +} + + +/****************************************************************************** + * SWLD::setPosition(SW_POSITION) - Positions this key if applicable + */ + +void SWLD::setPosition(SW_POSITION p) { + if (!key->Traversable()) { + switch (p) { + case POS_TOP: + *key = ""; + break; + case POS_BOTTOM: + *key = "zzzzzzzzz"; + break; + } + } + else *key = p; + getRawEntry(); +} + + diff --git a/src/modules/lexdict/zld/Makefile b/src/modules/lexdict/zld/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/lexdict/zld/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/zld/Makefile.am b/src/modules/lexdict/zld/Makefile.am new file mode 100644 index 0000000..81e4d7c --- /dev/null +++ b/src/modules/lexdict/zld/Makefile.am @@ -0,0 +1,4 @@ +zlddir = $(top_srcdir)/src/modules/lexdict/zld + +libsword_la_SOURCES += $(zlddir)/zld.cpp + diff --git a/src/modules/lexdict/zld/zld.cpp b/src/modules/lexdict/zld/zld.cpp new file mode 100644 index 0000000..047effa --- /dev/null +++ b/src/modules/lexdict/zld/zld.cpp @@ -0,0 +1,172 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <zstr.h> +#include <zld.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +zLD::zLD(const char *ipath, const char *iname, const char *idesc, long blockCount, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zStr(ipath, -1, blockCount, icomp), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) { + +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +zLD::~zLD() { + +} + + +/****************************************************************************** + * zLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void zLD::strongsPad(char *buf) { + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * zLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char zLD::getEntry(long away) { + char *idxbuf = 0; + char *ebuf = 0; + char retval = 0; + long index; + unsigned long size; + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findKeyIndex(buf, &index, away))) { + getText(index, &idxbuf, &ebuf); + size = strlen(ebuf) + 1; + entrybuf = new char [ size * FILTERPAD ]; + strcpy(entrybuf, ebuf); + + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + free(idxbuf); + free(ebuf); + } + else { + entrybuf = new char [ 5 ]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * zLD::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *zLD::getRawEntry() { + if (!getEntry() && !isUnicode()) { + prepText(entrybuf); + } + + return entrybuf; +} + + +/****************************************************************************** + * zLD::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void zLD::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void zLD::setEntry(const char *inbuf, long len) { + setText(*key, inbuf, len); +} + + +void zLD::linkEntry(const SWKey *inkey) { + zStr::linkEntry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zLD::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/readme b/src/modules/readme new file mode 100644 index 0000000..92cc99e --- /dev/null +++ b/src/modules/readme @@ -0,0 +1,9 @@ +This directory contains all different module types that are usable by the SWORD +API. + + comments Commentaries + common common utility objects + lexdict Lexicons/Dictionaries + maps Maps + parsers Language Parsers + texts Scripture Texts diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp new file mode 100644 index 0000000..f37df27 --- /dev/null +++ b/src/modules/swmodule.cpp @@ -0,0 +1,661 @@ +/****************************************************************************** + * swmodule.cpp -code for base class 'module'. Module is the basis for all + * types of modules (e.g. texts, commentaries, maps, lexicons, + * etc.) + */ + +#include <string.h> +#include <swmodule.h> +#include <utilfuns.h> +#include <regex.h> // GNU +#include <swfilter.h> +#include <versekey.h> // KLUDGE for Search +#ifndef _MSC_VER +#include <iostream> +#endif + +SWDisplay SWModule::rawdisp; +void SWModule::nullPercent(char percent, void *percentUserData) {} + +/****************************************************************************** + * SWModule Constructor - Initializes data for instance of SWModule + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + * imodtype - Type of Module (All modules will be displayed with + * others of same type under their modtype heading + * unicode - if this module is unicode + */ + +SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) { + key = CreateKey(); + entrybuf = new char [1]; + *entrybuf = 0; + config = &ownConfig; + entrybufallocsize = 0; + modname = 0; + error = 0; + moddesc = 0; + modtype = 0; + modlang = 0; + this->encoding = encoding; + this->direction = direction; + this->markup = markup; + entrySize= -1; + disp = (idisp) ? idisp : &rawdisp; + stdstr(&modname, imodname); + stdstr(&moddesc, imoddesc); + stdstr(&modtype, imodtype); + stdstr(&modlang, imodlang); + stripFilters = new FilterList(); + rawFilters = new FilterList(); + renderFilters = new FilterList(); + optionFilters = new FilterList(); + encodingFilters = new FilterList(); + skipConsecutiveLinks = true; + procEntAttr = true; +} + + +/****************************************************************************** + * SWModule Destructor - Cleans up instance of SWModule + */ + +SWModule::~SWModule() +{ + if (entrybuf) + delete [] entrybuf; + if (modname) + delete [] modname; + if (moddesc) + delete [] moddesc; + if (modtype) + delete [] modtype; + if (modlang) + delete [] modlang; + + if (key) { + if (!key->Persist()) + delete key; + } + + stripFilters->clear(); + rawFilters->clear(); + renderFilters->clear(); + optionFilters->clear(); + encodingFilters->clear(); + entryAttributes.clear(); + + delete stripFilters; + delete rawFilters; + delete renderFilters; + delete optionFilters; + delete encodingFilters; +} + + +/****************************************************************************** + * SWModule::CreateKey - Allocates a key of specific type for module + * + * RET: pointer to allocated key + */ + +SWKey *SWModule::CreateKey() +{ + return new SWKey(); +} + + +/****************************************************************************** + * SWModule::Error - Gets and clears error status + * + * RET: error status + */ + +char SWModule::Error() +{ + char retval = error; + + error = 0; + return retval; +} + + +/****************************************************************************** + * SWModule::Name - Sets/gets module name + * + * ENT: imodname - value which to set modname + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Name(const char *imodname) +{ + return stdstr(&modname, imodname); +} + + +/****************************************************************************** + * SWModule::Description - Sets/gets module description + * + * ENT: imoddesc - value which to set moddesc + * [0] - only get + * + * RET: pointer to moddesc + */ + +char *SWModule::Description(const char *imoddesc) +{ + return stdstr(&moddesc, imoddesc); +} + + +/****************************************************************************** + * SWModule::Type - Sets/gets module type + * + * ENT: imodtype - value which to set modtype + * [0] - only get + * + * RET: pointer to modtype + */ + +char *SWModule::Type(const char *imodtype) +{ + return stdstr(&modtype, imodtype); +} + +/****************************************************************************** + * SWModule::Direction - Sets/gets module direction + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char direction + */ +char SWModule::Direction(signed char newdir) { + if (newdir != -1) + direction = newdir; + return direction; +} + +/****************************************************************************** + * SWModule::Encoding - Sets/gets module encoding + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char encoding + */ +char SWModule::Encoding(signed char newenc) { + if (newenc != -1) + encoding = newenc; + return encoding; +} + +/****************************************************************************** + * SWModule::Markup - Sets/gets module markup + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char markup + */ +char SWModule::Markup(signed char newmark) { + if (newmark != -1) + markup = newmark; + return markup; +} + + +/****************************************************************************** + * SWModule::Lang - Sets/gets module language + * + * ENT: imodlang - value which to set modlang + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Lang(const char *imodlang) +{ + return stdstr(&modlang, imodlang); +} + + +/****************************************************************************** + * SWModule::Disp - Sets/gets display driver + * + * ENT: idisp - value which to set disp + * [0] - only get + * + * RET: pointer to disp + */ + +SWDisplay *SWModule::Disp(SWDisplay *idisp) +{ + if (idisp) + disp = idisp; + + return disp; +} + + +/****************************************************************************** + * SWModule::Display - Calls this modules display object and passes itself + * + * RET: error status + */ + +char SWModule::Display() +{ + disp->Display(*this); + return 0; +} + + +/****************************************************************************** + * SWModule::SetKey - Sets a key to this module for position to a particular + * record or set of records + * + * ENT: ikey - key with which to set this module + * + * RET: error status + */ + +char SWModule::SetKey(const SWKey &ikey) { + return SetKey(&ikey); +} + +char SWModule::SetKey(const SWKey *ikey) +{ + SWKey *oldKey = 0; + + if (key) { + if (!key->Persist()) // if we have our own copy + oldKey = key; + } + + if (!ikey->Persist()) { // if we are to keep our own copy + key = CreateKey(); + *key = *ikey; + } + else key = (SWKey *)ikey; // if we are to just point to an external key + + if (oldKey) + delete oldKey; + + return 0; +} + + +/****************************************************************************** + * SWModule::KeyText - Sets/gets module KeyText + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWModule::KeyText(const char *ikeytext) +{ + if (ikeytext) + SetKey(ikeytext); + + return *key; +} + + +/****************************************************************************** + * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry + * + * ENT: p - position (e.g. TOP, BOTTOM) + * + * RET: *this + */ + +void SWModule::setPosition(SW_POSITION p) { + *key = p; + char saveError = key->Error(); + + switch (p) { + case POS_TOP: + (*this)++; + (*this)--; + break; + + case POS_BOTTOM: + (*this)--; + (*this)++; + break; + } + + error = saveError; +} + + +/****************************************************************************** + * SWModule::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void SWModule::increment(int steps) { + (*key) += steps; + error = key->Error(); +} + + +/****************************************************************************** + * SWModule::decrement - Decrements module key a number of entries + * + * ENT: decrement - Number of entries to jump backward + * + * RET: *this + */ + +void SWModule::decrement(int steps) { + (*key) -= steps; + error = key->Error(); +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &SWModule::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + SWKey *savekey = 0; + SWKey *searchkey = 0; + regex_t preg; + SWKey textkey; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + const char *sres; + terminateSearch = false; + char perc = 1; + bool savePEA = isProcessEntryAttributes(); + + processEntryAttributes(false); + listkey.ClearList(); + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (scope)?scope->clone():(key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + (*percent)(perc, percentUserData); + // MAJOR KLUDGE: VerseKey::Index still return index within testament. + // VerseKey::NewIndex should be moved to Index and Index should be some + // VerseKey specific name + VerseKey *vkcheck = 0; +#ifndef _WIN32_WCE + try { +#endif + vkcheck = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch (...) {} +#endif + // end MAJOR KLUDGE + + *this = BOTTOM; + // fix below when we find out the bug + long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index(); + if (!highIndex) + highIndex = 1; // avoid division by zero errors. + *this = TOP; + if (searchType >= 0) { + flags |=searchType|REG_NOSUB|REG_EXTENDED; + regcomp(&preg, istr, flags); + } + + (*percent)(++perc, percentUserData); + if (searchType == -2) { + wordBuf = (char *)calloc(sizeof(char), strlen(istr) + 1); + strcpy(wordBuf, istr); + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + } + + perc = 5; + (*percent)(perc, percentUserData); + + while (!Error() && !terminateSearch) { + + + long mindex = 0; + if (vkcheck) + mindex = vkcheck->NewIndex(); + else mindex = key->Index(); + float per = (float)mindex / highIndex; + per *= 93; + per += 5; + char newperc = (char)per; +// char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex))); + if (newperc > perc) { + perc = newperc; + (*percent)(perc, percentUserData); + } + else if (newperc < perc) { +#ifndef _MSC_VER + std::cerr << "Serious error: new percentage complete is less than previous value\n"; + std::cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n"; + std::cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n"; + std::cerr << "highIndex: " << highIndex << "\n"; + std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n"; + std::cerr << "perc == " << (int )perc << "% \n"; +#endif + } + if (searchType >= 0) { + if (!regexec(&preg, StripText(), 0, 0, 0)) { + textkey = KeyText(); + listkey << textkey; + } + } + else { + if (searchType == -1) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(StripText(), istr) : strstr(StripText(), istr); + if (sres) { + textkey = KeyText(); + listkey << textkey; + } + } + if (searchType == -2) { + int i; + const char *stripBuf = StripText(); + for (i = 0; i < wordCount; i++) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(stripBuf, words[i]) : strstr(stripBuf, words[i]); + if (!sres) + break; + } + if (i == wordCount) { + textkey = KeyText(); + listkey << textkey; + } + + } + } + (*this)++; + } + if (searchType >= 0) + regfree(&preg); + + if (searchType == -2) { + free(words); + free(wordBuf); + } + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + listkey = TOP; + processEntryAttributes(savePEA); + (*percent)(100, percentUserData); + + return listkey; +} + + +/****************************************************************************** + * SWModule::StripText() - calls all stripfilters on current text + * + * ENT: buf - buf to massage instead of this modules current text + * len - max len of buf + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(char *buf, int len) +{ + return RenderText(buf, len, false); +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: buf - buffer to Render instead of current module position + * + * RET: listkey set to verses that contain istr + */ + + const char *SWModule::RenderText(char *buf, int len, bool render) { + entryAttributes.clear(); + char *tmpbuf = (buf) ? buf : getRawEntry(); + SWKey *key = 0; + static char *null = ""; + + if (tmpbuf) { + unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) * FILTERPAD : len; + if (size > 0) { + key = (SWKey *)*this; + + optionFilter(tmpbuf, size, key); + + if (render) { + renderFilter(tmpbuf, size, key); + encodingFilter(tmpbuf, size, key); + } + else stripFilter(tmpbuf, size, key); + } + } + else { + tmpbuf = null; + } + + return tmpbuf; +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by RenderFilers + */ + + const char *SWModule::RenderText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = RenderText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +/****************************************************************************** + * SWModule::StripText - calls all StripTextFilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = StripText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +const char *SWModule::getConfigEntry(const char *key) const { + ConfigEntMap::iterator it = config->find(key); + return (it != config->end()) ? it->second.c_str() : 0; +} + + +void SWModule::setConfig(ConfigEntMap *config) { + this->config = config; +} diff --git a/src/modules/tests/Makefile b/src/modules/tests/Makefile new file mode 100644 index 0000000..81f7721 --- /dev/null +++ b/src/modules/tests/Makefile @@ -0,0 +1,4 @@ +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/tests/echomod.cpp b/src/modules/tests/echomod.cpp new file mode 100644 index 0000000..65e689b --- /dev/null +++ b/src/modules/tests/echomod.cpp @@ -0,0 +1,21 @@ +/****************************************************************************** + * echomod.cpp - code for class 'echomod'- a dummy test text module that just + * echos back the key + */ + +#include <echomod.h> + + +EchoMod::EchoMod() : SWText("echomod", "Echos back key") +{ +} + + +EchoMod::~EchoMod() { +} + + +EchoMod::operator const char*() +{ + return *key; +} diff --git a/src/modules/texts/Makefile b/src/modules/texts/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/texts/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/Makefile.am b/src/modules/texts/Makefile.am new file mode 100644 index 0000000..b48d93e --- /dev/null +++ b/src/modules/texts/Makefile.am @@ -0,0 +1,7 @@ +textsdir = $(top_srcdir)/src/modules/texts + +libsword_la_SOURCES += $(textsdir)/swtext.cpp + +include ../src/modules/texts/rawtext/Makefile.am +include ../src/modules/texts/ztext/Makefile.am +include ../src/modules/texts/rawgbf/Makefile.am diff --git a/src/modules/texts/rawgbf/Gbf.c b/src/modules/texts/rawgbf/Gbf.c new file mode 100644 index 0000000..2b7f786 --- /dev/null +++ b/src/modules/texts/rawgbf/Gbf.c @@ -0,0 +1,485 @@ +/* Output from p2c, the Pascal-to-C translator */ +/* From input file "Gbf.pas" */ + + +#include <p2c/p2c.h> + + +typedef enum { + tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, tokWord, + tokSpace, tokSync, tokControl, tokChar, tokFont +} TToken; +typedef enum { + caBold, caSmallCaps, caItalic, caOTQuote, caRed, caSuperscript, caUnderline, + caSubscript +} TCharacterAttribute; +typedef long TCharAttribs; + + + +typedef struct TBookNameRec { + Char Name[256], Abbr[256]; + uchar Num; +} TBookNameRec; + +typedef TBookNameRec TBookAbbr[116]; + + +typedef struct TReadGBF { + /* private*/ + FILE *F; + Char FName[256], TokenLine[256]; + long TokenPos; + boolean fFileIsOpen, fParagraphEnd, fInTitle, fInPsalmBookTitle, + fInHebrewTitle, fInSectionTitle; + + /* public*/ + Char sBook[256], sChapter[256], sVerse[256], sMode[256]; + Char sContext[256]; /*// Last text type (header, body, or tail)*/ + Char sTitle[256]; /*// Title of this book of the Bible*/ + Char sPsalmBookTitle[256]; /*// Title of this Psalm book*/ + Char sHebrewTitle[256]; /*// Psalm Hebrew title*/ + Char sSectionTitle[256]; /*// Section headings*/ + Char sDate[256], sFontName[256]; + long iTotalWords; + Char chJustification, chDirection; + boolean fIndent, fPoetry; + TCharAttribs CharAttribs; + uchar bBk, bChap, bVs, bWd; + /* + function Init({const*/ + /*sFileName: string): boolean; + procedure Done; + function GetToken(var TokenKind: TToken): string; + */ +} TReadGBF; + +typedef struct TWriteGBF { + /* private*/ + FILE *F; + Char FName[256], LineOut[256]; + boolean fFileIsOpen; + uchar bBk, bChap, bVs, bWd; + + /* public*/ + /* + function Init({const*/ + /*sFileName: string): boolean; + function Done: boolean; + procedure Out({const*/ + /*s: string); +*/ +} TWriteGBF; + +/*implementation */ + + +/*//0*/ +/*//1*/ +/*//2*/ +/*//3*/ +/*//4*/ +/*//5*/ +/*//6*/ +/*//7*/ +/*//8*/ +/*//9*/ +/*//10*/ +/*//11*/ +/*//12*/ +/*//13*/ +/*//14*/ +/*//15*/ +/*//16*/ +/*//17*/ +/*//18*/ +/*//19*/ +/*//20*/ +/*//21*/ +/*//22*/ +/*//0*/ +/*//1*/ +/*//2*/ +/*//3*/ +/*//4*/ +/*//5*/ +/*//6*/ +/*//7*/ +/*//8*/ +/*//9*/ +/*//10*/ +/*//11*/ +/*//12*/ +/*//13*/ +/*//14*/ +/*//15*/ +/*//16*/ +/*//17*/ +/*//18*/ +/*//19*/ +/*//20*/ +/*//21*/ +/*//22*/ +/*//23*/ +/*//24*/ +/*//26*/ +/*//27*/ +/*//28*/ +/*//29*/ +/*//30*/ +/*//31*/ +/*//32*/ +/*//33*/ +/*//34*/ +/*//35*/ +/*//36*/ +/*//37*/ +/*//38*/ +/*//39*/ +/*//40*/ +/*//41*/ +/*//42*/ +/*//43*/ +/*//44*/ +/*//45*/ +/*//46*/ +/*//47*/ +/*//48*/ +/*//49*/ +/*//50*/ +/*//51*/ +/*//52*/ +/*//53*/ +/*//54*/ +/*//55*/ +/*//56*/ +/*//57*/ +/*//58*/ +/*//59*/ +/*//60*/ +/*//61*/ +/*//62*/ +/*//63*/ +/*//64*/ +/*//65*/ +/*//66*/ +/*//67*/ +/*//68*/ +/*//69*/ +/*//70*/ +/*//71*/ +/*//72*/ +/*//73*/ +/*//74*/ +/*//75*/ +/*//76*/ +/*//77*/ +/*//78*/ +/*//79*/ +/*//80*/ +/*//81*/ +/*//82*/ +/*//83*/ +/*//84*/ +/*//85*/ +/*//86*/ +/*//87*/ +/*//88*/ +/*//89*/ +/*//90*/ +/*//91*/ +/*//92*/ + +Static TBookAbbr BookAbbr = { + { "1 Chronicles", "1CH", 13 }, + { "1 Corinthians", "1CO", 70 }, + { "1 Esdras", "1E", 52 }, + { "1 John", "1J", 86 }, + { "1 Kings", "1K", 11 }, + { "1 Maccabees", "1M", 50 }, + { "1 Peter", "1P", 84 }, + { "1 Samuel", "1S", 9 }, + { "1 Thessalonians", "1TH", 76 }, + { "1 Timothy", "1TI", 78 }, + { "2 Chronicles", "2CH", 14 }, + { "2 Corinthians", "2CO", 71 }, + { "2 Esdras", "2E", 56 }, + { "2 John", "2J", 87 }, + { "2 Kings", "2K", 12 }, + { "2 Maccabees", "2M", 51 }, + { "2 Peter", "2P", 85 }, + { "2 Samuel", "2S", 10 }, + { "2 Thessalonians", "2TH", 77 }, + { "2 Timothy", "2TI", 79 }, + { "3 John", "3J", 88 }, + { "3 Maccabees", "3M", 55 }, + { "4 Maccabees", "4M", 57 }, + { "1 Chronicles", "1 CH", 13 }, + { "1 Corinthians", "1 CO", 70 }, + { "1 Esdras", "1 E", 52 }, + { "1 John", "1 J", 86 }, + { "1 Kings", "1 K", 11 }, + { "1 Maccabees", "1 M", 50 }, + { "1 Peter", "1 P", 84 }, + { "1 Samuel", "1 S", 9 }, + { "1 Thessalonians", "1 TH", 76 }, + { "1 Timothy", "1 TI", 78 }, + { "2 Chronicles", "2 CH", 14 }, + { "2 Corinthians", "2 CO", 71 }, + { "2 Esdras", "2 E", 56 }, + { "2 John", "2 J", 87 }, + { "2 Kings", "2 K", 12 }, + { "2 Maccabees", "2 M", 51 }, + { "2 Peter", "2 P", 85 }, + { "2 Samuel", "2 S", 10 }, + { "2 Thessalonians", "2 TH", 77 }, + { "2 Timothy", "2 TI", 79 }, + { "3 John", "3 J", 88 }, + { "3 Maccabees", "3 M", 55 }, + { "4 Maccabees", "4 M", 57 }, + { "Acts", "AC", 68 }, + { "Amos", "AM", 30 }, + { "Prayer of Asariah and the Song of the Three Jews", "AZ", 47 }, + { "Baruch", "BA", 45 }, + { "Bel and the Dragon", "BE", 49 }, + { "Colossians", "CO", 75 }, + { "Daniel", "DA", 27 }, + { "Deuteronomy", "DE", 5 }, + { "Deuteronomy", "DT", 5 }, + { "Ecclesiasties", "EC", 21 }, + { "Esther", "ES", 17 }, + { "Exodus", "EX", 2 }, + { "Ezekiel", "EZE", 26 }, + { "Ezra", "EZR", 15 }, + { "Galatians", "GA", 72 }, + { "Genesis", "GE", 1 }, + { "Genesis", "GN", 1 }, + { "Ephesians", "EP", 73 }, + { "Esther (Greek)", "GR", 42 }, + { "Habakkuk", "HAB", 35 }, + { "Haggai", "HAG", 37 }, + { "Hebrews", "HE", 82 }, + { "Hosea", "HO", 28 }, + { "Isaiah", "IS", 23 }, + { "James", "JA", 83 }, + { "Jeremiah", "JE", 24 }, + { "Job", "JOB", 18 }, + { "Joel", "JOE", 29 }, + { "John", "JOH", 67 }, + { "Jonah", "JON", 32 }, + { "Joshua", "JOS", 6 }, + { "Jude", "JUDE", 89 }, + { "Judges", "JUDG", 7 }, + { "Judith", "JUDI", 41 }, + { "Lamentations", "LA", 25 }, + { "Letter of Jeremiah", "LET", 46 }, + { "Leviticus", "LEV", 3 }, + { "Luke", "LK", 66 }, + { "Leviticus", "LV", 3 }, + { "Luke", "LU", 66 }, + { "Malachi", "MAL", 39 }, + { "Prayer of Manasseh", "MAN", 53 }, + { "Mark", "MAR", 65 }, + { "Matthew", "MAT", 64 }, + { "Micah", "MI", 33 }, + { "Nahum", "NA", 34 }, + { "Nehemiah", "NE", 16 }, + { "Numbers", "NU", 4 }, + { "Obadiah", "OB", 31 }, + { "Psalm 151", "P1", 54 }, + { "Philemon", "PHILE", 81 }, + { "Philippians", "PHILI", 74 }, + { "Philemon", "PHM", 81 }, + { "Philippians", "PHP", 74 }, + { "Proverbs", "PR", 20 }, + { "Psalms", "PS", 19 }, + { "Revelation", "RE", 90 }, + { "Romans", "RM", 69 }, + { "Romans", "RO", 69 }, + { "Ruth", "RU", 8 }, + { "Sirach", "SI", 44 }, + { "Song of Solomon", "SOL", 22 }, + { "Song of Solomon", "SON", 22 }, + { "Song of Solomon", "SS", 22 }, + { "Susanna", "SU", 48 }, + { "Titus", "TI", 80 }, + { "Tobit", "TO", 40 }, + { "Wisdom", "WI", 43 }, + { "Zechariah", "ZEC", 38 }, + { "Zephaniah", "ZEP", 36 } +}; + +/*// 0 - 7*/ +/*// 8 - 14*/ +/*// 15-20*/ +/*// 21-26*/ +/*// 27-33*/ +/*// 34-39*/ +/*// 40-45*/ +/*// 46-52*/ +/*// 53-63*/ +/*// 64-70*/ +/*// 71-78*/ +/*// 79-84*/ + +Static Char BookFileName[91][256] = { + "", "Genesis", "Exodus", "Lev", "Num", "Deut", "Joshua", "Judges", "Ruth", + "1Sam", "2Sam", "1Kings", "2Kings", "1Chron", "2Chron", "Ezra", "Nehemiah", + "Esther", "Job", "Psalms", "Proverbs", "Eccl", "Song", "Isaiah", "Jeremiah", + "Lament", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", + "Micah", "Nahum", "Habakkuk", "Zeph", "Haggai", "Zech", "Malachi", "Tobit", + "Judith", "Esther", "Wisdom", "Sirach", "Baruch", "Let", "Azar", "Susanna", + "Bel", "1Mac", "2Mac", "1Esdras", "Man", "P1", "3Mac", "2Esdras", "4Mac", + "", "", "", "", "", "", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", + "1Cor", "2Cor", "Gal", "Eph", "Philip", "Col", "1Thes", "2Thes", "1Tim", + "2Tim", "Titus", "Philemon", "Hebrews", "James", "1Peter", "2Peter", + "1John", "2John", "3John", "Jude", "Rev" +/* p2c: Gbf.pas, line 200: + * Note: Line breaker spent 0.0 seconds, 5000 tries on line 336 [251] */ +}; /*// 85-90*/ + + +Static boolean isletter(ch) +Char ch; +{ + /*const*/ + boolean Result; + + if (isupper(ch)) { + Result = true; + return Result; + } + if (islower(ch)) + Result = true; + else + Result = false; + return Result; +} + + +Static boolean isinword(ch) +Char ch; +{ + /*const*/ + boolean Result; + + switch (ch) { + + case '-': + Result = true; + break; + + default: + if (isupper(ch)) + Result = true; + else if (islower(ch)) + Result = true; + else + Result = false; + break; + } + return Result; +} + + +Static boolean IsUpper(ch) +Char ch; +{ + /*const*/ + boolean Result; + + if (isupper(ch)) + Result = true; + else + Result = false; + return Result; +} + + +Static boolean IsDigit(ch) +Char ch; +{ + /*const*/ + boolean Result; + + if (isdigit(ch)) + Result = true; + else + Result = false; + return Result; +} + + +Static boolean MatchAbbrev(sName, sAbbrev) +Char *sName, *sAbbrev; +{ + /*const*/ + long i; + boolean Result; + + if (strlen(sName) < strlen(sAbbrev)) { + Result = false; +/* p2c: Gbf.pas, line 245: Warning: Symbol 'RESULT' is not defined [221] */ + } else + Result = true; + i = 1; + while (i <= strlen(sAbbrev) && Result) { + if (toupper(sName[i - 1]) != sAbbrev[i - 1]) + Result = false; + i++; + } +} + + +Static uchar BookNameToNumber(sBookName) +Char *sBookName; +{ + /*const*/ + long Result; + + Result = 0; +/* p2c: Gbf.pas, line 259: Warning: Symbol 'RESULT' is not defined [221] */ + TRY(try1); + if (IsDigit(sBookName[strlen(sBookName) - 1]) & IsDigit(sBookName[0])) { + Result = StrToInt(sBookName); +/* p2c: Gbf.pas, line 262: + * Warning: Symbol 'STRTOINT' is not defined [221] */ + } + except(); +/* p2c: Gbf.pas, line 264: Warning: Symbol 'EXCEPT' is not defined [221] */ +/* p2c: Gbf.pas, line 264: + * Warning: Expected RECOVER, found 'Result' [227] */ + RECOVER(try1); + ; + ENDTRY(try1); +} + + +main(argc, argv) +int argc; +Char *argv[]; +{ /*// Yuk! Linear search.*/ + Char STR1[256]; + uchar Result; + +/* p2c: Gbf.pas, line 266: Warning: Expected BEGIN, found 'i' [227] */ + PASCAL_MAIN(argc, argv); + if (MatchAbbrev(sBookName, BookAbbr[i].Abbr)) { +/* p2c: Gbf.pas, line 269: + * Warning: Symbol 'SBOOKNAME' is not defined [221] */ +/* p2c: Gbf.pas, line 269: Warning: Mixing non-strings with strings [170] */ +/* p2c: Gbf.pas, line 269: Warning: Symbol 'I' is not defined [221] */ + Result = BookAbbr[i].Num; +/* p2c: Gbf.pas, line 271: Warning: Symbol 'I' is not defined [221] */ +/* p2c: Gbf.pas, line 271: Warning: Symbol 'RESULT' is not defined [221] */ + } +/* p2c: Gbf.pas, line 273: Warning: Symbol 'I' is not defined [221] */ + i++; + exit(EXIT_SUCCESS); +} +/* p2c: Gbf.pas, line 275: + * Warning: Junk at end of input file ignored [277] */ + + + +/* End. */ diff --git a/src/modules/texts/rawgbf/Gbf.pas b/src/modules/texts/rawgbf/Gbf.pas new file mode 100644 index 0000000..13826e3 --- /dev/null +++ b/src/modules/texts/rawgbf/Gbf.pas @@ -0,0 +1,735 @@ +type + TToken = (tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, + tokWord, tokSpace, tokSync, tokControl, tokChar, tokFont); + TCharacterAttribute = (caBold, caSmallCaps, caItalic, caOTQuote, caRed, + caSuperscript, caUnderline, caSubscript); + TCharAttribs = set of TCharacterAttribute; + + + TBookNameRec = record + Name, + Abbr: string; + Num: byte + end; + + TBookAbbr = array[0..115] of TBookNameRec; + +const + BookAbbr: TBookAbbr = ( + (Name: '1 Chronicles'; Abbr: '1CH'; Num: 13), {//0} + (Name: '1 Corinthians'; Abbr: '1CO'; Num: 70), {//1} + (Name: '1 Esdras'; Abbr: '1E'; Num: 52), {//2} + (Name: '1 John'; Abbr: '1J'; Num: 86), {//3} + (Name: '1 Kings'; Abbr: '1K'; Num: 11), {//4} + (Name: '1 Maccabees'; Abbr: '1M'; Num: 50), {//5} + (Name: '1 Peter'; Abbr: '1P'; Num: 84), {//6} + (Name: '1 Samuel'; Abbr: '1S'; Num: 9), {//7} + (Name: '1 Thessalonians'; Abbr: '1TH'; Num: 76), {//8} + (Name: '1 Timothy'; Abbr: '1TI'; Num: 78), {//9} + (Name: '2 Chronicles'; Abbr: '2CH'; Num: 14), {//10} + (Name: '2 Corinthians'; Abbr: '2CO'; Num: 71), {//11} + (Name: '2 Esdras'; Abbr: '2E'; Num: 56), {//12} + (Name: '2 John'; Abbr: '2J'; Num: 87), {//13} + (Name: '2 Kings'; Abbr: '2K'; Num: 12), {//14} + (Name: '2 Maccabees'; Abbr: '2M'; Num: 51), {//15} + (Name: '2 Peter'; Abbr: '2P'; Num: 85), {//16} + (Name: '2 Samuel'; Abbr: '2S'; Num: 10), {//17} + (Name: '2 Thessalonians'; Abbr: '2TH'; Num: 77), {//18} + (Name: '2 Timothy'; Abbr: '2TI'; Num: 79), {//19} + (Name: '3 John'; Abbr: '3J'; Num: 88), {//20} + (Name: '3 Maccabees'; Abbr: '3M'; Num: 55), {//21} + (Name: '4 Maccabees'; Abbr: '4M'; Num: 57), {//22} + (Name: '1 Chronicles'; Abbr: '1 CH'; Num: 13), {//0} + (Name: '1 Corinthians'; Abbr: '1 CO'; Num: 70), {//1} + (Name: '1 Esdras'; Abbr: '1 E'; Num: 52), {//2} + (Name: '1 John'; Abbr: '1 J'; Num: 86), {//3} + (Name: '1 Kings'; Abbr: '1 K'; Num: 11), {//4} + (Name: '1 Maccabees'; Abbr: '1 M'; Num: 50), {//5} + (Name: '1 Peter'; Abbr: '1 P'; Num: 84), {//6} + (Name: '1 Samuel'; Abbr: '1 S'; Num: 9), {//7} + (Name: '1 Thessalonians'; Abbr: '1 TH'; Num: 76), {//8} + (Name: '1 Timothy'; Abbr: '1 TI'; Num: 78), {//9} + (Name: '2 Chronicles'; Abbr: '2 CH'; Num: 14), {//10} + (Name: '2 Corinthians'; Abbr: '2 CO'; Num: 71), {//11} + (Name: '2 Esdras'; Abbr: '2 E'; Num: 56), {//12} + (Name: '2 John'; Abbr: '2 J'; Num: 87), {//13} + (Name: '2 Kings'; Abbr: '2 K'; Num: 12), {//14} + (Name: '2 Maccabees'; Abbr: '2 M'; Num: 51), {//15} + (Name: '2 Peter'; Abbr: '2 P'; Num: 85), {//16} + (Name: '2 Samuel'; Abbr: '2 S'; Num: 10), {//17} + (Name: '2 Thessalonians'; Abbr: '2 TH'; Num: 77), {//18} + (Name: '2 Timothy'; Abbr: '2 TI'; Num: 79), {//19} + (Name: '3 John'; Abbr: '3 J'; Num: 88), {//20} + (Name: '3 Maccabees'; Abbr: '3 M'; Num: 55), {//21} + (Name: '4 Maccabees'; Abbr: '4 M'; Num: 57), {//22} + (Name: 'Acts'; Abbr: 'AC'; Num: 68), {//23} + (Name: 'Amos'; Abbr: 'AM'; Num: 30), {//24} + (Name: 'Prayer of Asariah and the Song of the Three Jews'; Abbr: 'AZ'; Num: 47), + (Name: 'Baruch'; Abbr: 'BA'; Num: 45), {//26} + (Name: 'Bel and the Dragon';Abbr: 'BE'; Num: 49), {//27} + (Name: 'Colossians'; Abbr: 'CO'; Num: 75), {//28} + (Name: 'Daniel'; Abbr: 'DA'; Num: 27), {//29} + (Name: 'Deuteronomy'; Abbr: 'DE'; Num: 5), {//30} + (Name: 'Deuteronomy'; Abbr: 'DT'; Num: 5), {//31} + (Name: 'Ecclesiasties'; Abbr: 'EC'; Num: 21), {//32} + (Name: 'Esther'; Abbr: 'ES'; Num: 17), {//33} + (Name: 'Exodus'; Abbr: 'EX'; Num: 2), {//34} + (Name: 'Ezekiel'; Abbr: 'EZE'; Num: 26), {//35} + (Name: 'Ezra'; Abbr: 'EZR'; Num: 15), {//36} + (Name: 'Galatians'; Abbr: 'GA'; Num: 72), {//37} + (Name: 'Genesis'; Abbr: 'GE'; Num: 1), {//38} + (Name: 'Genesis'; Abbr: 'GN'; Num: 1), {//39} + (Name: 'Ephesians'; Abbr: 'EP'; Num: 73), {//40} + (Name: 'Esther (Greek)'; Abbr: 'GR'; Num: 42), {//41} + (Name: 'Habakkuk'; Abbr: 'HAB'; Num: 35), {//42} + (Name: 'Haggai'; Abbr: 'HAG'; Num: 37), {//43} + (Name: 'Hebrews'; Abbr: 'HE'; Num: 82), {//44} + (Name: 'Hosea'; Abbr: 'HO'; Num: 28), {//45} + (Name: 'Isaiah'; Abbr: 'IS'; Num: 23), {//46} + (Name: 'James'; Abbr: 'JA'; Num: 83), {//47} + (Name: 'Jeremiah'; Abbr: 'JE'; Num: 24), {//48} + (Name: 'Job'; Abbr: 'JOB'; Num: 18), {//49} + (Name: 'Joel'; Abbr: 'JOE'; Num: 29), {//50} + (Name: 'John'; Abbr: 'JOH'; Num: 67), {//51} + (Name: 'Jonah'; Abbr: 'JON'; Num: 32), {//52} + (Name: 'Joshua'; Abbr: 'JOS'; Num: 6), {//53} + (Name: 'Jude'; Abbr: 'JUDE'; Num: 89), {//54} + (Name: 'Judges'; Abbr: 'JUDG'; Num: 7), {//55} + (Name: 'Judith'; Abbr: 'JUDI'; Num: 41), {//56} + (Name: 'Lamentations'; Abbr: 'LA'; Num: 25), {//57} + (Name: 'Letter of Jeremiah';Abbr:'LET'; Num: 46), {//58} + (Name: 'Leviticus'; Abbr: 'LEV'; Num: 3), {//59} + (Name: 'Luke'; Abbr: 'LK'; Num: 66), {//60} + (Name: 'Leviticus'; Abbr: 'LV'; Num: 3), {//61} + (Name: 'Luke'; Abbr: 'LU'; Num: 66), {//62} + (Name: 'Malachi'; Abbr: 'MAL'; Num: 39), {//63} + (Name: 'Prayer of Manasseh';Abbr:'MAN'; Num: 53), {//64} + (Name: 'Mark'; Abbr: 'MAR'; Num: 65), {//65} + (Name: 'Matthew'; Abbr: 'MAT'; Num: 64), {//66} + (Name: 'Micah'; Abbr: 'MI'; Num: 33), {//67} + (Name: 'Nahum'; Abbr: 'NA'; Num: 34), {//68} + (Name: 'Nehemiah'; Abbr: 'NE'; Num: 16), {//69} + (Name: 'Numbers'; Abbr: 'NU'; Num: 4), {//70} + (Name: 'Obadiah'; Abbr: 'OB'; Num: 31), {//71} + (Name: 'Psalm 151'; Abbr: 'P1'; Num: 54), {//72} + (Name: 'Philemon'; Abbr: 'PHILE'; Num: 81), {//73} + (Name: 'Philippians'; Abbr: 'PHILI'; Num: 74), {//74} + (Name: 'Philemon'; Abbr: 'PHM'; Num: 81), {//75} + (Name: 'Philippians'; Abbr: 'PHP'; Num: 74), {//76} + (Name: 'Proverbs'; Abbr: 'PR'; Num: 20), {//77} + (Name: 'Psalms'; Abbr: 'PS'; Num: 19), {//78} + (Name: 'Revelation'; Abbr: 'RE'; Num: 90), {//79} + (Name: 'Romans'; Abbr: 'RM'; Num: 69), {//80} + (Name: 'Romans'; Abbr: 'RO'; Num: 69), {//81} + (Name: 'Ruth'; Abbr: 'RU'; Num: 8), {//82} + (Name: 'Sirach'; Abbr: 'SI'; Num: 44), {//83} + (Name: 'Song of Solomon'; Abbr: 'SOL'; Num: 22), {//84} + (Name: 'Song of Solomon'; Abbr: 'SON'; Num: 22), {//85} + (Name: 'Song of Solomon'; Abbr: 'SS'; Num: 22), {//86} + (Name: 'Susanna'; Abbr: 'SU'; Num: 48), {//87} + (Name: 'Titus'; Abbr: 'TI'; Num: 80), {//88} + (Name: 'Tobit'; Abbr: 'TO'; Num: 40), {//89} + (Name: 'Wisdom'; Abbr: 'WI'; Num: 43), {//90} + (Name: 'Zechariah'; Abbr: 'ZEC'; Num: 38), {//91} + (Name: 'Zephaniah'; Abbr: 'ZEP'; Num: 36) {//92} + ); + + BookFileName: array[0..90] of string = ( + '','Genesis','Exodus','Lev','Num','Deut','Joshua','Judges', {// 0 - 7} + 'Ruth','1Sam','2Sam','1Kings','2Kings','1Chron','2Chron', {// 8 - 14} + 'Ezra','Nehemiah','Esther','Job','Psalms','Proverbs', {// 15-20} + 'Eccl','Song','Isaiah','Jeremiah','Lament','Ezekiel', {// 21-26} + 'Daniel','Hosea','Joel','Amos','Obadiah','Jonah','Micah', {// 27-33} + 'Nahum','Habakkuk','Zeph','Haggai','Zech','Malachi', {// 34-39} + 'Tobit','Judith','Esther','Wisdom','Sirach','Baruch', {// 40-45} + 'Let','Azar','Susanna','Bel','1Mac','2Mac','1Esdras', {// 46-52} + 'Man','P1','3Mac','2Esdras','4Mac','','','','','','', {// 53-63} + 'Matthew','Mark','Luke','John','Acts','Romans','1Cor', {// 64-70} + '2Cor','Gal','Eph','Philip','Col','1Thes','2Thes','1Tim', {// 71-78} + '2Tim','Titus','Philemon','Hebrews','James','1Peter', {// 79-84} + '2Peter','1John','2John','3John','Jude','Rev'); {// 85-90} + +type + TReadGBF = record +{ private} + F: File; + FName, TokenLine: string; + TokenPos: integer; + fFileIsOpen, fParagraphEnd, fInTitle, fInPsalmBookTitle, fInHebrewTitle, + fInSectionTitle: boolean; + +{ public} + sBook, sChapter, sVerse, sMode: string; + sContext, {// Last text type (header, body, or tail)} + sTitle, {// Title of this book of the Bible} + sPsalmBookTitle, {// Title of this Psalm book} + sHebrewTitle, {// Psalm Hebrew title} + sSectionTitle, {// Section headings} + sDate, + sFontName: string; + iTotalWords: integer; + chJustification, + chDirection: char; + fIndent, fPoetry: boolean; + CharAttribs: TCharAttribs; + bBk, bChap, bVs, bWd: byte; +{ + function Init({const}{sFileName: string): boolean; + procedure Done; + function GetToken(var TokenKind: TToken): string; +} + end; + + TWriteGBF = record +{ private} + F: File; + FName, LineOut: string; + fFileIsOpen: boolean; + bBk, bChap, bVs, bWd: byte; + +{ public} +{ + function Init({const}{sFileName: string): boolean; + function Done: boolean; + procedure Out({const}{s: string); +} + end; + +{implementation } + +function isletter({const}ch: char): boolean; +begin + case ch of + 'A'..'Z': isletter := true; + 'a'..'z': isletter := true; + else + isletter := false; + end; +end; + +function isinword({const}ch: char): boolean; +begin + case ch of + '-': isinword := true; + 'A'..'Z': isinword := true; + 'a'..'z': isinword := true; + else + isinword := false; + end; +end; + +function IsUpper({const}ch: char): Boolean; +begin + case ch of + 'A'..'Z': IsUpper := true; + else + IsUpper := false; + end; +end; + +function IsDigit({const}ch: char): Boolean; +begin + case ch of + '0'..'9': IsDigit := true; + else + IsDigit := false; + end; +end; + + +function MatchAbbrev({const}sName, sAbbrev: string): boolean; +var i: integer; +begin + if Length(sName) < Length(sAbbrev) then + Result := false + else + Result := true; + i := 1; + while (i <= Length(sAbbrev)) and Result do + begin + if UpCase(sName[i]) <> sAbbrev[i] then + Result := false; + inc(i); + end; +end; + +function BookNameToNumber({const}sBookName: string): byte; +var i: integer; +begin + Result := 0; + try + if IsDigit(sBookName[Length(sBookName)]) and IsDigit(sBookName[1]) then + Result := StrToInt(sBookName); + except + Result := 0; + end; + i := 0; + while (Result = 0) and (i <= 115) do {// Yuk! Linear search.} + begin + if MatchAbbrev(sBookName,BookAbbr[i].Abbr) then + begin + Result := BookAbbr[i].Num; + end; + inc(i); + end; +end; + +function BookNumberToName({const}bBookNum: byte): string; +begin + if bBookNum <= 115 then + Result := BookAbbr[bBookNum].Name + else + Result := ''; +end; + +function ConformCase({const}sPat, sSrc: string): string; +var i: integer; +begin + Result := sSrc; + if (Length(sPat) > 0) and (Length(sSrc) > 0) then + begin + Result := LowerCase(sSrc); + if IsUpper(sPat[1]) then + Result[1] := UpCase(Result[1]); + if (Length(sPat) > 1) and (Length(sSrc) > 1) then + begin + if IsUpper(sPat[2]) then + begin + for i := 2 to Length(Result) do + Result[i] := UpCase(Result[i]); + end; + end; + end; +end; + +function TReadGBF.Init({const}sFileName: string): boolean; +var s: string; + tok: TToken; +begin + try + fParagraphEnd := false; + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + iTotalWords := 0; + FName := sFileName; + Assign(F, FName); + reset(F); + readln(F, TokenLine); + TokenPos := 1; + fFileIsOpen := true; + repeat + s := GetToken(tok) + until (tok = tokEOF) or ((tok = tokHeader) and (s[3] = '0')); + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +procedure TReadGBF.Done; +begin + if fFileIsOpen then + begin + closefile(F); + fFileIsOpen := false; + end; +end; + +function TReadGBF.GetToken(var TokenKind: TToken): string; +var m: integer; +begin + Result := ''; + TokenKind := tokNull; + if TokenPos = 0 then + begin + if (not fFileIsOpen) or EOF(F) then + TokenKind := tokEOF + else + begin + ReadLn(F,TokenLine); + TokenPos := 1; + end; + end; + if TokenKind <> tokEOF then + begin + m := Length(TokenLine); + if TokenPos > m then + begin + TokenKind := tokSpace; + if fParagraphEnd then + fParagraphEnd := false + else + Result := ' '; + TokenPos := 0; + end + else + begin + if (TokenLine[TokenPos] = '<') then + begin + fParagraphEnd := false; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenLine[TokenPos] = '>') or (TokenPos > m); + Result := Result + '>'; + inc(TokenPos); + case result[2] of + 'B': begin {// File body text type} + TokenKind := tokContent; + sContext := Result; + end; + 'C': begin {// Special characters} + TokenKind := tokControl; + if (Result[3] = 'M') or (Result[3] = 'L') then + fParagraphEnd := true; + end; + 'D': begin {// Direction} + TokenKind := tokControl; + chDirection := Result[3]; + end; + 'H': begin + TokenKind := tokHeader; + sContext := Result; + end; + 'F': begin {// Font attributes} + TokenKind := tokFont; + case Result[3] of + 'B': CharAttribs := CharAttribs + [caBold]; + 'C': CharAttribs := CharAttribs + [caSmallCaps]; + 'I': CharAttribs := CharAttribs + [caItalic]; + 'N': sFontName := copy(Result,4,Length(Result)-4); + 'O': CharAttribs := CharAttribs + [caOTQuote]; + 'R': CharAttribs := CharAttribs + [caRed]; + 'S': CharAttribs := CharAttribs + [caSuperscript]; + 'U': CharAttribs := CharAttribs + [caUnderline]; + 'V': CharAttribs := CharAttribs + [caSubscript]; + 'b': CharAttribs := CharAttribs - [caBold]; + 'c': CharAttribs := CharAttribs - [caSmallCaps]; + 'i': CharAttribs := CharAttribs - [caItalic]; + 'n': sFontName := ''; + 'o': CharAttribs := CharAttribs - [caOTQuote]; + 'r': CharAttribs := CharAttribs - [caRed]; + 's': CharAttribs := CharAttribs - [caSuperscript]; + 'u': CharAttribs := CharAttribs - [caUnderline]; + 'v': CharAttribs := CharAttribs - [caSubscript]; + + end; + end; + 'J': begin {// Justification} + TokenKind := tokStyle; + chJustification := Result[3]; + end; + 'P': begin {// Poetry/prose, indent} + TokenKind := tokControl; + case Result[3] of + 'I': fIndent := true; + 'P': fPoetry := true; + 'i': fIndent := false; + 'p': fPoetry := false; + end; + end; + 'R': begin {// References and footnotes} + TokenKind := tokControl; + end; + 'S': begin {// sync mark} + TokenKind := TokSync; + case Result[3] of + 'B': begin {// Book} + sBook := system.copy(Result, 4, length(Result)-4); + sPsalmBookTitle := ''; + if sBook = '' then + begin + inc(bBk); + sBook := BookNumberToName(bBk); + end + else + bBk := BookNameToNumber(sBook); + sTitle := sBook; + end; + 'C': begin {//chapter} + sChapter := system.copy(Result, 4, length(Result)-4); + if sChapter = '' then + begin + inc(bChap); + sChapter := IntToStr(bChap); + end + else + begin + try + bChap := StrToInt(sChapter); + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + sHebrewTitle := ''; + end; + 'V': begin {// Verse} + bWd := 0; + sVerse := system.copy(Result, 4, length(Result)-4); + if sVerse = '' then + begin + inc(bVs); + sVerse := IntToStr(bVs); + end + else + begin + try + bVs := StrToInt(sVerse); + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + 'D': begin {// Date} + sDate := system.copy(Result, 3, length(Result)-4); + end; + end; + end; + 'T': begin {// Titles} + TokenKind := TokContent; + case Result[3] of + 'B': + begin + sPsalmBookTitle := ''; + fInPsalmBookTitle := true; + end; + 'b': fInPsalmBookTitle := true; + 'H': + begin + sHebrewTitle := ''; + fInHebrewTitle := true; + end; + 'h': fInHebrewTitle := false; + 'S': + begin + sSectionTitle := ''; + fInSectionTitle := true; + end; + 's': fInSectionTitle := false; + 'T': + begin + sTitle := ''; + fInTitle := true; + end; + 't': fInTitle := false; + end; + end; + 'Z': begin {// File tail} + TokenKind := tokTail; + sContext := Result; + if Result[3] = 'Z' then + done; + end; + else + TokenKind := TokControl; + + end; + end + else if isletter(TokenLine[TokenPos]) then + begin {Word} + fParagraphEnd := false; + TokenKind := tokWord; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenPos > m) or (not isinword(TokenLine[TokenPos])); + inc(bWd); + inc(iTotalWords); + end + else if ((TokenLine[TokenPos] = ' ') or (TokenLine[TokenPos] = #9)) then + begin + fParagraphEnd := false; + TokenKind := tokSpace; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + else + begin + fParagraphEnd := false; + TokenKind := tokChar; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + end; + end; + if ((TokenKind = tokWord) or (TokenKind = tokSpace) or + (TokenKind = tokChar)) then + begin + if fInTitle then + sTitle := sTitle + Result + else if fInPsalmBookTitle then + sPsalmBookTitle := sPsalmBookTitle + Result + else if fInHebrewTitle then + sHebrewTitle := sHebrewTitle + Result + else if fInSectionTitle then + sSectionTitle := sSectionTitle + Result; + end; +end; + +function TWriteGBF.Init({const}sFileName: string): boolean; +begin + try + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + LineOut := ''; + FName := sFileName; + Assign(F, FName); + filemode := 1; + rewrite(F); + fFileIsOpen := true; + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +function TWriteGBF.Done: boolean; +begin + try + if fFileIsOpen then + begin + if LineOut <> '' then + begin + WriteLn(F, LineOut); + LineOut := ''; + end; + CloseFile(F); + end; + Done := true; + except + Done := false; + end; +end; + +procedure TWriteGBF.Out({const}s: string); +var sPrint, sSave, sBook, sChapter, sVerse: string; + i: integer; + b: byte; +begin + if (Length(s) > 0) and IsLetter(s[1]) then + begin + inc(bWd); + LineOut := LineOut + s; + end + else if Length(s) > 3 then + begin + if (s[1] = '<') and (s[2] = 'S') then + begin + case s[3] of + 'B': begin {// Book} + sBook := system.copy(s, 4, length(s)-4); + if sBook = '' then + begin + inc(bBk); + LineOut := LineOut + s; + end + else + begin + b := bBk; + bBk := BookNameToNumber(sBook); + if b <> bBk then + LineOut := LineOut + s; + end; + end; + 'C': begin {//chapter} + sChapter := system.copy(s, 4, length(s)-4); + if sChapter = '' then + begin + inc(bChap); + LineOut := LineOut + s; + end + else + begin + try +{// b := bChap;} + bChap := StrToInt(sChapter); +{// if b <> bChap then} + LineOut := LineOut + s; + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + end; + 'V': begin {// Verse} + bWd := 0; + sVerse := system.copy(s, 4, length(s)-4); + if sVerse = '' then + begin + inc(bVs); + LineOut := LineOut + s; + end + else + begin + try +{// b := bVs;} + bVs := StrToInt(sVerse); +{// if b <> bVs then} + LineOut := LineOut + s; + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + else + LineOut := LineOut + s; + end + end + else + LineOut := LineOut + s; {// Not a sync mark} + end + else {// other token, space, or punctuation} + LineOut := LineOut + s; {// Length <= 3} + if ((s = '<CM>') or (s = '<CL>')) then + begin + if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + WriteLn(F, sSave); + LineOut := ''; + end + end + else + begin + WriteLn(F, LineOut); + LineOut := ''; + end + end + else if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + LineOut := sSave; + end + end +end; + +end. diff --git a/src/modules/texts/rawgbf/Gbfmain.pas b/src/modules/texts/rawgbf/Gbfmain.pas new file mode 100644 index 0000000..4377622 --- /dev/null +++ b/src/modules/texts/rawgbf/Gbfmain.pas @@ -0,0 +1,1267 @@ +unit GBFMain; + +interface + +uses + Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs, + Buttons, StdCtrls, ExtCtrls, GBF; + +const + sTitlePar = '\pard\plain \s1\fi432\sb240\sa60\keepn\widctlpar \b\f5\fs28\kerning28 '; + sNormalPar = '\pard\plain \fi432\widctlpar \f4 '; + sNormalQuotePar = '\pard\plain \s20\fi432\li432\widctlpar \f4 '; + sPoetryPar = '\pard\plain \s18\fi-432\li432\widctlpar \f4 '; + sPoetryQuotePar = '\pard\plain \s21\fi-432\li864\widctlpar \f4 '; + sHebrewTitlePar = '\pard\plain \s16\fi432\keep\keepn\widctlpar \f4\fs20 '; + sSelahPar = '\pard\plain \s19\qr\widctlpar \f4 '; + ANSI2OEM: array[0..255] of char = + ( #0, #1, #2, #3, #4, #5, #6, #7, + #8, #9, #10, #11, #12, #13, #14, #15, + #16, #17, #18, #19, #20, #21, #22, #23, + #24, #25, #26, #27, #28, #29, #30, #31, + #32, #33, #34, #35, #36, #37, #38, #39, + #40, #41, #42, #43, #44, #45, #46, #47, + #48, #49, #50, #51, #52, #53, #54, #55, + #56, #57, #58, #59, #60, #61, #62, #63, + #64, #65, #66, #67, #68, #69, #70, #71, + #72, #73, #74, #75, #76, #77, #78, #79, + #80, #81, #82, #83, #84, #85, #86, #87, + #88, #89, #90, #91, #92, #93, #94, #95, + #96, #97, #98, #99,#100,#101,#102,#103, + #104,#105,#106,#107,#108,#109,#110,#111, + #112,#113,#114,#115,#116,#117,#118,#119, + #120,#121,#122,#123,#124,#125,#126,#127, + #128,#129, ',', 'a', '"',#133,#197,#216, + '^', '%', 'S', '<',#140,#141,#142,#143, + #144, #96, #97, '"', '"',#249,#150,#151, + '~',#153, 's', '>',#156,#157,#158, 'Y', + ' ',#173,#155,#156,#232,#157,#124, #21, + #168,#169, 'a',#174,#170, '-',#174, '_', + #167,#241,#253, '3', #39,#230, #20,#254, + ',', '1', 'o',#175,#172,#171,#190,#168, + 'A', 'A', 'A', 'A',#142,#143,#198,#128, + 'E',#144, 'E',#142, 'I', 'I', 'I', 'I', + 'D',#165, 'O', 'O', 'O', 'O',#153, 'x', + '0', 'U', 'U', 'U',#154, 'Y', 'b',#225, + #133,#130,#131, 'a',#132,#134,#230,#135, + #138,#130,#136,#137,#141,#161,#140,#139, + #148,#164,#149,#162,#147, 'o',#148,#246, + 'o',#151,#163,#150,#129, 'y', 'b',#152); + +type + TGBFConverterMainForm = class(TForm) + SourceEdit: TEdit; + Label1: TLabel; + BrowseButton: TButton; + SaveDialog1: TSaveDialog; + OpenDialog1: TOpenDialog; + DestEdit: TEdit; + Label2: TLabel; + BrowseDestButton: TButton; + FormatRadioGroup: TRadioGroup; + GoBitBtn: TBitBtn; + CloseBitBtn: TBitBtn; + Timer1: TTimer; + VerseLabel: TLabel; + ApocryphaCheckBox: TCheckBox; + WdLabel: TLabel; + Label3: TLabel; + Label4: TLabel; + WEBDraftCheckBox: TCheckBox; + QuickButton: TButton; + procedure CloseBitBtnClick(Sender: TObject); + procedure GoBitBtnClick(Sender: TObject); + procedure Timer1Timer(Sender: TObject); + procedure FormShow(Sender: TObject); + procedure FormatRadioGroupClick(Sender: TObject); + procedure QuickConversion; + procedure DoConversion; + procedure QuickButtonClick(Sender: TObject); + procedure FormActivate(Sender: TObject); + private + { Private declarations } + public + { Public declarations } + end; + +var + GBFConverterMainForm: TGBFConverterMainForm; + +implementation + +{$R *.DFM} + +var InFile: TReadGBF; + OutGBF: TWriteGBF; + OutFile: TextFile; + +function ANSIToOEM(s: string): string; +var i, j: integer; +begin + Result := s; + j := 1; + for i := 1 to length(s) do + begin + case s[i] of + #133: + begin + Result[j] := '.'; + inc(j); + Insert('..', Result, j); + inc(j); + end; + #140: + begin + Result[j] := 'O'; + inc(j); + Insert('E', Result, j); + end; + #150: + begin + Result[j] := '-'; + inc(j); + Insert('-', Result, j); + end; + #151: + begin + Result[j] := '-'; + inc(j); + Insert('-', Result, j); + end; + #153: + begin + Result[j] := '('; + inc(j); + Insert('TM)', Result, j); + inc(j,2); + end; + #156: + begin + Result[j] := 'o'; + inc(j); + Insert('e', Result, j); + end; + #169: + begin + Result[j] := '('; + inc(j); + Insert('C)',Result, j); + inc(j); + end; + #174: + begin + Result[j] := '('; + inc(j); + Insert('R)',Result, j); + inc(j); + end; + #198: + begin + Result[j] := 'A'; + inc(j); + Insert('E', Result, j); + end; + #230: + begin + Result[j] := 'a'; + inc(j); + Insert('e', Result, j); + end; + else + Result[j] := ANSI2OEM[ord(s[i])]; + end; + inc(j); + end; +end; + +procedure TGBFConverterMainForm.CloseBitBtnClick(Sender: TObject); +begin + Close; +end; + +procedure TGBFConverterMainForm.DoConversion; +var LastBook, wd, ParagraphAttributes, s, sLine, sPrint, sSave, + OutFileName: string; + LinePos, i, iFileNumber: integer; + tok: TToken; + fInclude, fProse, fSkip, fHTMLisOpen, fRed, fASCIIisOpen: boolean; + bLastBook, bChap: byte; + + procedure CheckEOL; + begin + if Length(sLine) > 65 then + begin + i := 65; + while (i > 0) and (sLine[i] <> ' ') do + dec(i); + if i < 1 then + begin + if fASCIIisOpen then WriteLn(OutFile,sLine); + if fProse then + sLine := '' + else + sLine := ' '; + end + else + begin + sPrint := system.copy(sLine,1,i-1); + if fProse then + sSave := system.copy(sLine,i+1,Length(sLine)-i) + else + sSave := ' '+system.copy(sLine,i+1,Length(sLine)-i); + if fASCIIisOpen then WriteLn(OutFile,sPrint); + sLine := sSave; + end + end; + end; + + procedure StartNewLine; + begin + if fInclude then + begin + CheckEol; + if fASCIIisOpen then WriteLn(OutFile, sLine); + sLine := ''; + end; + end; + + procedure CloseHTML; + begin + if fHTMLisOpen then + begin + WriteLn(OutFile,sLine); + sLine := ''; + WriteLn(OutFile,'</P>'); + WriteLn(OutFile,'<P></P><HR><A HREF="index.htm">[Index]</A> '); + WriteLn(OutFile,'<A HREF="http://www.ebible.net/bible/">[Home]</A>'); + WriteLn(OutFile,'</BODY></HTML>'); + CloseFile(OutFile); + fHTMLisOpen := false; + end; + end; + + procedure CloseASCII; + begin + if fASCIIisOpen then + begin + WriteLn(OutFile,sLine); + sLine := ''; + WriteLn(OutFile); + if WEBDraftCheckBox.Checked then + begin + WriteLn(OutFile,'______________________________________________________________'); + WriteLn(OutFile); + WriteLn(OutFile,'The above is from the public domain World English Bible (WEB).'); + WriteLn(OutFile,'See http://www.ebible.org/bible/WEB for more about this Bible.'); + WriteLn(OutFile,'Please report typos to mpj@ebible.org.'); + end; + CloseFile(OutFile); + fASCIIisOpen := false; + end; + end; + + procedure OpenHTML; + begin + if fHTMLisOpen then CloseHTML; + sLine := ''; + OutFileName := ExtractFilePath(DestEdit.Text)+BookFileName[InFile.bBk]+'.htm'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile,'<HTML>'); + WriteLn(OutFile,'<HEAD>'); + WriteLn(OutFile,'<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=windows-1252">'); + WriteLn(OutFile,'<META http-equiv="PICS-Label" content=''(PICS-1.1 "http://www.rsac.org/ratingsv01.html"'); + WriteLn(OutFile,' l gen true comment "RSACi North America Server" by "mpj@csn.net" for "http://www.csn.net/~mpj"'); + WriteLn(OutFile,' on "1996.08.29T12:42-0500" r (n 0 s 0 v 0 l 0))''>'); + WriteLn(OutFile,'<META NAME="description" CONTENT="'+InFile.sTitle+' from the World English Bible -- a Public Domain Modern English translation of the Holy Bible.">'); + WriteLn(OutFile,'<META NAME="keywords" CONTENT="'+BookFileName[InFile.bBk]+', '+InFile.sTitle+', Bible, Christian, Holy Bible, Bible search, WEB, World English Bible, Scriptures, Scripture, Bibles, Gospel, Gospels, bible">'); + WriteLn(OutFile,'<TITLE>'+InFile.sTitle+'</TITLE>'); + WriteLn(OutFile,'<LINK REL=Home HREF="http://www.ebible.org/bible">'); + WriteLn(OutFile,'<LINK REL=Glossary HREF="glossary.htm">'); + WriteLn(OutFile,'</HEAD>'); + WriteLn(OutFile,'<BODY BGCOLOR="#80ffff">'); + WriteLn(OutFile,'<H1>'); + WriteLn(OutFile,InFile.sTitle); + WriteLn(OutFile,'</H1><P>'); + fHTMLisOpen := true; + end; + + procedure OpenASCII; + begin + if fASCIIisOpen then CloseASCII; + if fProse then + sLine := ' ' + else + sLine := ''; + OutFileName := ExtractFilePath(DestEdit.Text)+BookFileName[InFile.bBk]+'.txt'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile); + WriteLn(OutFile,InFile.sTitle); + WriteLn(OutFile); + fASCIIisOpen := true; + end; + + procedure OpenNTChapter; + var s: string; + begin + if InFile.bBk >= 64 then + begin + if fASCIIisOpen then CloseASCII; + inc(iFileNumber); + s := IntToStr(iFileNumber); + if Length(s) < 3 then s := '0'+s; + if Length(s) < 3 then s := '0'+s; + OutFileName := ExtractFilePath(DestEdit.Text)+'n'+s+'.txt'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile,'Subject: '+BookFileName[InFile.bBk]+' '+InFile.sChapter+', World English Bible'); + if iFileNumber = 260 then + WriteLn(OutFile,'X-Reset: 1'); + WriteLn(OutFile); + WriteLn(OutFile); + WriteLn(OutFile,InFile.sTitle+', Chapter '+InFile.sChapter); + WriteLn(OutFile); + fASCIIisOpen := true; + if fProse then + sLine := ' ' + else + sLine := ''; + end + else + begin + inc(bChap); + if (bLastBook <> Infile.bBk) or ((bChap mod 3) = 1) then + begin + if (bLastBook <> Infile.bBk) then + begin + bLastBook := Infile.bBk; + bChap := 1; + end; + if fASCIIisOpen then CloseASCII; + inc(iFileNumber); + s := IntToStr(iFileNumber); + if Length(s) < 3 then s := '0'+s; + if Length(s) < 3 then s := '0'+s; + OutFileName := ExtractFilePath(DestEdit.Text)+s+'.txt'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile,'Subject: '+BookFileName[InFile.bBk]+' '+InFile.sChapter+', World English Bible'); + if (Infile.bBk = 39) and (bChap = 4) then + WriteLn(OutFile,'X-Reset: 1'); + WriteLn(OutFile); + WriteLn(OutFile); + WriteLn(OutFile,InFile.sTitle+', starting at chapter '+InFile.sChapter); + WriteLn(OutFile); + fASCIIisOpen := true; + if fProse then + sLine := ' ' + else + sLine := ''; + end; + end; + end; + + procedure CheckHTMLEOL; + begin + if Length(sLine) > 75 then + begin + i := 75; + while (i > 0) and (sLine[i] <> ' ') do + dec(i); + if i < 1 then + begin + if fHTMLisOpen then WriteLn(OutFile,sLine); + sLine := '' + end + else + begin + sPrint := system.copy(sLine,1,i-1); + sSave := system.copy(sLine,i+1,Length(sLine)-i); + if fHTMLisOpen then WriteLn(OutFile,sPrint); + sLine := sSave; + end + end; + end; + + procedure StartNewHTMLLine; + begin + if fInclude then + begin + CheckHTMLEOL; + if fHTMLisOpen then WriteLn(OutFile, sLine+'</P>'); + sLine := '<P>'; + end; + end; + + +begin + QuickButton.Enabled := false; + GoBitBtn.Enabled := false; + fInclude := false; + fSkip := false; + fProse := true; + fRed := false; + LastBook := ''; + ParagraphAttributes := sNormalPar; + try + InFile := TReadGBF.Create; + if InFile.Init(Trim(SourceEdit.Text)) then + begin + LinePos := 0; + case FormatRadioGroup.ItemIndex of + -1: showmessage('No destination format selected!'); + 0: begin + Label3.Caption := 'Converting to ASCII'; + AssignFile(OutFile, DestEdit.Text); + FileMode := 1; + Rewrite(OutFile); + fASCIIisOpen := true; + sLine := ''; + repeat + wd := ANSIToOEM(InFile.GetToken(tok)); + Application.ProcessMessages; + case tok of + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + sLine := sLine + '{' + InFile.sChapter+':'+ + InFile.sVerse+'} '; + CheckEOL + end + else if (wd[3] = 'C') and (InFile.bBk = 19) then + begin + StartNewLine; + WriteLn(OutFile, 'Psalm '+InFile.sChapter); + WriteLn(OutFile); + end; + if wd[3] = 'B' then + fProse := true; + end; + end; + tokContent: + begin + if wd = '<BO>' then + fInclude := true + else if wd = '<BN>' then + fInclude := true + else if wd = '<BO>' then + fInclude := ApocryphaCheckBox.Checked + end; + tokControl: + begin + if wd = '<CM>' then + begin + StartNewLine; + if fProse then + begin + WriteLn(OutFile); + sLine := ' ' + end + end + else if wd = '<CL>' then + begin + StartNewLine; + sLine := ' '; + end + else if wd = '<Pp>' then + fProse := true + else if wd = '<PP>' then + fProse := false + else if wd = '<RF>' then + fSkip := true + else if wd = '<Rf>' then + fSkip := false + else if wd = '<RN>' then + fSkip := true + else if wd = '<Rn>' then + fSkip := false + else if wd = '<ZZ>' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokFont: + begin + if wd = '<FI>' then + begin + if fInclude then + sLine := sLine + '['; + end + else if wd = '<Fi>' then + begin + if fInclude then + sLine := sLine + ']'; + end + end; + end + until tok = tokEOF; + writeln(OutFile, sLine); + CloseFile(OutFile); + fASCIIisOpen := false; + Label3.Caption := ''; + end; + 1: begin + Label3.Caption := 'Converting to ASCII (one file/book)'; + FileMode := 1; + fASCIIisOpen := false; + sLine := ''; + repeat + Application.ProcessMessages; + wd := ANSIToOEM(InFile.GetToken(tok)); + case tok of + tokEOF: + CloseASCII; + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + sLine := sLine + '{' + InFile.sChapter+':'+ + InFile.sVerse+'} '; + CheckEOL + end + else if (wd[3] = 'C') and (InFile.bBk = 19) then + begin + StartNewLine; + WriteLn(OutFile, 'Psalm '+InFile.sChapter); + WriteLn(OutFile); + end; + if wd[3] = 'B' then + begin + fProse := true; + CloseASCII; + end; + end; + end; + tokContent: + begin + if wd = '<BO>' then + fInclude := true + else if wd = '<BN>' then + fInclude := true + else if wd = '<BO>' then + fInclude := ApocryphaCheckBox.Checked + else if wd = '<Tt>' then + OpenASCII; + end; + tokControl: + begin + if wd = '<CM>' then + begin + StartNewLine; + if fProse then + begin + if fASCIIisOpen then WriteLn(OutFile); + sLine := ' ' + end + end + else if wd = '<CL>' then + begin + StartNewLine; + sLine := ' '; + end + else if wd = '<Pp>' then + fProse := true + else if wd = '<PP>' then + fProse := false + else if wd = '<RF>' then + fSkip := true + else if wd = '<Rf>' then + fSkip := false + else if wd = '<RN>' then + fSkip := true + else if wd = '<Rn>' then + fSkip := false + else if wd = '<ZZ>' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokFont: + begin + if wd = '<FI>' then + begin + if fInclude then + sLine := sLine + '['; + end + else if wd = '<Fi>' then + begin + if fInclude then + sLine := sLine + ']'; + end + end; + end + until tok = tokEOF; + if fASCIIisOpen then writeln(OutFile, sLine); + CloseASCII; + Label3.Caption := ''; + end; + 2: begin + Label3.Caption := 'Converting ASCII postings'; + bLastBook := 255; + bChap := 0; + FileMode := 1; + iFileNumber := 0; + fASCIIisOpen := false; + sLine := ''; + repeat + Application.ProcessMessages; + wd := ANSIToOEM(InFile.GetToken(tok)); + case tok of + tokEOF: + CloseASCII; + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + sLine := sLine + '{' + InFile.sChapter+':'+ + InFile.sVerse+'} '; + CheckEOL + end + else if (wd[3] = 'C') then + begin + OpenNTChapter; + if (InFile.bBk = 19) then + begin + StartNewLine; + if fASCIIisOpen then + begin + WriteLn(OutFile, 'Psalm '+InFile.sChapter); + WriteLn(OutFile); + end; + end; + end; + if wd[3] = 'B' then + begin + fProse := true; + CloseASCII; + end; + end; + end; + tokContent: + begin + if wd = '<BO>' then + fInclude := true + else if wd = '<BN>' then + begin + fInclude := true; + iFileNumber := 0; + end + else if wd = '<BO>' then + fInclude := ApocryphaCheckBox.Checked + end; + tokControl: + begin + if wd = '<CM>' then + begin + StartNewLine; + if fProse then + begin + if fASCIIisOpen then WriteLn(OutFile); + sLine := ' ' + end + end + else if wd = '<CL>' then + begin + StartNewLine; + sLine := ' '; + end + else if wd = '<Pp>' then + fProse := true + else if wd = '<PP>' then + fProse := false + else if wd = '<RF>' then + fSkip := true + else if wd = '<Rf>' then + fSkip := false + else if wd = '<RN>' then + fSkip := true + else if wd = '<Rn>' then + fSkip := false + else if wd = '<ZZ>' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokFont: + begin + if wd = '<FI>' then + begin + if fInclude then + sLine := sLine + '['; + end + else if wd = '<Fi>' then + begin + if fInclude then + sLine := sLine + ']'; + end + end; + end + until tok = tokEOF; + if fASCIIisOpen then writeln(OutFile, sLine); + CloseASCII; + Label3.Caption := ''; + end; + 3: begin + Label3.Caption := 'Converting to RTF'; + AssignFile(OutFile, DestEdit.Text); + FileMode := 1; + Rewrite(OutFile); + repeat + Application.ProcessMessages; + wd := InFile.GetToken(tok); + case tok of + tokWord: + begin + if fInclude then + begin + LinePos := LinePos + Length(wd); + write(OutFile,wd); + end; + end; + tokSpace: + begin + if fInclude then + begin + LinePos := LinePos + Length(wd); + if LinePos > 78 then + begin + WriteLn(OutFile,wd); + LinePos := 0; + end + else + write(OutFile,wd); + end + end; + tokSync: + begin + if length(wd) > 1 then + begin + case wd[2] of + 'B': begin + if InFile.sBook <> LastBook then + begin + LastBook := InFile.sBook; + WriteLn(OutFile,'\par '+sTitlePar+ + LastBook+'\par '+ParagraphAttributes); + LinePos := 0; + end; + end; + 'V': begin + s := '{\f5\super '+InFile.sChapter+':'+ + InFile.sVerse+'}'; + Write(OutFile,s); + LinePos := LinePos+Length(s); + end; + end; + end; + end; + tokControl: + begin + if length(wd) > 1 then + begin + case wd[2] of + 'A': fInclude := false; + 'E': begin + Write(OutFile,'{\b\cf1 '); + LinePos := LinePos + 8; + end; + 'F': fInclude := false; + 'H': begin + fInclude := true; + ParagraphAttributes := sHebrewTitlePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'I' : begin + Write(OutFile,'{\i\cf1 '); + LinePos := LinePos + 7; + end; + 'J' : begin + Write(OutFile,'{\scaps '); + LinePos := LinePos + 8; + end; + 'K': fInclude := false; + 'M': begin + if fInclude then + begin + writeln(OutFile); + write(OutFile,'\par '+ParagraphAttributes); + LinePos := Length(ParagraphAttributes) + 5; + end; + end; + 'N': begin + fInclude := true; + ParagraphAttributes := sNormalPar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'P': begin + fInclude := true; + ParagraphAttributes := sPoetryPar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'Q': begin + fInclude := true; + ParagraphAttributes := sTitlePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'R' : begin + Write(OutFile,'\cf6 '); + LinePos := LinePos + 4; + end; + 'S': begin + fInclude := true; + ParagraphAttributes := sSelahPar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'T': begin + fInclude := true; + ParagraphAttributes := sTitlePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'U' : begin + Write(OutFile,'{\ul '); + LinePos := LinePos + 4; + end; + 'W': begin + fInclude := true; + ParagraphAttributes := sNormalQuotePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'X': fInclude := false; + 'Y': begin + fInclude := true; + ParagraphAttributes := sPoetryQuotePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'Z': fInclude := false; + 'a': fInclude := false; + 'c': fInclude := false; + 'e': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'h': fInclude := false; + 'i': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'j': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'n': fInclude := false; + 'p': fInclude := false; + 'r': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'u': begin + Write(OutFile,'}'); + inc(LinePos); + end; + + end; + end; + end; + tokChar: + begin + if fInclude then + begin + write(OutFile,wd); + LinePos := LinePos + length(wd); + end; + end; + end; + until tok = tokEOF; + writeln(OutFile,'\par }'); + CloseFile(OutFile); + Label3.Caption := ''; + end; + 4: begin // GBF + Label3.Caption := 'Converting to GBF'; + OutGBF := TWriteGBF.Create; + OutGBF.Init(Trim(DestEdit.Text)); + OutGBF.Out('<H000>'); + repeat + Application.ProcessMessages; + wd := InFile.GetToken(tok); + if tok <> tokEOF then OutGBF.Out(wd); + until tok = tokEOF; + OutGBF.Done; + OutGBF.Free; + Label3.Caption := ''; + end; + 5: begin // HTML + Label3.Caption := 'Converting to HTML'; + fHTMLisOpen := false; + repeat + Application.ProcessMessages; + wd := Infile.GetToken(tok); + case tok of + tokEOF: + CloseHTML; + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckHTMLEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckHTMLEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + if fRed then + sLine := sLine + '</FONT>'; + sLine := sLine + '<FONT COLOR="#0000ff"><FONT SIZE=-1><SUP>'+ + InFile.sChapter+':'+ + InFile.sVerse+'</SUP></FONT></FONT>'; + if fRed then + sLine := sLine + '<FONT COLOR="#ff0000">'; + CheckHTMLEOL + end + else if (wd[3] = 'C') and (InFile.bBk = 19) then + begin + StartNewHTMLLine; + if fHTMLisOpen then + begin + WriteLn(OutFile, '<P><H2>Psalm '+ + InFile.sChapter+'</H2>'); + WriteLn(OutFile); + end; + end; + if wd[3] = 'B' then + begin + fProse := true; + CloseHTML; + end; + end; + end; + tokContent: + begin + if wd = '<BO>' then + fInclude := true + else if wd = '<BN>' then + fInclude := true + else if wd = '<BO>' then + fInclude := ApocryphaCheckBox.Checked + else if wd = '<Tt>' then + OpenHTML; + end; + tokControl: + begin + if wd = '<CM>' then + begin + StartNewHTMLLine; + if not fProse then + begin + sLine := sLine + ' '; + end + end + else if wd = '<CL>' then + begin + StartNewHTMLLine; + sLine := sLine + ' ' + end + else if wd = '<Pp>' then + fProse := true + else if wd = '<PP>' then + fProse := false + else if wd = '<RF>' then + fSkip := true + else if wd = '<Rf>' then + fSkip := false + else if wd = '<RN>' then + fSkip := true + else if wd = '<Rn>' then + fSkip := false + else if wd = '<ZZ>' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + if wd = '"' then + sLine := sLine + '"' + else + sLine := sLine + wd; + CheckHTMLEOL; + end; + end; + tokFont: + begin + if fInclude then + begin + if wd = '<FI>' then + sLine := sLine + '<I>' + else if wd = '<Fi>' then + sLine := sLine + '</I>' + else if wd = '<FR>' then + begin + if not fRed then + begin + sLine := sLine + '<FONT COLOR="#ff0000">'; + fRed := true + end + end + else if wd = '<Fr>' then + begin + if fRed then + begin + sLine := sLine + '</FONT>'; + fRed := false + end + end + end; + end; + end; + until tok = tokEOF; + Label3.Caption := ''; + end; + end; + InFile.Done; + end; + InFile.Free; + except + showmessage('Error!'); + end; + GoBitBtn.Enabled := true; + QuickButton.Enabled := true; +end; + +procedure TGBFConverterMainForm.GoBitBtnClick(Sender: TObject); +begin + DoConversion; +end; + +procedure TGBFConverterMainForm.Timer1Timer(Sender: TObject); +begin + If InFile <> nil then + VerseLabel.Caption := InFile.sBook+' ['+IntToStr(InFile.bBk)+'] '+ + InFile.sChapter+':'+InFile.sVerse + else + VerseLabel.Caption := ''; +end; + +procedure TGBFConverterMainForm.QuickConversion; +begin + FormatRadioGroup.ItemIndex := 1; + DoConversion; + FormatRadioGroup.ItemIndex := 2; + DoConversion; + FormatRadioGroup.ItemIndex := 5; + DoConversion; +end; + +procedure TGBFConverterMainForm.FormShow(Sender: TObject); +begin + VerseLabel.Caption := ''; + WdLabel.Caption := ''; +end; + +(* +procedure TGBFConverterMainForm.TransformButtonClick(Sender: TObject); +var apoc: textfile; + last, s, sBook, sChap, sVs: string; + blankcount, i: integer; +begin + TransformButton.Enabled := false; + blankcount := 0; + assignfile(apoc, trim(sourceedit.text)); + reset(apoc); + assignfile(outfile, trim(destedit.text)); + rewrite(outfile); + last := ''; + while not eof(apoc) do + begin + readln(apoc, s); + if s = '' then + begin + inc(blankcount); + if last <> '' then + begin + writeln(outfile, last, '~M'); + last := ''; + end; + end + else + begin + if blankcount >= 2 then + writeln(outfile, '~T',s,'~N~M') // book title + else if blankcount = 1 then + begin + sBook := ''; + sChap := ''; + sVs := ''; + i := 1; + while (s[i] <> ' ') and (i <= Length(s)) do + begin + sBook := sBook + s[i]; + inc(i); + end; + while (s[i] = ' ') and (i <= Length(s)) do + inc(i); + while (s[i] <> ':') and (i <= Length(s)) do + begin + sChap := sChap + s[i]; + inc(i); + end; + inc(i); + while IsDigit(s[i]) and (i <= Length(s)) do + begin + sVs := sVs + s[i]; + inc(i); + end; + write(outfile, '~B'+sBook+';~C'+sChap+';'); + if sVs <> '' then + write(outfile, '~V'+sVs+';'); + end + else + begin + if last <> '' then + begin + writeln(outfile, last); + end; + last := s; + end; + blankcount := 0; + end; + end; + if last <> '' then + begin + writeln(outfile, last); + last := s; + end; + closefile(outfile); + closefile(apoc); + TransformButton.Enabled := true; +end; +*) + +procedure TGBFConverterMainForm.FormatRadioGroupClick(Sender: TObject); +begin + Case FormatRadioGroup.ItemIndex of + 0: // Plain ASCII (one file) + DestEdit.Text := 'pub\web.txt'; + 1: // Plain ASCII (one file per book) + DestEdit.Text := 'pub\web.htm'; + 2: // Daily posts + DestEdit.Text := 'pub\queue\web.txt'; + 3: // RTF + DestEdit.Text := 'pub\web.rtf'; + 4: // GBF + DestEdit.Text := 'pub\web.gbf'; + 5: // HTML + DestEdit.Text := 'pub\htm\web.htm'; + end; +end; + +procedure TGBFConverterMainForm.QuickButtonClick(Sender: TObject); +begin + QuickConversion; +end; + +procedure TGBFConverterMainForm.FormActivate(Sender: TObject); +begin + if ParamCount > 0 then + if ParamStr(1) = 'quick' then + begin + QuickConversion; + close; + end; +end; + +end. diff --git a/src/modules/texts/rawgbf/Makefile b/src/modules/texts/rawgbf/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/rawgbf/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/rawgbf/Makefile.am b/src/modules/texts/rawgbf/Makefile.am new file mode 100644 index 0000000..ab6aa2e --- /dev/null +++ b/src/modules/texts/rawgbf/Makefile.am @@ -0,0 +1,4 @@ +rawgbfdir = $(top_srcdir)/src/modules/texts/rawgbf + +libsword_la_SOURCES += $(rawgbfdir)/rawgbf.cpp + diff --git a/src/modules/texts/rawgbf/gbf.cpp b/src/modules/texts/rawgbf/gbf.cpp new file mode 100644 index 0000000..dc67a1c --- /dev/null +++ b/src/modules/texts/rawgbf/gbf.cpp @@ -0,0 +1,735 @@ +enum TToken { +tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, + tokWord, tokSpace, tokSync, tokControl, tokChar, tokFont}; + +enum TCharacterAttribute { caBold, caSmallCaps, caItalic, caOTQuote, caRed, + caSuperscript, caUnderline, caSubscript}; + +// TCharAttribs = set of TCharacterAttribute; + + +struct TBookNameRec { + string Name, Abbr; + char Num; +} + +const struct TBookNameRec TBookAbbr[116] = { + {"1 Chronicles", "1CH", 13}, //0 + {"1 Corinthians", "1CO", 70}, //1 + {"1 Esdras", "1E", 52}, //2 + {"1 John", "1J", 86}, //3 + {"1 Kings", "1K", 11}, //4 + {"1 Maccabees", "1M", 50}, //5 + {"1 Peter", "1P", 84}, //6 + {"1 Samuel", "1S", 9}, //7 + {"1 Thessalonians", "1TH", 76}, //8 + {"1 Timothy", "1TI", 78}, //9 + {"2 Chronicles", "2CH", 14}, //10 + {"2 Corinthians", "2CO", 71}, //11 + {"2 Esdras", "2E", 56}, //12 + {"2 John", "2J", 87}, //13 + {"2 Kings", "2K", 12}, //14 + {"2 Maccabees", "2M", 51}, //15 + {"2 Peter", "2P", 85}, //16 + {"2 Samuel", "2S", 10}, //17 + {"2 Thessalonians", "2TH", 77}, //18 + {"2 Timothy", "2TI", 79}, //19 + {"3 John", "3J", 88}, //20 + {"3 Maccabees", "3M", 55}, //21 + {"4 Maccabees", "4M", 57}, //22 + {"1 Chronicles", "1 CH", 13}, //0 + {"1 Corinthians", "1 CO", 70}, //1 + {"1 Esdras", "1 E", 52}, //2 + {"1 John", "1 J", 86}, //3 + {"1 Kings", "1 K", 11}, //4 + {"1 Maccabees", "1 M", 50}, //5 + {"1 Peter", "1 P", 84}, //6 + {"1 Samuel", "1 S", 9}, //7 + {"1 Thessalonians", "1 TH", 76}, //8 + {"1 Timothy", "1 TI", 78}, //9 + {"2 Chronicles", "2 CH", 14}, //10 + {"2 Corinthians", "2 CO", 71}, //11 + {"2 Esdras", "2 E", 56}, //12 + {"2 John", "2 J", 87}, //13 + {"2 Kings", "2 K", 12}, //14 + {"2 Maccabees", "2 M", 51}, //15 + {"2 Peter", "2 P", 85}, //16 + {"2 Samuel", "2 S", 10}, //17 + {"2 Thessalonians", "2 TH", 77}, //18 + {"2 Timothy", "2 TI", 79}, //19 + {"3 John", "3 J", 88}, //20 + {"3 Maccabees", "3 M", 55}, //21 + {"4 Maccabees", "4 M", 57}, //22 + {"Acts", "AC", 68}, //23 + {"Amos", "AM", 30}, //24 + {"Prayer of Asariah and the Song of the Three Jews", "AZ", 47}, + {"Baruch", "BA", 45}, //26 + {"Bel and the Dragon","BE", 49}, //27 + {"Colossians", "CO", 75}, //28 + {"Daniel", "DA", 27}, //29 + {"Deuteronomy", "DE", 5}, //30 + {"Deuteronomy", "DT", 5}, //31 + {"Ecclesiasties", "EC", 21}, //32 + {"Esther", "ES", 17}, //33 + {"Exodus", "EX", 2}, //34 + {"Ezekiel", "EZE", 26}, //35 + {"Ezra", "EZR", 15}, //36 + {"Galatians", "GA", 72}, //37 + {"Genesis", "GE", 1}, //38 + {"Genesis", "GN", 1}, //39 + {"Ephesians", "EP", 73}, //40 + {"Esther (Greek}", "GR", 42), //41 + {"Habakkuk", "HAB", 35}, //42 + {"Haggai", "HAG", 37}, //43 + {"Hebrews", "HE", 82}, //44 + {"Hosea", "HO", 28}, //45 + {"Isaiah", "IS", 23}, //46 + {"James", "JA", 83}, //47 + {"Jeremiah", "JE", 24}, //48 + {"Job", "JOB", 18}, //49 + {"Joel", "JOE", 29}, //50 + {"John", "JOH", 67}, //51 + {"Jonah", "JON", 32}, //52 + {"Joshua", "JOS", 6}, //53 + {"Jude", "JUDE", 89}, //54 + {"Judges", "JUDG", 7}, //55 + {"Judith", "JUDI", 41}, //56 + {"Lamentations", "LA", 25}, //57 + {"Letter of Jeremiah",Abbr:"LET", 46}, //58 + {"Leviticus", "LEV", 3}, //59 + {"Luke", "LK", 66}, //60 + {"Leviticus", "LV", 3}, //61 + {"Luke", "LU", 66}, //62 + {"Malachi", "MAL", 39}, //63 + {"Prayer of Manasseh",Abbr:"MAN", 53}, //64 + {"Mark", "MAR", 65}, //65 + {"Matthew", "MAT", 64}, //66 + {"Micah", "MI", 33}, //67 + {"Nahum", "NA", 34}, //68 + {"Nehemiah", "NE", 16}, //69 + {"Numbers", "NU", 4}, //70 + {"Obadiah", "OB", 31}, //71 + {"Psalm 151", "P1", 54}, //72 + {"Philemon", "PHILE", 81}, //73 + {"Philippians", "PHILI", 74}, //74 + {"Philemon", "PHM", 81}, //75 + {"Philippians", "PHP", 74}, //76 + {"Proverbs", "PR", 20}, //77 + {"Psalms", "PS", 19}, //78 + {"Revelation", "RE", 90}, //79 + {"Romans", "RM", 69}, //80 + {"Romans", "RO", 69}, //81 + {"Ruth", "RU", 8}, //82 + {"Sirach", "SI", 44}, //83 + {"Song of Solomon", "SOL", 22}, //84 + {"Song of Solomon", "SON", 22}, //85 + {"Song of Solomon", "SS", 22}, //86 + {"Susanna", "SU", 48}, //87 + {"Titus", "TI", 80}, //88 + {"Tobit", "TO", 40}, //89 + {"Wisdom", "WI", 43}, //90 + {"Zechariah", "ZEC", 38}, //91 + {"Zephaniah", "ZEP", 36} //92 + }, + +string BookFileName[91] = { + "","Genesis","Exodus","Lev","Num","Deut","Joshua","Judges", // 0 - 7 + "Ruth","1Sam","2Sam","1Kings","2Kings","1Chron","2Chron", // 8 - 14 + "Ezra","Nehemiah","Esther","Job","Psalms","Proverbs", // 15-20 + "Eccl","Song","Isaiah","Jeremiah","Lament","Ezekiel", // 21-26 + "Daniel","Hosea","Joel","Amos","Obadiah","Jonah","Micah", // 27-33 + "Nahum","Habakkuk","Zeph","Haggai","Zech","Malachi", // 34-39 + "Tobit","Judith","Esther","Wisdom","Sirach","Baruch", // 40-45 + "Let","Azar","Susanna","Bel","1Mac","2Mac","1Esdras", // 46-52 + "Man","P1","3Mac","2Esdras","4Mac","","","","","","", // 53-63 + "Matthew","Mark","Luke","John","Acts","Romans","1Cor", // 64-70 + "2Cor","Gal","Eph","Philip","Col","1Thes","2Thes","1Tim", // 71-78 + "2Tim","Titus","Philemon","Hebrews","James","1Peter", // 79-84 + "2Peter","1John","2John","3John","Jude","Rev"}; // 85-90 + +class TReadGBF { +private: + FILE *fp; + string FName, TokenLine; + int TokenPos; + bool fFileIsOpen, fParagraphEnd, fInTitle, fInPsalmBookTitle, fInHebrewTitle, fInSectionTitle; + +public: + string sBook, sChapter, sVerse, sMode; + string sContext; // Last text type (header, body, or tail) + string sTitle; // Title of this book of the Bible + string sPsalmBookTitle; // Title of this Psalm book + string sHebrewTitle; // Psalm Hebrew title + string sSectionTitle; // Section headings + string sDate; + string sFontName; + int iTotalWords; + char chJustification, chDirection; + bool fIndent, fPoetry; + int CharAttribs; + char bBk, bChap, bVs, bWd; + + bool Init(const string sFileName); + void Done(); + string GetToken(TToken &TokenKind); + end; + +class TWriteGBF { + private: + F: TextFile; + FName, LineOut: string; + fFileIsOpen: boolean; + bBk, bChap, bVs, bWd: byte; + + public + + function Init(const sFileName: string): boolean; + function Done: boolean; + procedure Out(const s: string); + end; + +function isletter(const ch: char): boolean; +function isinword(const ch: char): boolean; +function IsDigit(const ch: char): Boolean; +function IsUpper(const ch: char): Boolean; +function ConformCase(const sPat, sSrc: string): string; +function BookNameToNumber(const sBookName: string): byte; + +implementation + +function isletter(const ch: char): boolean; +begin + case ch of + 'A'..'Z': isletter := true; + 'a'..'z': isletter := true; + else + isletter := false; + end; +end; + +function isinword(const ch: char): boolean; +begin + case ch of + '-': isinword := true; + 'A'..'Z': isinword := true; + 'a'..'z': isinword := true; + else + isinword := false; + end; +end; + +function IsUpper(const ch: char): Boolean; +begin + case ch of + 'A'..'Z': IsUpper := true; + else + IsUpper := false; + end; +end; + +function IsDigit(const ch: char): Boolean; +begin + case ch of + '0'..'9': IsDigit := true; + else + IsDigit := false; + end; +end; + + +function MatchAbbrev(const sName, sAbbrev: string): boolean; +var i: integer; +begin + if Length(sName) < Length(sAbbrev) then + Result := false + else + Result := true; + i := 1; + while (i <= Length(sAbbrev)) and Result do + begin + if UpCase(sName[i]) <> sAbbrev[i] then + Result := false; + inc(i); + end; +end; + +function BookNameToNumber(const sBookName: string): byte; +var i: integer; +begin + Result := 0; + try + if IsDigit(sBookName[Length(sBookName)]) and IsDigit(sBookName[1]) then + Result := StrToInt(sBookName); + except + Result := 0; + end; + i := 0; + while (Result = 0) and (i <= 115) do // Yuk! Linear search. + begin + if MatchAbbrev(sBookName,BookAbbr[i].Abbr) then + begin + Result := BookAbbr[i].Num; + end; + inc(i); + end; +end; + +function BookNumberToName(const bBookNum: byte): string; +begin + if bBookNum <= 115 then + Result := BookAbbr[bBookNum].Name + else + Result := ''; +end; + +function ConformCase(const sPat, sSrc: string): string; +var i: integer; +begin + Result := sSrc; + if (Length(sPat) > 0) and (Length(sSrc) > 0) then + begin + Result := LowerCase(sSrc); + if IsUpper(sPat[1]) then + Result[1] := UpCase(Result[1]); + if (Length(sPat) > 1) and (Length(sSrc) > 1) then + begin + if IsUpper(sPat[2]) then + begin + for i := 2 to Length(Result) do + Result[i] := UpCase(Result[i]); + end; + end; + end; +end; + +function TReadGBF.Init(const sFileName: string): boolean; +var s: string; + tok: TToken; +begin + try + fParagraphEnd := false; + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + iTotalWords := 0; + FName := sFileName; + Assign(F, FName); + reset(F); + readln(F, TokenLine); + TokenPos := 1; + fFileIsOpen := true; + repeat + s := GetToken(tok) + until (tok = tokEOF) or ((tok = tokHeader) and (s[3] = '0')); + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +procedure TReadGBF.Done; +begin + if fFileIsOpen then + begin + closefile(F); + fFileIsOpen := false; + end; +end; + +function TReadGBF.GetToken(var TokenKind: TToken): string; +var m: integer; +begin + Result := ''; + TokenKind := tokNull; + if TokenPos = 0 then + begin + if (not fFileIsOpen) or EOF(F) then + TokenKind := tokEOF + else + begin + ReadLn(F,TokenLine); + TokenPos := 1; + end; + end; + if TokenKind <> tokEOF then + begin + m := Length(TokenLine); + if TokenPos > m then + begin + TokenKind := tokSpace; + if fParagraphEnd then + fParagraphEnd := false + else + Result := ' '; + TokenPos := 0; + end + else + begin + if (TokenLine[TokenPos] = '<') then + begin + fParagraphEnd := false; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenLine[TokenPos] = '>') or (TokenPos > m); + Result := Result + '>'; + inc(TokenPos); + case result[2] of + 'B': begin // File body text type + TokenKind := tokContent; + sContext := Result; + end; + 'C': begin // Special characters + TokenKind := tokControl; + if (Result[3] = 'M') or (Result[3] = 'L') then + fParagraphEnd := true; + end; + 'D': begin // Direction + TokenKind := tokControl; + chDirection := Result[3]; + end; + 'H': begin + TokenKind := tokHeader; + sContext := Result; + end; + 'F': begin // Font attributes + TokenKind := tokFont; + case Result[3] of + 'B': CharAttribs := CharAttribs + [caBold]; + 'C': CharAttribs := CharAttribs + [caSmallCaps]; + 'I': CharAttribs := CharAttribs + [caItalic]; + 'N': sFontName := copy(Result,4,Length(Result)-4); + 'O': CharAttribs := CharAttribs + [caOTQuote]; + 'R': CharAttribs := CharAttribs + [caRed]; + 'S': CharAttribs := CharAttribs + [caSuperscript]; + 'U': CharAttribs := CharAttribs + [caUnderline]; + 'V': CharAttribs := CharAttribs + [caSubscript]; + 'b': CharAttribs := CharAttribs - [caBold]; + 'c': CharAttribs := CharAttribs - [caSmallCaps]; + 'i': CharAttribs := CharAttribs - [caItalic]; + 'n': sFontName := ''; + 'o': CharAttribs := CharAttribs - [caOTQuote]; + 'r': CharAttribs := CharAttribs - [caRed]; + 's': CharAttribs := CharAttribs - [caSuperscript]; + 'u': CharAttribs := CharAttribs - [caUnderline]; + 'v': CharAttribs := CharAttribs - [caSubscript]; + + end; + end; + 'J': begin // Justification + TokenKind := tokStyle; + chJustification := Result[3]; + end; + 'P': begin // Poetry/prose, indent + TokenKind := tokControl; + case Result[3] of + 'I': fIndent := true; + 'P': fPoetry := true; + 'i': fIndent := false; + 'p': fPoetry := false; + end; + end; + 'R': begin // References and footnotes + TokenKind := tokControl; + end; + 'S': begin // sync mark + TokenKind := TokSync; + case Result[3] of + 'B': begin // Book + sBook := system.copy(Result, 4, length(Result)-4); + sPsalmBookTitle := ''; + if sBook = '' then + begin + inc(bBk); + sBook := BookNumberToName(bBk); + end + else + bBk := BookNameToNumber(sBook); + sTitle := sBook; + end; + 'C': begin //chapter + sChapter := system.copy(Result, 4, length(Result)-4); + if sChapter = '' then + begin + inc(bChap); + sChapter := IntToStr(bChap); + end + else + begin + try + bChap := StrToInt(sChapter); + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + sHebrewTitle := ''; + end; + 'V': begin // Verse + bWd := 0; + sVerse := system.copy(Result, 4, length(Result)-4); + if sVerse = '' then + begin + inc(bVs); + sVerse := IntToStr(bVs); + end + else + begin + try + bVs := StrToInt(sVerse); + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + 'D': begin // Date + sDate := system.copy(Result, 3, length(Result)-4); + end; + end; + end; + 'T': begin // Titles + TokenKind := TokContent; + case Result[3] of + 'B': + begin + sPsalmBookTitle := ''; + fInPsalmBookTitle := true; + end; + 'b': fInPsalmBookTitle := true; + 'H': + begin + sHebrewTitle := ''; + fInHebrewTitle := true; + end; + 'h': fInHebrewTitle := false; + 'S': + begin + sSectionTitle := ''; + fInSectionTitle := true; + end; + 's': fInSectionTitle := false; + 'T': + begin + sTitle := ''; + fInTitle := true; + end; + 't': fInTitle := false; + end; + end; + 'Z': begin // File tail + TokenKind := tokTail; + sContext := Result; + if Result[3] = 'Z' then + done; + end; + else + TokenKind := TokControl; + + end; + end + else if isletter(TokenLine[TokenPos]) then + begin {Word} + fParagraphEnd := false; + TokenKind := tokWord; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenPos > m) or (not isinword(TokenLine[TokenPos])); + inc(bWd); + inc(iTotalWords); + end + else if ((TokenLine[TokenPos] = ' ') or (TokenLine[TokenPos] = #9)) then + begin + fParagraphEnd := false; + TokenKind := tokSpace; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + else + begin + fParagraphEnd := false; + TokenKind := tokChar; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + end; + end; + if ((TokenKind = tokWord) or (TokenKind = tokSpace) or + (TokenKind = tokChar)) then + begin + if fInTitle then + sTitle := sTitle + Result + else if fInPsalmBookTitle then + sPsalmBookTitle := sPsalmBookTitle + Result + else if fInHebrewTitle then + sHebrewTitle := sHebrewTitle + Result + else if fInSectionTitle then + sSectionTitle := sSectionTitle + Result; + end; +end; + +function TWriteGBF.Init(const sFileName: string): boolean; +begin + try + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + LineOut := ''; + FName := sFileName; + Assign(F, FName); + filemode := 1; + rewrite(F); + fFileIsOpen := true; + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +function TWriteGBF.Done: boolean; +begin + try + if fFileIsOpen then + begin + if LineOut <> '' then + begin + WriteLn(F, LineOut); + LineOut := ''; + end; + CloseFile(F); + end; + Done := true; + except + Done := false; + end; +end; + +procedure TWriteGBF.Out(const s: string); +var sPrint, sSave, sBook, sChapter, sVerse: string; + i: integer; + b: byte; +begin + if (Length(s) > 0) and IsLetter(s[1]) then + begin + inc(bWd); + LineOut := LineOut + s; + end + else if Length(s) > 3 then + begin + if (s[1] = '<') and (s[2] = 'S') then + begin + case s[3] of + 'B': begin // Book + sBook := system.copy(s, 4, length(s)-4); + if sBook = '' then + begin + inc(bBk); + LineOut := LineOut + s; + end + else + begin + b := bBk; + bBk := BookNameToNumber(sBook); + if b <> bBk then + LineOut := LineOut + s; + end; + end; + 'C': begin //chapter + sChapter := system.copy(s, 4, length(s)-4); + if sChapter = '' then + begin + inc(bChap); + LineOut := LineOut + s; + end + else + begin + try +// b := bChap; + bChap := StrToInt(sChapter); +// if b <> bChap then + LineOut := LineOut + s; + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + end; + 'V': begin // Verse + bWd := 0; + sVerse := system.copy(s, 4, length(s)-4); + if sVerse = '' then + begin + inc(bVs); + LineOut := LineOut + s; + end + else + begin + try +// b := bVs; + bVs := StrToInt(sVerse); +// if b <> bVs then + LineOut := LineOut + s; + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + else + LineOut := LineOut + s; + end + end + else + LineOut := LineOut + s; // Not a sync mark + end + else // other token, space, or punctuation + LineOut := LineOut + s; // Length <= 3 + if ((s = '<CM>') or (s = '<CL>')) then + begin + if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + WriteLn(F, sSave); + LineOut := ''; + end + end + else + begin + WriteLn(F, LineOut); + LineOut := ''; + end + end + else if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + LineOut := sSave; + end + end +end; + +end. diff --git a/src/modules/texts/rawgbf/gbf.h b/src/modules/texts/rawgbf/gbf.h new file mode 100644 index 0000000..b695759 --- /dev/null +++ b/src/modules/texts/rawgbf/gbf.h @@ -0,0 +1,67 @@ +/* Header for module GBF, generated by p2c */ +#ifndef GBF_H +#define GBF_H +/* p2c: Gbf.pas, line 5: Warning: Could not find module SYSUTILS [271] */ + + +#include "sysutils.h" +/* p2c: Gbf.pas, line 5: Warning: Could not find module DIALOGS [271] */ +#include "dialogs.h" + + +#ifdef GBF_G +# define vextern +#else +# define vextern extern +#endif + + + +typedef enum { + tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, tokWord, + tokSpace, tokSync, tokControl, tokChar, tokFont +} TToken; +typedef enum { + caBold, caSmallCaps, caItalic, caOTQuote, caRed, caSuperscript, caUnderline, + caSubscript +} TCharacterAttribute; +typedef long TCharAttribs; + + + +typedef struct TBookNameRec { + Char Name[256], Abbr[256]; + uchar Num; +} TBookNameRec; + +typedef TBookNameRec TBookAbbr[116]; +/* p2c: Gbf.pas, line 25: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 25: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 25: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 26: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 26: Warning: Expected a ')', found a '(' [227] */ +/* p2c: Gbf.pas, line 144: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 144: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 144: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 144: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 145: Warning: Mixing non-strings with strings [170] */ +/* p2c: Gbf.pas, line 145: + * Warning: Expected a ')', found a string literal [227] */ + + +extern TBookAbbr BookAbbr; + +extern Char BookFileName[91][256]; + +vextern Char STR1[256]; + + +#undef vextern + +#endif /*GBF_H*/ + +/* End. */ diff --git a/src/modules/texts/rawgbf/gbfidx.cpp b/src/modules/texts/rawgbf/gbfidx.cpp new file mode 100644 index 0000000..8337d62 --- /dev/null +++ b/src/modules/texts/rawgbf/gbfidx.cpp @@ -0,0 +1,294 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for WEB). Good luck! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + num1 = key1.Chapter(); + num2 = key1.Verse(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startchap(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'C') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'V') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + + memset(buf, ' ', 7); + + while (1) { + if (startchap(buf)) { + chapstart = lseek(fp, 0, SEEK_CUR) - 7; + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (isdigit(buf[loop])) + flag = true; + else { + buf[loop] = 0; + break; + } + } + if (flag) + *num1 = atoi(buf); + else (*num1)++; + } + if (startentry(buf)) { + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (isdigit(buf[loop])) + flag = true; + else { + buf[loop] = 0; + break; + } + if (flag) + *num2 = atoi(buf); + else (*num2)++; + } + loop++; + if (size) + *offset = lseek(fp, 0, SEEK_CUR) - (7 - loop); + else *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7; + if (size) { + ch2 = *num1; + vs2 = *num2; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + char buf[255]; + + if ((fp = open(fname, O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/texts/rawgbf/rawgbf.cpp b/src/modules/texts/rawgbf/rawgbf.cpp new file mode 100644 index 0000000..dd2fd47 --- /dev/null +++ b/src/modules/texts/rawgbf/rawgbf.cpp @@ -0,0 +1,84 @@ +/****************************************************************************** + * rawgbf.cpp - code for class 'RawGBF'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawgbf.h> + + +/****************************************************************************** + * RawGBF Constructor - Initializes data for instance of RawGBF + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGBF::RawGBF(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp) : SWText(iname, idesc, idisp), RawVerse(ipath) +{ +} + + +/****************************************************************************** + * RawGBF Destructor - Cleans up instance of RawGBF + */ + +RawGBF::~RawGBF() +{ +} + + +/****************************************************************************** + * RawGBF::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +RawGBF::operator char*() +{ + long start; + unsigned short size; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ size * 3 ]; // extra for conversion to RTF or other. + + readtext(key->Testament(), start, size + 1, entrybuf); + preptext(entrybuf); + RenderText(entrybuf, size * 3); + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/texts/rawtext/Makefile b/src/modules/texts/rawtext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/rawtext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/rawtext/Makefile.am b/src/modules/texts/rawtext/Makefile.am new file mode 100644 index 0000000..d0e1d7e --- /dev/null +++ b/src/modules/texts/rawtext/Makefile.am @@ -0,0 +1,4 @@ +rawtextdir = $(top_srcdir)/src/modules/texts/rawtext + +libsword_la_SOURCES += $(rawtextdir)/rawtext.cpp + diff --git a/src/modules/texts/rawtext/kjvidx.cpp b/src/modules/texts/rawtext/kjvidx.cpp new file mode 100644 index 0000000..708a9e6 --- /dev/null +++ b/src/modules/texts/rawtext/kjvidx.cpp @@ -0,0 +1,169 @@ +#include <stdio.h> +#include <fcntl.h> +#include <versekey.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s <file to process> [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + memset(buf, ' ', 17); + + while (1) { + offadj = -10; + inquotes = 0; + sizeadj = 0; + if ((!memcmp(buf, "\\widctlpar {\\b\\f0\\cf2 ", 16)) && (!size)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (!memcmp(&buf[1], "\\f0\\fs16\\cf2\\up6", 15)) { + offadj = 0; + inquotes = 1; + sizeadj = (*buf == 10) ? -18:-17; + } + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (offadj > -10) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)); + } + lseek(fp, *offset+17, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/makebnds.c b/src/modules/texts/rawtext/makebnds.c new file mode 100644 index 0000000..44da447 --- /dev/null +++ b/src/modules/texts/rawtext/makebnds.c @@ -0,0 +1,86 @@ +#include <stdio.h> +#include <fcntl.h> + + +char *bnames[] = { + "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", + "Joshua", "Judges", "Ruth", "I Samual", "II Samuel", + "I Kings", "II Kings", "I Chronicles", "II Chronicles", "Ezra", + "Nehemiah", "Esther", "Job", "Psalms", "Proverbs", + "Ecclesiastes", "Song of Solomon", "Isaiah", "Jeremiah", "Lamentations", + "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", + "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", + "Zephaniah", "Haggai", "Zechariah", "Malachi", + "Matthew", "Mark", "Luke", "John", "Acts", + "Romans", "I Corinthians", "II Corinthians", "Galatians", "Ephesians", + "Philippians", "Colossians", "I Thessalonians", "II Thessalonians", "I Timothy", + "II Timothy", "Titus", "Philemon", "Hebrews", "James", + "I Peter", "II Peter", "I John", "II John", "III John", + "Jude", "Revelation of John"}; + + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + int num1, num2, offset, offset2, chapmax, chapoff, chapoff2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc > 3) { + fprintf(stderr, "usage: %s <file to process> [NT?]\n", argv[0]); + exit(1); + } + + if (argc > 2) + curbook = 39; + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + read(bfp, &offset2, sizeof(offset2)); + read(cfp, &chapoff2, sizeof(chapoff2)); + while (read(bfp, &offset, sizeof(offset)) == sizeof(offset)) { + chapmax = (offset - offset2) / sizeof(offset); + printf("\n\{\"%s\", %d}, \n// %s\n", bnames[curbook], chapmax, bnames[curbook]); + curbook++; + for (curchap = 0; curchap < chapmax; curchap++) { + read(cfp, &chapoff, sizeof(chapoff)); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + chapoff2 = chapoff; + } + offset2 = offset; + } + pos = lseek(cfp, 0, SEEK_CUR); + offset = (int) lseek(cfp, 0, SEEK_END); + chapmax = (offset - offset2) / sizeof(offset); + printf("\n\{\"%s\", %d}, \n// %s\n", bnames[curbook], chapmax, bnames[curbook]); + curbook++; + lseek(cfp, pos, SEEK_SET); + for (curchap = 0; curchap < chapmax - 1; curchap++) { + read(cfp, &chapoff, sizeof(chapoff)); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + chapoff2 = chapoff; + } + chapoff = (int) lseek(vfp, 0, SEEK_END); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} diff --git a/src/modules/texts/rawtext/nuidx.cpp b/src/modules/texts/rawtext/nuidx.cpp new file mode 100644 index 0000000..edf298d --- /dev/null +++ b/src/modules/texts/rawtext/nuidx.cpp @@ -0,0 +1,238 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + num1 = key1.Chapter(); + num2 = key1.Verse(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + writeidx(key1, key2, key3, offset, size); + key2++; + key3 = key2; + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startchap(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'C') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'V') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + if (size) + *offset = lseek(fp, 0, SEEK_CUR) - 3; + else *offset = lseek(fp, 0, SEEK_CUR) - 7; + if (size) { + ch2 = *num1; + vs2 = *num2; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + *size = (offset2 - (*offset)); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/texts/rawtext/ojbtxidx.c b/src/modules/texts/rawtext/ojbtxidx.c new file mode 100644 index 0000000..f70cc01 --- /dev/null +++ b/src/modules/texts/rawtext/ojbtxidx.c @@ -0,0 +1,166 @@ +#include <stdio.h> +#include <fcntl.h> +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fd, vfd, cfd, bfd; + long pos, offset; + short size, tmp; + int num1, num2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + if ((fd = open(argv[1], O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfd, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfd, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfd, &pos, 4); /* Module intro */ + write(vfd, &size, 2); + write(vfd, &pos, 4); /* Testament intro */ + write(vfd, &size, 2); + + while (!findbreak(fd, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfd, 0, SEEK_CUR); + write(bfd, &pos, 4); + pos = lseek(vfd, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfd, &pos, 4); + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Book intro (vss) */ + write(vfd, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfd, 0, SEEK_CUR); + write(cfd, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Chapter intro */ + write(vfd, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7ld\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence (%2d:%3d:%3d)\n", curbook, num1-1, num2); + curchap = num1; +// break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence (%2d:%3d:%3d)\n", curbook, num1, num2-1); +// break; + tmp = 0; + curverse = num2; + write(vfd, &offset, 4); + write(vfd, &tmp, 2); + } + write(vfd, &offset, 4); + write(vfd, &size, 2); + } + + close(vfd); + close(cfd); + close(bfd); + close(fd); + return 0; +} + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size) +{ + char buf[8]; + char buf2[7]; + char loop, len, star; + + memset(buf, ' ', 7); + buf[7] = 0; + + while (1) { + + memmove(buf, &buf[1], 6); + if (read(fd, &buf[6], 1) != 1) + return 1; + + if ((buf[0] == 10) && ((buf[2] == '*') || (buf[3] == '*') || (buf[4] == '*'))) { + star = 0; + for (loop = 0; loop < 7; loop++) { + if (buf[loop] == '*') + star = 1; + if (isdigit(buf[loop])&&star) + break; + else buf[loop] = ' '; + } + if (loop < 7) { + sscanf(buf, "%d", num1); + continue; + } + } + + if ((buf[0] == '|') && (isdigit(buf[1]))) { + sscanf(&buf[1], "%d", num2); + sprintf(buf, "%d", *num2); + (*offset) = lseek(fd, 0, SEEK_CUR); + (*offset) -= (4-strlen(buf)); + + for (len = 1; len == 1; len = read(fd, &loop, 1)) { + if (loop == '|') + break; + } + + *size = (short)(lseek(fd, 0, SEEK_CUR) - *offset) - 1; + lseek(fd, -1, SEEK_CUR); + break; + } + } + return 0; +} + diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp new file mode 100644 index 0000000..c2214f8 --- /dev/null +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -0,0 +1,580 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawText'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawtext.h> + +#include <map> +#include <list> +#include <algorithm> +#include <regex.h> // GNU + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawText Constructor - Initializes data for instance of RawText + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse(ipath) { + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (!access(fastidxname.c_str(), 04)) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (!access(fastidxname.c_str(), 04)) + fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +} + + +/****************************************************************************** + * RawText Destructor - Cleans up instance of RawText + */ + +RawText::~RawText() +{ + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +} + + +/****************************************************************************** + * RawText::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawText::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +signed char RawText::createSearchFramework() { + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < string, list<long> > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make word upper case + toupperstr(word); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + int datfd; + int idxfd; + map < string, list<long> >::iterator it; + list<long>::iterator it2; + unsigned long offset, entryoff; + unsigned short size; + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) + return -1; + if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) { + close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = lseek(datfd, 0, SEEK_CUR); + write(idxfd, &offset, 4); + + // write our word out to the word.dat file, delineating with a \n + write(datfd, it->first.c_str(), strlen(it->first.c_str())); + write(datfd, "\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + write(datfd, &entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = lseek(datfd, 0, SEEK_CUR) - offset; + + // store the size of this database entry + write(idxfd, &size, 2); + printf("%d entries (size: %d)\n", count, size); + } + close(datfd); + close(idxfd); + } + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + listkey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned short size; + char *idxbuf = 0; + char *datbuf = 0; + list <long> indexes; + list <long> indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + toupperstr(wordBuf); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findoffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getidxbufdat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + free(idxbuf); + idxbuf = 0; + datbuf = 0; + fastSearch[j]->readtext(start, &size, &idxbuf, &datbuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datbuf; + while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + free(datbuf); + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listkey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + } + else listkey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } + + // if we don't support this search, fall back to base class + return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +void RawText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawText::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + +/****************************************************************************** + * RawText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} diff --git a/src/modules/texts/rawtext/rawtxidx.c b/src/modules/texts/rawtext/rawtxidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/texts/rawtext/rawtxidx.c @@ -0,0 +1,146 @@ +#include <stdio.h> +#include <fcntl.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/rtfidx.cpp b/src/modules/texts/rawtext/rtfidx.cpp new file mode 100644 index 0000000..9fdb305 --- /dev/null +++ b/src/modules/texts/rawtext/rtfidx.cpp @@ -0,0 +1,164 @@ +#include <stdio.h> +#include <fcntl.h> +#include <versekey.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s <file to process> [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + memset(buf, ' ', 17); + + while (1) { + offadj = -10; + inquotes = 0; + sizeadj = 0; + if (!memcmp(&buf[1], "\\f0\\fs16\\cf2\\up6", 15)) { + offadj = 0; + inquotes = 1; + sizeadj = (*buf == 10) ? -19:-17; + } + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (offadj > -10) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)); + } + lseek(fp, *offset+17, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/svetxidx.c b/src/modules/texts/rawtext/svetxidx.c new file mode 100644 index 0000000..26e67fd --- /dev/null +++ b/src/modules/texts/rawtext/svetxidx.c @@ -0,0 +1,153 @@ +#include <stdio.h> +#include <fcntl.h> +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fd, vfd, cfd, bfd; + long pos, offset; + short size, tmp; + int num1, num2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + if ((fd = open(argv[1], O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfd, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfd, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfd, &pos, 4); /* Module intro */ + write(vfd, &size, 2); + write(vfd, &pos, 4); /* Testament intro */ + write(vfd, &size, 2); + + while (!findbreak(fd, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfd, 0, SEEK_CUR); + write(bfd, &pos, 4); + pos = lseek(vfd, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfd, &pos, 4); + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Book intro (vss) */ + write(vfd, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfd, 0, SEEK_CUR); + write(cfd, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Chapter intro */ + write(vfd, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7ld\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence (%2d:%3d:%3d)\n", curbook, num1-1, num2); + curchap = num1; +// break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence (%2d:%3d:%3d)\n", curbook, num1, num2-1); +// break; + tmp = 0; + curverse = num2; + write(vfd, &offset, 4); + write(vfd, &tmp, 2); + } + write(vfd, &offset, 4); + write(vfd, &size, 2); + } + + close(vfd); + close(cfd); + close(bfd); + close(fd); +} + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size) +{ + char buf[8]; + char buf2[7]; + char loop, len; + + memset(buf, ' ', 7); + buf[7] = 0; + + while (1) { + + memmove(buf, &buf[1], 6); + if (read(fd, &buf[6], 1) != 1) + return 1; + + if ((buf[0] == 10) && (isdigit(buf[1]))) { + sscanf(buf, "%d %s", num2, buf2); + if (!strncmp(buf2, "KAP", 3)) { + *num1 = *num2; + continue; + } + sprintf(buf, "%d", *num2); + (*offset) = lseek(fd, 0, SEEK_CUR); + (*offset) -= (5-strlen(buf)); + for (len = 1; len == 1; len = read(fd, &loop, 1)) { + if ((loop == 10) || (loop == 13)) + break; + } + + *size = (short)(lseek(fd, 0, SEEK_CUR) - *offset) - 1; + lseek(fd, -1, SEEK_CUR); + break; + } + } + return 0; +} + diff --git a/src/modules/texts/rawtext/vntidx.cpp b/src/modules/texts/rawtext/vntidx.cpp new file mode 100644 index 0000000..bbb4a9e --- /dev/null +++ b/src/modules/texts/rawtext/vntidx.cpp @@ -0,0 +1,185 @@ +#include <stdio.h> +#include <fcntl.h> +#include <versekey.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s <file to process> [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char buf3[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + strcpy (buf3, "\\par "); + buf3[5] = 10; + memset(buf, ' ', 17); + + while (1) { + offadj = -100; + inquotes = 0; + sizeadj = 0; + if (!memcmp(buf, "\\par FIN DEL NUEVO TESTAMENTO", 16)) { + offadj = -11; +// inquotes = 1; + sizeadj = -7; + } + + if ((!memcmp(buf, buf3, 6)) && (!size)) { + offadj = -11; +// inquotes = 1; + sizeadj = -7; + } + if (!memcmp(buf, "\\par ", 6)) { + if (isdigit(buf[6])) { + for (loop = 7; loop < 10; loop++) { + if (!isdigit(buf[loop])) + break; + } + offadj = -(11 - (loop - 6)); + // inquotes = 1; + sizeadj = -7; + } + } +/* + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } +*/ + if (offadj > -100) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + *size = (offset2 - (*offset)); + } + lseek(fp, *offset-sizeadj, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp new file mode 100644 index 0000000..5f6b424 --- /dev/null +++ b/src/modules/texts/swtext.cpp @@ -0,0 +1,40 @@ +/****************************************************************************** + * swtext.cpp - code for base class 'SWText'- The basis for all text modules + */ + +#include <swtext.h> +#include <listkey.h> + + +/****************************************************************************** + * SWText Constructor - Initializes data for instance of SWText + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Biblical Texts", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + skipConsecutiveLinks = false; +} + + +/****************************************************************************** + * SWText Destructor - Cleans up instance of SWText + */ + +SWText::~SWText() { +} + + +/****************************************************************************** + * SWText CreateKey - Create the correct key (VerseKey) for use with SWText + */ + +SWKey *SWText::CreateKey() +{ + return new VerseKey(); +} diff --git a/src/modules/texts/ztext/Makefile b/src/modules/texts/ztext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/ztext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/ztext/Makefile.am b/src/modules/texts/ztext/Makefile.am new file mode 100644 index 0000000..2b78db6 --- /dev/null +++ b/src/modules/texts/ztext/Makefile.am @@ -0,0 +1,4 @@ +ztextdir = $(top_srcdir)/src/modules/texts/ztext + +libsword_la_SOURCES += $(ztextdir)/ztext.cpp + diff --git a/src/modules/texts/ztext/gbfidx.cpp b/src/modules/texts/ztext/gbfidx.cpp new file mode 100644 index 0000000..e7a9530 --- /dev/null +++ b/src/modules/texts/ztext/gbfidx.cpp @@ -0,0 +1,661 @@ +/*****************************************************************************
+ *
+ * This code reeks but works (sometimes). Good luck!
+ * Modified for zText purposes
+ */
+
+//#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+//#ifndef __GNUC__
+#include <io.h>
+//#else
+//#include <unistd.h>
+//#endif
+
+#include <fcntl.h>
+#include <versekey.h>
+
+
+void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size);
+char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, unsigned short *size);
+void openfiles(char *fname);
+void checkparams(int argc, char **argv);
+
+
+VerseKey key1, key2, key3;
+int fp=0, vfp=0, cfp=0, bfp=0;
+long chapoffset=0;
+unsigned short chapsize=0;
+long bookoffset=0;
+unsigned short booksize=0;
+long testoffset=0;
+unsigned short testsize=0;
+long verseoffset=0;
+unsigned short versesize=0;
+long nextoffset=0;
+char testmnt=0;
+int deadcount = 0;
+int chapmark=-4, bookmark=-1;
+ofstream cfile;
+
+
+int main(int argc, char **argv)
+{
+ long pos, offset;
+ int num1, num2, rangemax;//, curbook = 0, curchap = 0, curverse = 0;
+ //char buf[127],
+ char startflag = 0;
+ unsigned short size;//, tmp;
+
+ checkparams(argc, argv);
+
+ openfiles(argv[1]);
+ //key1 = "Matthew 1:1";
+ //key2 = "Matthew 1:1";
+ //key3 = "Matthew 1:1";
+
+ testmnt = key1.Testament();
+ cfile << "testament" << (int) testmnt << "\n";
+ num1 = key1.Chapter();
+ num2 = key1.Verse();
+ pos = 0;
+ write(bfp, &pos, 4); /* Book offset for testament intros */
+ pos = 4;
+ write(cfp, &pos, 4); /* Chapter offset for testament intro */
+
+
+/* Right now just zero out intros until parsing correctly */
+ /*pos = 0;
+ size = 0;
+ write(vfp, &pos, 4); // Module intro
+ write(vfp, &size, 2);
+ cfile << "modintro pos{" << pos << "} size{" << size << "}\n";
+ write(vfp, &pos, 4); // Testament intro
+ write(vfp, &size, 2);
+ cfile << "test intro pos{" << pos << "} size{" << size << "}\n";
+ */
+ cout << "GBFIDX Running\n";
+ cout.flush();
+ while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) {
+ if (!startflag) {
+ startflag = 1;
+ }
+ else {
+ if (num2 < key2.Verse()) { // new chapter
+ if (num1 <= key2.Chapter()) { // new book
+ key2.Verse(1);
+ key2.Chapter(1);
+ key2.Book(key2.Book()+1);
+ }
+ cfile << "Found Chapter Break: " << num1 << " ('" << (const char *)key2 << "')\n";
+ //chapoffset = offset;
+ //chapsize = chapsize - size;
+// continue;
+ }
+ }
+ key2.Verse(1);
+ key2.Chapter(num1);
+ key2.Verse(num2);
+
+ key3 = key2;
+// key3 += (rangemax - key3.Verse());
+
+ writeidx(key1, key2, key3, offset, size);
+ }
+ close(vfp);
+ close(cfp);
+ close(bfp);
+ close(fp);
+
+ return 1;
+}
+
+
+/**************************************************************************
+ * writeidx: key1 - current location of index
+ * key2 - minimum keyval for which this offset is valid
+ * key3 - maximum keyval for which this offset is valid
+ */
+
+void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size)
+{
+ long pos;
+ unsigned short tmp;
+
+ for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) {
+ if (chapmark>=2)
+ {
+ if (bookmark==2)
+ {
+ //booksize = booksize - chapsize + 7;
+ cfile << "versesize " << versesize << " booksize " << booksize << " chapsize " << chapsize << " size " << size << "\n";
+ //cfile.flush();
+ //assert(chapsize < size);
+ //if (chapsize > size) // At start of Psalms gets chapsize rather than chapsize+size ???
+ //{
+ // versesize = versesize - (booksize - (chapsize - size) + 7);
+ //}
+ //else
+ //{
+ versesize = versesize - (booksize - (chapsize) + 7);
+ //}
+ cfile << "Last verse in book\n";
+ }
+ //chapsize = chapsize - size;
+ cfile << "versesize " << versesize << " chapsize " << chapsize << " size " << size<< "\n";
+ cfile.flush();
+ //assert(chapsize > size);
+ //if (chapsize > size) // At start of Psalms gets chapsize rather than chapsize+size ???
+ //{
+ // versesize = versesize - (chapsize - size);
+ //}
+ //else
+ //{
+ versesize = versesize - (chapsize);
+ //}
+ cfile << "Last verse in chapter\n";
+ }
+ if (chapmark>=2 && bookmark!=1)
+ {
+ cfile << "prev verse pos{" << verseoffset << "} size{" << versesize << "} nextoffset{" << nextoffset << "}\n";
+ cfile.flush();
+ assert(verseoffset==nextoffset);
+ write(vfp, &verseoffset, 4);
+ write(vfp, &versesize, 2);
+ nextoffset = verseoffset+versesize;
+ bookmark = 0;
+ chapmark = 0;
+ }
+ if (key1.Verse() == 1) { // new chapter
+ cfile << "size??? " << size << "\n";
+ cfile.flush();
+ //assert(chapsize > size || key1.Chapter()==1);
+ //assert(chapsize > size);
+ //if (chapsize > size) // At start of books gets chapsize rather than chapsize+size
+ //{
+ // chapsize = chapsize - size;
+ //}
+ if (key1.Chapter() == 1) { // new book
+ booksize = booksize - chapsize + 7;
+ if (key1.Book() == 1)
+ {
+ pos = 0;
+ //tmp = testoffset;
+ tmp = 0; // better just remember that it goes up to the testament intro to avoid 64k limit
+ // AV exceeds that anyway!
+ write(vfp, &pos, 4); /* Module intro */
+ write(vfp, &tmp, 2);
+ assert(nextoffset==0);
+ cfile << "modintro pos{" << pos << "} size{" << tmp << "}\n";
+ testsize = testsize - booksize - chapsize + 7;
+ if (testsize > 10000)
+ {
+ cerr << "Error: testament too big " << testsize << "\n";
+ exit(-1);
+ }
+ //assert(testoffset==nextoffset);
+ write(vfp, &testoffset, 4); /* Testament intro (vss) */
+ write(vfp, &testsize, 2);
+ nextoffset = testoffset+testsize;
+ cfile << "test intro pos{" << testoffset << "} size{" << testsize << "}\n";
+ }
+ pos = lseek(cfp, 0, SEEK_CUR);
+ write(bfp, &pos, 4);
+ pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */
+ write(cfp, &pos, 4);
+ if (booksize > 10000)
+ {
+ cerr << "Error: book too big " << booksize << "\n";
+ exit(-1);
+ }
+ assert(bookoffset==nextoffset);
+ write(vfp, &bookoffset, 4); /* Book intro (vss) */
+ write(vfp, &booksize, 2);
+ nextoffset = bookoffset+booksize;
+ cfile << "book intro pos{" << bookoffset << "} size{" << booksize << "}\n";
+ //offset += booksize;
+ //bookmark = false;
+ }
+ pos = lseek(vfp, 0, SEEK_CUR);
+ write(cfp, &pos, 4);
+ assert(chapsize < 10000);
+ write(vfp, &chapoffset, 4); /* Chapter intro */
+ write(vfp, &chapsize, 2);
+ nextoffset = chapoffset+chapsize;
+ cfile << "chapter intro pos{" << chapoffset << "} size{" << chapsize << "}\n";
+ //offset += chapsize;
+ //size -= chapsize;
+ //chapmark = false;
+ }
+ if (key1 >= key2) {
+ if (size > 10000)
+ {
+ cerr << "Error: verse too big " << size << "\n";
+ exit(-1);
+ }
+ if (!chapmark && !bookmark)
+ {
+ write(vfp, &offset, 4);
+ write(vfp, &size, 2);
+ cfile << "verse pos{" << offset << "} size{" << size << "}\n";
+ cfile.flush();
+ assert(offset==nextoffset);
+ nextoffset = offset+size;
+ //cfile << "bookmark " << bookmark << " chapmark " << chapmark << "\n";
+ }
+ else
+ {
+ verseoffset = offset;
+ versesize = size;
+ cfile << "saving verse pos{" << offset << "} size{" << size << "}\n";
+ cfile << "bookmark " << bookmark << " chapmark " << chapmark << "\n";
+ }
+ }
+ else {
+ pos = 0;
+ tmp = 0;
+ write(vfp, &pos, 4);
+ write(vfp, &tmp, 2);
+ cfile << "blank pos{" << pos << "} size{" << tmp << "}\n";
+ }
+ }
+}
+
+char startmod(char *buf)
+{
+ //char loop;
+
+ if (buf[0] != '<')
+ return 0;
+ if (buf[1] != 'H')
+ return 0;
+ if (buf[2] != '0')
+ return 0;
+/*
+ if (!isdigit(buf[2]))
+ return 0;
+ for (loop = 3; loop < 7; loop++) {
+ if (buf[loop] == ' ')
+ break;
+ if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
+ return 0;
+ }
+*/
+ return 1;
+}
+
+
+char starttest(char *buf)
+{
+ //char loop;
+
+ if (buf[0] != '<')
+ return 0;
+ if (buf[1] != 'B')
+ return 0;
+ if (testmnt==2)
+ {
+ if (buf[2] != 'N')
+ return 0;
+ }
+ else
+ {
+ if (buf[2] != 'O')
+ return 0;
+ }
+ //if (buf[3] != '>')
+ // return 0;
+/*
+ if (!isdigit(buf[2]))
+ return 0;
+ for (loop = 3; loop < 7; loop++) {
+ if (buf[loop] == ' ')
+ break;
+ if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
+ return 0;
+ }
+*/
+ return 1;
+}
+
+
+char startbook(char *buf)
+{
+ //char loop;
+
+ if (buf[0] != '<')
+ return 0;
+ if (buf[1] != 'S')
+ return 0;
+ if (buf[2] != 'B')
+ return 0;
+/*
+ if (!isdigit(buf[2]))
+ return 0;
+ for (loop = 3; loop < 7; loop++) {
+ if (buf[loop] == ' ')
+ break;
+ if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
+ return 0;
+ }
+*/
+ return 1;
+}
+
+
+char startchap(char *buf)
+{
+ //char loop;
+
+ if (buf[0] != '<')
+ return 0;
+ if (buf[1] != 'S')
+ return 0;
+ if (buf[2] != 'C')
+ return 0;
+/*
+ if (!isdigit(buf[2]))
+ return 0;
+ for (loop = 3; loop < 7; loop++) {
+ if (buf[loop] == ' ')
+ break;
+ if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
+ return 0;
+ }
+*/
+ return 1;
+}
+
+
+char startentry(char *buf)
+{
+ //char loop;
+ //cfile << "{SV}";
+
+ if (buf[0] != '<')
+ {
+ //cfile << "{no<}";
+ return 0;
+ }
+ if (buf[1] != 'S')
+ {
+ //cfile << "\n{noS}\n";
+ return 0;
+ }
+ if (buf[2] != 'V')
+ {
+ //cfile << "\n{noV}\n";
+ return 0;
+ }
+/*
+ if (!isdigit(buf[2]))
+ return 0;
+ for (loop = 3; loop < 7; loop++) {
+ if (buf[loop] == ' ')
+ break;
+ if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
+ return 0;
+ }
+*/
+ return 1;
+}
+
+
+char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, unsigned short *size)
+{
+ char buf[8];
+ //char buf2[20];
+ //char ch;
+ char loop=0;
+ long offset2;
+ int ch2, vs2, rm2;
+ bool flag;
+ long versestart = 0;
+ long chapstart = 0;
+ long bookstart = 0;
+ long teststart = 0;
+
+ memset(buf, ' ', 8);
+
+ while (1) {
+ //cfile << "#" << buf << "#";
+ //if (lseek(fp, 0, SEEK_CUR) > 2000000)
+ //{
+ // cfile << lseek(fp, 0, SEEK_CUR) << "\n";
+ //}
+ if (starttest(buf)) {
+ cfile << "\n{start of testament}\n";
+ //chapstart = lseek(fp, 0, SEEK_CUR) - 7;
+ teststart = lseek(fp, 0, SEEK_CUR) - 7;
+ testoffset = teststart;
+ memset(buf, ' ', 3);
+ flag = false;
+ for (loop = 3; loop < 6; loop++) {
+ if (buf[loop]!='>')
+ flag = true;
+ else {
+ buf[loop] = 0;
+ break;
+ }
+ }
+ ch2 = *num1;
+ vs2 = 1;
+ if (size) {
+ if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
+ testsize = (unsigned short) (lseek(fp, 0, SEEK_END) - teststart-7);
+ }
+ else {
+ if (vs2) {
+ testsize = (offset2 - teststart - 7);
+ }
+ }
+ lseek(fp, teststart+7, SEEK_SET);
+ cfile << "\nGot testsize " << testsize << "\n";
+ }
+ }
+
+
+ if (startbook(buf)) {
+ cfile << "\n{start of book}\n";
+ bookmark++;
+ //chapstart = lseek(fp, 0, SEEK_CUR) - 7;
+ bookstart = lseek(fp, 0, SEEK_CUR) - 7;
+ bookoffset = bookstart;
+ memset(buf, ' ', 3);
+ flag = false;
+ for (loop = 3; loop < 6; loop++) {
+ if (buf[loop]!='>')
+ flag = true;
+ else {
+ buf[loop] = 0;
+ break;
+ }
+ }
+ if (size) {
+ ch2 = *num1;
+ vs2 = 1;
+ if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
+ booksize = (unsigned short) (lseek(fp, 0, SEEK_END) - bookstart - 7);
+ }
+ else {
+ if (vs2) {
+ booksize = (offset2 - bookstart - 7);
+ }
+ }
+ lseek(fp, bookstart+7, SEEK_SET);
+ cfile << "\nGot booksize " << booksize << "\n";
+ }
+ }
+
+ if (startchap(buf)) {
+ cfile << "{start of chapter}";
+ chapmark++;
+ //chapstart = lseek(fp, 0, SEEK_CUR) - 7;
+ chapstart = lseek(fp, 0, SEEK_CUR) - 7;
+ chapoffset = chapstart;
+ memset(buf, ' ', 3);
+ flag = false;
+ for (loop = 3; loop < 6; loop++) {
+ if (isdigit(buf[loop]))
+ flag = true;
+ else {
+ buf[loop] = 0;
+ break;
+ }
+ }
+ if (flag)
+ *num1 = atoi(buf);
+ else (*num1)++;
+
+ if (size) {
+ ch2 = *num1;
+ vs2 = 1;
+ lseek(fp, chapstart, SEEK_SET);
+ if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
+ chapsize = (unsigned short) (lseek(fp, 0, SEEK_END) - chapstart);
+ cfile << "getting chapsizeend{" << chapsize << "} = " << lseek(fp, 0, SEEK_END) << " - " << chapstart << "\n";
+ }
+ else {
+ if (vs2) {
+ chapsize = (offset2 - chapstart);
+ cfile << "getting chapsize{" << chapsize << "} = " << offset2 << " - " << chapstart << "\n";
+ }
+ }
+ lseek(fp, chapstart + 7, SEEK_SET);
+ cfile << "\nGot chapsize " << chapsize << " loop{" << (int) loop << "}\n";
+ }
+ //return 0;
+
+ }
+ if (startentry(buf)) {
+ //cfile << "{start of verse}";
+ memset(buf, ' ', 3);
+ flag = false;
+ for (loop = 3; loop < 6; loop++) {
+ if (isdigit(buf[loop]))
+ flag = true;
+ else {
+ buf[loop] = 0;
+ break;
+ }
+ if (flag)
+ *num2 = atoi(buf);
+ else (*num2)++;
+ }
+ loop++;
+ /*
+ if (size)
+ {
+ // *offset = lseek(fp, 0, SEEK_CUR) - (7 - loop);
+ *offset = lseek(fp, 0, SEEK_CUR) - 7;
+ }
+ //else *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7;
+ else *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7;
+ */
+ /*if (chapstart)
+ {
+ chapsize = *offset-chapstart;
+ }
+ else
+ {
+ chapsize = 0;
+ }*/
+ *offset = lseek(fp, 0, SEEK_CUR) - 7;
+ versestart = *offset;
+ if (size) {
+ ch2 = *num1;
+ vs2 = *num2;
+ if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
+ *size = (unsigned short) (lseek(fp, 0, SEEK_END) - versestart);
+ cfile << "getting sizeend{" << *size << "} = " << lseek(fp, 0, SEEK_END) << " - " << versestart << "\n";
+ }
+ else {
+ if (vs2) {
+ *size = (offset2 - versestart);
+ cfile << "getting size{" << *size << "} = " << offset2 << " - " << versestart << "\n";
+ }
+ }
+ lseek(fp, *offset+1, SEEK_SET);
+ }
+ else
+ {
+ cfile << "got offset{" << *offset << "}\n";
+ }
+ return 0;
+ }
+ //cfile << "{ng}";
+ //deadcount++;
+ //if (deadcount==1000) exit(-1);
+ //if (!size)
+ //{
+ // cfile << "not bound offset{" << *offset << "}\n";
+ //}
+ memmove(buf, &buf[1], 6);
+ if (read(fp, &buf[6], 1) != 1)
+ return 1;
+ }
+}
+
+
+void openfiles(char *fname)
+{
+#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53
+#define O_BINARY 0 // If it hasn't been defined than we probably
+#endif // don't need it.
+ char buf[255];
+
+ if ((fp = open(fname, O_RDONLY|O_BINARY)) == -1) {
+ fprintf(stderr, "Couldn't open file: %s\n", fname);
+ exit(1);
+ }
+
+ sprintf(buf, "%s.vss", fname);
+ if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) {
+ fprintf(stderr, "Couldn't open file: %s\n", buf);
+ exit(1);
+ }
+
+ sprintf(buf, "%s.cps", fname);
+ if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) {
+ fprintf(stderr, "Couldn't open file: %s\n", buf);
+ exit(1);
+ }
+
+ sprintf(buf, "%s.bks", fname);
+ if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) {
+ fprintf(stderr, "Couldn't open file: %s\n", buf);
+ exit(1);
+ }
+ cfile.open("gbfidx.log", ios::out);
+ if (!cfile.is_open())
+ {
+ cerr << "Failed to open log file\n";
+ exit(-1);
+ }
+}
+
+
+void checkparams(int argc, char **argv)
+{
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]);
+ exit(1);
+ }
+ if (!strcmp(argv[1], "nt"))
+ key1 = key2 = key3 = "Matthew 1:1";
+ else if (!strcmp(argv[1], "ot"))
+ {
+ key1 = key2 = key3 = "Genesis 1:1";
+ }
+ else
+ {
+ cerr << "File must be ot or nt\n";
+ exit(-1);
+ }
+}
diff --git a/src/modules/texts/ztext/makeidx.c b/src/modules/texts/ztext/makeidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/texts/ztext/makeidx.c @@ -0,0 +1,146 @@ +#include <stdio.h> +#include <fcntl.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/ztext/nasb.cpp b/src/modules/texts/ztext/nasb.cpp new file mode 100644 index 0000000..51e08b4 --- /dev/null +++ b/src/modules/texts/ztext/nasb.cpp @@ -0,0 +1,107 @@ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <swcomprs.h> + +class FileCompress: public SWCompress { + int ifd; + int ofd; + int ufd; + int zfd; +public: + FileCompress(char *); + ~FileCompress(); + int GetChars(char *, int len); + int SendChars(char *, int len); + void Encode(); + void Decode(); +}; + + +FileCompress::FileCompress(char *fname) +{ + char buf[256]; + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + ufd = open(fname, O_RDWR|O_CREAT|O_BINARY); + + sprintf(buf, "%s.zzz", fname); + zfd = open(buf, O_RDWR|O_CREAT|O_BINARY); +} + + +FileCompress::~FileCompress(char *fname) +{ + close(ufd); + close(zfd); +} + + +int FileCompress::GetChars(char *buf, int len) +{ + return read(ifd, buf, len); +} + + +int FileCompress::SendChars(char *buf, int len) +{ + return write(ofd, buf, len); +} + + +void FileCompress::Encode() +{ + ifd = ufd; + ofd = zfd; + + SWCompress::Encode(); +} + + +void FileCompress::Decode() +{ + ifd = zfd; + ofd = ufd; + + SWCompress::Decode(); +} + + +main(int argc, char **argv) +{ + int decomp = 0; + SWCompress *fobj; + + if (argc != 2) { + fprintf(stderr, "usage: %s <filename|filename.zzz>\n", argv[0]); + exit(1); + } + + if (strlen(argv[1]) > 4) { + if (!strcmp(&argv[1][strlen(argv[1])-4], ".zzz")) { + argv[1][strlen(argv[1])-4] = 0; + decomp = 1; + } + } + + fobj = new FileCompress(argv[1]); + + if (decomp) + fobj->Decode(); + else fobj->Encode(); + + delete fobj; +} diff --git a/src/modules/texts/ztext/rawtxt2z.cpp b/src/modules/texts/ztext/rawtxt2z.cpp new file mode 100644 index 0000000..7eafe2a --- /dev/null +++ b/src/modules/texts/ztext/rawtxt2z.cpp @@ -0,0 +1,457 @@ +// Compression on variable granularity
+
+#include <fcntl.h>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#ifndef __GNUC__
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <zlib.h>
+#include <versekey.h>
+
+int iBufSize, ulBuffNum;
+ofstream cfile;
+ofstream cfile2;
+
+int ofd[2], oxfd[2], ovxfd[2];
+int ifd[2], ixfd[2];
+int itestfd[2], itestxfd[2];
+unsigned long ulIOff=0, ulCOff=0, ulFOff=0, ulNone=0;
+string currbuff="";
+
+
+int openreadfile(char *buffer, char *path, const char *filename)
+{
+ int filenum;
+ sprintf(buffer, "%s/%s", path, filename);
+ cfile << buffer << "\n";
+ filenum = open(buffer, O_RDONLY|O_BINARY);
+ if (filenum > 0)
+ {
+ return filenum;
+ }
+ else
+ {
+ cerr << "failed to open file to read\n";
+ exit(-1);
+ }
+}
+
+int openwritefile(char *buffer, char *path, const char *filename)
+{
+ int filenum;
+ sprintf(buffer, "%s/%s", path, filename);
+ cfile << buffer << "\n";
+ filenum = open(buffer, O_WRONLY|O_BINARY|O_CREAT|O_TRUNC);
+ if (filenum > 0)
+ {
+ return filenum;
+ }
+ else
+ {
+ cerr << "failed to open file to read\n";
+ exit(-1);
+ }
+}
+
+int bytebound(unsigned long offset, VerseKey &thekey)
+{
+ unsigned long bufferoff;
+ cfile << "byteboundtest " << thekey << "\n";
+ bufferoff = iBufSize * (ulBuffNum+1);
+ if (offset > bufferoff)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+int versebound(unsigned long offset, VerseKey &thekey)
+{
+ cfile << "verseboundtest " << thekey << "\n";
+ return 1;
+}
+
+int chapterbound(unsigned long offset, VerseKey &thekey)
+{
+ VerseKey testkey;
+ testkey = thekey;
+ testkey++;
+ //cfile << "chapterboundtest " << testkey;
+ if (testkey.Verse()==1 || (!thekey.compare("Revelation of John 22:21")))
+ {
+ //cfile << " 1\n";
+ return 1;
+ }
+ else
+ {
+ //cfile << " 0\n";
+ return 0;
+ }
+}
+
+int bookbound(unsigned long offset, VerseKey &thekey)
+{
+ VerseKey testkey;
+ testkey = thekey;
+ cfile << "bookboundtest " << testkey << "\n";
+ testkey++;
+ if (testkey.Chapter()==1 || (!thekey.compare("Revelation of John 22:21")))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+
+typedef int (*boundfunc)(unsigned long offset, VerseKey &thekey);
+
+int writeblock(int i)
+{
+ char *destbuff=NULL;
+ unsigned long compsize = 0, buffsize=0;
+
+ cfile << "compressing block\n";
+ // compress current buffer
+ buffsize = currbuff.length();
+ write(itestfd[i], currbuff.c_str(), buffsize);
+ compsize = (unsigned long) (buffsize*1.01)+20; // at least 1% bigger than buffer + 12 bytes
+ //cfile << "{" << compsize << "}";
+ //destbuff = (char *) calloc(compsize + 1, 1);
+ destbuff = new char[compsize];
+ if (compress((Bytef*)destbuff, &compsize, (const Bytef*)currbuff.c_str(), buffsize)!=Z_OK)
+ {
+ cerr << "Could not compress buffer: exiting\n";
+ delete[] destbuff;
+ exit(-1);
+ }
+ //cout << "Compressed buffer{" << compsize << "}\n" << destbuff << "\n";
+ //cout.flush();
+ // write to compressed file index
+ ulCOff = lseek(ofd[i], 0, SEEK_END);
+ write(oxfd[i], &ulCOff, 4); // offset in compressed file
+ write(oxfd[i], &compsize, 4); // compressed size
+ write(oxfd[i], &buffsize, 4); // uncompressed size
+ cfile << buffsize << " -> " << compsize << "\n";
+ cfile2 << "Compressed{" << compsize << "}\n" << destbuff << "\n";
+ cfile2.flush();
+
+ //write compressed buffer to file
+ write(ofd[i], destbuff, compsize);
+
+ //free(destbuff);
+ delete[] destbuff;
+
+ currbuff = "";
+ ulBuffNum++;
+ ulIOff = 0;
+ return 1;
+}
+
+
+
+int main(int argc, char **argv)
+{
+ VerseKey key1, key2, key3;
+ int i;
+ char xbuff[64];
+ unsigned long offset;
+ unsigned short size=0;
+ unsigned long ulsize=0;
+ char *tmpbuf=NULL;
+ int iType;
+ boundfunc blockbound[4] = {bytebound, versebound, chapterbound, bookbound};
+ bool newbook=true, newchapter=true, newtestament = true, newmodule = true, lasttodo=true;
+
+ if ((argc < 2) || (argc > 4)) {
+ cerr << "usage: " << argv[0] << " datapath [compression type [buffer size]]\n";
+ exit(1);
+ }
+
+ if (argc>2)
+ {
+ iType = atoi(argv[2]);
+ if (argc==4)
+ {
+ iBufSize = atoi(argv[3]);
+ }
+ else
+ {
+ iBufSize = 1;
+ }
+ }
+ else
+ {
+ iType = 2;
+ iBufSize = 1;
+ }
+
+ cfile.open("raw2z.log", ios::out);
+ if (!cfile.is_open())
+ {
+ cerr << "Failed to open log file\n";
+ exit(-1);
+ }
+ cfile2.open("raw2z.lg2", ios::out);
+ if (!cfile2.is_open())
+ {
+ cerr << "Failed to open log file\n";
+ exit(-1);
+ }
+ cfile << iType << " " << iBufSize << "\n";
+
+ if ((iType<=0) || (iType > 4) || !iBufSize || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") || !strcmp(argv[1], "/?") || !strcmp(argv[1], "-help"))
+ {
+ cfile << argv[0] << " - a tool to create compressed Sword modules\n";
+ cfile << "version 0.1\n\n";
+ cfile << "usage: "<< argv[0] << " datapath [compression type [buffer size]]\n\n";
+ cfile << "datapath: the directory in which to find the raw module\n";
+ cfile << "compression type: (default 2)\n" << " 1 - bytes\n" << " 2 - verses\n" << " 3 - chapters\n" << " 4 - books\n";
+ cfile << "buffer size (default 1): the number of the compression type in each block\n";
+ exit(1);
+ }
+
+ //zobj = new SWCompress();
+ //rawdrv = new RawVerse(argv[1]);
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+ cfile << "opening files\n";
+
+ tmpbuf = new char [ strlen(argv[1]) + 11 ];
+
+ //original files
+ ifd[0] = openreadfile(tmpbuf, argv[1], "ot");
+ ixfd[0] = openreadfile(tmpbuf, argv[1], "ot.vss");
+ ifd[1] = openreadfile(tmpbuf, argv[1], "nt");
+ ixfd[1] = openreadfile(tmpbuf, argv[1], "nt.vss");
+
+switch ( iType) {
+ case 1 :
+ ofd[0] = openwritefile(tmpbuf, argv[1], "ot.rzz");
+ oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.rzs");
+ ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.rzv");
+ ofd[1] = openwritefile(tmpbuf, argv[1], "nt.rzz");
+ oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.rzs");
+ ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.rzv");
+ //boundfunc = bytebound;
+ break;
+ case 2 :
+ ofd[0] = openwritefile(tmpbuf, argv[1], "ot.vzz");
+ oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.vzs");
+ ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.vzv");
+ ofd[1] = openwritefile(tmpbuf, argv[1], "nt.vzz");
+ oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.vzs");
+ ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.vzv");
+ break;
+ case 3 :
+ ofd[0] = openwritefile(tmpbuf, argv[1], "ot.czz");
+ oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.czs");
+ ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.czv");
+ ofd[1] = openwritefile(tmpbuf, argv[1], "nt.czz");
+ oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.czs");
+ ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.czv");
+ break;
+ case 4 :
+ ofd[0] = openwritefile(tmpbuf, argv[1], "ot.bzz");
+ oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.bzs");
+ ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.bzv");
+ ofd[1] = openwritefile(tmpbuf, argv[1], "nt.bzz");
+ oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.bzs");
+ ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.bzv");
+ break;
+ default:
+ cerr << "Unknown compression type\n";
+ exit(-1);
+}
+ itestfd[0] = openwritefile(tmpbuf, argv[1], "ot.tst");
+ itestfd[1] = openwritefile(tmpbuf, argv[1], "nt.tst");
+ itestxfd[0] = openwritefile(tmpbuf, argv[1], "ot.tdx");
+ itestxfd[1] = openwritefile(tmpbuf, argv[1], "nt.tdx");
+
+
+ delete [] tmpbuf;
+
+ //cfile << "about to start\n";
+
+for ( i=0; i<2; i++)
+{
+ ulIOff=0, ulBuffNum=0;
+ currbuff = "";
+ key1 = (i == 1) ? "Matthew 1:1" : "Genesis 1:1";
+ key2 = key3 = key1;
+ newtestament = true;
+
+ cfile << "key: " << key1 << " Testament {" << key1.Testament()-1 << "}\n";
+ //cfile << "Chapter {" << key.Chapter() << "}\n";
+ //cfile << "Verse {" << key.Verse() << "}\n";
+ //cfile << key.compare("Revelation of John 22:21") << "\n";
+ //cfile << key.compare("Genesis 1:1") << "\n";
+ do
+ {
+ //cfile << "ok";
+ // read current verse offset
+ if (read(ixfd[i], &offset, 4) != 4)
+ {
+ cfile << "Failed to read input verse offsets?\n";
+ break;
+ }
+ if (read(ixfd[i], &size, 2) != 2)
+ {
+ cfile << "Failed to read input verse sizes?\n";
+ break;
+ }
+ cfile << "key:" << key1 << " offset:" << offset << " size:" << size << "\n";
+ sprintf(xbuff, "key{%s} offset{%ld} size{%d}\n", (const char *)key1, offset, size);
+ write(itestxfd[i], &xbuff, strlen(xbuff));
+ ulsize = size;
+ if (!offset && !size)
+ {
+ //Check for module header
+ if (read(ixfd[i], &ulIOff, 4) != 4)
+ {
+ cfile << "Failed to read input verse offsets?\n";
+ break;
+ }
+ ulsize = ulIOff;
+ ulIOff = 0;
+ lseek(ixfd[i], 6, SEEK_SET);
+ }
+
+ if (ulsize)
+ {
+ // read current verse and add to current buffer
+ tmpbuf = (char *) calloc(ulsize + 1, 1);
+ lseek(ifd[i], offset, SEEK_SET);
+ read(ifd[i], tmpbuf, ulsize);
+ currbuff += tmpbuf;
+ //cfile << currbuff << "\n";
+
+ // write to verse index into compressed
+ write(ovxfd[i], &ulBuffNum, 4); // current buffer number
+ write(ovxfd[i], &ulIOff, 4); // offset within the buffer
+ write(ovxfd[i], &size, 2); // verse size
+
+ ulFOff = lseek(ofd[i], 0, SEEK_CUR) + size;
+ if (key1.compare("Revelation of John 22:21")!=-1)
+ {
+ lasttodo = false;
+ }
+ if (blockbound[iType-1](ulFOff, key1)/*at block boudary*/)
+ {
+ writeblock(i);
+ /*
+ cfile << "compressing block\n";
+ // compress current buffer
+ buffsize = currbuff.length();
+ write(itestfd[i], currbuff.c_str(), buffsize);
+ compsize = (unsigned long) (buffsize*1.01)+20; // at least 1% bigger than buffer + 12 bytes
+ //cfile << "{" << compsize << "}";
+ //destbuff = (char *) calloc(compsize + 1, 1);
+ destbuff = new char[compsize];
+ if (compress((Bytef*)destbuff, &compsize, (const Bytef*)currbuff.c_str(), buffsize)!=Z_OK)
+ {
+ cerr << "Could not compress buffer: exiting\n";
+ delete[] destbuff;
+ exit(-1);
+ }
+ //cout << "Compressed buffer{" << compsize << "}\n" << destbuff << "\n";
+ //cout.flush();
+ // write to compressed file index
+ ulCOff = lseek(ofd[i], 0, SEEK_END);
+ write(oxfd[i], &ulCOff, 4); // offset in compressed file
+ write(oxfd[i], &compsize, 4); // compressed size
+ write(oxfd[i], &buffsize, 4); // uncompressed size
+ cfile << buffsize << " -> " << compsize << "\n";
+ cfile2 << "Compressed{" << compsize << "}\n" << destbuff << "\n";
+ cfile2.flush();
+
+ //write compressed buffer to file
+ write(ofd[i], destbuff, compsize);
+
+ //free(destbuff);
+ delete[] destbuff;
+
+ currbuff = "";
+ ulBuffNum++;
+ ulIOff = 0;
+ */
+ }
+ else
+ {
+ ulIOff += ulsize;
+ }
+ free(tmpbuf);
+
+ if (newmodule)
+ {
+ newmodule = false;
+ cfile << "had a new module " << (const char *) key1 << "{" << offset << "}\n";
+ writeblock(i);
+ }
+ else if (newtestament)
+ {
+ newtestament = false;
+ cfile << "had a new testament " << (const char *) key1 << "{" << offset << "}\n";
+ }
+ else if (newbook)
+ {
+ newbook = false;
+ cfile << "had a new book " << (const char *) key1 << "{" << offset << "}\n";
+ }
+ else if (newchapter)
+ {
+ newchapter = false;
+ cfile << "had a new chapter " << (const char *) key1 << "{" << offset << "}\n";
+ }
+ else
+ {
+ key1++;
+ }
+
+ if (key1.Chapter()!=key2.Chapter() || (key1.Book()!=key2.Book()))
+ {
+ newchapter = true;
+ cfile << "got a new chapter " << (const char *) key1 << "\n";
+ }
+ if (key1.Book()!=key2.Book())
+ {
+ newbook = true;
+ cfile << "got a new book " << (const char *) key1 << "\n";
+ }
+ key2 = key1;
+
+ }
+ else
+ {
+ cfile << "empty offset\n";
+ // write to verse index into compressed
+ write(ovxfd[i], &ulNone, 4); // current buffer number
+ write(ovxfd[i], &size, 2); // verse size
+ write(ovxfd[i], &ulNone, 4); // offset within the buffer
+ }
+ }
+ while ( (key1.Testament()==i+1) && ((key1.compare("Revelation of John 22:21")==-1) || (lasttodo)));
+
+ close(ifd[i]);
+ close(ofd[i]);
+ close(ixfd[i]);
+ close(oxfd[i]);
+ close(ovxfd[i]);
+ close(itestfd[i]);
+ close(itestxfd[i]);
+}
+ return 1;
+}
diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp new file mode 100644 index 0000000..c774693 --- /dev/null +++ b/src/modules/texts/ztext/ztext.cpp @@ -0,0 +1,309 @@ +/****************************************************************************** + * ztext.cpp - code for class 'zText'- a module that reads compressed text + * files: ot and nt using indexs ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +//#include <rawverse.h> +#include <ztext.h> +//#include <zlib.h> + + +/****************************************************************************** + * zText Constructor - Initializes data for instance of zText + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + + +/****************************************************************************** + * zText Destructor - Cleans up instance of zText + */ + +zText::~zText() +{ + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + + +/****************************************************************************** + * zText::getRawEntry - Returns the current verse buffer + * + * RET: buffer with verse + */ + +char *zText::getRawEntry() +{ +/* + long start; + unsigned long size; + unsigned long destsize; + char *tmpbuf; + char *dest; + VerseKey *lkey = (VerseKey *) SWModule::key; + char sizebuf[3]; + + lkey->Verse(0); + if (chapcache != lkey->Index()) { + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + readtext(lkey->Testament(), start, 3, sizebuf); + memcpy(&size, sizebuf, 2); + tmpbuf = new char [ size + 1 ]; + readtext(lkey->Testament(), start + 2, size + 1 , tmpbuf); + //zBuf(&size, tmpbuf); + dest = new char [ (size*4) + 1 ]; + uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size); + chapcache = lkey->Index(); + delete [] tmpbuf; + } + + //findoffset(key->Testament(), key->Index(), &start, &size); + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + + if (versebuf) + delete [] versebuf; + versebuf = new char [ size + 1 ]; + //memcpy(versebuf, Buf(), size); + memcpy(versebuf, dest, destsize); + delete [] dest; + + preptext(versebuf); + + return versebuf; +*/ + + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + //printf ("zText char *\n"); + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + //printf ("checking cache\n"); + //printf ("finding offset\n"); + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + //printf ("deleting previous buffer\n"); + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + //printf ("getting text\n"); + zreadtext(key->Testament(), start, (size + 2), entrybuf); + //printf ("got text\n"); + + rawFilter(entrybuf, size, key); + + //printf ("preparing text\n"); + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + //printf ("returning text\n"); + return entrybuf; + +} + + +bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + + +void zText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void zText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * zFiles::deleteEntry - deletes this entry + * + */ + +void zText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + */ + +void zText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} diff --git a/src/utilfuns/Greek2Greek.cpp b/src/utilfuns/Greek2Greek.cpp new file mode 100644 index 0000000..7e81f0f --- /dev/null +++ b/src/utilfuns/Greek2Greek.cpp @@ -0,0 +1,901 @@ +//***************************************************************************** +// Author : William Dicks *** +// Date Created: 10 February 1998 *** +// Purpose : Implementation for Greek to b-Greek conversion and vice *** +// : versa. *** +// File Name : Greek2Greek.cpp *** +// *** +// Author info : ---------------------------------------------------------- *** +// Address : 23 Tieroogpark *** +// : Hoewe Str *** +// : Elarduspark X3 *** +// : 0181 *** +// : South Africa *** +// Home Tel: +27 (0)12 345 3166 *** +// Cell No : +27 (0)82 577 4424 *** +// e-mail : wd@isis.co.za *** +// Church WWW : http://www.hatfield.co.za *** +// *** +// Bugfix info : ---------------------------------------------------------- *** +// Bug #1 : Greek Font character 197 converted to b-Greek "6" *** +// Date Fixed : 23 February 1998 *** +//***************************************************************************** + +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "Greek2Greek.h" +#include "GreekChars.h" + +//***************************************************************************** +// Used to convert a string created by using the Greek font supplied with the +// Sword Project to a string that conforms to the b-Greek discussion list +// method of transliteration. +//***************************************************************************** + +unsigned char Greek2bGreek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + char error; + + unsigned int NoOfChars = ParseGreek(sResult, sGreekText, nMaxResultBuflen); + + if (NoOfChars < strlen((char *)sGreekText)) + error = 1; + else + error = 0; + + return error; +} + +//***************************************************************************** +// Used to convert a string created by using the b-Greek method of +// transliteration to a string that can be converted to a Greek-font readable +// string. +//***************************************************************************** + +unsigned char bGreek2Greek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + unsigned char error; + + unsigned int NoOfChars = ParsebGreek(sResult, sGreekText, nMaxResultBuflen); + + if (NoOfChars < strlen((char *)sGreekText)) + error = 1; + else + error = 0; + + return error; +} + +//***************************************************************************** +// Parse a Greek font created string and return the b-Greek equivalent +//***************************************************************************** + +int ParseGreek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + int characters = 0; + int index = 0; + unsigned char tmp; + bool iota; // true = IOTA subscript; false = No IOTA + bool breathing; // true = add breathing; false = no breathing + bool rough; // true = rough breathing; false = smooth + + // While text is not equal to NULL pointer + + while (sGreekText[index] && characters < nMaxResultBuflen) + { + iota = breathing = rough = false; + tmp = Font2char(sGreekText[index++], iota, breathing, rough); + + if (breathing) + { + if (rough) // Rough breathing + { + sResult[characters++] = ROUGH; // Add rough breathing "h" + sResult[characters++] = tmp; // Insert char + } + else + sResult[characters++] = tmp; // Insert char + } + else + { + if (iota) // IOTA subscript + { + sResult[characters++] = tmp; // Insert char + sResult[characters++] = IOTA_SUB; // Add Iota subscript + } + else + sResult[characters++] = tmp; // Insert char + } + } + sResult[characters] = 0; // Terminate the string + + return index; +} + +//***************************************************************************** +// Parse a b-Greek string and return the Greek font equivalent +//***************************************************************************** +int ParsebGreek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + int characters = 0; + int index = 0; + bool iota = false; // true = IOTA subscript; false = No IOTA + bool breathing = false; // true = add breathing; false = no breathing + bool rough = false; // true = rough breathing; false = smooth + bool fSigma = false; // Final sigma flag + bool nChar = true; // New char flag + + // While text is not equal to NULL pointer + + while (*sGreekText || characters < nMaxResultBuflen) + { + if (nChar) + { + if (*sGreekText == (unsigned char)ROUGH) + { + rough = true; + breathing = true; + } + else + { + rough = false; + breathing = true; + } + + nChar = false; + } + else if (isPunctSpace(*(sGreekText + 1))) + { + fSigma = true; + } + else if (*(sGreekText + 1) == (unsigned char)IOTA_SUB) + { + iota = true; + } + + if (*sGreekText != (unsigned char)IOTA_SUB) + { + if (*sGreekText == (unsigned char)' ') + { + nChar = true; + } + + if (breathing) + { + if (rough) + { + // When we read a rough breather we want to increment the pointer + // to the right character before char2Font is called. + + sResult[index++] = + char2Font(*++sGreekText, fSigma, iota, breathing, rough); + + sGreekText++; + characters++; + } + else + { + sResult[index++] = + char2Font(*sGreekText++, fSigma, iota, breathing, rough); + characters++; + } + } + else + { + sResult[index++] = + char2Font(*sGreekText++, fSigma, iota, breathing, rough); + characters++; + } + } + else + { + sGreekText++; + characters++; + } + + fSigma = iota = breathing = rough = false; + } + + sResult[index] = 0; // Terminate the string + + return characters; +} + + +//***************************************************************************** +// Convert a character to a GREEK font character +//***************************************************************************** +unsigned char char2Font( + unsigned char letter, // bGreek letter to convert to Font letter + bool finalSigma, // Is it a final SIGMA + bool iota, // true = IOTA subscript; false = No IOTA + bool breathing, // true = add breathing; false = no breathing + bool rough) // true = rough breathing; false = smooth +{ + unsigned char charFont = 0; + + switch (letter) + { + case ALPHA: // A + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_ALPHA; + } + else + charFont = (unsigned char)gNON_ROUGH_ALPHA; + } + else + { + if (iota) + { + charFont = (unsigned char)gIOTA_ALPHA; + } + else + charFont = (unsigned char)gALPHA; + } + + break; + + case BETA: // B + charFont = (unsigned char)gBETA; + + break; + + case CHI: // C + charFont = (unsigned char)gCHI; + + break; + + case DELTA: // D + charFont = (unsigned char)gDELTA; + + break; + + case EPSILON: // E + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_EPSILON; + } + else + charFont = (unsigned char)gNON_ROUGH_EPSILON; + } + else + { + charFont = (unsigned char)gEPSILON; + } + + break; + + case PHI: // F + charFont = (unsigned char)gPHI; + + break; + + case GAMMA: // G + charFont = (unsigned char)gGAMMA; + + break; + + case ETA: // H + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_ETA; + } + else + charFont = (unsigned char)gNON_ROUGH_ETA; + } + else + { + if (iota) + { + charFont = (unsigned char)gIOTA_ETA; + } + else + charFont = (unsigned char)gETA; + } + + break; + + case IOTA: // I + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_IOTA; + } + else + charFont = (unsigned char)gNON_ROUGH_IOTA; + } + else + { + charFont = (unsigned char)gIOTA; + } + + break; + + case KAPPA: // K + charFont = (unsigned char)gKAPPA; + + break; + + case LAMBDA: // L + charFont = (unsigned char)gLAMBDA; + + break; + + case MU: // M + charFont = (unsigned char)gMU; + + break; + + case NU: // N + charFont = (unsigned char)gNU; + + break; + + case OMICRON: // O + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_OMICRON; + } + else + charFont = (unsigned char)gNON_ROUGH_OMICRON; + } + else + { + charFont = (unsigned char)gOMICRON; + } + + break; + + case PI: // P + charFont = (unsigned char)gPI; + + break; + + case THETA: // Q + charFont = (unsigned char)gTHETA; + + break; + + case RHO: // R + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_RHO; + } + else + charFont = (unsigned char)gNON_ROUGH_RHO; + } + else + { + charFont = (unsigned char)gRHO; + } + + break; + + case SIGMA: // S + if (finalSigma) + charFont = (unsigned char)gSIGMA_END; + else + charFont = (unsigned char)gSIGMA; + + break; + + case TAU: // T + charFont = (unsigned char)gTAU; + + break; + + case UPSILON: // U + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_UPSILON; + } + else + charFont = (unsigned char)gNON_ROUGH_UPSILON; + } + else + { + charFont = (unsigned char)gUPSILON; + } + + break; + + case OMEGA: // W + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_OMEGA; + } + else + charFont = (unsigned char)gNON_ROUGH_OMEGA; + } + else + { + if (iota) + { + charFont = (unsigned char)gIOTA_OMEGA; + } + else + charFont = (unsigned char)gOMEGA; + } + + break; + + case XI: // X + charFont = (unsigned char)gXI; + + break; + + case PSI: // Y + charFont = (unsigned char)gPSI; + + break; + + case ZETA: // Z + charFont = (unsigned char)gZETA; + + break; + + default: + if (ispunct(letter) || isspace(letter)) + { + charFont = getGreekPunct(letter); + } + + if (isdigit(letter)) + charFont = letter; + + break; + } + + return charFont; +} + + +//***************************************************************************** +// Convert a GREEK font character to a character +//***************************************************************************** +unsigned char Font2char( + unsigned char letter, // bGreek letter to convert to Font letter + bool &iota, // true = IOTA subscript; false = No IOTA + bool &breathing, // true = add breathing; false = no breathing + bool &rough) // true = rough breathing; false = smooth +{ + unsigned char character = 0; + + if (getSpecialChar(letter, letter)) + { + switch (letter) + { + case gROUGH_ALPHA: // hA + case gIOTA_ALPHA: // Ai + case gNON_ROUGH_ALPHA: // hA + character = ALPHA; + + if (letter == gIOTA_ALPHA) + iota = true; + else + iota = false; + + if (letter == gROUGH_ALPHA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_EPSILON: // hE + case gNON_ROUGH_EPSILON: // hE + character = EPSILON; + iota = false; + + if (letter == gROUGH_EPSILON) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_ETA: // hH + case gIOTA_ETA: // Ei + case gNON_ROUGH_ETA: // hH + character = ETA; + + if (letter == gIOTA_ETA) + iota = true; + else + iota = false; + + if (letter == gROUGH_ETA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_IOTA: // hH + case gNON_ROUGH_IOTA: // hH + character = IOTA; + iota = false; + + if (letter == gROUGH_IOTA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_OMICRON: // hH + case gNON_ROUGH_OMICRON: // hH + character = OMICRON; + iota = false; + + if (letter == gROUGH_OMICRON) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_RHO: // hR + case gNON_ROUGH_RHO: // hR + character = RHO; + iota = false; + + if (letter == gROUGH_RHO) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_UPSILON: // hU + case gNON_ROUGH_UPSILON: // hU + character = UPSILON; + iota = false; + + if (letter == gROUGH_UPSILON) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_OMEGA: // hW + case gIOTA_OMEGA: // Wi + case gNON_ROUGH_OMEGA: // hW + character = OMEGA; + + if (letter == gIOTA_OMEGA) + iota = true; + else + iota = false; + + if (letter == gROUGH_OMEGA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + } + } // if (letter > SPECIAL_GREEK) + else + { + if (letter == gSIGMA_END) + { + character = SIGMA; + } + else if (ispunct(letter) || isspace(letter)) + { + character = getbGreekPunct(letter); + } + else if (isdigit(letter)) + { + character = letter; + } + else + { + character = letter - 32; + } + } + + return character; +} + +//***************************************************************************** +// Identify and return a bGreek letter from a special font char +//***************************************************************************** +bool getSpecialChar(unsigned char Font, unsigned char &letter) +{ + bool Yes = false; + letter = Font; + + if (Font >= 133 && Font <= 144) + { + letter = gIOTA; + Font = gIOTA; + } + + if (Font >= 154 && Font <= 159) + { + letter = gEPSILON; + Font = gEPSILON; + } + + if (Font >= 163 && Font <= 171) + { + letter = gALPHA; + Font = gALPHA; + } + + if (Font >= 172 && Font <= 182) + { + letter = gIOTA_ALPHA; + Font = gIOTA_ALPHA; + Yes = true; + } + + if (Font >= 187 && Font <= 195) + { + letter = gETA; + Font = gETA; + } + + if (Font >= 197 && Font <= 207) + { + letter = gIOTA_ETA; + Font = gIOTA_ETA; + Yes = true; + } + + if ((Font >= 210 && Font <= 215) || Font == 253) + { + letter = gOMICRON; + Font = gOMICRON; + } + + if (Font >= 218 && Font <= 229) + { + letter = gUPSILON; + Font = gUPSILON; + } + + if (Font >= 232 && Font <= 240) + { + letter = gOMEGA; + Font = gOMEGA; + } + + if (Font >= 241 && Font <= 251) + { + letter = gIOTA_OMEGA; + Font = gIOTA_OMEGA; + Yes = true; + } + + Yes = SpecialGreek(Font); + + return Yes; +} + + +//***************************************************************************** +// true if the font character is a special character; false it isn't +//***************************************************************************** + +bool SpecialGreek(unsigned char Font) +{ + bool res = false; + + switch (Font) + { + case gROUGH_ALPHA: + case gROUGH_EPSILON: + case gROUGH_ETA: + case gROUGH_IOTA: + case gROUGH_OMICRON: + case gROUGH_RHO: + case gROUGH_UPSILON: + case gROUGH_OMEGA: + case gIOTA_ALPHA: + case gIOTA_ETA: + case gIOTA_OMEGA: + case gNON_ROUGH_ALPHA: + case gNON_ROUGH_EPSILON: + case gNON_ROUGH_ETA: + case gNON_ROUGH_IOTA: + case gNON_ROUGH_OMICRON: + case gNON_ROUGH_RHO: + case gNON_ROUGH_UPSILON: + case gNON_ROUGH_OMEGA: + res = true; + + break; + } + + return res; +} + + +//***************************************************************************** +// Return Greek font puntuation from bGreek punstuation +//***************************************************************************** + +unsigned char getGreekPunct(unsigned char bGreek) +{ + unsigned char Font; + + switch (bGreek) + { + case COMMA: + Font = gCOMMA; + break; + + case STOP: + Font = gSTOP; + break; + + case SEMI_COLON: + Font = gSEMI_COLON; + break; + + case QUESTION: + Font = gQUESTION; + break; + + default: + Font = ' '; + break; + } + + return Font; +} + + +//***************************************************************************** +// Return bGreek puntuation from Greek font punstuation +//***************************************************************************** + +unsigned char getbGreekPunct(unsigned char Greek) +{ + unsigned char character; + + switch (Greek) + { + case gCOMMA: + character = COMMA; + break; + + case gSTOP: + character = STOP; + break; + + case gSEMI_COLON: + character = SEMI_COLON; + break; + + case gQUESTION: + character = QUESTION; + break; + + default: + character = ' '; + break; + } + + return character; +} + + +//***************************************************************************** +// Is the character punctuation or a space: true it is, false it isn't +//***************************************************************************** + +bool isPunctSpace(unsigned char c) +{ + return (ispunct(c) || isspace(c) || c == 0) ? true : false; +} + +#ifdef __TEST + +int main() +{ + unsigned char *sGreekText = (unsigned char *) + "1„£kwboj qeoà kaˆ kur…ou „hsoà cristoà doàloj ta‹j dèdeka fula‹j ta‹j ™n tÍ diaspor´ ca…rein."; + unsigned char *sResult = new unsigned char[100]; + + char result = Greek2bGreek( + sResult, + sGreekText, + 100); + + strset((char *)sResult, 0); + strset((char *)sGreekText, 0); + + sGreekText = (unsigned char *)"18 EIS AFESIN TWN hAMARTWN hUMWN?"; + result = bGreek2Greek( + sResult, + sGreekText, + 33); + + //delete[] sGreekText; + delete[] sResult; +} + +#endif // __TEST diff --git a/src/utilfuns/Makefile b/src/utilfuns/Makefile new file mode 100644 index 0000000..339f87a --- /dev/null +++ b/src/utilfuns/Makefile @@ -0,0 +1,4 @@ +root := ../.. + +all: + make -C ${root} diff --git a/src/utilfuns/Makefile.am b/src/utilfuns/Makefile.am new file mode 100644 index 0000000..e7b2258 --- /dev/null +++ b/src/utilfuns/Makefile.am @@ -0,0 +1,19 @@ + +utilfunsdir = $(top_srcdir)/src/utilfuns +libsword_la_SOURCES += $(utilfunsdir)/Greek2Greek.cpp +libsword_la_SOURCES += $(utilfunsdir)/utilstr.cpp +libsword_la_SOURCES += $(utilfunsdir)/unixstr.cpp +libsword_la_SOURCES += $(utilfunsdir)/swunicod.cpp +libsword_la_SOURCES += $(utilfunsdir)/swversion.cpp + +if MINGW +SWREGEX = $(utilfunsdir)/regex.c +else +SWREGEX = +endif +libsword_la_SOURCES += $(SWREGEX) + + +libsword_la_SOURCES += $(utilfunsdir)/roman.c + + diff --git a/src/utilfuns/greekpatch b/src/utilfuns/greekpatch new file mode 100644 index 0000000..3a6d20c --- /dev/null +++ b/src/utilfuns/greekpatch @@ -0,0 +1,19 @@ +--- Greek2Greek.cpp.orig Thu Feb 26 15:19:39 1998 ++++ Greek2Greek.cpp Thu Feb 26 03:33:09 1998 +@@ -118,6 +118,7 @@ + sResult[characters++] = tmp; // Insert char + } + } ++ sResult[characters] = 0; // Terminate the string + + return index; + } +@@ -209,6 +210,8 @@ + + fSigma = iota = breathing = rough = false; + } ++ ++ sResult[index] = 0; // Terminate the string + + return characters; + } diff --git a/src/utilfuns/regex.c b/src/utilfuns/regex.c new file mode 100644 index 0000000..239b0dd --- /dev/null +++ b/src/utilfuns/regex.c @@ -0,0 +1,5725 @@ +/* Extended regular expression matching and search library, + version 0.12. + (Implements POSIX draft P1003.2/D11.2, except for some of the + internationalization features.) + + Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. + + the C library, however. The master source lives in /gd/gnu/lib. + +NOTE: The canonical source of this file is maintained with the +GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* AIX requires this to be the first thing in the file. */ +#if defined (_AIX) && !defined (REGEX_MALLOC) + #pragma alloca +#endif + +#undef _GNU_SOURCE +#define _GNU_SOURCE + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#if defined(STDC_HEADERS) && !defined(emacs) +#include <stddef.h> +#else +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +#include <sys/types.h> +#endif +#include <stdlib.h> // sword + +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +# include <wctype.h> +# include <wchar.h> +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if HAVE_LIBINTL_H || defined (_LIBC) +# include <libintl.h> +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +#define gettext_noop(String) String +#endif + +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +#ifdef emacs + +#include "lisp.h" +#include "buffer.h" +#include "syntax.h" + +#else /* not emacs */ + +/* If we are not linking with Emacs proper, + we can't use the relocating allocator + even if config.h says that we can. */ +#undef REL_ALLOC + +#if defined (STDC_HEADERS) || defined (_LIBC) +#include <stdlib.h> +#else +//sword char *malloc (); +//sword char *realloc (); +#endif + +/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. + If nothing else has been done, use the method below. */ +#ifdef INHIBIT_STRING_HEADER +#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) +#if !defined (bzero) && !defined (bcopy) +#undef INHIBIT_STRING_HEADER +#endif +#endif +#endif + +/* This is the normal way of making sure we have a bcopy and a bzero. + This is used in most programs--a few other programs avoid this + by defining INHIBIT_STRING_HEADER. */ +#define HAVE_STRING_H // for SWORD Project +#ifndef INHIBIT_STRING_HEADER +#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) +#include <string.h> +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif +#else +#include <strings.h> +#endif +#endif + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +#ifndef Sword +#define Sword 1 +#endif + +#ifdef SWITCH_ENUM_BUG +#define SWITCH_ENUM_CAST(x) ((int)(x)) +#else +#define SWITCH_ENUM_CAST(x) (x) +#endif + +#ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +#else /* not SYNTAX_TABLE */ + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE 256 + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +#endif /* not SYNTAX_TABLE */ + +#define SYNTAX(c) re_syntax_table[c] + +#endif /* not emacs */ + +/* Get the interface, including the syntax bits. */ +#include "regex.h" + +/* isalpha etc. are used for the character classes. */ +#include <ctype.h> + +/* Jim Meyering writes: + + "... Some ctype macros are valid only for character codes that + isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when + using /bin/cc or gcc but without giving an ansi option). So, all + ctype uses should be through macros like ISPRINT... If + STDC_HEADERS is defined, then autoconf has verified that the ctype + macros don't need to be guarded with references to isascii. ... + Defining isascii to 1 should let any compiler worth its salt + eliminate the && through constant folding." */ + +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) +#define ISASCII(c) 1 +#else +#define ISASCII(c) isascii(c) +#endif + +#ifdef isblank +#define ISBLANK(c) (ISASCII (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +#else +#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (ISASCII (c) && isprint (c)) +#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +#define ISALNUM(c) (ISASCII (c) && isalnum (c)) +#define ISALPHA(c) (ISASCII (c) && isalpha (c)) +#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +#define ISLOWER(c) (ISASCII (c) && islower (c)) +#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +#define ISSPACE(c) (ISASCII (c) && isspace (c)) +#define ISUPPER(c) (ISASCII (c) && isupper (c)) +#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +#ifndef NULL +#define NULL (void *)0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE malloc +#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +#define REGEX_FREE free + +#else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include <alloca.h> +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#if 0 /* It is a bad idea to declare alloca. We always cast the result. */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +#define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + bcopy (source, destination, osize), \ + destination) + +/* No need to do anything to free, after alloca. */ +#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ + +#endif /* not REGEX_MALLOC */ + +/* Define how to allocate the failure stack. */ + +#if defined (REL_ALLOC) && defined (REGEX_MALLOC) + +#define REGEX_ALLOCATE_STACK(size) \ + r_alloc (&failure_stack_ptr, (size)) +#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + r_re_alloc (&failure_stack_ptr, (nsize)) +#define REGEX_FREE_STACK(ptr) \ + r_alloc_free (&failure_stack_ptr) + +#else /* not using relocating allocator */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE_STACK malloc +#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +#define REGEX_FREE_STACK free + +#else /* not REGEX_MALLOC */ + +#define REGEX_ALLOCATE_STACK alloca + +#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + REGEX_REALLOCATE (source, osize, nsize) +/* No need to explicitly free anything. */ +#define REGEX_FREE_STACK(arg) + +#endif /* not REGEX_MALLOC */ +#endif /* not using relocating allocator */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +#define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define RETALLOC_IF(addr, n, t) \ + if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8 /* In bits. */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#undef MAX +#undef MIN +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +static int re_match_2_internal (); + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. */ + +typedef enum +{ + no_op = 0, + + /* Succeed right away--no more backtracking. */ + succeed, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +#ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +#endif /* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ + +#define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ + +#define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += 2; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ + +#define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) + +#ifdef DEBUG +static void extract_number _RE_ARGS ((int *dest, unsigned char *source)); +static void +extract_number (dest, source) + int *dest; + unsigned char *source; +{ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +} + +#ifndef EXTRACT_MACROS /* To debug the macros. */ +#undef EXTRACT_NUMBER +#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += 2; \ + } while (0) + +#ifdef DEBUG +static void extract_number_and_incr _RE_ARGS ((int *destination, + unsigned char **source)); +static void +extract_number_and_incr (destination, source) + int *destination; + unsigned char **source; +{ + extract_number (destination, *source); + *source += 2; +} + +#ifndef EXTRACT_MACROS +#undef EXTRACT_NUMBER_AND_INCR +#define EXTRACT_NUMBER_AND_INCR(dest, src) \ + extract_number_and_incr (&dest, &src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +#ifdef DEBUG + +/* We use standard I/O for debugging. */ +#include <stdio.h> + +/* It is useful to test things that ``must'' be true when debugging. */ +#include <assert.h> + +static int debug = 0; + +#define DEBUG_STATEMENT(e) e +#define DEBUG_PRINT1(x) if (debug) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) print_partial_compiled_pattern (s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +/* Print the fastmap in human-readable form. */ + +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + putchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + putchar (i - 1); + } + } + } + putchar ('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +print_partial_compiled_pattern (start, end) + unsigned char *start; + unsigned char *end; +{ + int mcnt, mcnt2; + unsigned char *p1; + unsigned char *p = start; + unsigned char *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { + printf ("%d:\t", p - start); + + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + putchar (*p++); + } + while (--mcnt); + break; + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%d", mcnt, *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%d", mcnt, *p++); + break; + + case duplicate: + printf ("/duplicate/%d", *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { + register int c, last = -100; + register int in_range = 0; + + printf ("/charset [%s", + (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); + + assert (p + *p < pend); + + for (c = 0; c < 256; c++) + if (c / 8 < *p + && (p[1 + (c/8)] & (1 << (c % 8)))) + { + /* Are we starting a range? */ + if (last + 1 == c && ! in_range) + { + putchar ('-'); + in_range = 1; + } + /* Have we broken a range? */ + else if (last + 1 != c && in_range) + { + putchar (last); + in_range = 0; + } + + if (! in_range) + putchar (c); + + last = c; + } + + if (in_range) + putchar (last); + + putchar (']'); + + p += 1 + *p; + } + break; + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_jump to %d", p + mcnt - start); + break; + + case on_failure_keep_string_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); + break; + + case dummy_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/dummy_failure_jump to %d", p + mcnt - start); + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/maybe_pop_jump to %d", p + mcnt - start); + break; + + case pop_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/pop_failure_jump to %d", p + mcnt - start); + break; + + case jump_past_alt: + extract_number_and_incr (&mcnt, &p); + printf ("/jump_past_alt to %d", p + mcnt - start); + break; + + case jump: + extract_number_and_incr (&mcnt, &p); + printf ("/jump to %d", p + mcnt - start); + break; + + case succeed_n: + extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; + extract_number_and_incr (&mcnt2, &p); + printf ("/succeed_n to %d, %d times", p1 - start, mcnt2); + break; + + case jump_n: + extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; + extract_number_and_incr (&mcnt2, &p); + printf ("/jump_n to %d, %d times", p1 - start, mcnt2); + break; + + case set_number_at: + extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; + extract_number_and_incr (&mcnt2, &p); + printf ("/set_number_at location %d to %d", p1 - start, mcnt2); + break; + + case wordbound: + printf ("/wordbound"); + break; + + case notwordbound: + printf ("/notwordbound"); + break; + + case wordbeg: + printf ("/wordbeg"); + break; + + case wordend: + printf ("/wordend"); + +#ifdef emacs + case before_dot: + printf ("/before_dot"); + break; + + case at_dot: + printf ("/at_dot"); + break; + + case after_dot: + printf ("/after_dot"); + break; + + case syntaxspec: + printf ("/syntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; + + case notsyntaxspec: + printf ("/notsyntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; +#endif /* emacs */ + + case wordchar: + printf ("/wordchar"); + break; + + case notwordchar: + printf ("/notwordchar"); + break; + + case begbuf: + printf ("/begbuf"); + break; + + case endbuf: + printf ("/endbuf"); + break; + + default: + printf ("?%d", *(p-1)); + } + + putchar ('\n'); + } + + printf ("%d:\tend of pattern.\n", p - start); +} + + +void +print_compiled_pattern (bufp) + struct re_pattern_buffer *bufp; +{ + unsigned char *buffer = bufp->buffer; + + print_partial_compiled_pattern (buffer, buffer + bufp->used); + printf ("%ld bytes used/%ld bytes allocated.\n", + bufp->used, bufp->allocated); + + if (bufp->fastmap_accurate && bufp->fastmap) + { + printf ("fastmap: "); + print_fastmap (bufp->fastmap); + } + + printf ("re_nsub: %d\t", bufp->re_nsub); + printf ("regs_alloc: %d\t", bufp->regs_allocated); + printf ("can_be_null: %d\t", bufp->can_be_null); + printf ("newline_anchor: %d\n", bufp->newline_anchor); + printf ("no_sub: %d\t", bufp->no_sub); + printf ("not_bol: %d\t", bufp->not_bol); + printf ("not_eol: %d\t", bufp->not_eol); + printf ("syntax: %lx\n", bufp->syntax); + /* Perhaps we should print the translate table? */ +} + + +void +print_double_string (where, string1, size1, string2, size2) + const char *where; + const char *string1; + const char *string2; + int size1; + int size2; +{ + int this_char; + + if (where == NULL) + printf ("(null)"); + else + { + if (FIRST_STRING_P (where)) + { + for (this_char = where - string1; this_char < size1; this_char++) + putchar (string1[this_char]); + + where = string2; + } + + for (this_char = where - string2; this_char < size2; this_char++) + putchar (string2[this_char]); + } +} + +void +printchar (c) + int c; +{ + putc (c, stderr); +} + +#else /* not DEBUG */ + +#undef assert +#define assert(e) + +#define DEBUG_STATEMENT(e) +#define DEBUG_PRINT1(x) +#define DEBUG_PRINT2(x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif /* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; +#ifdef DEBUG + if (syntax & RE_DEBUG) + debug = 1; + else if (debug) /* was on but now is not */ + debug = 0; +#endif /* DEBUG */ + return ret; +} + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +static const char *re_error_msgid[] = + { + gettext_noop ("Success"), /* REG_NOERROR */ + gettext_noop ("No match"), /* REG_NOMATCH */ + gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ + gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ + gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ + gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ + gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ + gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ + gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ + gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ + gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ + gettext_noop ("Invalid range end"), /* REG_ERANGE */ + gettext_noop ("Memory exhausted"), /* REG_ESPACE */ + gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ + gettext_noop ("Premature end of regular expression"), /* REG_EEND */ + gettext_noop ("Regular expression too big"), /* REG_ESIZE */ + gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ + }; + +/* Avoiding alloca during matching, to placate r_alloc. */ + +/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the + searching and matching functions should not call alloca. On some + systems, alloca is implemented in terms of malloc, and if we're + using the relocating allocator routines, then malloc could cause a + relocation, which might (if the strings being searched are in the + ralloc heap) shift the data out from underneath the regexp + routines. + + Here's another reason to avoid allocation: Emacs + processes input from X in a signal handler; processing X input may + call malloc; if input arrives while a matching routine is calling + malloc, then we're scrod. But Emacs can't just block input while + calling matching routines; then we don't notice interrupts when + they come in. So, Emacs blocks input around all regexp calls + except the matching calls, which it leaves unprotected, in the + faith that they will not malloc. */ + +/* Normally, this is fine. */ +#define MATCH_MAY_ALLOCATE + +/* When using GNU C, we are not REALLY using the C alloca, no matter + what config.h may say. So don't take precautions for it. */ +#ifdef __GNUC__ +#undef C_ALLOCA +#endif + +/* The match routines may not allocate if (1) they would do it with malloc + and (2) it's not safe for them to use malloc. + Note that if REL_ALLOC is defined, matching would not use malloc for the + failure stack, but we would still use it for the register vectors; + so REL_ALLOC should not affect this. */ +#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) +#undef MATCH_MAY_ALLOCATE +#endif + + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE_STACK. */ + + +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +#define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ + +#ifdef INT_IS_16BIT + +#if defined (MATCH_MAY_ALLOCATE) +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +long int re_max_failures = 4000; +#else +long int re_max_failures = 2000; +#endif + +union fail_stack_elt +{ + unsigned char *pointer; + long int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned long int size; + unsigned long int avail; /* Offset of next open position. */ +} fail_stack_type; + +#else /* not INT_IS_16BIT */ + +#if defined (MATCH_MAY_ALLOCATE) +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +int re_max_failures = 20000; +#else +int re_max_failures = 2000; +#endif + +union fail_stack_elt +{ + unsigned char *pointer; + int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#endif /* INT_IS_16BIT */ + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) + + +/* Define macros to initialize and free the failure stack. + Do `return -2' if the alloc fails. */ + +#ifdef MATCH_MAY_ALLOCATE +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ + } while (0) + +#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) +#else +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.avail = 0; \ + } while (0) + +#define RESET_FAIL_STACK() +#endif + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + + REGEX_REALLOCATE_STACK requires `destination' be declared. */ + +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE_STACK ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) + + +/* Push pointer POINTER on FAIL_STACK. + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ + ? 0 \ + : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ + 1)) + +/* Push a pointer value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_POINTER(item) \ + fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) + +/* This pushes an integer-valued item onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_INT(item) \ + fail_stack.stack[fail_stack.avail++].integer = (item) + +/* Push a fail_stack_elt_t value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ELT(item) \ + fail_stack.stack[fail_stack.avail++] = (item) + +/* These three POP... operations complement the three PUSH... operations. + All assume that `fail_stack' is nonempty. */ +#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer +#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer +#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +#define DEBUG_PUSH PUSH_FAILURE_INT +#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT () +#else +#define DEBUG_PUSH(item) +#define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + /* Can't be int, since there is not a shred of a guarantee that int \ + is wide enough to hold a value of something to which pointer can \ + be assigned */ \ + s_reg_t this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + if (1) \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + PUSH_FAILURE_POINTER (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + PUSH_FAILURE_POINTER (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ELT (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ + PUSH_FAILURE_INT (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ + PUSH_FAILURE_INT (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_POINTER (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_POINTER (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +#define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +/* We used to use (num_regs - 1), which is the number of registers + this regexp will save; but that was changed to 5 + to avoid stack overflow for a regexp with lots of parens. */ +#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + (((0 \ + ? 0 : highest_active_reg - lowest_active_reg + 1) \ + * NUM_REG_ITEMS) \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ + s_reg_t this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_POINTER (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (active_reg_t) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + \ + low_reg = (active_reg_t) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + \ + if (1) \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ELT (); \ + DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + } \ + else \ + { \ + for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ + { \ + reg_info[this_reg].word.integer = 0; \ + regend[this_reg] = 0; \ + regstart[this_reg] = 0; \ + } \ + highest_active_reg = high_reg; \ + } \ + \ + set_regs_matched_done = 0; \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + + + +/* Structure for per-register (a.k.a. per-group) information. + Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ + + +/* Declarations and macros for re_match_2. */ + +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + if (!set_regs_matched_done) \ + { \ + active_reg_t r; \ + set_regs_matched_done = 1; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + } \ + while (0) + +/* Registers are set to a sentinel when they haven't yet matched. */ +static char reg_unset_dummy; +#define REG_UNSET_VALUE (®_unset_dummy) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + +/* Subroutine declarations and macros for regex_compile. */ + +static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); +static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)); +static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg1, int arg2)); +static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg, unsigned char *end)); +static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg1, int arg2, unsigned char *end)); +static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, + reg_syntax_t syntax)); +static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, + reg_syntax_t syntax)); +static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, + const char *pend, + char *translate, + reg_syntax_t syntax, + unsigned char *b)); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#ifndef PATFETCH +#define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = (unsigned char) translate[c]; \ + } while (0) +#endif + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#ifndef TRANSLATE +#define TRANSLATE(d) \ + (translate ? (char) translate[(unsigned char) (d)] : (d)) +#endif + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (int) ((to) - (loc) - 3)) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +/* Any other compiler which, like MSC, has allocation limit below 2^16 + bytes will have to use approach similar to what was done below for + MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up + reallocating to 0 bytes. Such thing is not going to work too well. + You have been warned!! */ +#if defined(_MSC_VER) && !defined(WIN32) +/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. + The REALLOC define eliminates a flurry of conversion warnings, + but is not required. */ +#define MAX_BUF_SIZE 65500L +#define REALLOC(p,s) realloc ((p), (size_t) (s)) +#else +#define MAX_BUF_SIZE (1L << 16) +#define REALLOC(p,s) realloc ((p), (s)) +#endif + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the + correct places in the new one. If extending the buffer results in it + being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#define EXTEND_BUFFER() \ + do { \ + unsigned char *old_buffer = bufp->buffer; \ + if (bufp->allocated == MAX_BUF_SIZE) \ + return REG_ESIZE; \ + bufp->allocated <<= 1; \ + if (bufp->allocated > MAX_BUF_SIZE) \ + bufp->allocated = MAX_BUF_SIZE; \ + bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ + if (bufp->buffer == NULL) \ + return REG_ESPACE; \ + /* If the buffer moved, move all the pointers into it. */ \ + if (old_buffer != bufp->buffer) \ + { \ + b = (b - old_buffer) + bufp->buffer; \ + begalt = (begalt - old_buffer) + bufp->buffer; \ + if (fixup_alt_jump) \ + fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ + if (laststart) \ + laststart = (laststart - old_buffer) + bufp->buffer; \ + if (pending_exact) \ + pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + } \ + } while (0) + + +/* Since we have one byte reserved for the register number argument to + {start,stop}_memory, the maximum number of groups we can report + things about is what fits in that byte. */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers. We just + ignore the excess. */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack. */ + +/* Since offsets can go either forwards or backwards, this type needs to + be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ +/* int may be not enough when sizeof(int) == 2. */ +typedef long pattern_offset_t; + +typedef struct +{ + pattern_offset_t begalt_offset; + pattern_offset_t fixup_alt_jump; + pattern_offset_t inner_group_offset; + pattern_offset_t laststart_offset; + regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct +{ + compile_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) +#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) + +/* The next available element. */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list. */ +#define SET_LIST_BIT(c) \ + (b[((unsigned char) (c)) / BYTEWIDTH] \ + |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern. */ +#define GET_UNSIGNED_NUMBER(num) \ + { if (p != pend) \ + { \ + PATFETCH (c); \ + while (ISDIGIT (c)) \ + { \ + if (num < 0) \ + num = 0; \ + num = num * 10 + c - '0'; \ + if (p == pend) \ + break; \ + PATFETCH (c); \ + } \ + } \ + } + +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# define IS_CHAR_CLASS(string) wctype (string) +#else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +# define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif + +#ifndef MATCH_MAY_ALLOCATE + +/* If we cannot allocate large objects within re_match_2_internal, + we make the fail stack and register vectors global. + The fail stack, we grow to the maximum size when a regexp + is compiled. + The register vectors, we adjust in size each time we + compile a regexp, according to the number of registers it needs. */ + +static fail_stack_type fail_stack; + +/* Size with which the following vectors are currently allocated. + That is so we can make them bigger as needed, + but never make them smaller. */ +static int regs_allocated_size; + +static const char ** regstart, ** regend; +static const char ** old_regstart, ** old_regend; +static const char **best_regstart, **best_regend; +static register_info_type *reg_info; +static const char **reg_dummy; +static register_info_type *reg_info_dummy; + +/* Make the register vectors big enough for NUM_REGS registers, + but don't make them smaller. */ + +static +regex_grow_registers (num_regs) + int num_regs; +{ + if (num_regs > regs_allocated_size) + { + RETALLOC_IF (regstart, num_regs, const char *); + RETALLOC_IF (regend, num_regs, const char *); + RETALLOC_IF (old_regstart, num_regs, const char *); + RETALLOC_IF (old_regend, num_regs, const char *); + RETALLOC_IF (best_regstart, num_regs, const char *); + RETALLOC_IF (best_regend, num_regs, const char *); + RETALLOC_IF (reg_info, num_regs, register_info_type); + RETALLOC_IF (reg_dummy, num_regs, const char *); + RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); + + regs_allocated_size = num_regs; + } +} + +#endif /* not MATCH_MAY_ALLOCATE */ + +static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type + compile_stack, + regnum_t regnum)); + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. + Returns one of error codes defined in `regex.h', or zero for success. + + Assumes the `allocated' (and perhaps `buffer') and `translate' + fields are set in BUFP on entry. + + If it succeeds, results are put in BUFP (if it returns an error, the + contents of BUFP are undefined): + `buffer' is the compiled pattern; + `syntax' is set to SYNTAX; + `used' is set to the length of the compiled pattern; + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; + + The `fastmap' and `newline_anchor' fields are neither + examined nor set. */ + +/* Return, freeing storage we allocated. */ +#define FREE_STACK_RETURN(value) \ + return (free (compile_stack.stack), value) + +static reg_errcode_t +regex_compile (pattern, size, syntax, bufp) + const char *pattern; + size_t size; + reg_syntax_t syntax; + struct re_pattern_buffer *bufp; +{ + /* We fetch characters from PATTERN here. Even though PATTERN is + `char *' (i.e., signed), we declare these variables as unsigned, so + they can be reliably used as array indices. */ + register unsigned char c, c1; + + /* A random temporary spot in PATTERN. */ + const char *p1; + + /* Points to the end of the buffer, where we should append. */ + register unsigned char *b; + + /* Keeps track of unclosed groups. */ + compile_stack_type compile_stack; + + /* Points to the current (ending) position in the pattern. */ + const char *p = pattern; + const char *pend = pattern + size; + + /* How to translate the characters in the pattern. */ + RE_TRANSLATE_TYPE translate = bufp->translate; + + /* Address of the count-byte of the most recently inserted `exactn' + command. This makes it possible to tell if a new exact-match + character can be added to that command or if the character requires + a new `exactn' command. */ + unsigned char *pending_exact = 0; + + /* Address of start of the most recently finished expression. + This tells, e.g., postfix * where to find the start of its + operand. Reset at the beginning of groups and alternatives. */ + unsigned char *laststart = 0; + + /* Address of beginning of regexp, or inside of last group. */ + unsigned char *begalt; + + /* Place in the uncompiled pattern (i.e., the {) to + which to go back if the interval is invalid. */ + const char *beg_interval; + + /* Address of the place where a forward jump should go to the end of + the containing expression. Each alternative of an `or' -- except the + last -- ends with a forward jump of this sort. */ + unsigned char *fixup_alt_jump = 0; + + /* Counts open-groups as they are encountered. Remembered for the + matching close-group on the compile stack, so the same register + number is put in the stop_memory as the start_memory. */ + regnum_t regnum = 0; + +#ifdef DEBUG + DEBUG_PRINT1 ("\nCompiling pattern: "); + if (debug) + { + unsigned debug_count; + + for (debug_count = 0; debug_count < size; debug_count++) + putchar (pattern[debug_count]); + putchar ('\n'); + } +#endif /* DEBUG */ + + /* Initialize the compile stack. */ + compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); + if (compile_stack.stack == NULL) + return REG_ESPACE; + + compile_stack.size = INIT_COMPILE_STACK_SIZE; + compile_stack.avail = 0; + + /* Initialize the pattern buffer. */ + bufp->syntax = syntax; + bufp->fastmap_accurate = 0; + bufp->not_bol = bufp->not_eol = 0; + + /* Set `used' to zero, so that if we return an error, the pattern + printer (for debugging) will think there's no pattern. We reset it + at the end. */ + bufp->used = 0; + + /* Always count groups, whether or not bufp->no_sub is set. */ + bufp->re_nsub = 0; + +#if !defined (emacs) && !defined (SYNTAX_TABLE) + /* Initialize the syntax table. */ + init_syntax_once (); +#endif + + if (bufp->allocated == 0) + { + if (bufp->buffer) + { /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. */ + RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); + } + else + { /* Caller did not allocate a buffer. Do it for them. */ + bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); + } + if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); + + bufp->allocated = INIT_BUF_SIZE; + } + + begalt = b = bufp->buffer; + + /* Loop through the uncompiled pattern until we're at the end. */ + while (p != pend) + { + PATFETCH (c); + + switch (c) + { + case '^': + { + if ( /* If at start of pattern, it's an operator. */ + p == pattern + 1 + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's come before. */ + || at_begline_loc_p (pattern, p, syntax)) + BUF_PUSH (begline); + else + goto normal_char; + } + break; + + + case '$': + { + if ( /* If at end of pattern, it's an operator. */ + p == pend + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's next. */ + || at_endline_loc_p (p, pend, syntax)) + BUF_PUSH (endline); + else + goto normal_char; + } + break; + + + case '+': + case '?': + if ((syntax & RE_BK_PLUS_QM) + || (syntax & RE_LIMITED_OPS)) + goto normal_char; + handle_plus: + case '*': + /* If there is no previous pattern... */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + FREE_STACK_RETURN (REG_BADRPT); + else if (!(syntax & RE_CONTEXT_INDEP_OPS)) + goto normal_char; + } + + { + /* Are we optimizing this jump? */ + boolean keep_string_p = false; + + /* 1 means zero (many) matches is allowed. */ + char zero_times_ok = 0, many_times_ok = 0; + + /* If there is a sequence of repetition chars, collapse it + down to just one (the right one). We can't combine + interval operators with these because of, e.g., `a{2}*', + which should only match an even number of `a's. */ + + for (;;) + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + + if (p == pend) + break; + + PATFETCH (c); + + if (c == '*' + || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) + ; + + else if (syntax & RE_BK_PLUS_QM && c == '\\') + { + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + PATFETCH (c1); + if (!(c1 == '+' || c1 == '?')) + { + PATUNFETCH; + PATUNFETCH; + break; + } + + c = c1; + } + else + { + PATUNFETCH; + break; + } + + /* If we get here, we found another repeat character. */ + } + + /* Star, etc. applied to an empty pattern is equivalent + to an empty pattern. */ + if (!laststart) + break; + + /* Now we know whether or not zero matches is allowed + and also whether or not two or more matches is allowed. */ + if (many_times_ok) + { /* More than one repetition is allowed, so put in at the + end a backward relative jump from `b' to before the next + jump we're going to put in below (which jumps from + laststart to after this jump). + + But if we are at the `*' in the exact sequence `.*\n', + insert an unconditional jump backwards to the ., + instead of the beginning of the loop. This way we only + push a failure point once, instead of every time + through the loop. */ + assert (p - 1 > pattern); + + /* Allocate the space for the jump. */ + GET_BUFFER_SPACE (3); + + /* We know we are not at the first character of the pattern, + because laststart was nonzero. And we've already + incremented `p', by the way, to be the character after + the `*'. Do we have to do something analogous here + for null bytes, because of RE_DOT_NOT_NULL? */ + if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok + && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') + && !(syntax & RE_DOT_NEWLINE)) + { /* We have .*\n. */ + STORE_JUMP (jump, b, laststart); + keep_string_p = true; + } + else + /* Anything else. */ + STORE_JUMP (maybe_pop_jump, b, laststart - 3); + + /* We've added more stuff to the buffer. */ + b += 3; + } + + /* On failure, jump from laststart to b + 3, which will be the + end of the buffer after this jump is inserted. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump + : on_failure_jump, + laststart, b + 3); + pending_exact = 0; + b += 3; + + if (!zero_times_ok) + { + /* At least one repetition is required, so insert a + `dummy_failure_jump' before the initial + `on_failure_jump' instruction of the loop. This + effects a skip over that instruction the first time + we hit that loop. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); + b += 3; + } + } + break; + + + case '.': + laststart = b; + BUF_PUSH (anychar); + break; + + + case '[': + { + boolean had_char_class = false; + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + /* Ensure that we have enough space to push a charset: the + opcode, the length count, and the bitset; 34 bytes in all. */ + GET_BUFFER_SPACE (34); + + laststart = b; + + /* We test `*p == '^' twice, instead of using an if + statement, so we only need one BUF_PUSH. */ + BUF_PUSH (*p == '^' ? charset_not : charset); + if (*p == '^') + p++; + + /* Remember the first position in the bracket expression. */ + p1 = p; + + /* Push the number of bytes in the bitmap. */ + BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); + + /* Clear the whole map. */ + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + + /* charset_not matches newline according to a syntax bit. */ + if ((re_opcode_t) b[-2] == charset_not + && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) + SET_LIST_BIT ('\n'); + + /* Read in characters and ranges, setting map bits. */ + for (;;) + { + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + PATFETCH (c); + + /* \ might escape characters inside [...] and [^...]. */ + if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') + { + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + PATFETCH (c1); + SET_LIST_BIT (c1); + continue; + } + + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; + + /* Look ahead to see if it's a range when the last thing + was a character class. */ + if (had_char_class && c == '-' && *p != ']') + FREE_STACK_RETURN (REG_ERANGE); + + /* Look ahead to see if it's a range when the last thing + was a character: if this is a hyphen not at the + beginning or the end of a list, then it's the range + operator. */ + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') + && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') + && *p != ']') + { + reg_errcode_t ret + = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); + } + + else if (p[0] == '-' && p[1] != ']') + { /* This handles ranges made up of characters only. */ + reg_errcode_t ret; + + /* Move past the `-'. */ + PATFETCH (c1); + + ret = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); + } + + /* See if we're at the beginning of a possible character + class. */ + + else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') + { /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + + PATFETCH (c); + c1 = 0; + + /* If pattern is `[[:'. */ + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (;;) + { + PATFETCH (c); + if (c == ':' || c == ']' || p == pend + || c1 == CHAR_CLASS_MAX_LENGTH) + break; + str[c1++] = c; + } + str[c1] = '\0'; + + /* If isn't a word bracketed by `[:' and:`]': + undo the ending character, the letters, and leave + the leading `:' and `[' (but set bits for them). */ + if (c == ':' && *p == ']') + { +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + boolean is_lower = STREQ (str, "lower"); + boolean is_upper = STREQ (str, "upper"); + wctype_t wt; + int ch; + + wt = wctype (str); + if (wt == 0) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) + { + if (iswctype (btowc (ch), wt)) + SET_LIST_BIT (ch); + + if (translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + + had_char_class = true; +#else + int ch; + boolean is_alnum = STREQ (str, "alnum"); + boolean is_alpha = STREQ (str, "alpha"); + boolean is_blank = STREQ (str, "blank"); + boolean is_cntrl = STREQ (str, "cntrl"); + boolean is_digit = STREQ (str, "digit"); + boolean is_graph = STREQ (str, "graph"); + boolean is_lower = STREQ (str, "lower"); + boolean is_print = STREQ (str, "print"); + boolean is_punct = STREQ (str, "punct"); + boolean is_space = STREQ (str, "space"); + boolean is_upper = STREQ (str, "upper"); + boolean is_xdigit = STREQ (str, "xdigit"); + + if (!IS_CHAR_CLASS (str)) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < (1 << BYTEWIDTH); ch++) + { + /* This was split into 3 if's to + avoid an arbitrary limit in some compiler. */ + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch))) + SET_LIST_BIT (ch); + if ( (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch))) + SET_LIST_BIT (ch); + if ( (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) + SET_LIST_BIT (ch); + if ( translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + had_char_class = true; +#endif /* libc || wctype.h */ + } + else + { + c1++; + while (c1--) + PATUNFETCH; + SET_LIST_BIT ('['); + SET_LIST_BIT (':'); + had_char_class = false; + } + } + else + { + had_char_class = false; + SET_LIST_BIT (c); + } + } + + /* Discard any (non)matching list bytes that are all 0 at the + end of the map. Decrease the map-length byte too. */ + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; + b += b[-1]; + } + break; + + + case '(': + if (syntax & RE_NO_BK_PARENS) + goto handle_open; + else + goto normal_char; + + + case ')': + if (syntax & RE_NO_BK_PARENS) + goto handle_close; + else + goto normal_char; + + + case '\n': + if (syntax & RE_NEWLINE_ALT) + goto handle_alt; + else + goto normal_char; + + + case '|': + if (syntax & RE_NO_BK_VBAR) + goto handle_alt; + else + goto normal_char; + + + case '{': + if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) + goto handle_interval; + else + goto normal_char; + + + case '\\': + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + /* Do not translate the character after the \, so that we can + distinguish, e.g., \B from \b, even if we normally would + translate, e.g., B to b. */ + PATFETCH_RAW (c); + + switch (c) + { + case '(': + if (syntax & RE_NO_BK_PARENS) + goto normal_backslash; + + handle_open: + bufp->re_nsub++; + regnum++; + + if (COMPILE_STACK_FULL) + { + RETALLOC (compile_stack.stack, compile_stack.size << 1, + compile_stack_elt_t); + if (compile_stack.stack == NULL) return REG_ESPACE; + + compile_stack.size <<= 1; + } + + /* These are the values to restore when we hit end of this + group. They are all relative offsets, so that if the + whole pattern moves because of realloc, they will still + be valid. */ + COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; + COMPILE_STACK_TOP.fixup_alt_jump + = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; + COMPILE_STACK_TOP.regnum = regnum; + + /* We will eventually replace the 0 with the number of + groups inner to this one. But do not push a + start_memory for groups beyond the last one we can + represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM) + { + COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; + BUF_PUSH_3 (start_memory, regnum, 0); + } + + compile_stack.avail++; + + fixup_alt_jump = 0; + laststart = 0; + begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + break; + + + case ')': + if (syntax & RE_NO_BK_PARENS) goto normal_backslash; + + if (COMPILE_STACK_EMPTY) { + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + } + else FREE_STACK_RETURN (REG_ERPAREN); + + handle_close: + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `pop_failure_jump' to pop. See comments at + `push_dummy_failure' in `re_match_2'. */ + BUF_PUSH (push_dummy_failure); + + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); + } + + /* See similar code for backslashed left paren above. */ + if (COMPILE_STACK_EMPTY) { + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + } + else FREE_STACK_RETURN (REG_ERPAREN); + + /* Since we just checked for an empty stack above, this + ``can't happen''. */ + assert (compile_stack.avail != 0); + { + /* We don't just want to restore into `regnum', because + later groups should continue to be numbered higher, + as in `(ab)c(de)' -- the second group is #2. */ + regnum_t this_group_regnum; + + compile_stack.avail--; + begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; + fixup_alt_jump + = COMPILE_STACK_TOP.fixup_alt_jump + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + : 0; + laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; + this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + + /* We're at the end of the group, so now we know how many + groups were inside this one. */ + if (this_group_regnum <= MAX_REGNUM) + { + unsigned char *inner_group_loc + = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; + + *inner_group_loc = regnum - this_group_regnum; + BUF_PUSH_3 (stop_memory, this_group_regnum, + regnum - this_group_regnum); + } + } + break; + + + case '|': /* `\|'. */ + if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) + goto normal_backslash; + handle_alt: + if (syntax & RE_LIMITED_OPS) + goto normal_char; + + /* Insert before the previous alternative a jump which + jumps to this alternative if the former fails. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (on_failure_jump, begalt, b + 6); + pending_exact = 0; + b += 3; + + /* The alternative before this one has a jump after it + which gets executed if it gets matched. Adjust that + jump so it will jump to this alternative's analogous + jump (put in below, which in turn will jump to the next + (if any) alternative's such jump, etc.). The last such + jump jumps to the correct final destination. A picture: + _____ _____ + | | | | + | v | v + a | b | c + + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + /* Mark and leave space for a jump after this alternative, + to be filled in later either by next alternative or + when know we're at the end of a series of alternatives. */ + fixup_alt_jump = b; + GET_BUFFER_SPACE (3); + b += 3; + + laststart = 0; + begalt = b; + break; + + + case '{': + /* If \{ is a literal. */ + if (!(syntax & RE_INTERVALS) + /* If we're at `\{' and it's not the open-interval + operator. */ + || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + || (p - 2 == pattern && p == pend)) + goto normal_backslash; + + handle_interval: + { + /* If got here, then the syntax allows intervals. */ + + /* At least (most) this many matches must be made. */ + int lower_bound = -1, upper_bound = -1; + + beg_interval = p - 1; + + if (p == pend) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_EBRACE); + } + + GET_UNSIGNED_NUMBER (lower_bound); + + if (c == ',') + { + GET_UNSIGNED_NUMBER (upper_bound); + if (upper_bound < 0) upper_bound = RE_DUP_MAX; + } + else + /* Interval such as `{1}' => match exactly once. */ + upper_bound = lower_bound; + + if (lower_bound < 0 || upper_bound > RE_DUP_MAX + || lower_bound > upper_bound) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_BADBR); + } + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); + + PATFETCH (c); + } + + if (c != '}') + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_BADBR); + } + + /* We just parsed a valid interval. */ + + /* If it's invalid to have no preceding re. */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + FREE_STACK_RETURN (REG_BADRPT); + else if (syntax & RE_CONTEXT_INDEP_OPS) + laststart = b; + else + goto unfetch_interval; + } + + /* If the upper bound is zero, don't want to succeed at + all; jump from `laststart' to `b + 3', which will be + the end of the buffer after we insert the jump. */ + if (upper_bound == 0) + { + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + } + + /* Otherwise, we have a nontrivial interval. When + we're all done, the pattern will look like: + set_number_at <jump count> <upper bound> + set_number_at <succeed_n count> <lower bound> + succeed_n <after jump addr> <succeed_n count> + <body of loop> + jump_n <succeed_n addr> <jump count> + (The upper bound and `jump_n' are omitted if + `upper_bound' is 1, though.) */ + else + { /* If the upper bound is > 1, we need to insert + more at the end of the loop. */ + unsigned nbytes = 10 + (upper_bound > 1) * 10; + + GET_BUFFER_SPACE (nbytes); + + /* Initialize lower bound of the `succeed_n', even + though it will be set during matching by its + attendant `set_number_at' (inserted next), + because `re_compile_fastmap' needs to know. + Jump to the `jump_n' we might insert below. */ + INSERT_JUMP2 (succeed_n, laststart, + b + 5 + (upper_bound > 1) * 5, + lower_bound); + b += 5; + + /* Code to initialize the lower bound. Insert + before the `succeed_n'. The `5' is the last two + bytes of this `set_number_at', plus 3 bytes of + the following `succeed_n'. */ + insert_op2 (set_number_at, laststart, 5, lower_bound, b); + b += 5; + + if (upper_bound > 1) + { /* More than one repetition is allowed, so + append a backward jump to the `succeed_n' + that starts this interval. + + When we've reached this during matching, + we'll have matched the interval once, so + jump back only `upper_bound - 1' times. */ + STORE_JUMP2 (jump_n, b, laststart + 5, + upper_bound - 1); + b += 5; + + /* The location we want to set is the second + parameter of the `jump_n'; that is `b-2' as + an absolute address. `laststart' will be + the `set_number_at' we're about to insert; + `laststart+3' the number to set, the source + for the relative address. But we are + inserting into the middle of the pattern -- + so everything is getting moved up by 5. + Conclusion: (b - 2) - (laststart + 3) + 5, + i.e., b - laststart. + + We insert this at the beginning of the loop + so that if we fail during matching, we'll + reinitialize the bounds. */ + insert_op2 (set_number_at, laststart, b - laststart, + upper_bound - 1, b); + b += 5; + } + } + pending_exact = 0; + beg_interval = NULL; + } + break; + + unfetch_interval: + /* If an invalid interval, match the characters as literals. */ + assert (beg_interval); + p = beg_interval; + beg_interval = NULL; + + /* normal_char and normal_backslash need `c'. */ + PATFETCH (c); + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (p > pattern && p[-1] == '\\') + goto normal_backslash; + } + goto normal_char; + +#ifdef emacs + /* There is no way to specify the before_dot and after_dot + operators. rms says this is ok. --karl */ + case '=': + BUF_PUSH (at_dot); + break; + + case 's': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); + break; + + case 'S': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); + break; +#endif /* emacs */ + + + case 'w': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + laststart = b; + BUF_PUSH (wordchar); + break; + + + case 'W': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + laststart = b; + BUF_PUSH (notwordchar); + break; + + + case '<': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (wordbeg); + break; + + case '>': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (wordend); + break; + + case 'b': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (wordbound); + break; + + case 'B': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (notwordbound); + break; + + case '`': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (begbuf); + break; + + case '\'': + if (re_syntax_options & RE_NO_GNU_OPS) + goto normal_char; + BUF_PUSH (endbuf); + break; + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (syntax & RE_NO_BK_REFS) + goto normal_char; + + c1 = c - '0'; + + if (c1 > regnum) + FREE_STACK_RETURN (REG_ESUBREG); + + /* Can't back reference to a subexpression if inside of it. */ + if (group_in_compile_stack (compile_stack, (regnum_t) c1)) + goto normal_char; + + laststart = b; + BUF_PUSH_2 (duplicate, c1); + break; + + + case '+': + case '?': + if (syntax & RE_BK_PLUS_QM) + goto handle_plus; + else + goto normal_backslash; + + default: + normal_backslash: + /* You might think it would be useful for \ to mean + not to translate; but if we don't translate it + it will never match anything. */ + c = TRANSLATE (c); + goto normal_char; + } + break; + + + default: + /* Expects the character in `c'. */ + normal_char: + /* If no exactn currently being built. */ + if (!pending_exact + + /* If last exactn not at current position. */ + || pending_exact + *pending_exact + 1 != b + + /* We have only one byte following the exactn for the count. */ + || *pending_exact == (1 << BYTEWIDTH) - 1 + + /* If followed by a repetition operator. */ + || *p == '*' || *p == '^' + || ((syntax & RE_BK_PLUS_QM) + ? *p == '\\' && (p[1] == '+' || p[1] == '?') + : (*p == '+' || *p == '?')) + || ((syntax & RE_INTERVALS) + && ((syntax & RE_NO_BK_BRACES) + ? *p == '{' + : (p[0] == '\\' && p[1] == '{')))) + { + /* Start building a new exactn. */ + + laststart = b; + + BUF_PUSH_2 (exactn, 0); + pending_exact = b - 1; + } + + BUF_PUSH (c); + (*pending_exact)++; + break; + } /* switch (c) */ + } /* while p != pend */ + + + /* Through the pattern now. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + if (!COMPILE_STACK_EMPTY) + FREE_STACK_RETURN (REG_EPAREN); + + /* If we don't want backtracking, force success + the first time we reach the end of the compiled pattern. */ + if (syntax & RE_NO_POSIX_BACKTRACKING) + BUF_PUSH (succeed); + + free (compile_stack.stack); + + /* We have succeeded; set the length of the buffer. */ + bufp->used = b - bufp->buffer; + +#ifdef DEBUG + if (debug) + { + DEBUG_PRINT1 ("\nCompiled pattern: \n"); + print_compiled_pattern (bufp); + } +#endif /* DEBUG */ + +#ifndef MATCH_MAY_ALLOCATE + /* Initialize the failure stack to the largest possible stack. This + isn't necessary unless we're trying to avoid calling alloca in + the search and match routines. */ + { + int num_regs = bufp->re_nsub + 1; + + /* Since DOUBLE_FAIL_STACK refuses to double only if the current size + is strictly greater than re_max_failures, the largest possible stack + is 2 * re_max_failures failure points. */ + if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) + { + fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + +#ifdef emacs + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) xmalloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) xrealloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#else /* not emacs */ + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) malloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) realloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#endif /* not emacs */ + } + + regex_grow_registers (num_regs); + } +#endif /* not MATCH_MAY_ALLOCATE */ + + return REG_NOERROR; +} /* regex_compile */ + +/* Subroutines for `regex_compile'. */ + +/* Store OP at LOC followed by two-byte integer parameter ARG. */ + +static void +store_op1 (op, loc, arg) + re_opcode_t op; + unsigned char *loc; + int arg; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +store_op2 (op, loc, arg1, arg2) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg1); + STORE_NUMBER (loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC + for OP followed by two-byte integer parameter ARG. */ + +static void +insert_op1 (op, loc, arg, end) + re_opcode_t op; + unsigned char *loc; + int arg; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 3; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op1 (op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +insert_op2 (op, loc, arg1, arg2, end) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 5; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op2 (op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN. Return true if that ^ comes + after an alternative or a begin-subexpression. We assume there is at + least one character before the ^. */ + +static boolean +at_begline_loc_p (pattern, p, syntax) + const char *pattern, *p; + reg_syntax_t syntax; +{ + const char *prev = p - 2; + boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + + return + /* After a subexpression? */ + (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) + /* After an alternative? */ + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p. This one is for $. We assume there is + at least one character after the $, i.e., `P < PEND'. */ + +static boolean +at_endline_loc_p (p, pend, syntax) + const char *p, *pend; + reg_syntax_t syntax; +{ + const char *next = p; + boolean next_backslash = *next == '\\'; + const char *next_next = p + 1 < pend ? p + 1 : 0; + + return + /* Before a subexpression? */ + (syntax & RE_NO_BK_PARENS ? *next == ')' + : next_backslash && next_next && *next_next == ')') + /* Before an alternative? */ + || (syntax & RE_NO_BK_VBAR ? *next == '|' + : next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and + false if it's not. */ + +static boolean +group_in_compile_stack (compile_stack, regnum) + compile_stack_type compile_stack; + regnum_t regnum; +{ + int this_element; + + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; + + return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. + + Return an error code. + + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ + +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + RE_TRANSLATE_TYPE translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; + + const char *p = *p_ptr; + unsigned int range_start, range_end; + + if (p == pend) + return REG_ERANGE; + + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ + range_start = ((const unsigned char *) p)[-2]; + range_end = ((const unsigned char *) p)[0]; + + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; + + /* If the start is after the end, the range is empty. */ + if (range_start > range_end) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- the range is inclusive, so if `range_end' == 0xff + (assuming 8-bit characters), we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = range_start; this_char <= range_end; this_char++) + { + SET_LIST_BIT (TRANSLATE (this_char)); + } + + return REG_NOERROR; +} + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in + BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible + characters can start a string that matches the pattern. This fastmap + is used by re_search to skip quickly over impossible starting points. + + The caller must supply the address of a (1 << BYTEWIDTH)-byte data + area as BUFP->fastmap. + + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in + the pattern buffer. + + Returns 0 if we succeed, -2 if an internal error. */ + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + int j, k; +#ifdef MATCH_MAY_ALLOCATE + fail_stack_type fail_stack; +#endif +#ifndef REGEX_MALLOC + char *destination; +#endif + /* We don't push any register information onto the failure stack. */ +//sword unsigned num_regs = 0; + + register char *fastmap = bufp->fastmap; + unsigned char *pattern = bufp->buffer; + unsigned char *p = pattern; + register unsigned char *pend = pattern + bufp->used; + +#ifdef REL_ALLOC + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ + fail_stack_elt_t *failure_stack_ptr; +#endif + + /* Assume that each path through the pattern can be null until + proven otherwise. We set this false at the bottom of switch + statement, to which we get only if a particular path doesn't + match the empty string. */ + boolean path_can_be_null = true; + + /* We aren't doing a `succeed_n' to begin with. */ + boolean succeed_n_p = false; + + assert (fastmap != NULL && p != NULL); + + INIT_FAIL_STACK (); + bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ + bufp->fastmap_accurate = 1; /* It will be when we're done. */ + bufp->can_be_null = 0; + + while (1) + { + if (p == pend || *p == succeed) + { + /* We have reached the (effective) end of pattern. */ + if (!FAIL_STACK_EMPTY ()) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail].pointer; + + continue; + } + else + break; + } + + /* We should never be about to go beyond the end of the pattern. */ + assert (p < pend); + + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) + { + + /* I guess the idea here is to simply not bother with a fastmap + if a backreference is used, since it's too hard to figure out + the fastmap for the corresponding group. Setting + `can_be_null' stops `re_search_2' from using the fastmap, so + that is all we do. */ + case duplicate: + bufp->can_be_null = 1; + goto done; + + + /* Following are the cases which match a character. These end + with `break'. */ + + case exactn: + fastmap[p[1]] = 1; + break; + + + case charset: + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) + fastmap[j] = 1; + break; + + + case charset_not: + /* Chars beyond end of map must be allowed. */ + for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) + fastmap[j] = 1; + break; + + + case wordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == Sword) + fastmap[j] = 1; + break; + + + case notwordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != Sword) + fastmap[j] = 1; + break; + + + case anychar: + { + int fastmap_newline = fastmap['\n']; + + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = fastmap_newline; + + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + goto done; + + /* Otherwise, have to check alternative paths. */ + break; + } + +#ifdef emacs + case syntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + case notsyntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + /* All cases after this match the empty string. These end with + `continue'. */ + + + case before_dot: + case at_dot: + case after_dot: + continue; +#endif /* emacs */ + + + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbound: + case notwordbound: + case wordbeg: + case wordend: + case push_dummy_failure: + continue; + + + case jump_n: + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case jump_past_alt: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + if (j > 0) + continue; + + /* Jump backward implies we just went through the body of a + loop and matched nothing. Opcode jumped to should be + `on_failure_jump' or `succeed_n'. Just treat it like an + ordinary jump. For a * loop, it has pushed its failure + point already; if so, discard that as redundant. */ + if ((re_opcode_t) *p != on_failure_jump + && (re_opcode_t) *p != succeed_n) + continue; + + p++; + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + + /* If what's on the stack is where we are now, pop it. */ + if (!FAIL_STACK_EMPTY () + && fail_stack.stack[fail_stack.avail - 1].pointer == p) + fail_stack.avail--; + + continue; + + + case on_failure_jump: + case on_failure_keep_string_jump: + handle_on_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + + /* For some patterns, e.g., `(a?)?', `p+j' here points to the + end of the pattern. We don't want to push such a point, + since when we restore it above, entering the switch will + increment `p' past the end of the pattern. We don't need + to push such a point since we obviously won't find any more + fastmap entries beyond `pend'. Such a pattern can match + the null string, though. */ + if (p + j < pend) + { + if (!PUSH_PATTERN_OP (p + j, fail_stack)) + { + RESET_FAIL_STACK (); + return -2; + } + } + else + bufp->can_be_null = 1; + + if (succeed_n_p) + { + EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ + succeed_n_p = false; + } + + continue; + + + case succeed_n: + /* Get to the number of times to succeed. */ + p += 2; + + /* Increment p past the n for when k != 0. */ + EXTRACT_NUMBER_AND_INCR (k, p); + if (k == 0) + { + p -= 4; + succeed_n_p = true; /* Spaghetti code alert. */ + goto handle_on_failure_jump; + } + continue; + + + case set_number_at: + p += 4; + continue; + + + case start_memory: + case stop_memory: + p += 2; + continue; + + + default: + abort (); /* We have listed all the cases. */ + } /* switch *p++ */ + + /* Getting here means we have found the possible starting + characters for one path of the pattern -- and that the empty + string does not match. We need not follow this path further. + Instead, look at the next alternative (remembered on the + stack), or quit if no more. The test at the top of the loop + does these things. */ + path_can_be_null = false; + p = pend; + } /* while p */ + + /* Set `can_be_null' for the last path (also the first path, if the + pattern is empty). */ + bufp->can_be_null |= path_can_be_null; + + done: + RESET_FAIL_STACK (); + return 0; +} /* re_compile_fastmap */ + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} + +/* Searching routines. */ + +/* Like re_search_2, below, but only one string is specified, and + doesn't let you say where to stop matching. */ + +int +re_search (bufp, string, size, startpos, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, startpos, range; + struct re_registers *regs; +{ + return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + regs, size); +} + + +/* Using the compiled pattern in BUFP->buffer, first tries to match the + virtual concatenation of STRING1 and STRING2, starting first at index + STARTPOS, then at STARTPOS + 1, and so on. + + STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. + + RANGE is how far to scan while trying to match. RANGE = 0 means try + only at STARTPOS; in general, the last start tried is STARTPOS + + RANGE. + + In REGS, return the indices of the virtual concatenation of STRING1 + and STRING2 that matched the entire BUFP->buffer and its contained + subexpressions. + + Do not consider matching one past the index STOP in the virtual + concatenation of STRING1 and STRING2. + + We return either the position in the strings at which the match was + found, -1 if no match, or -2 if error (such as failure + stack overflow). */ + +int +re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int startpos; + int range; + struct re_registers *regs; + int stop; +{ + int val; + register char *fastmap = bufp->fastmap; + register RE_TRANSLATE_TYPE translate = bufp->translate; + int total_size = size1 + size2; + int endpos = startpos + range; + + /* Check for out-of-range STARTPOS. */ + if (startpos < 0 || startpos > total_size) + return -1; + + /* Fix up RANGE if it might eventually take us outside + the virtual concatenation of STRING1 and STRING2. + Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ + if (endpos < 0) + range = 0 - startpos; + else if (endpos > total_size) + range = total_size - startpos; + + /* If the search isn't to be a backwards one, don't waste time in a + search for a pattern that must be anchored. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + { + if (startpos > 0) + return -1; + else + range = 1; + } + +#ifdef emacs + /* In a forward search for something that starts with \=. + don't keep searching past point. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) + { + range = PT - startpos; + if (range <= 0) + return -1; + } +#endif /* emacs */ + + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) + if (re_compile_fastmap (bufp) == -2) + return -2; + + /* Loop through the string, looking for a place to start matching. */ + for (;;) + { + /* If a fastmap is supplied, skip quickly over characters that + cannot be the start of a match. If the pattern can match the + null string, however, we don't need to skip characters; we want + the first null string. */ + if (fastmap && startpos < total_size && !bufp->can_be_null) + { + if (range > 0) /* Searching forwards. */ + { + register const char *d; + register int lim = 0; + int irange = range; + + if (startpos < size1 && startpos + range >= size1) + lim = range - (size1 - startpos); + + d = (startpos >= size1 ? string2 - size1 : string1) + startpos; + + /* Written out as an if-else to avoid testing `translate' + inside the loop. */ + if (translate) + while (range > lim + && !fastmap[(unsigned char) + translate[(unsigned char) *d++]]) + range--; + else + while (range > lim && !fastmap[(unsigned char) *d++]) + range--; + + startpos += irange - range; + } + else /* Searching backwards. */ + { + register char c = (size1 == 0 || startpos >= size1 + ? string2[startpos - size1] + : string1[startpos]); + + if (!fastmap[(unsigned char) TRANSLATE (c)]) + goto advance; + } + } + + /* If can't match the null string, and that's all we have left, fail. */ + if (range >= 0 && startpos == total_size && fastmap + && !bufp->can_be_null) + return -1; + + val = re_match_2_internal (bufp, string1, size1, string2, size2, + startpos, regs, stop); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA + alloca (0); +#endif +#endif + + if (val >= 0) + return startpos; + + if (val == -2) + return -2; + + advance: + if (!range) + break; + else if (range > 0) + { + range--; + startpos++; + } + else + { + range++; + startpos--; + } + } + return -1; +} /* re_search_2 */ + +/* This converts PTR, a pointer into one of the search strings `string1' + and `string2' into an offset from the beginning of that string. */ +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) \ + ? ((regoff_t) ((ptr) - string1)) \ + : ((regoff_t) ((ptr) - string2 + size1))) + +/* Macros for dealing with the split strings in re_match_2. */ + +#define MATCHING_IN_FIRST_STRING (dend == end_match_1) + +/* Call before fetching a character with *d. This switches over to + string2 if necessary. */ +#define PREFETCH() \ + while (d == dend) \ + { \ + /* End of string2 => fail. */ \ + if (dend == end_match_2) \ + goto fail; \ + /* End of string1 => advance to string2. */ \ + d = string2; \ + dend = end_match_2; \ + } + + +/* Test if at very beginning or at very end of the virtual concatenation + of `string1' and `string2'. If only one string, it's `string2'. */ +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) + + +/* Test if D points to a character which is word-constituent. We have + two special cases to check for: if past the end of string1, look at + the first character in string2; and if before the beginning of + string2, look at the last character in string1. */ +#define WORDCHAR_P(d) \ + (SYNTAX ((d) == end1 ? *string2 \ + : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ + == Sword) + +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 +/* Test if the character before D and the one at D differ with respect + to being word-constituent. */ +#define AT_WORD_BOUNDARY(d) \ + (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ + || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) +#endif + +/* Free everything we malloc. */ +#ifdef MATCH_MAY_ALLOCATE +#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL +#define FREE_VARIABLES() \ + do { \ + REGEX_FREE_STACK (fail_stack.stack); \ + FREE_VAR (regstart); \ + FREE_VAR (regend); \ + FREE_VAR (old_regstart); \ + FREE_VAR (old_regend); \ + FREE_VAR (best_regstart); \ + FREE_VAR (best_regend); \ + FREE_VAR (reg_info); \ + FREE_VAR (reg_dummy); \ + FREE_VAR (reg_info_dummy); \ + } while (0) +#else +#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ +#endif /* not MATCH_MAY_ALLOCATE */ + +/* These values must meet several constraints. They must not be valid + register values; since we have a limit of 255 registers (because + we use only one byte in the pattern for the register number), we can + use numbers larger than 255. They must differ by 1, because of + NUM_FAILURE_ITEMS above. And the value for the lowest register must + be larger than the value for the highest register, so we do not try + to actually save any registers when none are active. */ +#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) +#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) + +/* Matching routines. */ + +#ifndef emacs /* Emacs never uses this. */ +/* re_match is like re_match_2 except it takes only a single string. */ + +int +re_match (bufp, string, size, pos, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, pos; + struct re_registers *regs; +{ + int result = re_match_2_internal (bufp, NULL, 0, string, size, + pos, regs, size); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA + alloca (0); +#endif +#endif + return result; +} +#endif /* not emacs */ + +static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p, + unsigned char *end, + register_info_type *reg_info)); +static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p, + unsigned char *end, + register_info_type *reg_info)); +static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p, + unsigned char *end, + register_info_type *reg_info)); +static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2, + int len, char *translate)); + +/* re_match_2 matches the compiled pattern in BUFP against the + the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 + and SIZE2, respectively). We start matching at POS, and stop + matching at STOP. + + If REGS is non-null and the `no_sub' field of BUFP is nonzero, we + store offsets for the substring each group matched in REGS. See the + documentation for exactly how many groups we fill. + + We return -1 if no match, -2 if an internal error (such as the + failure stack overflowing). Otherwise, we return the length of the + matched substring. */ + +int +re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + int result = re_match_2_internal (bufp, string1, size1, string2, size2, + pos, regs, stop); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA + alloca (0); +#endif +#endif + return result; +} + +/* This is a separate function so that we can force an alloca cleanup + afterwards. */ +static int +re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + /* General temporaries. */ + int mcnt; + unsigned char *p1; + + /* Just past the end of the corresponding string. */ + const char *end1, *end2; + + /* Pointers into string1 and string2, just past the last characters in + each to consider matching. */ + const char *end_match_1, *end_match_2; + + /* Where we are in the data, and the end of the current string. */ + const char *d, *dend; + + /* Where we are in the pattern, and the end of the pattern. */ + unsigned char *p = bufp->buffer; + register unsigned char *pend = p + bufp->used; + + /* Mark the opcode just after a start_memory, so we can test for an + empty subpattern when we get to the stop_memory. */ + unsigned char *just_past_start_mem = 0; + + /* We use this to map every character in the string. */ + RE_TRANSLATE_TYPE translate = bufp->translate; + + /* Failure point stack. Each place that can handle a failure further + down the line pushes a failure point on this stack. It consists of + restart, regend, and reg_info for all registers corresponding to + the subexpressions we're currently inside, plus the number of such + registers, and, finally, two char *'s. The first char * is where + to resume scanning the pattern; the second one is where to resume + scanning the strings. If the latter is zero, the failure point is + a ``dummy''; if a failure happens and the failure point is a dummy, + it gets discarded and the next next one is tried. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ + fail_stack_type fail_stack; +#endif +#ifdef DEBUG + static unsigned failure_id = 0; + unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; +#endif + +#ifdef REL_ALLOC + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ + fail_stack_elt_t *failure_stack_ptr; +#endif + + /* We fill all the registers internally, independent of what we + return, for use in backreferences. The number here includes + an element for register zero. */ + size_t num_regs = bufp->re_nsub + 1; + + /* The currently active registers. */ + active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; + active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; + + /* Information on the contents of registers. These are pointers into + the input strings; they record just what was matched (on this + attempt) by a subexpression part of the pattern, that is, the + regnum-th regstart pointer points to where in the pattern we began + matching and the regnum-th regend points to right after where we + stopped matching the regnum-th subexpression. (The zeroth register + keeps track of what the whole pattern matches.) */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **regstart, **regend; +#endif + + /* If a group that's operated upon by a repetition operator fails to + match anything, then the register for its start will need to be + restored because it will have been set to wherever in the string we + are when we last see its open-group operator. Similarly for a + register's end. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **old_regstart, **old_regend; +#endif + + /* The is_active field of reg_info helps us keep track of which (possibly + nested) subexpressions we are currently in. The matched_something + field of reg_info[reg_num] helps us tell whether or not we have + matched any of the pattern so far this time through the reg_num-th + subexpression. These two fields get reset each time through any + loop their register is in. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ + register_info_type *reg_info; +#endif + + /* The following record the register info as found in the above + variables when we find a match better than any we've seen before. + This happens as we backtrack through the failure points, which in + turn happens only if we have not yet matched the entire string. */ + unsigned best_regs_set = false; +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **best_regstart, **best_regend; +#endif + + /* Logically, this is `best_regend[0]'. But we don't want to have to + allocate space for that if we're not allocating space for anything + else (see below). Also, we never need info about register 0 for + any of the other register vectors, and it seems rather a kludge to + treat `best_regend' differently than the rest. So we keep track of + the end of the best match so far in a separate variable. We + initialize this to NULL so that when we backtrack the first time + and need to test it, it's not garbage. */ + const char *match_end = NULL; + + /* This helps SET_REGS_MATCHED avoid doing redundant work. */ + int set_regs_matched_done = 0; + + /* Used when we pop values we don't care about. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ + const char **reg_dummy; + register_info_type *reg_info_dummy; +#endif + +#ifdef DEBUG + /* Counts the total number of registers pushed. */ + unsigned num_regs_pushed = 0; +#endif + + DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); + + INIT_FAIL_STACK (); + +#ifdef MATCH_MAY_ALLOCATE + /* Do not bother to initialize all the register variables if there are + no groups in the pattern, as it takes a fair amount of time. If + there are groups, we include space for register 0 (the whole + pattern), even though we never use it, since it simplifies the + array indexing. We should fix this. */ + if (bufp->re_nsub) + { + regstart = REGEX_TALLOC (num_regs, const char *); + regend = REGEX_TALLOC (num_regs, const char *); + old_regstart = REGEX_TALLOC (num_regs, const char *); + old_regend = REGEX_TALLOC (num_regs, const char *); + best_regstart = REGEX_TALLOC (num_regs, const char *); + best_regend = REGEX_TALLOC (num_regs, const char *); + reg_info = REGEX_TALLOC (num_regs, register_info_type); + reg_dummy = REGEX_TALLOC (num_regs, const char *); + reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); + + if (!(regstart && regend && old_regstart && old_regend && reg_info + && best_regstart && best_regend && reg_dummy && reg_info_dummy)) + { + FREE_VARIABLES (); + return -2; + } + } + else + { + /* We must initialize all our variables to NULL, so that + `FREE_VARIABLES' doesn't try to free them. */ + regstart = regend = old_regstart = old_regend = best_regstart + = best_regend = reg_dummy = NULL; + reg_info = reg_info_dummy = (register_info_type *) NULL; + } +#endif /* MATCH_MAY_ALLOCATE */ + + /* The starting position is bogus. */ + if (pos < 0 || pos > size1 + size2) + { + FREE_VARIABLES (); + return -1; + } + + /* Initialize subexpression text positions to -1 to mark ones that no + start_memory/stop_memory has been seen for. Also initialize the + register information struct. */ + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) + { + regstart[mcnt] = regend[mcnt] + = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; + + REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; + IS_ACTIVE (reg_info[mcnt]) = 0; + MATCHED_SOMETHING (reg_info[mcnt]) = 0; + EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; + } + + /* We move `string1' into `string2' if the latter's empty -- but not if + `string1' is null. */ + if (size2 == 0 && string1 != NULL) + { + string2 = string1; + size2 = size1; + string1 = 0; + size1 = 0; + } + end1 = string1 + size1; + end2 = string2 + size2; + + /* Compute where to stop matching, within the two strings. */ + if (stop <= size1) + { + end_match_1 = string1 + stop; + end_match_2 = string2; + } + else + { + end_match_1 = end1; + end_match_2 = string2 + stop - size1; + } + + /* `p' scans through the pattern as `d' scans through the data. + `dend' is the end of the input string that `d' points within. `d' + is advanced into the following input string whenever necessary, but + this happens before fetching; therefore, at the beginning of the + loop, `d' can be pointing at the end of a string, but it cannot + equal `string2'. */ + if (size1 > 0 && pos <= size1) + { + d = string1 + pos; + dend = end_match_1; + } + else + { + d = string2 + pos - size1; + dend = end_match_2; + } + + DEBUG_PRINT1 ("The compiled pattern is:\n"); + DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); + DEBUG_PRINT1 ("The string to match is: `"); + DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); + DEBUG_PRINT1 ("'\n"); + + /* This loops over pattern commands. It exits by returning from the + function if the match is complete, or it drops through if the match + fails at this starting point in the input data. */ + for (;;) + { +#ifdef _LIBC + DEBUG_PRINT2 ("\n%p: ", p); +#else + DEBUG_PRINT2 ("\n0x%x: ", p); +#endif + + if (p == pend) + { /* End of pattern means we might have succeeded. */ + DEBUG_PRINT1 ("end of pattern ... "); + + /* If we haven't matched the entire string, and we want the + longest match, try backtracking. */ + if (d != end_match_2) + { + /* 1 if this match ends in the same string (string1 or string2) + as the best previous match. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + /* 1 if this match is the best seen so far. */ + boolean best_match_p; + + /* AIX compiler got confused when this was combined + with the previous declaration. */ + if (same_str_p) + best_match_p = d > match_end; + else + best_match_p = !MATCHING_IN_FIRST_STRING; + + DEBUG_PRINT1 ("backtracking.\n"); + + if (!FAIL_STACK_EMPTY ()) + { /* More failure points to try. */ + + /* If exceeds best match so far, save it. */ + if (!best_regs_set || best_match_p) + { + best_regs_set = true; + match_end = d; + + DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); + + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) + { + best_regstart[mcnt] = regstart[mcnt]; + best_regend[mcnt] = regend[mcnt]; + } + } + goto fail; + } + + /* If no failure points, don't restore garbage. And if + last match is real best match, don't restore second + best one. */ + else if (best_regs_set && !best_match_p) + { + restore_best_regs: + /* Restore best match. It may happen that `dend == + end_match_1' while the restored d is in string2. + For example, the pattern `x.*y.*z' against the + strings `x-' and `y-z-', if the two strings are + not consecutive in memory. */ + DEBUG_PRINT1 ("Restoring best registers.\n"); + + d = match_end; + dend = ((d >= string1 && d <= end1) + ? end_match_1 : end_match_2); + + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) + { + regstart[mcnt] = best_regstart[mcnt]; + regend[mcnt] = best_regend[mcnt]; + } + } + } /* d != end_match_2 */ + + succeed_label: + DEBUG_PRINT1 ("Accepting match.\n"); + + /* If caller wants register contents data back, do it. */ + if (regs && !bufp->no_sub) + { + /* Have the register data arrays been allocated? */ + if (bufp->regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. We need one + extra element beyond `num_regs' for the `-1' marker + GNU code uses. */ + regs->num_regs = MAX (RE_NREGS, num_regs + 1); + regs->start = TALLOC (regs->num_regs, regoff_t); + regs->end = TALLOC (regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + { + FREE_VARIABLES (); + return -2; + } + bufp->regs_allocated = REGS_REALLOCATE; + } + else if (bufp->regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (regs->num_regs < num_regs + 1) + { + regs->num_regs = num_regs + 1; + RETALLOC (regs->start, regs->num_regs, regoff_t); + RETALLOC (regs->end, regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + { + FREE_VARIABLES (); + return -2; + } + } + } + else + { + /* These braces fend off a "empty body in an else-statement" + warning under GCC when assert expands to nothing. */ + assert (bufp->regs_allocated == REGS_FIXED); + } + + /* Convert the pointer data in `regstart' and `regend' to + indices. Register zero has to be set differently, + since we haven't kept track of any info for it. */ + if (regs->num_regs > 0) + { + regs->start[0] = pos; + regs->end[0] = (MATCHING_IN_FIRST_STRING + ? ((regoff_t) (d - string1)) + : ((regoff_t) (d - string2 + size1))); + } + + /* Go through the first `min (num_regs, regs->num_regs)' + registers, since that is all we initialized. */ + for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); + mcnt++) + { + if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) + regs->start[mcnt] = regs->end[mcnt] = -1; + else + { + regs->start[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); + } + } + + /* If the regs structure we return has more elements than + were in the pattern, set the extra elements to -1. If + we (re)allocated the registers, this is the case, + because we always allocate enough to have at least one + -1 at the end. */ + for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + } /* regs && !bufp->no_sub */ + + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + nfailure_points_pushed, nfailure_points_popped, + nfailure_points_pushed - nfailure_points_popped); + DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); + + mcnt = d - pos - (MATCHING_IN_FIRST_STRING + ? string1 + : string2 - size1); + + DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + + FREE_VARIABLES (); + return mcnt; + } + + /* Otherwise match next pattern command. */ + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) + { + /* Ignore these. Used to ignore the n of succeed_n's which + currently have n == 0. */ + case no_op: + DEBUG_PRINT1 ("EXECUTING no_op.\n"); + break; + + case succeed: + DEBUG_PRINT1 ("EXECUTING succeed.\n"); + goto succeed_label; + + /* Match the next n pattern characters exactly. The following + byte in the pattern defines n, and the n bytes after that + are the characters to match. */ + case exactn: + mcnt = *p++; + DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); + + /* This is written out as an if-else so we don't waste time + testing `translate' inside the loop. */ + if (translate) + { + do + { + PREFETCH (); + if ((unsigned char) translate[(unsigned char) *d++] + != (unsigned char) *p++) + goto fail; + } + while (--mcnt); + } + else + { + do + { + PREFETCH (); + if (*d++ != (char) *p++) goto fail; + } + while (--mcnt); + } + SET_REGS_MATCHED (); + break; + + + /* Match any character except possibly a newline or a null. */ + case anychar: + DEBUG_PRINT1 ("EXECUTING anychar.\n"); + + PREFETCH (); + + if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') + || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) + goto fail; + + SET_REGS_MATCHED (); + DEBUG_PRINT2 (" Matched `%d'.\n", *d); + d++; + break; + + + case charset: + case charset_not: + { + register unsigned char c; + boolean not = (re_opcode_t) *(p - 1) == charset_not; + + DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + + PREFETCH (); + c = TRANSLATE (*d); /* The character to match. */ + + /* Cast to `unsigned' instead of `unsigned char' in case the + bit list is a full 32 bytes long. */ + if (c < (unsigned) (*p * BYTEWIDTH) + && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + p += 1 + *p; + + if (!not) goto fail; + + SET_REGS_MATCHED (); + d++; + break; + } + + + /* The beginning of a group is represented by start_memory. + The arguments are the register number in the next byte, and the + number of groups inner to this one in the next. The text + matched within the group is recorded (in the internal + registers data structure) under the register number. */ + case start_memory: + DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); + + /* Find out if this group can match the empty string. */ + p1 = p; /* To send to group_match_null_string_p. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[*p]) + = group_match_null_string_p (&p1, pend, reg_info); + + /* Save the position in the string where we were the last time + we were at this open-group operator in case the group is + operated upon by a repetition operator, e.g., with `(a*)*b' + against `ab'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regstart[*p]) ? d : regstart[*p] + : regstart[*p]; + DEBUG_PRINT2 (" old_regstart: %d\n", + POINTER_TO_OFFSET (old_regstart[*p])); + + regstart[*p] = d; + DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); + + IS_ACTIVE (reg_info[*p]) = 1; + MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + + /* This is the new highest active register. */ + highest_active_reg = *p; + + /* If nothing was active before, this is the new lowest active + register. */ + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *p; + + /* Move past the register number and inner group count. */ + p += 2; + just_past_start_mem = p; + + break; + + + /* The stop_memory opcode represents the end of a group. Its + arguments are the same as start_memory's: the register + number, and the number of inner groups. */ + case stop_memory: + DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + + /* We need to save the string position the last time we were at + this close-group operator in case the group is operated + upon by a repetition operator, e.g., with `((a*)*(b*)*)*' + against `aba'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regend[*p]) ? d : regend[*p] + : regend[*p]; + DEBUG_PRINT2 (" old_regend: %d\n", + POINTER_TO_OFFSET (old_regend[*p])); + + regend[*p] = d; + DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); + + /* This register isn't active anymore. */ + IS_ACTIVE (reg_info[*p]) = 0; + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + + /* If this was the only register active, nothing is active + anymore. */ + if (lowest_active_reg == highest_active_reg) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + { /* We must scan for the new highest active register, since + it isn't necessarily one less than now: consider + (a(b)c(d(e)f)g). When group 3 ends, after the f), the + new highest active register is 1. */ + unsigned char r = *p - 1; + while (r > 0 && !IS_ACTIVE (reg_info[r])) + r--; + + /* If we end up at register zero, that means that we saved + the registers as the result of an `on_failure_jump', not + a `start_memory', and we jumped to past the innermost + `stop_memory'. For example, in ((.)*) we save + registers 1 and 2 as a result of the *, but when we pop + back to the second ), we are at the stop_memory 1. + Thus, nothing is active. */ + if (r == 0) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + highest_active_reg = r; + } + + /* If just failed to match something this time around with a + group that's operated on by a repetition operator, try to + force exit from the ``loop'', and restore the register + information for this group that we had before trying this + last match. */ + if ((!MATCHED_SOMETHING (reg_info[*p]) + || just_past_start_mem == p - 1) + && (p + 2) < pend) + { + boolean is_a_jump_n = false; + + p1 = p + 2; + mcnt = 0; + switch ((re_opcode_t) *p1++) + { + case jump_n: + is_a_jump_n = true; + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (is_a_jump_n) + p1 += 2; + break; + + default: + /* do nothing */ ; + } + p1 += mcnt; + + /* If the next operation is a jump backwards in the pattern + to an on_failure_jump right before the start_memory + corresponding to this stop_memory, exit from the loop + by forcing a failure after pushing on the stack the + on_failure_jump's jump in the pattern, and d. */ + if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump + && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) + { + /* If this group ever matched anything, then restore + what its registers were before trying this last + failed match, e.g., with `(a*)*b' against `ab' for + regstart[1], and, e.g., with `((a*)*(b*)*)*' + against `aba' for regend[3]. + + Also restore the registers for inner groups for, + e.g., `((a*)(b*))*' against `aba' (register 3 would + otherwise get trashed). */ + + if (EVER_MATCHED_SOMETHING (reg_info[*p])) + { + unsigned r; + + EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Restore this and inner groups' (if any) registers. */ + for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); + r++) + { + regstart[r] = old_regstart[r]; + + /* xx why this test? */ + if (old_regend[r] >= regstart[r]) + regend[r] = old_regend[r]; + } + } + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + PUSH_FAILURE_POINT (p1 + mcnt, d, -2); + + goto fail; + } + } + + /* Move past the register number and the inner group count. */ + p += 2; + break; + + + /* \<digit> has been turned into a `duplicate' command which is + followed by the numeric value of <digit> as the register number. */ + case duplicate: + { + register const char *d2, *dend2; + int regno = *p++; /* Get which register to match against. */ + DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); + + /* Can't back reference a group which we've never matched. */ + if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) + goto fail; + + /* Where in input to try to start matching. */ + d2 = regstart[regno]; + + /* Where to stop matching; if both the place to start and + the place to stop matching are in the same string, then + set to the place to stop, otherwise, for now have to use + the end of the first string. */ + + dend2 = ((FIRST_STRING_P (regstart[regno]) + == FIRST_STRING_P (regend[regno])) + ? regend[regno] : end_match_1); + for (;;) + { + /* If necessary, advance to next segment in register + contents. */ + while (d2 == dend2) + { + if (dend2 == end_match_2) break; + if (dend2 == regend[regno]) break; + + /* End of string1 => advance to string2. */ + d2 = string2; + dend2 = regend[regno]; + } + /* At end of register contents => success */ + if (d2 == dend2) break; + + /* If necessary, advance to next segment in data. */ + PREFETCH (); + + /* How many characters left in this segment to match. */ + mcnt = dend - d; + + /* Want how many consecutive characters we can match in + one shot, so, if necessary, adjust the count. */ + if (mcnt > dend2 - d2) + mcnt = dend2 - d2; + + /* Compare that many; failure if mismatch, else move + past them. */ + if (translate + ? bcmp_translate (d, d2, mcnt, translate) + : bcmp (d, d2, mcnt)) + goto fail; + d += mcnt, d2 += mcnt; + + /* Do this because we've match some characters. */ + SET_REGS_MATCHED (); + } + } + break; + + + /* begline matches the empty string at the beginning of the string + (unless `not_bol' is set in `bufp'), and, if + `newline_anchor' is set, after newlines. */ + case begline: + DEBUG_PRINT1 ("EXECUTING begline.\n"); + + if (AT_STRINGS_BEG (d)) + { + if (!bufp->not_bol) break; + } + else if (d[-1] == '\n' && bufp->newline_anchor) + { + break; + } + /* In all other cases, we fail. */ + goto fail; + + + /* endline is the dual of begline. */ + case endline: + DEBUG_PRINT1 ("EXECUTING endline.\n"); + + if (AT_STRINGS_END (d)) + { + if (!bufp->not_eol) break; + } + + /* We have to ``prefetch'' the next character. */ + else if ((d == end1 ? *string2 : *d) == '\n' + && bufp->newline_anchor) + { + break; + } + goto fail; + + + /* Match at the very beginning of the data. */ + case begbuf: + DEBUG_PRINT1 ("EXECUTING begbuf.\n"); + if (AT_STRINGS_BEG (d)) + break; + goto fail; + + + /* Match at the very end of the data. */ + case endbuf: + DEBUG_PRINT1 ("EXECUTING endbuf.\n"); + if (AT_STRINGS_END (d)) + break; + goto fail; + + + /* on_failure_keep_string_jump is used to optimize `.*\n'. It + pushes NULL as the value for the string on the stack. Then + `pop_failure_point' will keep the current value for the + string, instead of restoring it. To see why, consider + matching `foo\nbar' against `.*\n'. The .* matches the foo; + then the . fails against the \n. But the next thing we want + to do is match the \n against the \n; if we restored the + string value, we would be back at the foo. + + Because this is used only in specific cases, we don't need to + check all the things that `on_failure_jump' does, to make + sure the right things get saved on the stack. Hence we don't + share its code. The only reason to push anything on the + stack at all is that otherwise we would have to change + `anychar's code to do something besides goto fail in this + case; that seems worse than this. */ + case on_failure_keep_string_jump: + DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); +#else + DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); +#endif + + PUSH_FAILURE_POINT (p + mcnt, NULL, -2); + break; + + + /* Uses of on_failure_jump: + + Each alternative starts with an on_failure_jump that points + to the beginning of the next alternative. Each alternative + except the last ends with a jump that in effect jumps past + the rest of the alternatives. (They really jump to the + ending jump of the following alternative, because tensioning + these jumps is a hassle.) + + Repeats start with an on_failure_jump that points past both + the repetition text and either the following jump or + pop_failure_jump back to this on_failure_jump. */ + case on_failure_jump: + on_failure: + DEBUG_PRINT1 ("EXECUTING on_failure_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); +#else + DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); +#endif + + /* If this on_failure_jump comes right before a group (i.e., + the original * applied to a group), save the information + for that group and all inner ones, so that if we fail back + to this point, the group's information will be correct. + For example, in \(a*\)*\1, we need the preceding group, + and in \(zz\(a*\)b*\)\2, we need the inner group. */ + + /* We can't use `p' to check ahead because we push + a failure point to `p + mcnt' after we do this. */ + p1 = p; + + /* We need to skip no_op's before we look for the + start_memory in case this on_failure_jump is happening as + the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 + against aba. */ + while (p1 < pend && (re_opcode_t) *p1 == no_op) + p1++; + + if (p1 < pend && (re_opcode_t) *p1 == start_memory) + { + /* We have a new highest active register now. This will + get reset at the start_memory we are about to get to, + but we will have saved all the registers relevant to + this repetition op, as described above. */ + highest_active_reg = *(p1 + 1) + *(p1 + 2); + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *(p1 + 1); + } + + DEBUG_PRINT1 (":\n"); + PUSH_FAILURE_POINT (p + mcnt, d, -2); + break; + + + /* A smart repeat ends with `maybe_pop_jump'. + We change it to either `pop_failure_jump' or `jump'. */ + case maybe_pop_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); + { + register unsigned char *p2 = p; + + /* Compare the beginning of the repeat with what in the + pattern follows its end. If we can establish that there + is nothing that they would both match, i.e., that we + would have to backtrack because of (as in, e.g., `a*a') + then we can change to pop_failure_jump, because we'll + never have to backtrack. + + This is not true in the case of alternatives: in + `(a|ab)*' we do need to backtrack to the `ab' alternative + (e.g., if the string was `ab'). But instead of trying to + detect that here, the alternative has put on a dummy + failure point which is what we will end up popping. */ + + /* Skip over open/close-group commands. + If what follows this loop is a ...+ construct, + look at what begins its body, since we will have to + match at least one of that. */ + while (1) + { + if (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; + else if (p2 + 6 < pend + && (re_opcode_t) *p2 == dummy_failure_jump) + p2 += 6; + else + break; + } + + p1 = p + mcnt; + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ + + /* If we're at the end of the pattern, we can change. */ + if (p2 == pend) + { + /* Consider what happens when matching ":\(.*\)" + against ":/". I don't really understand this code + yet. */ + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 + (" End of pattern: change to `pop_failure_jump'.\n"); + } + + else if ((re_opcode_t) *p2 == exactn + || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + + if ((re_opcode_t) p1[3] == exactn && p1[5] != c) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset + || (re_opcode_t) p1[3] == charset_not) + { + int not = (re_opcode_t) p1[3] == charset_not; + + if (c < (unsigned char) (p1[4] * BYTEWIDTH) + && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + /* `not' is equal to 1 if c would match, which means + that we can't change to pop_failure_jump. */ + if (!not) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + else if ((re_opcode_t) *p2 == charset) + { +#ifdef DEBUG + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; +#endif + +#if 0 + if ((re_opcode_t) p1[3] == exactn + && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] + && (p2[2 + p1[5] / BYTEWIDTH] + & (1 << (p1[5] % BYTEWIDTH))))) +#else + if ((re_opcode_t) p1[3] == exactn + && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] + && (p2[2 + p1[4] / BYTEWIDTH] + & (1 << (p1[4] % BYTEWIDTH))))) +#endif + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset_not) + { + int idx; + /* We win if the charset_not inside the loop + lists every character listed in the charset after. */ + for (idx = 0; idx < (int) p2[1]; idx++) + if (! (p2[2 + idx] == 0 + || (idx < (int) p1[4] + && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) + break; + + if (idx == p2[1]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + else if ((re_opcode_t) p1[3] == charset) + { + int idx; + /* We win if the charset inside the loop + has no overlap with the one after the loop. */ + for (idx = 0; + idx < (int) p2[1] && idx < (int) p1[4]; + idx++) + if ((p2[2 + idx] & p1[5 + idx]) != 0) + break; + + if (idx == p2[1] || idx == p1[4]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + } + p -= 2; /* Point at relative address again. */ + if ((re_opcode_t) p[-1] != pop_failure_jump) + { + p[-1] = (unsigned char) jump; + DEBUG_PRINT1 (" Match => jump.\n"); + goto unconditional_jump; + } + /* Note fall through. */ + + + /* The end of a simple repeat has a pop_failure_jump back to + its matching on_failure_jump, where the latter will push a + failure point. The pop_failure_jump takes off failure + points put on by this pop_failure_jump's matching + on_failure_jump; we got through the pattern to here from the + matching on_failure_jump, so didn't fail. */ + case pop_failure_jump: + { + /* We need to pass separate storage for the lowest and + highest registers, even though we don't care about the + actual values. Otherwise, we will restore only one + register from the stack, since lowest will == highest in + `pop_failure_point'. */ + active_reg_t dummy_low_reg, dummy_high_reg; + unsigned char *pdummy; + const char *sdummy; + + DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); + POP_FAILURE_POINT (sdummy, pdummy, + dummy_low_reg, dummy_high_reg, + reg_dummy, reg_dummy, reg_info_dummy); + } + /* Note fall through. */ + + unconditional_jump: +#ifdef _LIBC + DEBUG_PRINT2 ("\n%p: ", p); +#else + DEBUG_PRINT2 ("\n0x%x: ", p); +#endif + /* Note fall through. */ + + /* Unconditionally jump (without popping any failure points). */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ + DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); + p += mcnt; /* Do the jump. */ +#ifdef _LIBC + DEBUG_PRINT2 ("(to %p).\n", p); +#else + DEBUG_PRINT2 ("(to 0x%x).\n", p); +#endif + break; + + + /* We need this opcode so we can detect where alternatives end + in `group_match_null_string_p' et al. */ + case jump_past_alt: + DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); + goto unconditional_jump; + + + /* Normally, the on_failure_jump pushes a failure point, which + then gets popped at pop_failure_jump. We will end up at + pop_failure_jump, also, and with a pattern of, say, `a+', we + are skipping over the on_failure_jump, so we have to push + something meaningless for pop_failure_jump to pop. */ + case dummy_failure_jump: + DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); + /* It doesn't matter what we push for the string here. What + the code at `fail' tests is the value for the pattern. */ + PUSH_FAILURE_POINT (0, 0, -2); + goto unconditional_jump; + + + /* At the end of an alternative, we need to push a dummy failure + point in case we are followed by a `pop_failure_jump', because + we don't want the failure point for the alternative to be + popped. For example, matching `(a|ab)*' against `aab' + requires that we match the `ab' alternative. */ + case push_dummy_failure: + DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); + /* See comments just above at `dummy_failure_jump' about the + two zeroes. */ + PUSH_FAILURE_POINT (0, 0, -2); + break; + + /* Have to succeed matching what follows at least n times. + After that, handle like `on_failure_jump'. */ + case succeed_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); + + assert (mcnt >= 0); + /* Originally, this is how many times we HAVE to succeed. */ + if (mcnt > 0) + { + mcnt--; + p += 2; + STORE_NUMBER_AND_INCR (p, mcnt); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt); +#endif + } + else if (mcnt == 0) + { +#ifdef _LIBC + DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); +#else + DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); +#endif + p[2] = (unsigned char) no_op; + p[3] = (unsigned char) no_op; + goto on_failure; + } + break; + + case jump_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); + + /* Originally, this is how many times we CAN jump. */ + if (mcnt) + { + mcnt--; + STORE_NUMBER (p + 2, mcnt); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt); +#endif + goto unconditional_jump; + } + /* If don't have to jump any more, skip over the rest of command. */ + else + p += 4; + break; + + case set_number_at: + { + DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + p1 = p + mcnt; + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); +#endif + STORE_NUMBER (p1, mcnt); + break; + } + +#if 0 + /* The DEC Alpha C compiler 3.x generates incorrect code for the + test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of + AT_WORD_BOUNDARY, so this code is disabled. Expanding the + macro and introducing temporary variables works around the bug. */ + + case wordbound: + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + break; + goto fail; + + case notwordbound: + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + goto fail; + break; +#else + case wordbound: + { + boolean prevchar, thischar; + + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) + break; + + prevchar = WORDCHAR_P (d - 1); + thischar = WORDCHAR_P (d); + if (prevchar != thischar) + break; + goto fail; + } + + case notwordbound: + { + boolean prevchar, thischar; + + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) + goto fail; + + prevchar = WORDCHAR_P (d - 1); + thischar = WORDCHAR_P (d); + if (prevchar != thischar) + goto fail; + break; + } +#endif + + case wordbeg: + DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); + if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) + break; + goto fail; + + case wordend: + DEBUG_PRINT1 ("EXECUTING wordend.\n"); + if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) + && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) + break; + goto fail; + +#ifdef emacs + case before_dot: + DEBUG_PRINT1 ("EXECUTING before_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) >= point) + goto fail; + break; + + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) != point) + goto fail; + break; + + case after_dot: + DEBUG_PRINT1 ("EXECUTING after_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) <= point) + goto fail; + break; + + case syntaxspec: + DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchsyntax; + + case wordchar: + DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); + mcnt = (int) Sword; + matchsyntax: + PREFETCH (); + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + + case notsyntaxspec: + DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchnotsyntax; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); + mcnt = (int) Sword; + matchnotsyntax: + PREFETCH (); + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + +#else /* not emacs */ + case wordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); + PREFETCH (); + if (!WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); + PREFETCH (); + if (WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; +#endif /* not emacs */ + + default: + abort (); + } + continue; /* Successfully executed one pattern command; keep going. */ + + + /* We goto here if a matching operation fails. */ + fail: + if (!FAIL_STACK_EMPTY ()) + { /* A restart point is known. Restore to that state. */ + DEBUG_PRINT1 ("\nFAIL:\n"); + POP_FAILURE_POINT (d, p, + lowest_active_reg, highest_active_reg, + regstart, regend, reg_info); + + /* If this failure point is a dummy, try the next one. */ + if (!p) + goto fail; + + /* If we failed to the end of the pattern, don't examine *p. */ + assert (p <= pend); + if (p < pend) + { + boolean is_a_jump_n = false; + + /* If failed to a backwards jump that's part of a repetition + loop, need to pop this failure point and use the next one. */ + switch ((re_opcode_t) *p) + { + case jump_n: + is_a_jump_n = true; + case maybe_pop_jump: + case pop_failure_jump: + case jump: + p1 = p + 1; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + + if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) + || (!is_a_jump_n + && (re_opcode_t) *p1 == on_failure_jump)) + goto fail; + break; + default: + /* do nothing */ ; + } + } + + if (d >= string1 && d <= end1) + dend = end_match_1; + } + else + break; /* Matching at this starting point really fails. */ + } /* for (;;) */ + + if (best_regs_set) + goto restore_best_regs; + + FREE_VARIABLES (); + + return -1; /* Failure to match. */ +} /* re_match_2 */ + +/* Subroutine definitions for re_match_2. */ + + +/* We are passed P pointing to a register number after a start_memory. + + Return true if the pattern up to the corresponding stop_memory can + match the empty string, and false otherwise. + + If we find the matching stop_memory, sets P to point to one past its number. + Otherwise, sets P to an undefined byte less than or equal to END. + + We don't handle duplicates properly (yet). */ + +static boolean +group_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + /* Point to after the args to the start_memory. */ + unsigned char *p1 = *p + 2; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and return true or + false, as appropriate, when we get to one that can't, or to the + matching stop_memory. */ + + switch ((re_opcode_t) *p1) + { + /* Could be either a loop or a series of alternatives. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + /* If the next operation is not a jump backwards in the + pattern. */ + + if (mcnt >= 0) + { + /* Go through the on_failure_jumps of the alternatives, + seeing if any of the alternatives cannot match nothing. + The last alternative starts with only a jump, + whereas the rest start with on_failure_jump and end + with a jump, e.g., here is the pattern for `a|b|c': + + /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 + /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 + /exactn/1/c + + So, we have to first go through the first (n-1) + alternatives and then deal with the last one separately. */ + + + /* Deal with the first (n-1) alternatives, which start + with an on_failure_jump (see above) that jumps to right + past a jump_past_alt. */ + + while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) + { + /* `mcnt' holds how many bytes long the alternative + is, including the ending `jump_past_alt' and + its number. */ + + if (!alt_match_null_string_p (p1, p1 + mcnt - 3, + reg_info)) + return false; + + /* Move to right after this alternative, including the + jump_past_alt. */ + p1 += mcnt; + + /* Break if it's the beginning of an n-th alternative + that doesn't begin with an on_failure_jump. */ + if ((re_opcode_t) *p1 != on_failure_jump) + break; + + /* Still have to check that it's not an n-th + alternative that starts with an on_failure_jump. */ + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) + { + /* Get to the beginning of the n-th alternative. */ + p1 -= 3; + break; + } + } + + /* Deal with the last alternative: go back and get number + of the `jump_past_alt' just before it. `mcnt' contains + the length of the alternative. */ + EXTRACT_NUMBER (mcnt, p1 - 2); + + if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) + return false; + + p1 += mcnt; /* Get past the n-th alternative. */ + } /* if mcnt > 0 */ + break; + + + case stop_memory: + assert (p1[1] == **p); + *p = p1 + 2; + return true; + + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return false; +} /* group_match_null_string_p */ + + +/* Similar to group_match_null_string_p, but doesn't deal with alternatives: + It expects P to be the first byte of a single alternative and END one + byte past the last. The alternative can contain groups. */ + +static boolean +alt_match_null_string_p (p, end, reg_info) + unsigned char *p, *end; + register_info_type *reg_info; +{ + int mcnt; + unsigned char *p1 = p; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and break when we get + to one that can't. */ + + switch ((re_opcode_t) *p1) + { + /* It's a loop. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + break; + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return true; +} /* alt_match_null_string_p */ + + +/* Deals with the ops common to group_match_null_string_p and + alt_match_null_string_p. + + Sets P to one after the op and its arguments, if any. */ + +static boolean +common_op_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + boolean ret; + int reg_no; + unsigned char *p1 = *p; + + switch ((re_opcode_t) *p1++) + { + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbeg: + case wordend: + case wordbound: + case notwordbound: +#ifdef emacs + case before_dot: + case at_dot: + case after_dot: +#endif + break; + + case start_memory: + reg_no = *p1; + assert (reg_no > 0 && reg_no <= MAX_REGNUM); + ret = group_match_null_string_p (&p1, end, reg_info); + + /* Have to set this here in case we're checking a group which + contains a group and a back reference to it. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; + + if (!ret) + return false; + break; + + /* If this is an optimized succeed_n for zero times, make the jump. */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (mcnt >= 0) + p1 += mcnt; + else + return false; + break; + + case succeed_n: + /* Get to the number of times to succeed. */ + p1 += 2; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + if (mcnt == 0) + { + p1 -= 4; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + } + else + return false; + break; + + case duplicate: + if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) + return false; + break; + + case set_number_at: + p1 += 4; + + default: + /* All other opcodes mean we cannot match the empty string. */ + return false; + } + + *p = p1; + return true; +} /* common_op_match_null_string_p */ + + +/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN + bytes; nonzero otherwise. */ + +static int +bcmp_translate (s1, s2, len, translate) + const char *s1, *s2; + register int len; + RE_TRANSLATE_TYPE translate; +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + while (len) + { + if (translate[*p1++] != translate[*p2++]) return 1; + len--; + } + return 0; +} + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length SIZE) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. + + We call regex_compile to do the actual compilation. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* GNU code is written to assume at least RE_NREGS registers will be set + (and at least one extra will be -1). */ + bufp->regs_allocated = REGS_UNALLOCATED; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub. */ + bufp->no_sub = 0; + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = regex_compile (pattern, length, re_syntax_options, bufp); + + if (!ret) + return NULL; + return gettext (re_error_msgid[(int) ret]); +} + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined (_REGEX_RE_COMP) || defined (_LIBC) + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec below without link errors. */ +weak_function +#endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (!re_comp_buf.buffer) + { + re_comp_buf.buffer = (unsigned char *) malloc (200); + if (re_comp_buf.buffer == NULL) + return gettext (re_error_msgid[(int) REG_ESPACE]); + re_comp_buf.allocated = 200; + + re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); + if (re_comp_buf.fastmap == NULL) + return gettext (re_error_msgid[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (re_error_msgid[(int) ret]); +} + + +int +#ifdef _LIBC +weak_function +#endif +re_exec (s) + const char *s; +{ + const int len = strlen (s); + return + 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); +} + +#endif /* _REGEX_RE_COMP */ + +/* POSIX.2 functions. Don't define these for Emacs. */ + +#ifndef emacs + +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' and `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *preg; + const char *pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax + = (cflags & REG_EXTENDED) ? + RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + + /* regex_compile will allocate the space for the compiled pattern. */ + preg->buffer = 0; + preg->allocated = 0; + preg->used = 0; + + /* Don't bother to use a fastmap when searching. This simplifies the + REG_NEWLINE case: if we used a fastmap, we'd have to put all the + characters after newlines into the fastmap. This way, we just try + every character. */ + preg->fastmap = 0; + + if (cflags & REG_ICASE) + { + unsigned i; + + preg->translate + = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE + * sizeof (*(RE_TRANSLATE_TYPE)0)); + if (preg->translate == NULL) + return (int) REG_ESPACE; + + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + } + else + preg->translate = NULL; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + + preg->no_sub = !!(cflags & REG_NOSUB); + + /* POSIX says a null character in the pattern terminates it, so we + can use strlen here in compiling the pattern. */ + ret = regex_compile (pattern, strlen (pattern), syntax, preg); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) ret = REG_EPAREN; + + return (int) ret; +} + + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *preg; + const char *string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + int ret; + struct re_registers regs; + regex_t private_preg; + int len = strlen (string); + boolean want_reg_info = !preg->no_sub && nmatch > 0; + + private_preg = *preg; + + private_preg.not_bol = !!(eflags & REG_NOTBOL); + private_preg.not_eol = !!(eflags & REG_NOTEOL); + + /* The user has told us exactly how many registers to return + information about, via `nmatch'. We have to pass that on to the + matching routines. */ + private_preg.regs_allocated = REGS_FIXED; + + if (want_reg_info) + { + regs.num_regs = nmatch; + regs.start = TALLOC (nmatch, regoff_t); + regs.end = TALLOC (nmatch, regoff_t); + if (regs.start == NULL || regs.end == NULL) + return (int) REG_NOMATCH; + } + + /* Perform the searching operation. */ + ret = re_search (&private_preg, string, len, + /* start: */ 0, /* range: */ len, + want_reg_info ? ®s : (struct re_registers *) 0); + + /* Copy the register information to the POSIX structure. */ + if (want_reg_info) + { + if (ret >= 0) + { + unsigned r; + + for (r = 0; r < nmatch; r++) + { + pmatch[r].rm_so = regs.start[r]; + pmatch[r].rm_eo = regs.end[r]; + } + } + + /* If we needed the temporary register info, free the space now. */ + free (regs.start); + free (regs.end); + } + + /* We want zero return to mean success, unlike `re_search'. */ + return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; +} + + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror (errcode, preg, errbuf, errbuf_size) + int errcode; + const regex_t *preg; + char *errbuf; + size_t errbuf_size; +{ + const char *msg; + size_t msg_size; + + if (errcode < 0 + || errcode >= (int) (sizeof (re_error_msgid) + / sizeof (re_error_msgid[0]))) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (re_error_msgid[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (errbuf_size != 0) + { + if (msg_size > errbuf_size) + { + strncpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + strcpy (errbuf, msg); + } + + return msg_size; +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + if (preg->buffer != NULL) + free (preg->buffer); + preg->buffer = NULL; + + preg->allocated = 0; + preg->used = 0; + + if (preg->fastmap != NULL) + free (preg->fastmap); + preg->fastmap = NULL; + preg->fastmap_accurate = 0; + + if (preg->translate != NULL) + free (preg->translate); + preg->translate = NULL; +} + +#endif /* not emacs */ diff --git a/src/utilfuns/roman.c b/src/utilfuns/roman.c new file mode 100644 index 0000000..3c6d190 --- /dev/null +++ b/src/utilfuns/roman.c @@ -0,0 +1,82 @@ +/* + * roman.c + * Copyright 2001 by CrossWire Bible Society + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <roman.h> +char isroman (const char* str) { + char * ch = (char*)str; + for (; *ch; ch++) + if (!strchr ("IVXLCDMivxlcdm ", *ch)) + return 0; + return 1; +} + +int from_rom(const char* str) { + int i, n = strlen(str); + short * num= calloc(n, sizeof(short)); + for (i = 0; str[i]; i++) { + switch(str[i]) { + case 'i': + case 'I': + num[i] = 1; + break; + case 'v': + case 'V': + num[i] = 5; + break; + case 'x': + case 'X': + num[i] = 10; + break; + case 'l': + case 'L': + num[i] = 50; + break; + case 'c': + case 'C': + num[i] = 100; + break; + case 'd': + case 'D': + num[i] = 500; + break; + case 'm': + case 'M': + num[i] = 1000; + break; + default: + num[i] = 0; + } + } + for (i = 1; str[i]; i++) { + if (num[i] > num[i-1]) { + num[i] -= num[i-1]; + num[i-1] = 0; + } + } + n = 0; + for (i = 0; str[i]; i++) { + n += num[i]; + } + free(num); + return n; +} diff --git a/src/utilfuns/sub.c b/src/utilfuns/sub.c new file mode 100644 index 0000000..fcea6e2 --- /dev/null +++ b/src/utilfuns/sub.c @@ -0,0 +1,36 @@ + +#include <stdio.h> +#include <stdlib.h> + +main(int argc, char **argv) +{ + FILE *fp; + char *buf; + int size; + + if ((argc < 3) || (argc > 4)) { + fprintf(stderr, "usage: %s <string> <substitute string> [filename]\n", *argv); + exit(-1); + } + + if (argc > 3) + fp = fopen(argv[3], "r"); + else fp = stdin; + + size = strlen(argv[1]); + buf = (char *)calloc(size + 1, 1); + + while ((buf[size - 1] = fgetc(fp)) != EOF) { + if (!strcmp(buf, argv[1])) { + printf("\n%s", argv[2]); + memset(buf, 0, size); + continue; + } + if (*buf) { + printf("%c", *buf); + } + memmove(buf, &buf[1], size); + } + buf[size - 1] = 0; + printf("%s", buf); +} diff --git a/src/utilfuns/swunicod.cpp b/src/utilfuns/swunicod.cpp new file mode 100644 index 0000000..f42fd86 --- /dev/null +++ b/src/utilfuns/swunicod.cpp @@ -0,0 +1,139 @@ +/* + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include "swunicod.h" +unsigned char* UTF32to8 (unsigned long utf32, unsigned char * utf8) { + unsigned int i; + for (i = 0; i < 6; i++) utf8[i] = 0; + + if (utf32 < 0x80) { + utf8[0] = (char)utf32; + } + else if (utf32 < 0x800) { + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x1f; + utf8[0] = 0xc0 | i; + } + else if (utf32 < 0x10000) { + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x0f; + utf8[0] = 0xe0 | i; + } + else if (utf32 < 0x200000) { + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x07; + utf8[0] = 0xf0 | i; + } + else if (utf32 < 0x4000000) { + i = utf32 & 0x3f; + utf8[4] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x03; + utf8[0] = 0xf8 | i; + } + else if (utf32 < 0x80000000) { + i = utf32 & 0x3f; + utf8[5] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[4] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x01; + utf8[0] = 0xfc | i; + } + return utf8; +} + +/** Converts a UTF-8 encoded 1-6 byte array into a 32-bit unsigned integer UTF-32 value + * @param utf8 pointer to an array of 6 unsigned chars containing the UTF-8 value, starting in the utf8[0] + * @param utf32 the UTF-32 Unicode code point value + */ +unsigned long UTF8to32 (unsigned char * utf8) { + + unsigned char i = utf8[0]; + unsigned char count; + unsigned long utf32 = 0; + + for (count = 0; i & 0x80; count++) i <<= 1; + if (!count) { + return utf8[0]; + } + else if (count == 1) { + return 0xffff; + } + else { + count--; + utf32 = i >> count; + for (i = 1; i <= count; i++) { + if (0xc0 & utf8[i] != 0x80) { + return 0xffff; + } + utf32 <<= 6; + utf32 |= (utf8[i] & 0x3f); + } + } + return utf32; +} diff --git a/src/utilfuns/swversion.cpp b/src/utilfuns/swversion.cpp new file mode 100644 index 0000000..48c85ff --- /dev/null +++ b/src/utilfuns/swversion.cpp @@ -0,0 +1,78 @@ +#include <swversion.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + + +SWVersion SWVersion::currentVersion(SWORDVER); + +/****************************************************************************** + * SWVersion c-tor - Constructs a new SWVersion + * + * ENT: version - const version string + */ + +SWVersion::SWVersion(const char *version) { + char *buf = new char[ strlen(version) + 1 ]; + char *tok; + major = minor = minor2 = minor3 = -1; + + strcpy(buf, version); + tok = strtok(buf, "."); + if (tok) + major = atoi(tok); + tok = strtok(0, "."); + if (tok) + minor = atoi(tok); + tok = strtok(0, "."); + if (tok) + minor2 = atoi(tok); + tok = strtok(0, "."); + if (tok) + minor3 = atoi(tok); + delete [] buf; +} + + +/****************************************************************************** + * compare - compares this version to another version + * + * ENT: vi - other version with which to compare + * + * RET: = 0 if equal; + * < 0 if this version is less than other version; + * > 0 if this version is greater than other version + */ + +int SWVersion::compare(const SWVersion &vi) const { + if (major == vi.major) + if (minor == vi.minor) + if (minor2 == vi.minor2) + if (minor3 == vi.minor3) + return 0; + else return minor3 - vi.minor3; + else return minor2 - vi.minor2; + else return minor - vi.minor; + else return major - vi.major; +} + + +const char *SWVersion::getText() const { + + // 255 is safe because there is no way 4 integers (plus 3 '.'s) can have + // a string representation that will overrun this buffer + static char buf[255]; + + if (minor > -1) { + if (minor2 > -1) { + if (minor3 > -1) { + sprintf(buf, "%d.%d.%d.%d", major, minor, minor2, minor3); + } + else sprintf(buf, "%d.%d.%d", major, minor, minor2); + } + else sprintf(buf, "%d.%d", major, minor); + } + else sprintf(buf, "%d", major); + + return buf; +} diff --git a/src/utilfuns/unixstr.cpp b/src/utilfuns/unixstr.cpp new file mode 100644 index 0000000..7a975a2 --- /dev/null +++ b/src/utilfuns/unixstr.cpp @@ -0,0 +1,7 @@ +// Include only if your UNIX compiler does not include stricmp but does include strcasecmp + +#include <unixstr.h> + +int stricmp(const char *s1, const char *s2) { + return strcasecmp(s1, s2); +} diff --git a/src/utilfuns/utilconf.cpp b/src/utilfuns/utilconf.cpp new file mode 100644 index 0000000..5a32ca0 --- /dev/null +++ b/src/utilfuns/utilconf.cpp @@ -0,0 +1,19 @@ +#include <string.h> +#include <utilstr.h> + + +/****************************************************************************** + * getconfent - Get the value of an entry in a configuration file + * + * ENT: filename - File name in which to look for entry + * entryname - Entry of which to obtain value + * buf - Buffer to store entry value + * len - Maximum length to write into buffer + * + * RET: error status + */ + +char getconfent(char *filename, char *entryname, char *buf, int len) +{ + return 0; +} diff --git a/src/utilfuns/utilstr.cpp b/src/utilfuns/utilstr.cpp new file mode 100644 index 0000000..7363240 --- /dev/null +++ b/src/utilfuns/utilstr.cpp @@ -0,0 +1,198 @@ +#include <string.h> +#include <utilstr.h> +#include <ctype.h> + +#ifdef _ICU_ +#include <unicode/utypes.h> +#include <unicode/ucnv.h> +#include <unicode/ustring.h> +#include <unicode/uchar.h> + +#include <unicode/unistr.h> +#include <unicode/translit.h> + +#endif + +/****************************************************************************** + * stdstr - Sets/gets a string + * + * ENT: ipstr - pointer to a string pointer to set if necessary + * istr - string to set to *ipstr + * 0 - only get + * + * RET: *ipstr + */ + +char *stdstr(char **ipstr, const char *istr) { + if (istr) { + if (*ipstr) + delete [] *ipstr; + int len = strlen(istr) + 1; + *ipstr = new char [ len*2 ]; // *2 buffer for unicode manipulations + memcpy(*ipstr, istr, len); + } + return *ipstr; +} + + +/****************************************************************************** + * strstrip - Removes leading and trailing spaces from a string + * + * ENT: istr - string pointer to strip + * + * RET: *istr + */ + +char *strstrip(char *istr) { + char *tmp = istr; + char *rtmp; + + int len = strlen(istr); + if (len < 1) + return istr; + rtmp = istr + (len - 1); + + while ((rtmp > istr)&&((*rtmp == ' ')||(*rtmp == '\t')||(*rtmp == 10)||(*rtmp == 13))) *(rtmp--) = 0; + while ((*tmp == ' ')||(*tmp == '\t')||(*tmp == 10)||(*tmp == 13)) tmp++; + memmove(istr, tmp, (rtmp - tmp) + 1); + istr[(rtmp - tmp) + 1] = 0; + + return istr; +} + + +/****************************************************************************** + * stristr - Scans a string for the occurrence of a given substring, no case + * + * ENT: scans s1 for the first occurrence of the substring s2, ingnoring case + * + * RET: a pointer to the element in s1, where s2 begins (points to s2 in s1). + * If s2 does not occur in s1, returns null. + */ + +const char *stristr(const char *s1, const char *s2) { + int tLen = strlen(s2); + int cLen = strlen(s1); + char *target = new char [ tLen + 1 ]; + int i, j; + const char *retVal = 0; + + strcpy(target, s2); + for (i = 0; i < tLen; i++) + target[i] = SW_toupper(target[i]); + + for (i = 0; i < (cLen - tLen)+1; i++) { + if (SW_toupper(s1[i]) == (unsigned char)*target) { + for (j = 1; j < tLen; j++) { + if (SW_toupper(s1[i+j]) != (unsigned char)target[j]) + break; + } + if (j == tLen) { + retVal = s1+i; + break; + } + } + } + delete [] target; + return retVal; +} + +/****************************************************************************** + * strnicmp - compares the first n bytes of 2 string ignoring case + * + * ENT: compares s1 to s2 comparing the first n byte ingnoring case + * + * RET: same as strcmp + */ + +const char strnicmp(const char *s1, const char *s2, int len) { + int tLen = strlen(s2); + int cLen = strlen(s1); + char diff; + int i; + for (i = 0; ((i < len) && (i < tLen) && (i < cLen)); i++) { + if ((diff = SW_toupper(*s1) - SW_toupper(*s2))) + return diff; + s1++; + s2++; + } + return (i < len) ? cLen - tLen : 0; +} + +/****************************************************************************** + * strlenw - Scans a string for trailing 0x0000 and return size in BYTES + * + * ENT: target - string for which to determine size + * + * RET: length in BYTES + * If s2 does not occur in s1, returns null. + */ + +unsigned int strlenw(const char *s1) { + return strlen(s1); +// utf8 says no null in string except terminator, so below code is overkill +/* + const char *ch = s1; + if (!*ch) + ch++; + while (*ch) { + ch++; + if (!*ch) + ch++; + } + return (unsigned int)(ch - s1) - 1; +*/ +} + + +/****************************************************************************** + * toupperstr - converts a string to uppercase string + * + * ENT: target - string to convert + * + * RET: target + */ + +char *toupperstr(char *buf) { + char *ret = buf; + + while (*buf) + *buf = SW_toupper(*buf++); + + return ret; +} + + +/****************************************************************************** + * toupperstr - converts a string to uppercase string + * + * ENT: target - string to convert + * + * RET: target + */ + +char *toupperstr_utf8(char *buf) { + char *ret = buf; + +#ifndef _ICU_ + // try to decide if it's worth trying to toupper. Do we have more + // characters that are probably lower latin than not? + long performOp = 0; + for (const char *ch = buf; *ch; ch++) + performOp += (*ch > 0) ? 1 : -1; + + if (performOp) { + while (*buf) + *buf = SW_toupper(*buf++); + } +#else + UErrorCode err = U_ZERO_ERROR; + UConverter *conv = ucnv_open("UTF-8", &err); + UnicodeString str(buf, -1, conv, err); + UnicodeString ustr = str.toUpper(); + ustr.extract(ret, strlen(ret)*2, conv, err); + ucnv_close(conv); +#endif + + return ret; +} diff --git a/src/utilfuns/win32/dirent.cpp b/src/utilfuns/win32/dirent.cpp new file mode 100644 index 0000000..150bf10 --- /dev/null +++ b/src/utilfuns/win32/dirent.cpp @@ -0,0 +1,131 @@ +/* + + Implementation of POSIX directory browsing functions and types for Win32. + + Kevlin Henney (mailto:kevlin@acm.org), March 1997. + + Copyright Kevlin Henney, 1997. All rights reserved. + + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose is hereby granted without fee, provided + that this copyright and permissions notice appear in all copies and + derivatives, and that no charge may be made for the software and its + documentation except to cover cost of distribution. + + This software is supplied "as is" without express or implied warranty. + + But that said, if there are any problems please get in touch. + +*/ + +#include <dirent.h> +#include <errno.h> +#include <io.h> +#include <stdlib.h> +#include <string.h> + +struct DIR +{ + long handle; /* -1 for failed rewind */ + struct _finddata_t info; + struct dirent result; /* d_name null iff first time */ + char *name; /* NTBS */ +}; + +DIR *opendir(const char *name) +{ + DIR *dir = 0; + + if(name && name[0]) + { + size_t base_length = strlen(name); + const char *all = /* the root directory is a special case... */ + strchr("/\\", name[base_length - 1]) ? "*" : "/*"; + + if((dir = (DIR *) malloc(sizeof *dir)) != 0 && + (dir->name = (char *) malloc(base_length + strlen(all) + 1)) != 0) + { + strcat(strcpy(dir->name, name), all); + + if((dir->handle = _findfirst(dir->name, &dir->info)) != -1) + { + dir->result.d_name = 0; + } + else /* rollback */ + { + free(dir->name); + free(dir); + dir = 0; + } + } + else /* rollback */ + { + free(dir); + dir = 0; + errno = ENOMEM; + } + } + else + { + errno = EINVAL; + } + + return dir; +} + +int closedir(DIR *dir) +{ + int result = -1; + + if(dir) + { + if(dir->handle != -1) + { + result = _findclose(dir->handle); + } + + free(dir->name); + free(dir); + } + + if(result == -1) /* map all errors to EBADF */ + { + errno = EBADF; + } + + return result; +} + +struct dirent *readdir(DIR *dir) +{ + struct dirent *result = 0; + + if(dir && dir->handle != -1) + { + if(!dir->result.d_name || _findnext(dir->handle, &dir->info) != -1) + { + result = &dir->result; + result->d_name = dir->info.name; + } + } + else + { + errno = EBADF; + } + + return result; +} + +void rewinddir(DIR *dir) +{ + if(dir && dir->handle != -1) + { + _findclose(dir->handle); + dir->handle = _findfirst(dir->name, &dir->info); + dir->result.d_name = 0; + } + else + { + errno = EBADF; + } +} diff --git a/src/utilfuns/win32/dirent.h b/src/utilfuns/win32/dirent.h new file mode 100644 index 0000000..ba22833 --- /dev/null +++ b/src/utilfuns/win32/dirent.h @@ -0,0 +1,32 @@ +/* + + Declaration of POSIX directory browsing functions and types for Win32. + + Kevlin Henney (mailto:kevlin@acm.org), March 1997. + + Copyright Kevlin Henney, 1997. All rights reserved. + + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose is hereby granted without fee, provided + that this copyright and permissions notice appear in all copies and + derivatives, and that no charge may be made for the software and its + documentation except to cover cost of distribution. + +*/ + +#ifndef DIRENT_INCLUDED +#define DIRENT_INCLUDED + +typedef struct DIR DIR; + +struct dirent +{ + char *d_name; +}; + +DIR *opendir(const char *); +int closedir(DIR *); +struct dirent *readdir(DIR *); +void rewinddir(DIR *); + +#endif diff --git a/src/utilfuns/zlib/adler32.c b/src/utilfuns/zlib/adler32.c new file mode 100644 index 0000000..14e3abd --- /dev/null +++ b/src/utilfuns/zlib/adler32.c @@ -0,0 +1,48 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: adler32.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == Z_NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} diff --git a/src/utilfuns/zlib/compress.c b/src/utilfuns/zlib/compress.c new file mode 100644 index 0000000..df5fca8 --- /dev/null +++ b/src/utilfuns/zlib/compress.c @@ -0,0 +1,68 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: compress.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; +#endif + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} diff --git a/src/utilfuns/zlib/crc32.c b/src/utilfuns/zlib/crc32.c new file mode 100644 index 0000000..fe80e8a --- /dev/null +++ b/src/utilfuns/zlib/crc32.c @@ -0,0 +1,162 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: crc32.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +#define local static + +#ifdef DYNAMIC_CRC_TABLE + +local int crc_table_empty = 1; +local uLongf crc_table[256]; +local void make_crc_table OF((void)); + +/* + Generate a table for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The table is simply the CRC of all possible eight bit values. This is all + the information needed to generate CRC's on data a byte at a time for all + combinations of CRC register values and incoming bytes. +*/ +local void make_crc_table() +{ + uLong c; + int n, k; + uLong poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static const Byte p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* make exclusive-or pattern from polynomial (0xedb88320L) */ + poly = 0L; + for (n = 0; n < sizeof(p)/sizeof(Byte); n++) + poly |= 1L << (31 - p[n]); + + for (n = 0; n < 256; n++) + { + c = (uLong)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[n] = c; + } + crc_table_empty = 0; +} +#else +/* ======================================================================== + * Table of CRC-32's of all single-byte values (made by make_crc_table) + */ +local const uLongf crc_table[256] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; +#endif + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const uLongf * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) make_crc_table(); +#endif + return (const uLongf *)crc_table; +} + +/* ========================================================================= */ +#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8); +#define DO2(buf) DO1(buf); DO1(buf); +#define DO4(buf) DO2(buf); DO2(buf); +#define DO8(buf) DO4(buf); DO4(buf); + +/* ========================================================================= */ +uLong ZEXPORT crc32(crc, buf, len) + uLong crc; + const Bytef *buf; + uInt len; +{ + if (buf == Z_NULL) return 0L; +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif + crc = crc ^ 0xffffffffL; + while (len >= 8) + { + DO8(buf); + len -= 8; + } + if (len) do { + DO1(buf); + } while (--len); + return crc ^ 0xffffffffL; +} diff --git a/src/utilfuns/zlib/deflate.c b/src/utilfuns/zlib/deflate.c new file mode 100644 index 0000000..a232eea --- /dev/null +++ b/src/utilfuns/zlib/deflate.c @@ -0,0 +1,1350 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in ftp://ds.internic.net/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id: deflate.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.1.3 Copyright 1995-1998 Jean-loup Gailly "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +local block_state deflate_slow OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int noheader = 0; + static const char* my_version = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == Z_NULL) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == Z_NULL) strm->zfree = zcfree; + + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#ifdef FASTEST + level = 1; +#endif + + if (windowBits < 0) { /* undocumented feature: suppress zlib header */ + noheader = 1; + windowBits = -windowBits; + } + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->noheader = noheader; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt length = dictLength; + uInt n; + IPos hash_head = 0; + + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || + strm->state->status != INIT_STATE) return Z_STREAM_ERROR; + + s = strm->state; + strm->adler = adler32(strm->adler, dictionary, dictLength); + + if (length < MIN_MATCH) return Z_OK; + if (length > MAX_DIST(s)) { + length = MAX_DIST(s); +#ifndef USE_DICT_HEAD + dictionary += dictLength - length; /* use the tail of the dictionary */ +#endif + } + zmemcpy(s->window, dictionary, length); + s->strstart = length; + s->block_start = (long)length; + + /* Insert all strings in the hash table (except for the last two bytes). + * s->lookahead stays null, so s->ins_h will be recomputed at the next + * call of fill_window. + */ + s->ins_h = s->window[0]; + UPDATE_HASH(s, s->ins_h, s->window[1]); + for (n = 0; n <= length - MIN_MATCH; n++) { + INSERT_STRING(s, n, hash_head); + } + if (hash_head) hash_head = 0; /* to make compiler happy */ + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR; + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->noheader < 0) { + s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ + } + s->status = s->noheader ? BUSY_STATE : INIT_STATE; + strm->adler = 1; + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + + if (level == Z_DEFAULT_COMPRESSION) { + level = 6; + } + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if (func != configuration_table[level].func && strm->total_in != 0) { + /* Flush the last buffer: */ + err = deflate(strm, Z_PARTIAL_FLUSH); + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len = strm->state->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, strm->state->pending_out, len); + strm->next_out += len; + strm->state->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + strm->state->pending -= len; + if (strm->state->pending == 0) { + strm->state->pending_out = strm->state->pending_buf; + } +} + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the zlib header */ + if (s->status == INIT_STATE) { + + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags = (s->level-1) >> 1; + + if (level_flags > 3) level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = 1L; + } + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUFF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->noheader) return Z_STREAM_END; + + /* Write the zlib trailer (adler32) */ + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + s->noheader = -1; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + + status = strm->state->status; + if (status != INIT_STATE && status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + *dest = *source; + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + *ds = *ss; + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local int read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (!strm->state->noheader) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } + zmemcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +} + +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +#ifndef FASTEST +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2: + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} + +#else /* FASTEST */ +/* --------------------------------------------------------------------------- + * Optimized version for level == 1 only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return len <= s->lookahead ? len : s->lookahead; +} +#endif /* FASTEST */ +#endif /* ASMV */ + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if strstart == 0 + * and lookahead == 1 (input done one byte at time) + */ + more--; + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + } else if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in hash table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED || + (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR))) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} diff --git a/src/utilfuns/zlib/deflate.h b/src/utilfuns/zlib/deflate.h new file mode 100644 index 0000000..e55d52a --- /dev/null +++ b/src/utilfuns/zlib/deflate.h @@ -0,0 +1,318 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-1998 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id: deflate.h,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#ifndef _DEFLATE_H +#define _DEFLATE_H + +#include "zutil.h" + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + int pending; /* nb of bytes in the pending buffer */ + int noheader; /* suppress zlib header and adler32 */ + Byte data_type; /* UNKNOWN, BINARY or ASCII */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to supress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +void _tr_init OF((deflate_state *s)); +int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +void _tr_align OF((deflate_state *s)); +void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch _length_code[]; + extern uch _dist_code[]; +#else + extern const uch _length_code[]; + extern const uch _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif diff --git a/src/utilfuns/zlib/gzio.c b/src/utilfuns/zlib/gzio.c new file mode 100644 index 0000000..a2c5b58 --- /dev/null +++ b/src/utilfuns/zlib/gzio.c @@ -0,0 +1,875 @@ +/* gzio.c -- IO on .gz files + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Compile this file with -DNO_DEFLATE to avoid the compression code. + */ + +/* @(#) $Id: gzio.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include <stdio.h> + +#include "zutil.h" + +struct internal_state {int dummy;}; /* for buggy compilers */ + +#ifndef Z_BUFSIZE +# ifdef MAXSEG_64K +# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */ +# else +# define Z_BUFSIZE 16384 +# endif +#endif +#ifndef Z_PRINTF_BUFSIZE +# define Z_PRINTF_BUFSIZE 4096 +#endif + +#define ALLOC(size) malloc(size) +#define TRYFREE(p) {if (p) free(p);} + +static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +typedef struct gz_stream { + z_stream stream; + int z_err; /* error code for last stream operation */ + int z_eof; /* set if end of input file */ + FILE *file; /* .gz file */ + Byte *inbuf; /* input buffer */ + Byte *outbuf; /* output buffer */ + uLong crc; /* crc32 of uncompressed data */ + char *msg; /* error message */ + char *path; /* path name for debugging only */ + int transparent; /* 1 if input file is not a .gz file */ + char mode; /* 'w' or 'r' */ + long startpos; /* start of compressed data in file (header skipped) */ +} gz_stream; + + +local gzFile gz_open OF((const char *path, const char *mode, int fd)); +local int do_flush OF((gzFile file, int flush)); +local int get_byte OF((gz_stream *s)); +local void check_header OF((gz_stream *s)); +local int destroy OF((gz_stream *s)); +local void putLong OF((FILE *file, uLong x)); +local uLong getLong OF((gz_stream *s)); + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb"). The file is given either by file descriptor + or path name (if fd == -1). + gz_open return NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). +*/ +local gzFile gz_open (path, mode, fd) + const char *path; + const char *mode; + int fd; +{ + int err; + int level = Z_DEFAULT_COMPRESSION; /* compression level */ + int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */ + char *p = (char*)mode; + gz_stream *s; + char fmode[80]; /* copy of mode, without the compression level */ + char *m = fmode; + + if (!path || !mode) return Z_NULL; + + s = (gz_stream *)ALLOC(sizeof(gz_stream)); + if (!s) return Z_NULL; + + s->stream.zalloc = (alloc_func)0; + s->stream.zfree = (free_func)0; + s->stream.opaque = (voidpf)0; + s->stream.next_in = s->inbuf = Z_NULL; + s->stream.next_out = s->outbuf = Z_NULL; + s->stream.avail_in = s->stream.avail_out = 0; + s->file = NULL; + s->z_err = Z_OK; + s->z_eof = 0; + s->crc = crc32(0L, Z_NULL, 0); + s->msg = NULL; + s->transparent = 0; + + s->path = (char*)ALLOC(strlen(path)+1); + if (s->path == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + strcpy(s->path, path); /* do this early for debugging */ + + s->mode = '\0'; + do { + if (*p == 'r') s->mode = 'r'; + if (*p == 'w' || *p == 'a') s->mode = 'w'; + if (*p >= '0' && *p <= '9') { + level = *p - '0'; + } else if (*p == 'f') { + strategy = Z_FILTERED; + } else if (*p == 'h') { + strategy = Z_HUFFMAN_ONLY; + } else { + *m++ = *p; /* copy the mode */ + } + } while (*p++ && m != fmode + sizeof(fmode)); + if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL; + + if (s->mode == 'w') { +#ifdef NO_DEFLATE + err = Z_STREAM_ERROR; +#else + err = deflateInit2(&(s->stream), level, + Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy); + /* windowBits is passed < 0 to suppress zlib header */ + + s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); +#endif + if (err != Z_OK || s->outbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } else { + s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); + + err = inflateInit2(&(s->stream), -MAX_WBITS); + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are + * present after the compressed stream. + */ + if (err != Z_OK || s->inbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } + s->stream.avail_out = Z_BUFSIZE; + + errno = 0; + s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode); + + if (s->file == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + if (s->mode == 'w') { + /* Write a very simple .gz header: + */ + fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1], + Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE); + s->startpos = 10L; + /* We use 10L instead of ftell(s->file) to because ftell causes an + * fflush on some systems. This version of the library doesn't use + * startpos anyway in write mode, so this initialization is not + * necessary. + */ + } else { + check_header(s); /* skip the .gz header */ + s->startpos = (ftell(s->file) - s->stream.avail_in); + } + + return (gzFile)s; +} + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. +*/ +gzFile ZEXPORT gzopen (path, mode) + const char *path; + const char *mode; +{ + return gz_open (path, mode, -1); +} + +/* =========================================================================== + Associate a gzFile with the file descriptor fd. fd is not dup'ed here + to mimic the behavio(u)r of fdopen. +*/ +gzFile ZEXPORT gzdopen (fd, mode) + int fd; + const char *mode; +{ + char name[20]; + + if (fd < 0) return (gzFile)Z_NULL; + sprintf(name, "<fd:%d>", fd); /* for debugging */ + + return gz_open (name, mode, fd); +} + +/* =========================================================================== + * Update the compression level and strategy + */ +int ZEXPORT gzsetparams (file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + /* Make room to allow flushing */ + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + } + s->stream.avail_out = Z_BUFSIZE; + } + + return deflateParams (&(s->stream), level, strategy); +} + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ +local int get_byte(s) + gz_stream *s; +{ + if (s->z_eof) return EOF; + if (s->stream.avail_in == 0) { + errno = 0; + s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) s->z_err = Z_ERRNO; + return EOF; + } + s->stream.next_in = s->inbuf; + } + s->stream.avail_in--; + return *(s->stream.next_in)++; +} + +/* =========================================================================== + Check the gzip header of a gz_stream opened for reading. Set the stream + mode to transparent if the gzip magic header is not present; set s->err + to Z_DATA_ERROR if the magic header is present but the rest of the header + is incorrect. + IN assertion: the stream s has already been created sucessfully; + s->stream.avail_in is zero for the first time, but may be non-zero + for concatenated .gz files. +*/ +local void check_header(s) + gz_stream *s; +{ + int method; /* method byte */ + int flags; /* flags byte */ + uInt len; + int c; + + /* Check the gzip magic header */ + for (len = 0; len < 2; len++) { + c = get_byte(s); + if (c != gz_magic[len]) { + if (len != 0) s->stream.avail_in++, s->stream.next_in--; + if (c != EOF) { + s->stream.avail_in++, s->stream.next_in--; + s->transparent = 1; + } + s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END; + return; + } + } + method = get_byte(s); + flags = get_byte(s); + if (method != Z_DEFLATED || (flags & RESERVED) != 0) { + s->z_err = Z_DATA_ERROR; + return; + } + + /* Discard time, xflags and OS code: */ + for (len = 0; len < 6; len++) (void)get_byte(s); + + if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ + len = (uInt)get_byte(s); + len += ((uInt)get_byte(s))<<8; + /* len is garbage if EOF but the loop below will quit anyway */ + while (len-- != 0 && get_byte(s) != EOF) ; + } + if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ + for (len = 0; len < 2; len++) (void)get_byte(s); + } + s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; +} + + /* =========================================================================== + * Cleanup then free the given gz_stream. Return a zlib error code. + Try freeing in the reverse order of allocations. + */ +local int destroy (s) + gz_stream *s; +{ + int err = Z_OK; + + if (!s) return Z_STREAM_ERROR; + + TRYFREE(s->msg); + + if (s->stream.state != NULL) { + if (s->mode == 'w') { +#ifdef NO_DEFLATE + err = Z_STREAM_ERROR; +#else + err = deflateEnd(&(s->stream)); +#endif + } else if (s->mode == 'r') { + err = inflateEnd(&(s->stream)); + } + } + if (s->file != NULL && fclose(s->file)) { +#ifdef ESPIPE + if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */ +#endif + err = Z_ERRNO; + } + if (s->z_err < 0) err = s->z_err; + + TRYFREE(s->inbuf); + TRYFREE(s->outbuf); + TRYFREE(s->path); + TRYFREE(s); + return err; +} + +/* =========================================================================== + Reads the given number of uncompressed bytes from the compressed file. + gzread returns the number of bytes actually read (0 for end of file). +*/ +int ZEXPORT gzread (file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + Bytef *start = (Bytef*)buf; /* starting point for crc computation */ + Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ + + if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; + + if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; + if (s->z_err == Z_STREAM_END) return 0; /* EOF */ + + next_out = (Byte*)buf; + s->stream.next_out = (Bytef*)buf; + s->stream.avail_out = len; + + while (s->stream.avail_out != 0) { + + if (s->transparent) { + /* Copy first the lookahead bytes: */ + uInt n = s->stream.avail_in; + if (n > s->stream.avail_out) n = s->stream.avail_out; + if (n > 0) { + zmemcpy(s->stream.next_out, s->stream.next_in, n); + next_out += n; + s->stream.next_out = next_out; + s->stream.next_in += n; + s->stream.avail_out -= n; + s->stream.avail_in -= n; + } + if (s->stream.avail_out > 0) { + s->stream.avail_out -= fread(next_out, 1, s->stream.avail_out, + s->file); + } + len -= s->stream.avail_out; + s->stream.total_in += (uLong)len; + s->stream.total_out += (uLong)len; + if (len == 0) s->z_eof = 1; + return (int)len; + } + if (s->stream.avail_in == 0 && !s->z_eof) { + + errno = 0; + s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) { + s->z_err = Z_ERRNO; + break; + } + } + s->stream.next_in = s->inbuf; + } + s->z_err = inflate(&(s->stream), Z_NO_FLUSH); + + if (s->z_err == Z_STREAM_END) { + /* Check CRC and original size */ + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + start = s->stream.next_out; + + if (getLong(s) != s->crc) { + s->z_err = Z_DATA_ERROR; + } else { + (void)getLong(s); + /* The uncompressed length returned by above getlong() may + * be different from s->stream.total_out) in case of + * concatenated .gz files. Check for such files: + */ + check_header(s); + if (s->z_err == Z_OK) { + uLong total_in = s->stream.total_in; + uLong total_out = s->stream.total_out; + + inflateReset(&(s->stream)); + s->stream.total_in = total_in; + s->stream.total_out = total_out; + s->crc = crc32(0L, Z_NULL, 0); + } + } + } + if (s->z_err != Z_OK || s->z_eof) break; + } + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + + return (int)(len - s->stream.avail_out); +} + + +/* =========================================================================== + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ +int ZEXPORT gzgetc(file) + gzFile file; +{ + unsigned char c; + + return gzread(file, &c, 1) == 1 ? c : -1; +} + + +/* =========================================================================== + Reads bytes from the compressed file until len-1 characters are + read, or a newline character is read and transferred to buf, or an + end-of-file condition is encountered. The string is then terminated + with a null character. + gzgets returns buf, or Z_NULL in case of error. + + The current implementation is not optimized at all. +*/ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + char *b = buf; + if (buf == Z_NULL || len <= 0) return Z_NULL; + + while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ; + *buf = '\0'; + return b == buf && len > 0 ? Z_NULL : b; +} + + +#ifndef NO_DEFLATE +/* =========================================================================== + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of bytes actually written (0 in case of error). +*/ +int ZEXPORT gzwrite (file, buf, len) + gzFile file; + const voidp buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.next_in = (Bytef*)buf; + s->stream.avail_in = len; + + while (s->stream.avail_in != 0) { + + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + break; + } + s->stream.avail_out = Z_BUFSIZE; + } + s->z_err = deflate(&(s->stream), Z_NO_FLUSH); + if (s->z_err != Z_OK) break; + } + s->crc = crc32(s->crc, (const Bytef *)buf, len); + + return (int)(len - s->stream.avail_in); +} + +/* =========================================================================== + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). +*/ +#ifdef STDC +#include <stdarg.h> + +int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...) +{ + char buf[Z_PRINTF_BUFSIZE]; + va_list va; + int len; + + va_start(va, format); +#ifdef HAS_vsnprintf + (void)vsnprintf(buf, sizeof(buf), format, va); +#else + (void)vsprintf(buf, format, va); +#endif + va_end(va); + len = strlen(buf); /* some *sprintf don't return the nb of bytes written */ + if (len <= 0) return 0; + + return gzwrite(file, buf, (unsigned)len); +} +#else /* not ANSI C */ + +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + char buf[Z_PRINTF_BUFSIZE]; + int len; + +#ifdef HAS_snprintf + snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +#else + sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +#endif + len = strlen(buf); /* old sprintf doesn't return the nb of bytes written */ + if (len <= 0) return 0; + + return gzwrite(file, buf, len); +} +#endif + +/* =========================================================================== + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned char cc = (unsigned char) c; /* required for big endian systems */ + + return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1; +} + + +/* =========================================================================== + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ +int ZEXPORT gzputs(file, s) + gzFile file; + const char *s; +{ + return gzwrite(file, (char*)s, (unsigned)strlen(s)); +} + + +/* =========================================================================== + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. +*/ +local int do_flush (file, flush) + gzFile file; + int flush; +{ + uInt len; + int done = 0; + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.avail_in = 0; /* should be zero already anyway */ + + for (;;) { + len = Z_BUFSIZE - s->stream.avail_out; + + if (len != 0) { + if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) { + s->z_err = Z_ERRNO; + return Z_ERRNO; + } + s->stream.next_out = s->outbuf; + s->stream.avail_out = Z_BUFSIZE; + } + if (done) break; + s->z_err = deflate(&(s->stream), flush); + + /* Ignore the second of two consecutive flushes: */ + if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK; + + /* deflate has finished flushing only when it hasn't used up + * all the available space in the output buffer: + */ + done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); + + if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; + } + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} + +int ZEXPORT gzflush (file, flush) + gzFile file; + int flush; +{ + gz_stream *s = (gz_stream*)file; + int err = do_flush (file, flush); + + if (err) return err; + fflush(s->file); + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} +#endif /* NO_DEFLATE */ + +/* =========================================================================== + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error. + SEEK_END is not implemented, returns error. + In this version of the library, gzseek can be extremely slow. +*/ +z_off_t ZEXPORT gzseek (file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || whence == SEEK_END || + s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) { + return -1L; + } + + if (s->mode == 'w') { +#ifdef NO_DEFLATE + return -1L; +#else + if (whence == SEEK_SET) { + offset -= s->stream.total_in; + } + if (offset < 0) return -1L; + + /* At this point, offset is the number of zero bytes to write. */ + if (s->inbuf == Z_NULL) { + s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */ + zmemzero(s->inbuf, Z_BUFSIZE); + } + while (offset > 0) { + uInt size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (uInt)offset; + + size = gzwrite(file, s->inbuf, size); + if (size == 0) return -1L; + + offset -= size; + } + return (z_off_t)s->stream.total_in; +#endif + } + /* Rest of function is for reading only */ + + /* compute absolute position */ + if (whence == SEEK_CUR) { + offset += s->stream.total_out; + } + if (offset < 0) return -1L; + + if (s->transparent) { + /* map to fseek */ + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + if (fseek(s->file, offset, SEEK_SET) < 0) return -1L; + + s->stream.total_in = s->stream.total_out = (uLong)offset; + return offset; + } + + /* For a negative seek, rewind and use positive seek */ + if ((uLong)offset >= s->stream.total_out) { + offset -= s->stream.total_out; + } else if (gzrewind(file) < 0) { + return -1L; + } + /* offset is now the number of bytes to skip. */ + + if (offset != 0 && s->outbuf == Z_NULL) { + s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); + } + while (offset > 0) { + int size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (int)offset; + + size = gzread(file, s->outbuf, (uInt)size); + if (size <= 0) return -1L; + offset -= size; + } + return (z_off_t)s->stream.total_out; +} + +/* =========================================================================== + Rewinds input file. +*/ +int ZEXPORT gzrewind (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r') return -1; + + s->z_err = Z_OK; + s->z_eof = 0; + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + s->crc = crc32(0L, Z_NULL, 0); + + if (s->startpos == 0) { /* not a compressed file */ + rewind(s->file); + return 0; + } + + (void) inflateReset(&s->stream); + return fseek(s->file, s->startpos, SEEK_SET); +} + +/* =========================================================================== + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. +*/ +z_off_t ZEXPORT gztell (file) + gzFile file; +{ + return gzseek(file, 0L, SEEK_CUR); +} + +/* =========================================================================== + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ +int ZEXPORT gzeof (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + return (s == NULL || s->mode != 'r') ? 0 : s->z_eof; +} + +/* =========================================================================== + Outputs a long in LSB order to the given file +*/ +local void putLong (file, x) + FILE *file; + uLong x; +{ + int n; + for (n = 0; n < 4; n++) { + fputc((int)(x & 0xff), file); + x >>= 8; + } +} + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets z_err in case + of error. +*/ +local uLong getLong (s) + gz_stream *s; +{ + uLong x = (uLong)get_byte(s); + int c; + + x += ((uLong)get_byte(s))<<8; + x += ((uLong)get_byte(s))<<16; + c = get_byte(s); + if (c == EOF) s->z_err = Z_DATA_ERROR; + x += ((uLong)c)<<24; + return x; +} + +/* =========================================================================== + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. +*/ +int ZEXPORT gzclose (file) + gzFile file; +{ + int err; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return Z_STREAM_ERROR; + + if (s->mode == 'w') { +#ifdef NO_DEFLATE + return Z_STREAM_ERROR; +#else + err = do_flush (file, Z_FINISH); + if (err != Z_OK) return destroy((gz_stream*)file); + + putLong (s->file, s->crc); + putLong (s->file, s->stream.total_in); +#endif + } + return destroy((gz_stream*)file); +} + +/* =========================================================================== + Returns the error message for the last error which occured on the + given compressed file. errnum is set to zlib error number. If an + error occured in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ +const char* ZEXPORT gzerror (file, errnum) + gzFile file; + int *errnum; +{ + char *m; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) { + *errnum = Z_STREAM_ERROR; + return (const char*)ERR_MSG(Z_STREAM_ERROR); + } + *errnum = s->z_err; + if (*errnum == Z_OK) return (const char*)""; + + m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg); + + if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err); + + TRYFREE(s->msg); + s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3); + strcpy(s->msg, s->path); + strcat(s->msg, ": "); + strcat(s->msg, m); + return (const char*)s->msg; +} diff --git a/src/utilfuns/zlib/infblock.c b/src/utilfuns/zlib/infblock.c new file mode 100644 index 0000000..f4920fa --- /dev/null +++ b/src/utilfuns/zlib/infblock.c @@ -0,0 +1,398 @@ +/* infblock.c -- interpret and process block types to last block + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infblock.h" +#include "inftrees.h" +#include "infcodes.h" +#include "infutil.h" + +struct inflate_codes_state {int dummy;}; /* for buggy compilers */ + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +/* Table for deflate from PKZIP's appnote.txt. */ +local const uInt border[] = { /* Order of the bit length code lengths */ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +/* + Notes beyond the 1.93a appnote.txt: + + 1. Distance pointers never point before the beginning of the output + stream. + 2. Distance pointers can point back across blocks, up to 32k away. + 3. There is an implied maximum of 7 bits for the bit length table and + 15 bits for the actual data. + 4. If only one code exists, then it is encoded using one bit. (Zero + would be more efficient, but perhaps a little confusing.) If two + codes exist, they are coded using one bit each (0 and 1). + 5. There is no way of sending zero distance codes--a dummy must be + sent if there are none. (History: a pre 2.0 version of PKZIP would + store blocks with no distance codes, but this was discovered to be + too harsh a criterion.) Valid only for 1.93a. 2.04c does allow + zero distance codes, which is sent as one code of zero bits in + length. + 6. There are up to 286 literal/length codes. Code 256 represents the + end-of-block. Note however that the static length tree defines + 288 codes just to fill out the Huffman codes. Codes 286 and 287 + cannot be used though, since there is no length base or extra bits + defined for them. Similarily, there are up to 30 distance codes. + However, static trees define 32 codes (all 5 bits) to fill out the + Huffman codes, but the last two had better not show up in the data. + 7. Unzip can check dynamic Huffman blocks for complete code sets. + The exception is that a single code would not be complete (see #4). + 8. The five bits following the block type is really the number of + literal codes sent minus 257. + 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits + (1+6+6). Therefore, to output three times the length, you output + three codes (1+1+1), whereas to output four times the same length, + you only need two codes (1+3). Hmm. + 10. In the tree reconstruction algorithm, Code = Code + Increment + only if BitLength(i) is not zero. (Pretty obvious.) + 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) + 12. Note: length code 284 can represent 227-258, but length code 285 + really is 258. The last length deserves its own, short code + since it gets used a lot in very redundant files. The length + 258 is special since 258 - 3 (the min match length) is 255. + 13. The literal/length and distance code bit lengths are read as a + single stream of lengths. It is possible (and advantageous) for + a repeat code (16, 17, or 18) to go across the boundary between + the two sets of lengths. + */ + + +void inflate_blocks_reset(s, z, c) +inflate_blocks_statef *s; +z_streamp z; +uLongf *c; +{ + if (c != Z_NULL) + *c = s->check; + if (s->mode == BTREE || s->mode == DTREE) + ZFREE(z, s->sub.trees.blens); + if (s->mode == CODES) + inflate_codes_free(s->sub.decode.codes, z); + s->mode = TYPE; + s->bitk = 0; + s->bitb = 0; + s->read = s->write = s->window; + if (s->checkfn != Z_NULL) + z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0); + Tracev((stderr, "inflate: blocks reset\n")); +} + + +inflate_blocks_statef *inflate_blocks_new(z, c, w) +z_streamp z; +check_func c; +uInt w; +{ + inflate_blocks_statef *s; + + if ((s = (inflate_blocks_statef *)ZALLOC + (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) + return s; + if ((s->hufts = + (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL) + { + ZFREE(z, s); + return Z_NULL; + } + if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) + { + ZFREE(z, s->hufts); + ZFREE(z, s); + return Z_NULL; + } + s->end = s->window + w; + s->checkfn = c; + s->mode = TYPE; + Tracev((stderr, "inflate: blocks allocated\n")); + inflate_blocks_reset(s, z, Z_NULL); + return s; +} + + +int inflate_blocks(s, z, r) +inflate_blocks_statef *s; +z_streamp z; +int r; +{ + uInt t; /* temporary storage */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input based on current state */ + while (1) switch (s->mode) + { + case TYPE: + NEEDBITS(3) + t = (uInt)b & 7; + s->last = t & 1; + switch (t >> 1) + { + case 0: /* stored */ + Tracev((stderr, "inflate: stored block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + t = k & 7; /* go to byte boundary */ + DUMPBITS(t) + s->mode = LENS; /* get length of stored block */ + break; + case 1: /* fixed */ + Tracev((stderr, "inflate: fixed codes block%s\n", + s->last ? " (last)" : "")); + { + uInt bl, bd; + inflate_huft *tl, *td; + + inflate_trees_fixed(&bl, &bd, &tl, &td, z); + s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); + if (s->sub.decode.codes == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + } + DUMPBITS(3) + s->mode = CODES; + break; + case 2: /* dynamic */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + s->mode = TABLE; + break; + case 3: /* illegal */ + DUMPBITS(3) + s->mode = BAD; + z->msg = (char*)"invalid block type"; + r = Z_DATA_ERROR; + LEAVE + } + break; + case LENS: + NEEDBITS(32) + if ((((~b) >> 16) & 0xffff) != (b & 0xffff)) + { + s->mode = BAD; + z->msg = (char*)"invalid stored block lengths"; + r = Z_DATA_ERROR; + LEAVE + } + s->sub.left = (uInt)b & 0xffff; + b = k = 0; /* dump bits */ + Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); + s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE); + break; + case STORED: + if (n == 0) + LEAVE + NEEDOUT + t = s->sub.left; + if (t > n) t = n; + if (t > m) t = m; + zmemcpy(q, p, t); + p += t; n -= t; + q += t; m -= t; + if ((s->sub.left -= t) != 0) + break; + Tracev((stderr, "inflate: stored end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + s->mode = s->last ? DRY : TYPE; + break; + case TABLE: + NEEDBITS(14) + s->sub.trees.table = t = (uInt)b & 0x3fff; +#ifndef PKZIP_BUG_WORKAROUND + if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) + { + s->mode = BAD; + z->msg = (char*)"too many length or distance symbols"; + r = Z_DATA_ERROR; + LEAVE + } +#endif + t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); + if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + DUMPBITS(14) + s->sub.trees.index = 0; + Tracev((stderr, "inflate: table sizes ok\n")); + s->mode = BTREE; + case BTREE: + while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) + { + NEEDBITS(3) + s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; + DUMPBITS(3) + } + while (s->sub.trees.index < 19) + s->sub.trees.blens[border[s->sub.trees.index++]] = 0; + s->sub.trees.bb = 7; + t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, + &s->sub.trees.tb, s->hufts, z); + if (t != Z_OK) + { + ZFREE(z, s->sub.trees.blens); + r = t; + if (r == Z_DATA_ERROR) + s->mode = BAD; + LEAVE + } + s->sub.trees.index = 0; + Tracev((stderr, "inflate: bits tree ok\n")); + s->mode = DTREE; + case DTREE: + while (t = s->sub.trees.table, + s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) + { + inflate_huft *h; + uInt i, j, c; + + t = s->sub.trees.bb; + NEEDBITS(t) + h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); + t = h->bits; + c = h->base; + if (c < 16) + { + DUMPBITS(t) + s->sub.trees.blens[s->sub.trees.index++] = c; + } + else /* c == 16..18 */ + { + i = c == 18 ? 7 : c - 14; + j = c == 18 ? 11 : 3; + NEEDBITS(t + i) + DUMPBITS(t) + j += (uInt)b & inflate_mask[i]; + DUMPBITS(i) + i = s->sub.trees.index; + t = s->sub.trees.table; + if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || + (c == 16 && i < 1)) + { + ZFREE(z, s->sub.trees.blens); + s->mode = BAD; + z->msg = (char*)"invalid bit length repeat"; + r = Z_DATA_ERROR; + LEAVE + } + c = c == 16 ? s->sub.trees.blens[i - 1] : 0; + do { + s->sub.trees.blens[i++] = c; + } while (--j); + s->sub.trees.index = i; + } + } + s->sub.trees.tb = Z_NULL; + { + uInt bl, bd; + inflate_huft *tl, *td; + inflate_codes_statef *c; + + bl = 9; /* must be <= 9 for lookahead assumptions */ + bd = 6; /* must be <= 9 for lookahead assumptions */ + t = s->sub.trees.table; + t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), + s->sub.trees.blens, &bl, &bd, &tl, &td, + s->hufts, z); + ZFREE(z, s->sub.trees.blens); + if (t != Z_OK) + { + if (t == (uInt)Z_DATA_ERROR) + s->mode = BAD; + r = t; + LEAVE + } + Tracev((stderr, "inflate: trees ok\n")); + if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + s->sub.decode.codes = c; + } + s->mode = CODES; + case CODES: + UPDATE + if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) + return inflate_flush(s, z, r); + r = Z_OK; + inflate_codes_free(s->sub.decode.codes, z); + LOAD + Tracev((stderr, "inflate: codes end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + if (!s->last) + { + s->mode = TYPE; + break; + } + s->mode = DRY; + case DRY: + FLUSH + if (s->read != s->write) + LEAVE + s->mode = DONE; + case DONE: + r = Z_STREAM_END; + LEAVE + case BAD: + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +} + + +int inflate_blocks_free(s, z) +inflate_blocks_statef *s; +z_streamp z; +{ + inflate_blocks_reset(s, z, Z_NULL); + ZFREE(z, s->window); + ZFREE(z, s->hufts); + ZFREE(z, s); + Tracev((stderr, "inflate: blocks freed\n")); + return Z_OK; +} + + +void inflate_set_dictionary(s, d, n) +inflate_blocks_statef *s; +const Bytef *d; +uInt n; +{ + zmemcpy(s->window, d, n); + s->read = s->write = s->window + n; +} + + +/* Returns true if inflate is currently at the end of a block generated + * by Z_SYNC_FLUSH or Z_FULL_FLUSH. + * IN assertion: s != Z_NULL + */ +int inflate_blocks_sync_point(s) +inflate_blocks_statef *s; +{ + return s->mode == LENS; +} diff --git a/src/utilfuns/zlib/infblock.h b/src/utilfuns/zlib/infblock.h new file mode 100644 index 0000000..bd25c80 --- /dev/null +++ b/src/utilfuns/zlib/infblock.h @@ -0,0 +1,39 @@ +/* infblock.h -- header to use infblock.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_blocks_state; +typedef struct inflate_blocks_state FAR inflate_blocks_statef; + +extern inflate_blocks_statef * inflate_blocks_new OF(( + z_streamp z, + check_func c, /* check function */ + uInt w)); /* window size */ + +extern int inflate_blocks OF(( + inflate_blocks_statef *, + z_streamp , + int)); /* initial return code */ + +extern void inflate_blocks_reset OF(( + inflate_blocks_statef *, + z_streamp , + uLongf *)); /* check value on output */ + +extern int inflate_blocks_free OF(( + inflate_blocks_statef *, + z_streamp)); + +extern void inflate_set_dictionary OF(( + inflate_blocks_statef *s, + const Bytef *d, /* dictionary */ + uInt n)); /* dictionary length */ + +extern int inflate_blocks_sync_point OF(( + inflate_blocks_statef *s)); diff --git a/src/utilfuns/zlib/infcodes.c b/src/utilfuns/zlib/infcodes.c new file mode 100644 index 0000000..cfd0807 --- /dev/null +++ b/src/utilfuns/zlib/infcodes.c @@ -0,0 +1,260 @@ +/* infcodes.c -- process literals and length/distance pairs + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "infblock.h" +#include "infcodes.h" +#include "infutil.h" +#include "inffast.h" + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +typedef enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + START, /* x: set up for LEN */ + LEN, /* i: get length/literal/eob next */ + LENEXT, /* i: getting length extra (have base) */ + DIST, /* i: get distance next */ + DISTEXT, /* i: getting distance extra */ + COPY, /* o: copying bytes in window, waiting for space */ + LIT, /* o: got literal, waiting for output space */ + WASH, /* o: got eob, possibly still output waiting */ + END, /* x: got eob and all data flushed */ + BADCODE} /* x: got error */ +inflate_codes_mode; + +/* inflate codes private state */ +struct inflate_codes_state { + + /* mode */ + inflate_codes_mode mode; /* current inflate_codes mode */ + + /* mode dependent information */ + uInt len; + union { + struct { + inflate_huft *tree; /* pointer into tree */ + uInt need; /* bits needed */ + } code; /* if LEN or DIST, where in tree */ + uInt lit; /* if LIT, literal */ + struct { + uInt get; /* bits to get for extra */ + uInt dist; /* distance back to copy from */ + } copy; /* if EXT or COPY, where and how much */ + } sub; /* submode */ + + /* mode independent information */ + Byte lbits; /* ltree bits decoded per branch */ + Byte dbits; /* dtree bits decoder per branch */ + inflate_huft *ltree; /* literal/length/eob tree */ + inflate_huft *dtree; /* distance tree */ + +}; + + +inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z) +uInt bl, bd; +inflate_huft *tl; +inflate_huft *td; /* need separate declaration for Borland C++ */ +z_streamp z; +{ + inflate_codes_statef *c; + + if ((c = (inflate_codes_statef *) + ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) + { + c->mode = START; + c->lbits = (Byte)bl; + c->dbits = (Byte)bd; + c->ltree = tl; + c->dtree = td; + Tracev((stderr, "inflate: codes new\n")); + } + return c; +} + + +int inflate_codes(s, z, r) +inflate_blocks_statef *s; +z_streamp z; +int r; +{ + uInt j; /* temporary storage */ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + Bytef *f; /* pointer to copy strings from */ + inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ + long tryF; +// f = q + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input and output based on current state */ + while (1) switch (c->mode) + { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + case START: /* x: set up for LEN */ +#ifndef SLOW + if (m >= 258 && n >= 10) + { + UPDATE + r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); + LOAD + if (r != Z_OK) + { + c->mode = r == Z_STREAM_END ? WASH : BADCODE; + break; + } + } +#endif /* !SLOW */ + c->sub.code.need = c->lbits; + c->sub.code.tree = c->ltree; + c->mode = LEN; + case LEN: /* i: get length/literal/eob next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e == 0) /* literal */ + { + c->sub.lit = t->base; + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", t->base)); + c->mode = LIT; + break; + } + if (e & 16) /* length */ + { + c->sub.copy.get = e & 15; + c->len = t->base; + c->mode = LENEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t + t->base; + break; + } + if (e & 32) /* end of block */ + { + Tracevv((stderr, "inflate: end of block\n")); + c->mode = WASH; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = (char*)"invalid literal/length code"; + r = Z_DATA_ERROR; + LEAVE + case LENEXT: /* i: getting length extra (have base) */ + j = c->sub.copy.get; + NEEDBITS(j) + c->len += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + c->sub.code.need = c->dbits; + c->sub.code.tree = c->dtree; + Tracevv((stderr, "inflate: length %u\n", c->len)); + c->mode = DIST; + case DIST: /* i: get distance next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e & 16) /* distance */ + { + c->sub.copy.get = e & 15; + c->sub.copy.dist = t->base; + c->mode = DISTEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t + t->base; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = (char*)"invalid distance code"; + r = Z_DATA_ERROR; + LEAVE + case DISTEXT: /* i: getting distance extra */ + j = c->sub.copy.get; + NEEDBITS(j) + c->sub.copy.dist += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); + c->mode = COPY; + case COPY: /* o: copying bytes in window, waiting for space */ +#ifndef __TURBOC__ /* Turbo C bug for following expression */ + f = (uInt)(q - s->window) < c->sub.copy.dist ? + s->end - (c->sub.copy.dist - (q - s->window)) : + q - c->sub.copy.dist; +#else + tryF = (long)q - c->sub.copy.dist; +// f = q - c->sub.copy.dist; + if ((uInt)(q - s->window) < c->sub.copy.dist) + f = s->end - (c->sub.copy.dist - (uInt)(q - s->window)); + else f = (Bytef *)tryF; +#endif + while (c->len) + { + NEEDOUT + OUTBYTE(*f++) + if (f == s->end) + f = s->window; + c->len--; + } + c->mode = START; + break; + case LIT: /* o: got literal, waiting for output space */ + NEEDOUT + OUTBYTE(c->sub.lit) + c->mode = START; + break; + case WASH: /* o: got eob, possibly more output */ + if (k > 7) /* return unused byte, if any */ + { + Assert(k < 16, "inflate_codes grabbed too many bytes") + k -= 8; + n++; + p--; /* can always return one */ + } + FLUSH + if (s->read != s->write) + LEAVE + c->mode = END; + case END: + r = Z_STREAM_END; + LEAVE + case BADCODE: /* x: got error */ + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +#ifdef NEED_DUMMY_RETURN + return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ +#endif +} + + +void inflate_codes_free(c, z) +inflate_codes_statef *c; +z_streamp z; +{ + ZFREE(z, c); + Tracev((stderr, "inflate: codes free\n")); +} diff --git a/src/utilfuns/zlib/infcodes.h b/src/utilfuns/zlib/infcodes.h new file mode 100644 index 0000000..6c750d8 --- /dev/null +++ b/src/utilfuns/zlib/infcodes.h @@ -0,0 +1,27 @@ +/* infcodes.h -- header to use infcodes.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_codes_state; +typedef struct inflate_codes_state FAR inflate_codes_statef; + +extern inflate_codes_statef *inflate_codes_new OF(( + uInt, uInt, + inflate_huft *, inflate_huft *, + z_streamp )); + +extern int inflate_codes OF(( + inflate_blocks_statef *, + z_streamp , + int)); + +extern void inflate_codes_free OF(( + inflate_codes_statef *, + z_streamp )); + diff --git a/src/utilfuns/zlib/inffast.c b/src/utilfuns/zlib/inffast.c new file mode 100644 index 0000000..61a78ee --- /dev/null +++ b/src/utilfuns/zlib/inffast.c @@ -0,0 +1,170 @@ +/* inffast.c -- process literals and length/distance pairs fast + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "infblock.h" +#include "infcodes.h" +#include "infutil.h" +#include "inffast.h" + +struct inflate_codes_state {int dummy;}; /* for buggy compilers */ + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +/* macros for bit input with no checking and for returning unused bytes */ +#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}} +#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;} + +/* Called with number of bytes left to write in window at least 258 + (the maximum string length) and number of input bytes available + at least ten. The ten bytes are six bytes for the longest length/ + distance pair plus four bytes for overloading the bit buffer. */ + +int inflate_fast(bl, bd, tl, td, s, z) +uInt bl, bd; +inflate_huft *tl; +inflate_huft *td; /* need separate declaration for Borland C++ */ +inflate_blocks_statef *s; +z_streamp z; +{ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + uInt ml; /* mask for literal/length tree */ + uInt md; /* mask for distance tree */ + uInt c; /* bytes to copy */ + uInt d; /* distance back to copy from */ + Bytef *r; /* copy source pointer */ + + /* load input, output, bit values */ + LOAD + + /* initialize masks */ + ml = inflate_mask[bl]; + md = inflate_mask[bd]; + + /* do until not enough input or output space for fast loop */ + do { /* assume called with m >= 258 && n >= 10 */ + /* get literal/length code */ + GRABBITS(20) /* max bits for literal/length code */ + if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + continue; + } + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits for length */ + e &= 15; + c = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * length %u\n", c)); + + /* decode distance base of block to copy */ + GRABBITS(15); /* max bits for distance code */ + e = (t = td + ((uInt)b & md))->exop; + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits to add to distance base */ + e &= 15; + GRABBITS(e) /* get extra bits (up to 13) */ + d = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * distance %u\n", d)); + + /* do the copy */ + m -= c; + if ((uInt)(q - s->window) >= d) /* offset before dest */ + { /* just copy */ + r = q - d; + *q++ = *r++; c--; /* minimum count is three, */ + *q++ = *r++; c--; /* so unroll loop a little */ + } + else /* else offset after destination */ + { + e = d - (uInt)(q - s->window); /* bytes from offset to end */ + r = s->end - e; /* pointer to offset */ + if (c > e) /* if source crosses, */ + { + c -= e; /* copy to end of window */ + do { + *q++ = *r++; + } while (--e); + r = s->window; /* copy rest from start of window */ + } + } + do { /* copy all or what's left */ + *q++ = *r++; + } while (--c); + break; + } + else if ((e & 64) == 0) + { + t += t->base; + e = (t += ((uInt)b & inflate_mask[e]))->exop; + } + else + { + z->msg = (char*)"invalid distance code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + break; + } + if ((e & 64) == 0) + { + t += t->base; + if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + break; + } + } + else if (e & 32) + { + Tracevv((stderr, "inflate: * end of block\n")); + UNGRAB + UPDATE + return Z_STREAM_END; + } + else + { + z->msg = (char*)"invalid literal/length code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + } while (m >= 258 && n >= 10); + + /* not enough input or output--restore pointers and return */ + UNGRAB + UPDATE + return Z_OK; +} diff --git a/src/utilfuns/zlib/inffast.h b/src/utilfuns/zlib/inffast.h new file mode 100644 index 0000000..8facec5 --- /dev/null +++ b/src/utilfuns/zlib/inffast.h @@ -0,0 +1,17 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +extern int inflate_fast OF(( + uInt, + uInt, + inflate_huft *, + inflate_huft *, + inflate_blocks_statef *, + z_streamp )); diff --git a/src/utilfuns/zlib/inffixed.h b/src/utilfuns/zlib/inffixed.h new file mode 100644 index 0000000..77f7e76 --- /dev/null +++ b/src/utilfuns/zlib/inffixed.h @@ -0,0 +1,151 @@ +/* inffixed.h -- table for decoding fixed codes + * Generated automatically by the maketree.c program + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +local uInt fixed_bl = 9; +local uInt fixed_bd = 5; +local inflate_huft fixed_tl[] = { + {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115}, + {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},192}, + {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},160}, + {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},224}, + {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},144}, + {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},208}, + {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},176}, + {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},240}, + {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227}, + {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},200}, + {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},168}, + {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},232}, + {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},152}, + {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},216}, + {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},184}, + {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},248}, + {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163}, + {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},196}, + {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},164}, + {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},228}, + {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},148}, + {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},212}, + {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},180}, + {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},244}, + {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0}, + {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},204}, + {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},172}, + {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},236}, + {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},156}, + {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},220}, + {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},188}, + {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},252}, + {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131}, + {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},194}, + {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},162}, + {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},226}, + {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},146}, + {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},210}, + {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},178}, + {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},242}, + {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258}, + {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},202}, + {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},170}, + {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},234}, + {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},154}, + {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},218}, + {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},186}, + {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},250}, + {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195}, + {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},198}, + {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},166}, + {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},230}, + {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},150}, + {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},214}, + {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},182}, + {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},246}, + {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0}, + {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},206}, + {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},174}, + {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},238}, + {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},158}, + {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},222}, + {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},190}, + {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},254}, + {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115}, + {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},193}, + {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},161}, + {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},225}, + {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},145}, + {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},209}, + {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},177}, + {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},241}, + {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227}, + {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},201}, + {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},169}, + {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},233}, + {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},153}, + {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},217}, + {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},185}, + {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},249}, + {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163}, + {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},197}, + {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},165}, + {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},229}, + {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},149}, + {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},213}, + {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},181}, + {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},245}, + {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0}, + {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},205}, + {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},173}, + {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},237}, + {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},157}, + {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},221}, + {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},189}, + {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},253}, + {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131}, + {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},195}, + {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},163}, + {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},227}, + {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},147}, + {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},211}, + {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},179}, + {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},243}, + {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258}, + {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},203}, + {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},171}, + {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},235}, + {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},155}, + {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},219}, + {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},187}, + {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},251}, + {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195}, + {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},199}, + {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},167}, + {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},231}, + {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},151}, + {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},215}, + {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},183}, + {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},247}, + {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0}, + {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},207}, + {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},175}, + {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},239}, + {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},159}, + {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},223}, + {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},191}, + {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},255} + }; +local inflate_huft fixed_td[] = { + {{{80,5}},1}, {{{87,5}},257}, {{{83,5}},17}, {{{91,5}},4097}, + {{{81,5}},5}, {{{89,5}},1025}, {{{85,5}},65}, {{{93,5}},16385}, + {{{80,5}},3}, {{{88,5}},513}, {{{84,5}},33}, {{{92,5}},8193}, + {{{82,5}},9}, {{{90,5}},2049}, {{{86,5}},129}, {{{192,5}},24577}, + {{{80,5}},2}, {{{87,5}},385}, {{{83,5}},25}, {{{91,5}},6145}, + {{{81,5}},7}, {{{89,5}},1537}, {{{85,5}},97}, {{{93,5}},24577}, + {{{80,5}},4}, {{{88,5}},769}, {{{84,5}},49}, {{{92,5}},12289}, + {{{82,5}},13}, {{{90,5}},3073}, {{{86,5}},193}, {{{192,5}},24577} + }; diff --git a/src/utilfuns/zlib/inflate.c b/src/utilfuns/zlib/inflate.c new file mode 100644 index 0000000..32e9b8d --- /dev/null +++ b/src/utilfuns/zlib/inflate.c @@ -0,0 +1,366 @@ +/* inflate.c -- zlib interface to inflate modules + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infblock.h" + +struct inflate_blocks_state {int dummy;}; /* for buggy compilers */ + +typedef enum { + METHOD, /* waiting for method byte */ + FLAG, /* waiting for flag byte */ + DICT4, /* four dictionary check bytes to go */ + DICT3, /* three dictionary check bytes to go */ + DICT2, /* two dictionary check bytes to go */ + DICT1, /* one dictionary check byte to go */ + DICT0, /* waiting for inflateSetDictionary */ + BLOCKS, /* decompressing blocks */ + CHECK4, /* four check bytes to go */ + CHECK3, /* three check bytes to go */ + CHECK2, /* two check bytes to go */ + CHECK1, /* one check byte to go */ + DONE, /* finished check, done */ + BAD} /* got an error--stay here */ +inflate_mode; + +/* inflate private state */ +struct internal_state { + + /* mode */ + inflate_mode mode; /* current inflate mode */ + + /* mode dependent information */ + union { + uInt method; /* if FLAGS, method byte */ + struct { + uLong was; /* computed check value */ + uLong need; /* stream check value */ + } check; /* if CHECK, check values to compare */ + uInt marker; /* if BAD, inflateSync's marker bytes count */ + } sub; /* submode */ + + /* mode independent information */ + int nowrap; /* flag for no wrapper */ + uInt wbits; /* log2(window size) (8..15, defaults to 15) */ + inflate_blocks_statef + *blocks; /* current inflate_blocks state */ + +}; + + +int ZEXPORT inflateReset(z) +z_streamp z; +{ + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + z->total_in = z->total_out = 0; + z->msg = Z_NULL; + z->state->mode = z->state->nowrap ? BLOCKS : METHOD; + inflate_blocks_reset(z->state->blocks, z, Z_NULL); + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + + +int ZEXPORT inflateEnd(z) +z_streamp z; +{ + if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->blocks != Z_NULL) + inflate_blocks_free(z->state->blocks, z); + ZFREE(z, z->state); + z->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + + +int ZEXPORT inflateInit2_(z, w, version, stream_size) +z_streamp z; +int w; +const char *version; +int stream_size; +{ + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != sizeof(z_stream)) + return Z_VERSION_ERROR; + + /* initialize state */ + if (z == Z_NULL) + return Z_STREAM_ERROR; + z->msg = Z_NULL; + if (z->zalloc == Z_NULL) + { + z->zalloc = zcalloc; + z->opaque = (voidpf)0; + } + if (z->zfree == Z_NULL) z->zfree = zcfree; + if ((z->state = (struct internal_state FAR *) + ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) + return Z_MEM_ERROR; + z->state->blocks = Z_NULL; + + /* handle undocumented nowrap option (no zlib header or check) */ + z->state->nowrap = 0; + if (w < 0) + { + w = - w; + z->state->nowrap = 1; + } + + /* set window size */ + if (w < 8 || w > 15) + { + inflateEnd(z); + return Z_STREAM_ERROR; + } + z->state->wbits = (uInt)w; + + /* create inflate_blocks state */ + if ((z->state->blocks = + inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w)) + == Z_NULL) + { + inflateEnd(z); + return Z_MEM_ERROR; + } + Tracev((stderr, "inflate: allocated\n")); + + /* reset state */ + inflateReset(z); + return Z_OK; +} + + +int ZEXPORT inflateInit_(z, version, stream_size) +z_streamp z; +const char *version; +int stream_size; +{ + return inflateInit2_(z, DEF_WBITS, version, stream_size); +} + + +#define NEEDBYTE {if(z->avail_in==0)return r;r=f;} +#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) + +int ZEXPORT inflate(z, f) +z_streamp z; +int f; +{ + int r; + uInt b; + + if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL) + return Z_STREAM_ERROR; + f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK; + r = Z_BUF_ERROR; + while (1) switch (z->state->mode) + { + case METHOD: + NEEDBYTE + if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED) + { + z->state->mode = BAD; + z->msg = (char*)"unknown compression method"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + if ((z->state->sub.method >> 4) + 8 > z->state->wbits) + { + z->state->mode = BAD; + z->msg = (char*)"invalid window size"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + z->state->mode = FLAG; + case FLAG: + NEEDBYTE + b = NEXTBYTE; + if (((z->state->sub.method << 8) + b) % 31) + { + z->state->mode = BAD; + z->msg = (char*)"incorrect header check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Tracev((stderr, "inflate: zlib header ok\n")); + if (!(b & PRESET_DICT)) + { + z->state->mode = BLOCKS; + break; + } + z->state->mode = DICT4; + case DICT4: + NEEDBYTE + z->state->sub.check.need = (uLong)NEXTBYTE << 24; + z->state->mode = DICT3; + case DICT3: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 16; + z->state->mode = DICT2; + case DICT2: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 8; + z->state->mode = DICT1; + case DICT1: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE; + z->adler = z->state->sub.check.need; + z->state->mode = DICT0; + return Z_NEED_DICT; + case DICT0: + z->state->mode = BAD; + z->msg = (char*)"need dictionary"; + z->state->sub.marker = 0; /* can try inflateSync */ + return Z_STREAM_ERROR; + case BLOCKS: + r = inflate_blocks(z->state->blocks, z, r); + if (r == Z_DATA_ERROR) + { + z->state->mode = BAD; + z->state->sub.marker = 0; /* can try inflateSync */ + break; + } + if (r == Z_OK) + r = f; + if (r != Z_STREAM_END) + return r; + r = f; + inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); + if (z->state->nowrap) + { + z->state->mode = DONE; + break; + } + z->state->mode = CHECK4; + case CHECK4: + NEEDBYTE + z->state->sub.check.need = (uLong)NEXTBYTE << 24; + z->state->mode = CHECK3; + case CHECK3: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 16; + z->state->mode = CHECK2; + case CHECK2: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 8; + z->state->mode = CHECK1; + case CHECK1: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE; + + if (z->state->sub.check.was != z->state->sub.check.need) + { + z->state->mode = BAD; + z->msg = (char*)"incorrect data check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Tracev((stderr, "inflate: zlib check ok\n")); + z->state->mode = DONE; + case DONE: + return Z_STREAM_END; + case BAD: + return Z_DATA_ERROR; + default: + return Z_STREAM_ERROR; + } +#ifdef NEED_DUMMY_RETURN + return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ +#endif +} + + +int ZEXPORT inflateSetDictionary(z, dictionary, dictLength) +z_streamp z; +const Bytef *dictionary; +uInt dictLength; +{ + uInt length = dictLength; + + if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0) + return Z_STREAM_ERROR; + + if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR; + z->adler = 1L; + + if (length >= ((uInt)1<<z->state->wbits)) + { + length = (1<<z->state->wbits)-1; + dictionary += dictLength - length; + } + inflate_set_dictionary(z->state->blocks, dictionary, length); + z->state->mode = BLOCKS; + return Z_OK; +} + + +int ZEXPORT inflateSync(z) +z_streamp z; +{ + uInt n; /* number of bytes to look at */ + Bytef *p; /* pointer to bytes */ + uInt m; /* number of marker bytes found in a row */ + uLong r, w; /* temporaries to save total_in and total_out */ + + /* set up */ + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->mode != BAD) + { + z->state->mode = BAD; + z->state->sub.marker = 0; + } + if ((n = z->avail_in) == 0) + return Z_BUF_ERROR; + p = z->next_in; + m = z->state->sub.marker; + + /* search */ + while (n && m < 4) + { + static const Byte mark[4] = {0, 0, 0xff, 0xff}; + if (*p == mark[m]) + m++; + else if (*p) + m = 0; + else + m = 4 - m; + p++, n--; + } + + /* restore */ + z->total_in += p - z->next_in; + z->next_in = p; + z->avail_in = n; + z->state->sub.marker = m; + + /* return no joy or set up to restart on a new block */ + if (m != 4) + return Z_DATA_ERROR; + r = z->total_in; w = z->total_out; + inflateReset(z); + z->total_in = r; z->total_out = w; + z->state->mode = BLOCKS; + return Z_OK; +} + + +/* Returns true if inflate is currently at the end of a block generated + * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + * implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH + * but removes the length bytes of the resulting empty stored block. When + * decompressing, PPP checks that at the end of input packet, inflate is + * waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(z) +z_streamp z; +{ + if (z == Z_NULL || z->state == Z_NULL || z->state->blocks == Z_NULL) + return Z_STREAM_ERROR; + return inflate_blocks_sync_point(z->state->blocks); +} diff --git a/src/utilfuns/zlib/inftrees.c b/src/utilfuns/zlib/inftrees.c new file mode 100644 index 0000000..ef1e0b6 --- /dev/null +++ b/src/utilfuns/zlib/inftrees.c @@ -0,0 +1,455 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#if !defined(BUILDFIXED) && !defined(STDC) +# define BUILDFIXED /* non ANSI compilers may not accept inffixed.h */ +#endif + +const char inflate_copyright[] = + " inflate 1.1.3 Copyright 1995-1998 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ +struct internal_state {int dummy;}; /* for buggy compilers */ + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + + +local int huft_build OF(( + uIntf *, /* code lengths in bits */ + uInt, /* number of codes */ + uInt, /* number of "simple" codes */ + const uIntf *, /* list of base values for non-simple codes */ + const uIntf *, /* list of extra bits for non-simple codes */ + inflate_huft * FAR*,/* result: starting table */ + uIntf *, /* maximum lookup bits (returns actual) */ + inflate_huft *, /* space for trees */ + uInt *, /* hufts used in space */ + uIntf * )); /* space for values */ + +/* Tables for deflate from PKZIP's appnote.txt. */ +local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + /* see note #13 above about 258 */ +local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */ +local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; +local const uInt cpdext[30] = { /* Extra bits for distance codes */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + +/* + Huffman code decoding is performed using a multi-level table lookup. + The fastest way to decode is to simply build a lookup table whose + size is determined by the longest code. However, the time it takes + to build this table can also be a factor if the data being decoded + is not very long. The most common codes are necessarily the + shortest codes, so those codes dominate the decoding time, and hence + the speed. The idea is you can have a shorter table that decodes the + shorter, more probable codes, and then point to subsidiary tables for + the longer codes. The time it costs to decode the longer codes is + then traded against the time it takes to make longer tables. + + This results of this trade are in the variables lbits and dbits + below. lbits is the number of bits the first level table for literal/ + length codes can decode in one step, and dbits is the same thing for + the distance codes. Subsequent tables are also less than or equal to + those sizes. These values may be adjusted either when all of the + codes are shorter than that, in which case the longest code length in + bits is used, or when the shortest code is *longer* than the requested + table size, in which case the length of the shortest code in bits is + used. + + There are two different values for the two tables, since they code a + different number of possibilities each. The literal/length table + codes 286 possible values, or in a flat code, a little over eight + bits. The distance table codes 30 possible values, or a little less + than five bits, flat. The optimum values for speed end up being + about one bit more than those, so lbits is 8+1 and dbits is 5+1. + The optimum values may differ though from machine to machine, and + possibly even between compilers. Your mileage may vary. + */ + + +/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ +#define BMAX 15 /* maximum bit length of any code */ + +local int huft_build(b, n, s, d, e, t, m, hp, hn, v) +uIntf *b; /* code lengths in bits (all assumed <= BMAX) */ +uInt n; /* number of codes (assumed <= 288) */ +uInt s; /* number of simple-valued codes (0..s-1) */ +const uIntf *d; /* list of base values for non-simple codes */ +const uIntf *e; /* list of extra bits for non-simple codes */ +inflate_huft * FAR *t; /* result: starting table */ +uIntf *m; /* maximum lookup bits, returns actual */ +inflate_huft *hp; /* space for trees */ +uInt *hn; /* hufts used in space */ +uIntf *v; /* working area: values in order of bit length */ +/* Given a list of code lengths and a maximum table size, make a set of + tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR + if the given code set is incomplete (the tables are still built in this + case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of + lengths), or Z_MEM_ERROR if not enough memory. */ +{ + + uInt a; /* counter for codes of length k */ + uInt c[BMAX+1]; /* bit length count table */ + uInt f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int h; /* table level */ + register uInt i; /* counter, current code */ + register uInt j; /* counter */ + register int k; /* number of bits in current code */ + int l; /* bits per table (returned in m) */ + uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */ + register uIntf *p; /* pointer into c[], b[], or v[] */ + inflate_huft *q; /* points to current table */ + struct inflate_huft_s r; /* table entry for structure assignment */ + inflate_huft *u[BMAX]; /* table stack */ + register int w; /* bits before this table == (l * h) */ + uInt x[BMAX+1]; /* bit offsets, then code stack */ + uIntf *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + uInt z; /* number of entries in current table */ + + + /* Generate counts for each bit length */ + p = c; +#define C0 *p++ = 0; +#define C2 C0 C0 C0 C0 +#define C4 C2 C2 C2 C2 + C4 /* clear c[]--assume BMAX+1 is 16 */ + p = b; i = n; + do { + c[*p++]++; /* assume all entries <= BMAX */ + } while (--i); + if (c[0] == n) /* null input--all zero length codes */ + { + *t = (inflate_huft *)Z_NULL; + *m = 0; + return Z_OK; + } + + + /* Find minimum and maximum length, bound *m by those */ + l = *m; + for (j = 1; j <= BMAX; j++) + if (c[j]) + break; + k = j; /* minimum code length */ + if ((uInt)l < j) + l = j; + for (i = BMAX; i; i--) + if (c[i]) + break; + g = i; /* maximum code length */ + if ((uInt)l > i) + l = i; + *m = l; + + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) + if ((y -= c[j]) < 0) + return Z_DATA_ERROR; + if ((y -= c[i]) < 0) + return Z_DATA_ERROR; + c[i] += y; + + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; xp = x + 2; + while (--i) { /* note that i == g from above */ + *xp++ = (j += *p++); + } + + + /* Make a table of values in order of bit lengths */ + p = b; i = 0; + do { + if ((j = *p++) != 0) + v[x[j]++] = i; + } while (++i < n); + n = x[g]; /* set n to length of v */ + + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + h = -1; /* no tables yet--level -1 */ + w = -l; /* bits decoded == (l * h) */ + u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ + q = (inflate_huft *)Z_NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) + { + a = c[k]; + while (a--) + { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > w + l) + { + h++; + w += l; /* previous table always l bits */ + + /* compute minimum size table less than or equal to l bits */ + z = g - w; + z = z > (uInt)l ? l : z; /* table size upper limit */ + if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ + { /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + if (j < z) + while (++j < z) /* try smaller tables up to z bits */ + { + if ((f <<= 1) <= *++xp) + break; /* enough codes to use up j bits */ + f -= *xp; /* else deduct codes from patterns */ + } + } + z = 1 << j; /* table entries for j-bit table */ + + /* allocate new table */ + if (*hn + z > MANY) /* (note: doesn't matter for fixed) */ + return Z_MEM_ERROR; /* not enough memory */ + u[h] = q = hp + *hn; + *hn += z; + + /* connect to last table, if there is one */ + if (h) + { + x[h] = i; /* save pattern for backing up */ + r.bits = (Byte)l; /* bits to dump before this table */ + r.exop = (Byte)j; /* bits in this table */ + j = i >> (w - l); + r.base = (uInt)(q - u[h-1] - j); /* offset to this table */ + u[h-1][j] = r; /* connect to last table */ + } + else + *t = q; /* first table is returned result */ + } + + /* set up table entry in r */ + r.bits = (Byte)(k - w); + if (p >= v + n) + r.exop = 128 + 64; /* out of values--invalid code */ + else if (*p < s) + { + r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ + r.base = *p++; /* simple code is just the value */ + } + else + { + r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */ + r.base = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) + q[j] = r; + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) + i ^= j; + i ^= j; + + /* backup over finished tables */ + mask = (1 << w) - 1; /* needed on HP, cc -O bug */ + while ((i & mask) != x[h]) + { + h--; /* don't need to update q */ + w -= l; + mask = (1 << w) - 1; + } + } + } + + + /* Return Z_BUF_ERROR if we were given an incomplete table */ + return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; +} + + +int inflate_trees_bits(c, bb, tb, hp, z) +uIntf *c; /* 19 code lengths */ +uIntf *bb; /* bits tree desired/actual depth */ +inflate_huft * FAR *tb; /* bits tree result */ +inflate_huft *hp; /* space for trees */ +z_streamp z; /* for messages */ +{ + int r; + uInt hn = 0; /* hufts used in space */ + uIntf *v; /* work area for huft_build */ + + if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL) + return Z_MEM_ERROR; + r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, + tb, bb, hp, &hn, v); + if (r == Z_DATA_ERROR) + z->msg = (char*)"oversubscribed dynamic bit lengths tree"; + else if (r == Z_BUF_ERROR || *bb == 0) + { + z->msg = (char*)"incomplete dynamic bit lengths tree"; + r = Z_DATA_ERROR; + } + ZFREE(z, v); + return r; +} + + +int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z) +uInt nl; /* number of literal/length codes */ +uInt nd; /* number of distance codes */ +uIntf *c; /* that many (total) code lengths */ +uIntf *bl; /* literal desired/actual bit depth */ +uIntf *bd; /* distance desired/actual bit depth */ +inflate_huft * FAR *tl; /* literal/length tree result */ +inflate_huft * FAR *td; /* distance tree result */ +inflate_huft *hp; /* space for trees */ +z_streamp z; /* for messages */ +{ + int r; + uInt hn = 0; /* hufts used in space */ + uIntf *v; /* work area for huft_build */ + + /* allocate work area */ + if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) + return Z_MEM_ERROR; + + /* build literal/length tree */ + r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v); + if (r != Z_OK || *bl == 0) + { + if (r == Z_DATA_ERROR) + z->msg = (char*)"oversubscribed literal/length tree"; + else if (r != Z_MEM_ERROR) + { + z->msg = (char*)"incomplete literal/length tree"; + r = Z_DATA_ERROR; + } + ZFREE(z, v); + return r; + } + + /* build distance tree */ + r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v); + if (r != Z_OK || (*bd == 0 && nl > 257)) + { + if (r == Z_DATA_ERROR) + z->msg = (char*)"oversubscribed distance tree"; + else if (r == Z_BUF_ERROR) { +#ifdef PKZIP_BUG_WORKAROUND + r = Z_OK; + } +#else + z->msg = (char*)"incomplete distance tree"; + r = Z_DATA_ERROR; + } + else if (r != Z_MEM_ERROR) + { + z->msg = (char*)"empty distance tree with lengths"; + r = Z_DATA_ERROR; + } + ZFREE(z, v); + return r; +#endif + } + + /* done */ + ZFREE(z, v); + return Z_OK; +} + + +/* build fixed tables only once--keep them here */ +#ifdef BUILDFIXED +local int fixed_built = 0; +#define FIXEDH 544 /* number of hufts used by fixed tables */ +local inflate_huft fixed_mem[FIXEDH]; +local uInt fixed_bl; +local uInt fixed_bd; +local inflate_huft *fixed_tl; +local inflate_huft *fixed_td; +#else +#include "inffixed.h" +#endif + + +int inflate_trees_fixed(bl, bd, tl, td, z) +uIntf *bl; /* literal desired/actual bit depth */ +uIntf *bd; /* distance desired/actual bit depth */ +inflate_huft * FAR *tl; /* literal/length tree result */ +inflate_huft * FAR *td; /* distance tree result */ +z_streamp z; /* for memory allocation */ +{ +#ifdef BUILDFIXED + /* build fixed tables if not already */ + if (!fixed_built) + { + int k; /* temporary variable */ + uInt f = 0; /* number of hufts used in fixed_mem */ + uIntf *c; /* length list for huft_build */ + uIntf *v; /* work area for huft_build */ + + /* allocate memory */ + if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) + return Z_MEM_ERROR; + if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) + { + ZFREE(z, c); + return Z_MEM_ERROR; + } + + /* literal table */ + for (k = 0; k < 144; k++) + c[k] = 8; + for (; k < 256; k++) + c[k] = 9; + for (; k < 280; k++) + c[k] = 7; + for (; k < 288; k++) + c[k] = 8; + fixed_bl = 9; + huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, + fixed_mem, &f, v); + + /* distance table */ + for (k = 0; k < 30; k++) + c[k] = 5; + fixed_bd = 5; + huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, + fixed_mem, &f, v); + + /* done */ + ZFREE(z, v); + ZFREE(z, c); + fixed_built = 1; + } +#endif + *bl = fixed_bl; + *bd = fixed_bd; + *tl = fixed_tl; + *td = fixed_td; + return Z_OK; +} diff --git a/src/utilfuns/zlib/inftrees.h b/src/utilfuns/zlib/inftrees.h new file mode 100644 index 0000000..85853e0 --- /dev/null +++ b/src/utilfuns/zlib/inftrees.h @@ -0,0 +1,58 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Huffman code lookup table entry--this entry is four bytes for machines + that have 16-bit pointers (e.g. PC's in the small or medium model). */ + +typedef struct inflate_huft_s FAR inflate_huft; + +struct inflate_huft_s { + union { + struct { + Byte Exop; /* number of extra bits or operation */ + Byte Bits; /* number of bits in this code or subcode */ + } what; + uInt pad; /* pad structure to a power of 2 (4 bytes for */ + } word; /* 16-bit, 8 bytes for 32-bit int's) */ + uInt base; /* literal, length base, distance base, + or table offset */ +}; + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1004 huft structures (850 for length/literals + and 154 for distances, the latter actually the result of an + exhaustive search). The actual maximum is not known, but the + value below is more than safe. */ +#define MANY 1440 + +extern int inflate_trees_bits OF(( + uIntf *, /* 19 code lengths */ + uIntf *, /* bits tree desired/actual depth */ + inflate_huft * FAR *, /* bits tree result */ + inflate_huft *, /* space for trees */ + z_streamp)); /* for messages */ + +extern int inflate_trees_dynamic OF(( + uInt, /* number of literal/length codes */ + uInt, /* number of distance codes */ + uIntf *, /* that many (total) code lengths */ + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *, /* distance tree result */ + inflate_huft *, /* space for trees */ + z_streamp)); /* for messages */ + +extern int inflate_trees_fixed OF(( + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *, /* distance tree result */ + z_streamp)); /* for memory allocation */ diff --git a/src/utilfuns/zlib/infutil.c b/src/utilfuns/zlib/infutil.c new file mode 100644 index 0000000..824dab5 --- /dev/null +++ b/src/utilfuns/zlib/infutil.c @@ -0,0 +1,87 @@ +/* inflate_util.c -- data and routines common to blocks and codes + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infblock.h" +#include "inftrees.h" +#include "infcodes.h" +#include "infutil.h" + +struct inflate_codes_state {int dummy;}; /* for buggy compilers */ + +/* And'ing with mask[n] masks the lower n bits */ +uInt inflate_mask[17] = { + 0x0000, + 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + + +/* copy as much as possible from the sliding window to the output area */ +int inflate_flush(s, z, r) +inflate_blocks_statef *s; +z_streamp z; +int r; +{ + uInt n; + Bytef *p; + Bytef *q; + + /* local copies of source and destination pointers */ + p = z->next_out; + q = s->read; + + /* compute number of bytes to copy as far as end of window */ + n = (uInt)((q <= s->write ? s->write : s->end) - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + z->adler = s->check = (*s->checkfn)(s->check, q, n); + + /* copy as far as end of window */ + zmemcpy(p, q, n); + p += n; + q += n; + + /* see if more to copy at beginning of window */ + if (q == s->end) + { + /* wrap pointers */ + q = s->window; + if (s->write == s->end) + s->write = s->window; + + /* compute bytes to copy */ + n = (uInt)(s->write - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + z->adler = s->check = (*s->checkfn)(s->check, q, n); + + /* copy */ + zmemcpy(p, q, n); + p += n; + q += n; + } + + /* update pointers */ + z->next_out = p; + s->read = q; + + /* done */ + return r; +} diff --git a/src/utilfuns/zlib/infutil.h b/src/utilfuns/zlib/infutil.h new file mode 100644 index 0000000..99d1135 --- /dev/null +++ b/src/utilfuns/zlib/infutil.h @@ -0,0 +1,98 @@ +/* infutil.h -- types and macros common to blocks and codes + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +#ifndef _INFUTIL_H +#define _INFUTIL_H + +typedef enum { + TYPE, /* get type bits (3, including end bit) */ + LENS, /* get lengths for stored */ + STORED, /* processing stored block */ + TABLE, /* get table lengths */ + BTREE, /* get bit lengths tree for a dynamic block */ + DTREE, /* get length, distance trees for a dynamic block */ + CODES, /* processing fixed or dynamic block */ + DRY, /* output remaining window bytes */ + DONE, /* finished last block, done */ + BAD} /* got a data error--stuck here */ +inflate_block_mode; + +/* inflate blocks semi-private state */ +struct inflate_blocks_state { + + /* mode */ + inflate_block_mode mode; /* current inflate_block mode */ + + /* mode dependent information */ + union { + uInt left; /* if STORED, bytes left to copy */ + struct { + uInt table; /* table lengths (14 bits) */ + uInt index; /* index into blens (or border) */ + uIntf *blens; /* bit lengths of codes */ + uInt bb; /* bit length tree depth */ + inflate_huft *tb; /* bit length decoding tree */ + } trees; /* if DTREE, decoding info for trees */ + struct { + inflate_codes_statef + *codes; + } decode; /* if CODES, current state */ + } sub; /* submode */ + uInt last; /* true if this block is the last block */ + + /* mode independent information */ + uInt bitk; /* bits in bit buffer */ + uLong bitb; /* bit buffer */ + inflate_huft *hufts; /* single malloc for tree space */ + Bytef *window; /* sliding window */ + Bytef *end; /* one byte after sliding window */ + Bytef *read; /* window read pointer */ + Bytef *write; /* window write pointer */ + check_func checkfn; /* check function */ + uLong check; /* check on output */ + +}; + + +/* defines for inflate input/output */ +/* update pointers and return */ +#define UPDBITS {s->bitb=b;s->bitk=k;} +#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} +#define UPDOUT {s->write=q;} +#define UPDATE {UPDBITS UPDIN UPDOUT} +#define LEAVE {UPDATE return inflate_flush(s,z,r);} +/* get bytes and bits */ +#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} +#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} +#define NEXTBYTE (n--,*p++) +#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}} +#define DUMPBITS(j) {b>>=(j);k-=(j);} +/* output bytes */ +#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q) +#define LOADOUT {q=s->write;m=(uInt)WAVAIL;} +#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}} +#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} +#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} +#define OUTBYTE(a) {*q++=(Byte)(a);m--;} +/* load local pointers */ +#define LOAD {LOADIN LOADOUT} + +/* masks for lower bits (size given to avoid silly warnings with Visual C++) */ +extern uInt inflate_mask[17]; + +/* copy as much as possible from the sliding window to the output area */ +extern int inflate_flush OF(( + inflate_blocks_statef *, + z_streamp , + int)); + +struct internal_state {int dummy;}; /* for buggy compilers */ + +#endif diff --git a/src/utilfuns/zlib/maketree.c b/src/utilfuns/zlib/maketree.c new file mode 100644 index 0000000..949d786 --- /dev/null +++ b/src/utilfuns/zlib/maketree.c @@ -0,0 +1,85 @@ +/* maketree.c -- make inffixed.h table for decoding fixed codes + * Copyright (C) 1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* This program is included in the distribution for completeness. + You do not need to compile or run this program since inffixed.h + is already included in the distribution. To use this program + you need to compile zlib with BUILDFIXED defined and then compile + and link this program with the zlib library. Then the output of + this program can be piped to inffixed.h. */ + +#include <stdio.h> +#include <stdlib.h> +#include "zutil.h" +#include "inftrees.h" + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +/* generate initialization table for an inflate_huft structure array */ +void maketree(uInt b, inflate_huft *t) +{ + int i, e; + + i = 0; + while (1) + { + e = t[i].exop; + if (e && (e & (16+64)) == 0) /* table pointer */ + { + fprintf(stderr, "maketree: cannot initialize sub-tables!\n"); + exit(1); + } + if (i % 4 == 0) + printf("\n "); + printf(" {{{%u,%u}},%u}", t[i].exop, t[i].bits, t[i].base); + if (++i == (1<<b)) + break; + putchar(','); + } + puts(""); +} + +/* create the fixed tables in C initialization syntax */ +void main(void) +{ + int r; + uInt bl, bd; + inflate_huft *tl, *td; + z_stream z; + + z.zalloc = zcalloc; + z.opaque = (voidpf)0; + z.zfree = zcfree; + r = inflate_trees_fixed(&bl, &bd, &tl, &td, &z); + if (r) + { + fprintf(stderr, "inflate_trees_fixed error %d\n", r); + return; + } + puts("/* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by the maketree.c program"); + puts(" */"); + puts(""); + puts("/* WARNING: this file should *not* be used by applications. It is"); + puts(" part of the implementation of the compression library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + printf("local uInt fixed_bl = %d;\n", bl); + printf("local uInt fixed_bd = %d;\n", bd); + printf("local inflate_huft fixed_tl[] = {"); + maketree(bl, tl); + puts(" };"); + printf("local inflate_huft fixed_td[] = {"); + maketree(bd, td); + puts(" };"); +} diff --git a/src/utilfuns/zlib/trees.c b/src/utilfuns/zlib/trees.c new file mode 100644 index 0000000..4d59d6e --- /dev/null +++ b/src/utilfuns/zlib/trees.c @@ -0,0 +1,1214 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-1998 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id: trees.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef DEBUG +# include <ctype.h> +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, ct_data *ltree, + ct_data *dtree)); +local void set_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); +local void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG */ + + +#define MAX(a,b) (a >= b ? a : b) +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1<<extra_lbits[code]); n++) { + _length_code[length++] = (uch)code; + } + } + Assert (length == 256, "tr_static_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + _length_code[length-1] = (uch)code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<<extra_dbits[code]); n++) { + _dist_code[dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef DEBUG +# include <stdio.h> +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if (tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1, + "inconsistent bit counts"); + Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + if (len == 0) continue; + /* Now reverse the bits */ + tree[n].Code = bi_reverse(next_code[len]++, len); + + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); + } +} + +/* =========================================================================== + * Construct one Huffman tree and assigns the code bit strings and lengths. + * Update the total bit length for the current block. + * IN assertion: the field freq is set for all tree elements. + * OUT assertions: the fields len and code are set to the optimal bit length + * and corresponding code. The length opt_len is updated; static_len is + * also updated if stree is not null. The field max_code is set. + */ +local void build_tree(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void _tr_stored_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ +#ifdef DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; +#endif + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +void _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; +#endif + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. + */ +void _tr_flush_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is ascii or binary */ + if (s->data_type == Z_UNKNOWN) set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute first the block length in bytes*/ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (eof) { + bi_windup(s); +#ifdef DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + ct_data *ltree; /* literal tree */ + ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to ASCII or BINARY, using a crude approximation: + * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. + * IN assertion: the fields freq of dyn_ltree are set and the total of all + * frequencies does not exceed 64K (to fit in an int on 16 bit machines). + */ +local void set_data_type(s) + deflate_state *s; +{ + int n = 0; + unsigned ascii_freq = 0; + unsigned bin_freq = 0; + while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; + while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; + while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; + s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +local void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG + s->bits_sent += (ulg)len<<3; +#endif + while (len--) { + put_byte(s, *buf++); + } +} diff --git a/src/utilfuns/zlib/trees.h b/src/utilfuns/zlib/trees.h new file mode 100644 index 0000000..72facf9 --- /dev/null +++ b/src/utilfuns/zlib/trees.h @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff --git a/src/utilfuns/zlib/uncompr.c b/src/utilfuns/zlib/uncompr.c new file mode 100644 index 0000000..0c1051a --- /dev/null +++ b/src/utilfuns/zlib/uncompr.c @@ -0,0 +1,58 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: uncompr.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; + + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + err = inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + inflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = inflateEnd(&stream); + return err; +} diff --git a/src/utilfuns/zlib/untgz.c b/src/utilfuns/zlib/untgz.c new file mode 100644 index 0000000..2919656 --- /dev/null +++ b/src/utilfuns/zlib/untgz.c @@ -0,0 +1,436 @@ +/* + * untgz.c -- Display contents and/or extract file from + * a gzip'd TAR file + * written by "Pedro A. Aranda Guti\irrez" <paag@tid.es> + * adaptation to Unix by Jean-loup Gailly <jloup@gzip.org> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <fcntl.h> +#ifdef unix +# include <unistd.h> +#else +# include <direct.h> +# include <io.h> +#endif + +#include "zlib.h" + +#ifdef WIN32 +# include <windows.h> +# ifndef F_OK +# define F_OK (0) +# endif +# ifdef _MSC_VER +# define mkdir(dirname,mode) _mkdir(dirname) +# define strdup(str) _strdup(str) +# define unlink(fn) _unlink(fn) +# define access(path,mode) _access(path,mode) +# else +# define mkdir(dirname,mode) _mkdir(dirname) +# endif +#else +# include <utime.h> +#endif + + +/* Values used in typeflag field. */ + +#define REGTYPE '0' /* regular file */ +#define AREGTYPE '\0' /* regular file */ +#define LNKTYPE '1' /* link */ +#define SYMTYPE '2' /* reserved */ +#define CHRTYPE '3' /* character special */ +#define BLKTYPE '4' /* block special */ +#define DIRTYPE '5' /* directory */ +#define FIFOTYPE '6' /* FIFO special */ +#define CONTTYPE '7' /* reserved */ + +#define BLOCKSIZE 512 + +struct tar_header +{ /* byte offset */ + char name[100]; /* 0 */ + char mode[8]; /* 100 */ + char uid[8]; /* 108 */ + char gid[8]; /* 116 */ + char size[12]; /* 124 */ + char mtime[12]; /* 136 */ + char chksum[8]; /* 148 */ + char typeflag; /* 156 */ + char linkname[100]; /* 157 */ + char magic[6]; /* 257 */ + char version[2]; /* 263 */ + char uname[32]; /* 265 */ + char gname[32]; /* 297 */ + char devmajor[8]; /* 329 */ + char devminor[8]; /* 337 */ + char prefix[155]; /* 345 */ + /* 500 */ +}; + +union tar_buffer { + char buffer[BLOCKSIZE]; + struct tar_header header; +}; + +enum { TGZ_EXTRACT = 0, TGZ_LIST }; + +static char *TGZfname OF((const char *)); +void TGZnotfound OF((const char *)); + +int getoct OF((char *, int)); +char *strtime OF((time_t *)); +int ExprMatch OF((char *,char *)); + +int makedir OF((char *)); +int matchname OF((int,int,char **,char *)); + +void error OF((const char *)); +int tar OF((gzFile, int, int, int, char **)); + +void help OF((int)); +int main OF((int, char **)); + +char *prog; + +/* This will give a benign warning */ + +static char *TGZprefix[] = { "\0", ".tgz", ".tar.gz", ".tar", NULL }; + +/* Return the real name of the TGZ archive */ +/* or NULL if it does not exist. */ + +static char *TGZfname OF((const char *fname)) +{ + static char buffer[1024]; + int origlen,i; + + strcpy(buffer,fname); + origlen = strlen(buffer); + + for (i=0; TGZprefix[i]; i++) + { + strcpy(buffer+origlen,TGZprefix[i]); + if (access(buffer,F_OK) == 0) + return buffer; + } + return NULL; +} + +/* error message for the filename */ + +void TGZnotfound OF((const char *fname)) +{ + int i; + + fprintf(stderr,"%s : couldn't find ",prog); + for (i=0;TGZprefix[i];i++) + fprintf(stderr,(TGZprefix[i+1]) ? "%s%s, " : "or %s%s\n", + fname, + TGZprefix[i]); + exit(1); +} + + +/* help functions */ + +int getoct(char *p,int width) +{ + int result = 0; + char c; + + while (width --) + { + c = *p++; + if (c == ' ') + continue; + if (c == 0) + break; + result = result * 8 + (c - '0'); + } + return result; +} + +char *strtime (time_t *t) +{ + struct tm *local; + static char result[32]; + + local = localtime(t); + sprintf(result,"%2d/%02d/%4d %02d:%02d:%02d", + local->tm_mday, local->tm_mon+1, local->tm_year+1900, + local->tm_hour, local->tm_min, local->tm_sec); + return result; +} + + +/* regular expression matching */ + +#define ISSPECIAL(c) (((c) == '*') || ((c) == '/')) + +int ExprMatch(char *string,char *expr) +{ + while (1) + { + if (ISSPECIAL(*expr)) + { + if (*expr == '/') + { + if (*string != '\\' && *string != '/') + return 0; + string ++; expr++; + } + else if (*expr == '*') + { + if (*expr ++ == 0) + return 1; + while (*++string != *expr) + if (*string == 0) + return 0; + } + } + else + { + if (*string != *expr) + return 0; + if (*expr++ == 0) + return 1; + string++; + } + } +} + +/* recursive make directory */ +/* abort if you get an ENOENT errno somewhere in the middle */ +/* e.g. ignore error "mkdir on existing directory" */ +/* */ +/* return 1 if OK */ +/* 0 on error */ + +int makedir (char *newdir) +{ + char *buffer = strdup(newdir); + char *p; + int len = strlen(buffer); + + if (len <= 0) { + free(buffer); + return 0; + } + if (buffer[len-1] == '/') { + buffer[len-1] = '\0'; + } + if (mkdir(buffer, 0775) == 0) + { + free(buffer); + return 1; + } + + p = buffer+1; + while (1) + { + char hold; + + while(*p && *p != '\\' && *p != '/') + p++; + hold = *p; + *p = 0; + if ((mkdir(buffer, 0775) == -1) && (errno == ENOENT)) + { + fprintf(stderr,"%s: couldn't create directory %s\n",prog,buffer); + free(buffer); + return 0; + } + if (hold == 0) + break; + *p++ = hold; + } + free(buffer); + return 1; +} + +int matchname (int arg,int argc,char **argv,char *fname) +{ + if (arg == argc) /* no arguments given (untgz tgzarchive) */ + return 1; + + while (arg < argc) + if (ExprMatch(fname,argv[arg++])) + return 1; + + return 0; /* ignore this for the moment being */ +} + + +/* Tar file list or extract */ + +int untar (gzFile in, const char *dest) { + union tar_buffer buffer; + int len; + int err; + int getheader = 1; + int remaining = 0; + FILE *outfile = NULL; + char fname[BLOCKSIZE]; + time_t tartime; + + while (1) { + len = gzread(in, &buffer, BLOCKSIZE); + if (len < 0) + error (gzerror(in, &err)); + /* + * Always expect complete blocks to process + * the tar information. + */ + if (len != BLOCKSIZE) + error("gzread: incomplete block read"); + + /* + * If we have to get a tar header + */ + if (getheader == 1) { + /* + * if we met the end of the tar + * or the end-of-tar block, + * we are done + */ + if ((len == 0) || (buffer.header.name[0]== 0)) break; + + tartime = (time_t)getoct(buffer.header.mtime,12); + strcpy(fname, dest); + if ((fname[strlen(fname)-1] != '/') && (fname[strlen(fname)-1] != '\\')) + strcat(fname, "/"); + strcat(fname, buffer.header.name); + + switch (buffer.header.typeflag) { + case DIRTYPE: + makedir(fname); + break; + case REGTYPE: + case AREGTYPE: + remaining = getoct(buffer.header.size,12); + if (remaining) { + outfile = fopen(fname,"wb"); + if (outfile == NULL) { + // try creating directory + char *p = strrchr(fname, '/'); + if (p != NULL) { + *p = '\0'; + makedir(fname); + *p = '/'; + outfile = fopen(fname,"wb"); + } + } +/* + fprintf(stderr, + "%s %s\n", + (outfile) ? "Extracting" : "Couldn't create", + fname); +*/ + } + else + outfile = NULL; + /* + * could have no contents + */ + getheader = (remaining) ? 0 : 1; + break; + default: + break; + } + } + else { + unsigned int bytes = (remaining > BLOCKSIZE) ? BLOCKSIZE : remaining; + + if (outfile != NULL) { + if (fwrite(&buffer,sizeof(char),bytes,outfile) != bytes) { + fprintf(stderr,"%s : error writing %s skipping...\n",prog,fname); + fclose(outfile); + unlink(fname); + } + } + remaining -= bytes; + if (remaining == 0) { + getheader = 1; + if (outfile != NULL) { +#ifdef WIN32 + HANDLE hFile; + FILETIME ftm,ftLocal; + SYSTEMTIME st; + struct tm localt; + + fclose(outfile); + + localt = *localtime(&tartime); + + hFile = CreateFile(fname, GENERIC_READ | GENERIC_WRITE, + 0, NULL, OPEN_EXISTING, 0, NULL); + + st.wYear = (WORD)localt.tm_year+1900; + st.wMonth = (WORD)localt.tm_mon; + st.wDayOfWeek = (WORD)localt.tm_wday; + st.wDay = (WORD)localt.tm_mday; + st.wHour = (WORD)localt.tm_hour; + st.wMinute = (WORD)localt.tm_min; + st.wSecond = (WORD)localt.tm_sec; + st.wMilliseconds = 0; + SystemTimeToFileTime(&st,&ftLocal); + LocalFileTimeToFileTime(&ftLocal,&ftm); + SetFileTime(hFile,&ftm,NULL,&ftm); + CloseHandle(hFile); + + outfile = NULL; +#else + struct utimbuf settime; + + settime.actime = settime.modtime = tartime; + + fclose(outfile); + outfile = NULL; + utime(fname,&settime); +#endif + } + } + } + } + return 0; +} + + +/* =========================================================== */ + +void help(int exitval) +{ + fprintf(stderr, + "untgz v 0.1\n" + " an sample application of zlib 1.0.4\n\n" + "Usage : untgz TGZfile to extract all files\n" + " untgz TGZfile fname ... to extract selected files\n" + " untgz -l TGZfile to list archive contents\n" + " untgz -h to display this help\n\n"); + exit(exitval); +} + +void error(const char *msg) +{ + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + + +int untargz(int fd, const char *dest) { + gzFile *f; + + f = gzdopen(fd, "rb"); + if (f == NULL) { + fprintf(stderr,"%s: Couldn't gzopen file\n", prog); + return 1; + } + + return untar(f, dest); +} diff --git a/src/utilfuns/zlib/zutil.c b/src/utilfuns/zlib/zutil.c new file mode 100644 index 0000000..2eb7b46 --- /dev/null +++ b/src/utilfuns/zlib/zutil.c @@ -0,0 +1,225 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: zutil.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zutil.h" + +struct internal_state {int dummy;}; /* for buggy compilers */ + +#ifndef STDC +extern void exit OF((int)); +#endif + +const char *z_errmsg[10] = { +"need dictionary", /* Z_NEED_DICT 2 */ +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +"incompatible version",/* Z_VERSION_ERROR (-6) */ +""}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +#ifdef DEBUG + +# ifndef verbose +# define verbose 0 +# endif +int z_verbose = verbose; + +void z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + + +#ifndef HAVE_MEMCPY + +void zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifdef __TURBOC__ +#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__) +/* Small and medium model in Turbo C are for now limited to near allocation + * with reduced MAX_WBITS and MAX_MEM_LEVEL + */ +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf = opaque; /* just to make some compilers happy */ + ulg bsize = (ulg)items*size; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + int n; + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + ptr = opaque; /* just to make some compilers happy */ + Assert(0, "zcfree: ptr not found"); +} +#endif +#endif /* __TURBOC__ */ + + +#if defined(M_I86) && !defined(__32BIT__) +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + return _halloc((long)items, size); +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + _hfree(ptr); +} + +#endif /* MSC */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + if (opaque) items += size - size; /* make compiler happy */ + return (voidpf)calloc(items, size); +} + +void zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + free(ptr); + if (opaque) return; /* make compiler happy */ +} + +#endif /* MY_ZCALLOC */ diff --git a/src/utilfuns/zlib/zutil.h b/src/utilfuns/zlib/zutil.h new file mode 100644 index 0000000..53278ba --- /dev/null +++ b/src/utilfuns/zlib/zutil.h @@ -0,0 +1,220 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id: zutil.h,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#ifndef _Z_UTIL_H +#define _Z_UTIL_H + +#include "zlib.h" + +#ifdef STDC +# include <stddef.h> +# include <string.h> +# include <stdlib.h> +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include <errno.h> +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#ifdef MSDOS +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include <alloc.h> +# endif +# else /* MSC or DJGPP */ +# include <malloc.h> +# endif +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +#endif + +#ifdef WIN32 /* Window 95 & Windows NT */ +# define OS_CODE 0x0b +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include <unix.h> /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0F +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) +# define fdopen(fd,type) _fdopen(fd,type) +#endif + + + /* Common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#ifdef HAVE_STRERROR + extern char *strerror OF((int)); +# define zstrerror(errnum) strerror(errnum) +#else +# define zstrerror(errnum) "" +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + extern void zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include <stdio.h> + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf, + uInt len)); +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* _Z_UTIL_H */ |