#!/usr/bin/perl -w # # This tool is supposed to convert the ccat Parallel MT/LXX # to a valid OSIS file. # # @author Martin Gruner # @copyright GPL # use strict; binmode(STDOUT, ":utf8"); #see "man perluniintro" my $prefix = "./"; my $MorphologicalSegmentStart = "" ; my $MorphologicalSegmentEnd = "" ; my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegmentStart; my $hebrewLetters="A-Z\(\)\+\#\$\*\&/r"; #used in a character class of a regexp later my %hebrew2utf8 = ( ")" =>chr(0x05D0), #HEBREW LETTER ALEF "A" =>chr(0x05D0), #HEBREW LETTER ALEF # TODO: check, this is from an occurrence of ABRHM "B" =>chr(0x05D1), #HEBREW LETTER BET "G" =>chr(0x05D2), #HEBREW LETTER GIMEL "D" =>chr(0x05D3), #HEBREW LETTER DALET "H" =>chr(0x05D4), #HEBREW LETTER HE "W" =>chr(0x05D5), #HEBREW LETTER VAV "Z" =>chr(0x05D6), #HEBREW LETTER ZAYIN "z" =>chr(0x05D6), #HEBREW LETTER ZAYIN # TODO: this occured in a retranslation "X" =>chr(0x05D7), #HEBREW LETTER HET "+" =>chr(0x05D8), #HEBREW LETTER TET "Y" =>chr(0x05D9), #HEBREW LETTER YOD "I" =>chr(0x05D9), #HEBREW LETTER YOD # TODO: this occured in a retranslation (KI) "K" =>chr(0x05DB), #HEBREW LETTER KAF "L" =>chr(0x05DC), #HEBREW LETTER LAMED "M" =>chr(0x05DE), #HEBREW LETTER MEM "N" =>chr(0x05E0), #HEBREW LETTER NUN "S" =>chr(0x05E1), #HEBREW LETTER SAMEKH "(" =>chr(0x05E2), #HEBREW LETTER AYIN "P" =>chr(0x05E4), #HEBREW LETTER PE "C" =>chr(0x05E6), #HEBREW LETTER TSADI "Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF "R" =>chr(0x05E8), #HEBREW LETTER RESH "r" =>chr(0x05E8), #HEBREW LETTER RESH # TODO: this occured in a retranslation "#" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN == SIN/SHIN without dot "\$" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN + SHIN DOT == SHIN "&" =>chr(0x05E9).chr(0x05C2), #HEBREW LETTER SHIN + SIN DOT == SIN "T" =>chr(0x05EA), #HEBREW LETTER TAV "-" =>chr(0x05BE), #MAQQEF # Special stuff "*" =>"*Ketiv.", # TODO:FIX "**" =>"**Qere.", # "/" => $MorphologicalDivisionMarker, "," => ",", #separate words in colB "?" => "Uncertain." #HACK ); my %final_hebrew2utf8 = ( "K" =>chr(0x05DA), #HEBREW LETTER FINAL KAF # TODO: HANDLE FINAL LETTERS "M"=>chr(0x05DD), #HEBREW LETTER FINAL MEM "N" =>chr(0x05DF), #HEBREW LETTER FINAL NUN "P" =>chr(0x05E3), #HEBREW LETTER FINAL PE "C" =>chr(0x05E5), #HEBREW LETTER FINAL TSADI ); my $greekLetters = "A-Z#3\(\)+=|\\/"; my %greek2utf8 = ( "\\"=>chr(0x0300), #COMBINING GRAVE ACCENT "/" =>chr(0x0301), #COMBINING ACUTE ACCENT "+" =>chr(0x0308), #COMBINING DIAERESIS "=" =>chr(0x0342), #COMBINING GREEK PERISPOMENI / CIRCUMFLEX ")" =>chr(0x0313), #COMBINING COMMA ABOVE / SMOOTH BREATHING "(" =>chr(0x0314), #COMBINING REVERSED COMMA ABOVE / ROUGH BREATHING "|" =>chr(0x0345), #COMBINING GREEK YPOGEGRAMMENI / IOTA SUBSCRIPT "'" => "'", "?" => "Uncertain.", #HACK "^" => "^", "-" => "-", #occurs in the text ":" => ":", #occurs in the text!? "!" => "!", #occurs in the text!? "|" => "|", #occurs in the text!? "A" =>chr(0x03B1), #GREEK SMALL LETTER ALPHA "B" =>chr(0x03B2), #GREEK SM LETT BETA / SM LETTER BETA BEGINNING OF WORD "G" =>chr(0x03B3), #GREEK SMALL LETTER GAMMA "D" =>chr(0x03B4), #GREEK SMALL LETTER DELTA "E" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON "e" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON # occurs in Neh "V" =>chr(0x03DD), #GREEK SMALL LETTER DIGAMMA "Z" =>chr(0x03B6), #GREEK SMALL LETTER ZETA "H" =>chr(0x03B7), #GREEK SMALL LETTER ETA "Q" =>chr(0x03B8), #GREEK SMALL LETTER THETA "I" =>chr(0x03B9), #GREEK SMALL LETTER IOTA "K" =>chr(0x03BA), #GREEK SMALL LETTER KAPPA "L" =>chr(0x03BB), #GREEK SMALL LETTER LAMDA "M" =>chr(0x03BC), #GREEK SMALL LETTER MU "N" =>chr(0x03BD), #GREEK SMALL LETTER NU "n" =>chr(0x03BD), #GREEK SMALL LETTER NU # occurs in Neh "C" =>chr(0x03BE), #GREEK SMALL LETTER XI "O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON "P" =>chr(0x03C0), #GREEK SMALL LETTER PI "R" =>chr(0x03C1), #GREEK SMALL LETTER RHO "S" =>chr(0x03C3), #GREEK SMALL LETTER SIGMA "J" =>chr(0x03C2), #GREEK SM LETT FINAL SIGMA / SM LETT SIGMA END OF WORD "T" =>chr(0x03C4), #GREEK SMALL LETTER TAU "U" =>chr(0x03C5), #GREEK SMALL LETTER UPSILON "F" =>chr(0x03C6), #GREEK SMALL LETTER PHI "X" =>chr(0x03C7), #GREEK SMALL LETTER CHI "Y" =>chr(0x03C8), #GREEK SMALL LETTER PSI "W" =>chr(0x03C9), #GREEK SMALL LETTER OMEGA ); sub createNote(){ my $noteText = shift; return("$noteText "); } sub openNote(){ my $noteText = shift; return("$noteText "); } sub closeNote(){ my $noteText = shift; return("$noteText "); } my %notes = ( "{#}" => "Asterized passage (in Job).", "{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.", "..a" => "Word included in one of the Aramaic sections.", ",,a" => "Word included in one of the Aramaic sections.", #my addition, ok "*" => "Ketib.", "**" => "Qere.", "*z" => "Qere wela ketib, ketib wela qere.", #"[ ]" => "Reference of number of verse in LXX, different from MT.", # TODO: MAKE USE OF IT #"[[ ]]" => "Reference number of verse in MT, different from the LXX.", "{x}" => "UNKNOWN", # TODO: FIX "--- {x}" => "Apparent minus created by lack of equivalence between long stretches of text in the LXX and MT.", "--+ {x}" => "Apparent plus created by lack of equivalence between long stretches of text in the LXX and MT.", "{...}" => "Equivalent reflected elsewhere in the text, disregarded by indexing program.", "~" => "Difference in sequence between MT and LXX, denoted after the first Hebrew word and before the second one, as well as between two Greek words.", "~~~" => "Equivalent of the Hebrew or Greek word(s) occurring elsewhere in the verse or context (transposition).", "{..~}" => "Stylistic or grammatical transposition.", "{..}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning "{...}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning "{....}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning "---" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", "---?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", "--" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check, probably wrong "{---%}" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check?? "--?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", # TODO: my addition, check, probably wrong "--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", "---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition "-.-" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition, check? "-+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition "'" => "Long minus or plus (at least four lines).", # TODO: my addition, check "''" => "Long minus or plus (at least four lines).", "{d}" => "Reference to doublet (occurring between the two elements of the doublet).", "={d};" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: occurs, check? "{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", "{d}?" => "Reference to doublet (occurring between the two elements of the doublet)?", "{?d}" => "Reference to doublet (occurring between the two elements of the doublet)?", "{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.", "{..r}" => "Notation in Hebrew column of elements repeated in the translation.", "?" => "Questionable notation, equivalent, etc.", "??" => "Questionable notation, equivalent, etc.", # TODO: my addition "{p}" => "Greek preverb representing Hebrew preposition.", "{p}+" => "Greek preverb representing Hebrew preposition.", # TODO: my addition ??? "{pm}" => "Greek preverb representing Hebrew preposition.", # TODO: my addition, check "{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", "{...p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", #my addition "{..^{p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", #my addition "{+}" => "Unknown.", # TODO: occurs in the text, unknown meaning "{+?}" => "Unknown?", # TODO: occurs in the text, unknown meaning "{!}" => "Infinitive absolute.", "{!}+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}n" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}na" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}na+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}ad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}aj" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}nad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}nd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}ndd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}nd+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}p" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}p+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}pc" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}pd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}-" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}v" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8 "{t}" => "Transliterated Hebrew word.", "{dt}" => "Transliterated Hebrew word.", # TODO: occurs. DOUBLET? "{t.}" => "Transliterated Hebrew word.", # TODO: unknown "" => "Transliterated Hebrew word.", # TODO: unknown "{t?}" => "Transliterated Hebrew word?", "{c}" => "Unknown.", # TODO: unknown "{c}?" => "Unknown.", # TODO: unknown "{z}" => "Unknown.", # TODO: unknown "" => "Unknown.", # TODO: unknown "#\"" => "Unknown.", # TODO: unknown ".vs" => "Unknown.", # TODO: unknown, can hardly be a change of cons. Ezek 40:49 "#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.", "{v}" => "The reading of the main text of the LXX seems to reflect a secondary text, while the \"original\" reading is reflected in a variant.", # Notes regarding ColB of the Hebrew #"=" => "Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).", "={d}" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: my addition, check "={d}\@" => "Reference to doublet (occurring between the two elements of the doublet). Etymological exegesis.", # TODO: my addition, check "={d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", # TODO: my addition, check "=\%" => "Introducing categories of translation technique recorded in col. b.", "=\%vap" => "Change from active to passive form in verbs.", "=\%vpa" => "Change from passive to active form in verbs.", "=\%vpa?" => "Change from passive to active form in verbs?", "\%vpa" => "Change from passive to active form in verbs.", "=vpa" => "Change from passive to active form in verbs.", # TODO: my addition, check "=\%p" => "Difference in preposition or particle.", "=p\%" => "Difference in preposition or particle.", "=\%p?" => "Difference in preposition or particle?", "=\%?p" => "Difference in preposition or particle?", "=p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? "={d}\%p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? DOUBLET? "=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check "=\%p=" => "Difference in preposition or particle.", # TODO: my addition, check "=\%p+" => "Addition of preposition or particle.", "\%p+" => "Addition of preposition or particle.", #my addition "=\%p+?" => "Addition of preposition or particle?", "=\%p-" => "Omission of preposition or particle.", "=\%p-?" => "Omission of preposition or particle?", "=\%?p-" => "Omission of preposition or particle?", "=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain? "=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain? "=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.", #"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.", "=+" => "Difference in numbers between MT and the LXX.", "=\@" => "Etymological exegesis.", "=?\@" => "Etymological exegesis?", #my addition "=\@?" => "Etymological exegesis?", #my addition "=\@...a" => "Etymological exegesis according to Aramaic.", "=\@a" => "Etymological exegesis according to Aramaic.", # TODO: my addition, check "=:" => "Introducing reconstructed proper noun.", "=:?" => "Introducing reconstructed proper noun?", ":" => "Introducing reconstructed proper noun.", #my addition "=v" => "Difference in vocalization (reading).", "={d}v" => "Difference in vocalization (reading).", # TODO: check, occurs in text, DOUBLET? "=v?" => "Difference in vocalization (reading)?", "=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text "=>" => "Difference in vocalization (reading).", # TODO: check, occurs in text "=r" => "Incomplete retroversion.", "=a" => "Aramaic?", # TODO: UNKNOWN "{*}" => "Agreement of LXX with ketib.", "{**}" => "Agreement of LXX with qere.", "{**" => "Agreement of LXX with qere.", #my addidion "{**?}" => "Agreement of LXX with qere?", #my addition "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", ".a" => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", # TODO: occurs, unknown ".m" => "Metathesis of consonants between MT and the presumed Hebrew parent text of the LXX.", ".z" => "Possible abbreviation.", ".s" => "One word of MT separated into two or more words in the parent text of the LXX.", ".j" => "Two words of MT joined into one word in the parent text of the LXX.", ".w" => "Different word-division reflected in the parent text of the LXX.", "(!)" => "(!)", #my addition " "" => "", #TODO: FIX, occurs in text "" => "", #TODO: FIX, occurs in text "^" => "^", #Notsure what these are "?^" => "?^", #Notsure what these are "^^^" => "^^^", "___" => "___", ); sub translateHebrewNote(){ my $origNote = shift; # print("TranslateHebrewNote $origNote\n"); (not $origNote) and die("Hebrew note empty."); ($origNote eq "=") and return; #= only marks colB, no real note ($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) ); ($origNote =~ m/^=?\.([a-z()&\$+-])([a-z()&\$+-]+)$/) and return( &createNote("Interchange of consonants (" . &translateHebrewLetters( uc( $1 ) ) . "/" . &translateHebrewLetters( uc( $2 ) ) . ") between MT and the presumed Hebrew parent text of the LXX.") ); ($origNote =~ m/^\[([^\]]+)\]/) and return( &createNote( "Number of verse in LXX ($1) is different from MT." ) ); #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above ($origNote =~ m/^=?\[\[|\]\]$/) and return $origNote; ($origNote =~ m/^=?{\.\.d(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) ); ($origNote =~ m/^=?{\.\.\.r(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) ); ($origNote =~ m/^=?{\.\.r(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) ); ($origNote =~ m/^=?{\.\.\.(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) ); ($origNote =~ m/^=?{\.\.(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..}" }) ); #Special cases: the note includes more than one word, "cat" the results together ($origNote =~ m/^=?{\.\.\.([^}]+)$/) and return( &openNote( $notes{"{...}"} . "(".&translateHebrewWordorNote($1) ." " ) ); ($origNote =~ m/^=?{\.\.r([^}]+)$/) and return( &openNote( $notes{"{..r}"} . "(".&translateHebrewWordorNote($1) ." " ) ); ($origNote =~ m/^=?{\.\.([^}]+)$/) and return( &openNote( $notes{"{..}"} . "(".&translateHebrewWordorNote($1) ." " ) ); ($origNote =~ m/^([^{]+)}$/) and return( &closeNote(&translateHebrewWordorNote( $1 ).")" ) ); ($origNote =~ m/^=?@([?$hebrewLetters]+)/) and return( &createNote( $notes{ "=\@" } ) . &translateHebrewWordorNote( $1 ) ); ($origNote =~ m/^=?\^([?$hebrewLetters]+)/) and return( "^" . &translateHebrewWordorNote( $1 ) ); # TODO: check, what is ^? # # Special handling for the = colB Notes # if (($origNote =~ m/^=/) and (not $notes{ $origNote } )) { #only split if the note does not exist, to avoid parsing problems # print("note reads $origNote\n"); if ($origNote =~ m/^=(<[0-9.a-z]+>)$/){ return( &translateHebrewWordorNote( $1 ) ); } elsif ($origNote =~m/^=(.+)$/ and $notes{ $1 }){ return( &translateHebrewWordorNote( $1 ) ); } elsif ($origNote =~ m/^=([?$hebrewLetters]+)/){ return( &translateHebrewWordorNote( $1 ) ); } } #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed ($origNote =~ m/^<|>$/) and # return(""); return $origNote; ($origNote =~ m/^.+[.].+$/) and #Occurs e.g.: "?$/) and return $origNote; #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed ($origNote =~ m/^{=\d+}$/) and return $origNote; ($origNote =~ m/^([^{]+)}$/) and return( &closeNote(&translateGreekWordorNote( $1 ).")" ) ); ($origNote =~ m/^([^{]+)}\?$/) and return( &closeNote(&translateGreekWordorNote( $1 )."?)" ) ); ($origNote =~ m/^(.+),(.+)$/) and # 2 Notes / Words, split up, but only at the end return( &translateGreekWordorNote( $1 ). "," .&translateGreekWordorNote( $2 ) ); ($origNote eq "}") and return $origNote; #special case in EZEK 47:20, superfluous bracket ($origNote =~ m/^{([^}]+}?)$/) and # TODO: occurs, seems unreasonable return( "(" .&translateGreekWordorNote($1) ." " ); HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!} if ( $notes{ substr($origNote,0,$currentLength) } ){ return( &translateGreekNote(substr($origNote,0,$currentLength)) . &translateGreekWordorNote(substr($origNote,$currentLength, length($origNote) - $currentLength) ) ); } } die("Note $origNote not found.\n"); } sub translateGreekLetters(){ my $greek = shift; my $lastIndex = length( $greek )-1; my $result; (not $greek) and die "Greek string empty...\n"; foreach my $i (0..$lastIndex ){ $result .= $greek2utf8{ substr($greek,$i,1) } || die("Could not find Greek letters in $greek\n"); } return $result; } sub translateGreekWordorNote(){ my $greek = shift; if (not $greek) { die("Greek string empty.") }; # printf("TranslateGreek of $greek\n"); ( $notes {$greek} ) and return &translateGreekNote( $greek ); # exact match first ( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek ); ( $greek =~ m/[}]\??$/ ) and return &translateGreekNote( $greek ); ( $greek eq "#" ) and return &translateGreekNote( "#" ); ( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 ); ( $greek =~ m/^(.+)(\[\d+\])$/ ) and return ( &translateGreekWordorNote( $1 ) . &translateGreekNote ( $2 ) ); ( $greek =~ m/^([^.]+)\.\.\.([^.]+)$/) and # TODO: occurs e.g. {..bla1 bla2...bla3 #TODO: format output return( &translateGreekWordorNote($1) . "..." . &translateGreekWordorNote($2) ); return &translateGreekLetters( $greek ); } my $colBContent; sub parseLine(){ my $origLine = shift; my $result; # printf("parsing %s\n", $origLine); $origLine =~ s/ --=/ --+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable $origLine =~ s/ -\%vap/ =\%vap/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable $origLine =~ s/ ;=/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable $origLine =~ s/ \+;/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable $origLine =~ s/[\ ]{10,}/\t/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right $origLine =~ s/\.h-/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces $origLine =~ s/}{/} {/; # TODO: UGLY HACK, notes hung together $origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong $origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note $origLine =~ s/E\t\)KPE\/SH\|/\tE)KPE\/SH|/; #occurs, tab misplaced $origLine =~ s/^\(..r\(L\/YK}/{..r(L\/YK}/; # in EZEK $origLine =~ s/^DANW {t}$/DANW\t{t}/; # in DAN $origLine =~ s/AI\)W=NOS\[110\.10/AI)W=NOS [110.10/; # in PS $origLine =~ s/W\/YD\(Y{\*\*}/W\/YD(Y {**}/; # in PS $origLine =~ s/{\.1\.dU\(PE\\R}/{..dU(PE\\R}/; # in PS $origLine =~ m/^W\(\/SPER/ and return; #ignore, probably an error ($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and $origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing ($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and # in JoshB: Tab misplaced $origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\ OI( LI/QOI"; # TODO: hack, Tab missing ($origLine eq "{...?AU)TOU=} MDBR =v\tLALOU=NTOS") and $origLine = "MDBR =v\tLALOU=NTOS"; # In EZEK: TODO: error, greek in first col ($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and $origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing ($origLine eq "W/YBW) {...EI)S}\tKAI\\ EI)SH=LQEN") and $origLine = "W/YBW)\t{...EI)S} KAI\\ EI)SH=LQEN"; # TODO: hack, TAB misplaced ($origLine eq "W/L) {..^OU)}\tDE\\") and $origLine = "W/L)\t{..^OU)} DE\\"; # TODO: hack, TAB misplaced # print("parsing %s\n", $origLine); ($origLine =~ m/^([^=\t]+)?([=][^\t]*)?\t(.+)$/) or die("No match in parseLine().\n"); ($1 or $2) or die("Hebrew not found.\n"); $3 or die("Greek not found.\n"); $1 and my @hebrewWordsColA = split(/\s+/, $1); $2 and my @hebrewWordsColB = split(/\s+/, $2); my @greekWords = split(/\s+/, $3); # printf("1: $1 2: $2 3: $3\n"); # 3 columns= Hebrew ColA, Hebrew ColB, Greek $result .= "\n "; foreach my $wordA (@hebrewWordsColA){ ($wordA) and $result .= &translateHebrewWordorNote( $wordA ) . " "; } $result .= "\n "; foreach my $wordB (@hebrewWordsColB){ if ($wordB) { $result .= &translateHebrewWordorNote( $wordB ) . " "; $colBContent = 1; } } $result .= "\n "; foreach my $wordG (@greekWords){ ($wordG) and $result .= &translateGreekWordorNote( $wordG ). " "; } $result .= "\n"; # printf("Result: %s\n", $result); return $result; } my $headerWithColB = "\n MT(LXX)LXX\n"; my $headerWithoutColB = "\n MTLXX\n"; # # grabVerseContent - if the Verse can be found, returns its Content, otherwise nothing # sub grabVerseContent(){ #Bookname, chapter, verse, @list my @result; my $bookname = shift; my $chapter = shift; my $verse = shift; my @buffer = @_; my $index=0; # printf("Parsing $bookname $chapter:$verse...\n"); $colBContent = undef; my @tmp; if ($bookname eq "Obad"){ #special handling, no chapter:verse structure LOOP: foreach my $current_item (@buffer){ if ($chapter == 1 and $current_item =~ m/^$bookname $verse/){ #only for the first chapter while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){ push(@tmp, &parseLine( $buffer[$index] ) ); } if ($colBContent){ push(@result, $headerWithColB, @tmp) } else{ push(@result, $headerWithoutColB, @tmp) } return @result; } $index++; } } else{ LOOP: foreach my $current_item (@buffer){ if ($current_item =~ m/^$bookname $chapter:$verse/){ while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){ push(@tmp, &parseLine( $buffer[$index] ) ); } if ($colBContent){ push(@result, $headerWithColB, @tmp) } else{ push(@result, $headerWithoutColB, @tmp) } return @result; } $index++; } } return; } #Nothing found, don't return a value. sub processBook(){ # File File id ThML id OSIS id Short Book Title my $filename = shift; my $bookname_infile = shift; my $thml_id = shift; my $osis_id = shift; my $short_book_title = shift; my @BUF = &loadFile( $filename ); my @result; push(@result, "
"); CHAPTER: foreach my $chapter(1..1000){ my $chapter_header_written; print("Processing $bookname_infile chapter $chapter.\n"); my $verse_found; VERSE: foreach my $verse(1..1000){ my @verseContent = &grabVerseContent($bookname_infile, $chapter, $verse, @BUF); if (@verseContent) { if ($bookname_infile eq "Obad"){ if (not $chapter_header_written) { $chapter_header_written = 1; #no chapters in Obadiah } push(@result, "\n"); #chapter will be ignored for >1 by grabVerseContent } else{ if (not $chapter_header_written) { push(@result, ""); $chapter_header_written = 1; } push(@result, "\n
"); } push(@result, @verseContent); push(@result, "
\n
"); $verse_found = 1; } else{ #verse nonexistent, goto next chapter if ($chapter_header_written and (not $bookname_infile eq "Obad") ) { push(@result, ""); } last VERSE; } } if (not $verse_found){ #chapter empty, stop here if ($chapter == 1) { die("Error: no content in $bookname_infile"); } last CHAPTER; } } push(@result, "
"); #book print("done.\n"); return(@result); } sub processBookVariant(){ # FileA File_id_A VariantNameA FileB File_id_B VariantNameB ThML id OSIS id Short Book Title my $filenameA = shift; my $bookname_infile_A = shift; my $variantNameA = shift; my $filenameB = shift; my $bookname_infile_B = shift; my $variantNameB = shift; my $thml_id = shift; my $osis_id = shift; my $short_book_title = shift; my @BUFA = loadFile( "$filenameA" ); my @BUFB = loadFile( "$filenameB" ); my @result; push(@result, "
"); CHAPTER: foreach my $chapter(1..1000){ print("Processing $bookname_infile_A and $bookname_infile_B chapter $chapter.\n"); my $chapter_header_written; my $verse_found; VERSE: foreach my $verse(1..1000){ my $colBcontentTMP; my @verseContentA = &grabVerseContent($bookname_infile_A, $chapter, $verse, @BUFA); ($colBContent) and $colBcontentTMP = 1; my @verseContentB = &grabVerseContent($bookname_infile_B, $chapter, $verse, @BUFB); ($colBContent) and $colBcontentTMP = 1; if (@verseContentA or @verseContentB) { if (not $chapter_header_written) { push(@result, ""); $chapter_header_written = 1; } push(@result, ""); $verse_found = 1; } else{ #verse nonexistent, goto next chapter if ($chapter_header_written) { push(@result, ""); } last VERSE; } if (@verseContentA){ if (@verseContentB){ push(@result, "" . $variantNameA ."\n") }; push(@result, "
", @verseContentA, "
"); if (@verseContentB){ push(@result, "") }; } if (@verseContentB){ if (@verseContentA){ push(@result, $variantNameB . "\n") }; push(@result, "", @verseContentB, "
"); if (@verseContentA){ push(@result, "
") }; } if ($verse_found){ push(@result, "") } } if (not $verse_found){ #chapter empty, stop here if ($chapter == 1) { die("Error: no content in $bookname_infile_A and $bookname_infile_B"); } last CHAPTER; } } push(@result, "
"); #book print("done.\n"); return(@result); } sub loadFile(){ #$fileName loads the file into the buffer and makes small corrections my $filename = shift; open( FILE, "$prefix/$filename") or die("Could not open file $prefix/$filename"); my @buffer = ; chomp(@buffer); close( FILE ); my @result; my $index = 0; LOOP: foreach my $currentItem (@buffer){ if ($currentItem =~ m/^DANIHL/){ $result[$#result] .= " " .$buffer[$index]; } elsif ($currentItem =~ m/^NUMA/){ $result[$#result] .= $buffer[$index]; } elsif ($currentItem =~ m/^DEUTERONO\/MION/){ $result[$#result] .= " ".$buffer[$index]; } elsif ($currentItem =~ m/^AU\)TOU=/){ $result[$#result] .= " ".$buffer[$index]; } elsif ($currentItem =~ m/^E\(\/C/){ $result[$#result] .= " ". $buffer[$index]; } elsif ($currentItem =~ m/^MOU/){ $result[$#result] .= " " . $buffer[$index]; } elsif ($currentItem =~ m/^NEHL$/){ $result[$#result] .= $buffer[$index]; # no space, ANANEL } elsif ($currentItem =~ m/^ESTHKE\/NAI$/){ $result[$#result] .= $buffer[$index]; # no space } elsif ($currentItem =~ m/^ESTHKW\\S$/){ $result[$#result] .= $buffer[$index]; # no space } elsif ($currentItem =~ m/^ISA/){ # a few lines in ISAIAH have this in different styles $result[$#result] .= $buffer[$index]; # no space } elsif ($currentItem =~ m/^LAMYAN/){ # in LAM $result[$#result] .= $buffer[$index]; # no space } elsif ($currentItem =~ m/^EZEKIHL/){ # in LAM $result[$#result] .= $buffer[$index]; # no space } elsif ($currentItem =~ m/^\)$/){ # in PS $result[$#result] .= $buffer[$index]; # no space } elsif ($currentItem =~ m/^PS[Y\s]/){ # in PS; breaks at PS or PSY $result[$#result] .= $buffer[$index]; # no space } elsif (($buffer[$index+1] =~ m/^#/) && ($buffer[$index] =~ m/^(.*)#$/)){ # in Daniel, # is used as a "continue line on next line" marker push(@result, $1); } elsif (($buffer[$index-1] =~ m/^(.*)#$/) && $buffer[$index] =~ m/^#\t(.*)/){ # in Daniel, # is used as a "continue line on next line" marker $result[$#result] .= $1; } else{ push(@result, $buffer[$index] ); } $index++; } return @result; } my @result; push(@result,"\n"); push(@result,"\n"); push(@result,"\n"); push(@result,"
\ \ The Parallel Aligned Hebrew-Aramaic and Greek texts of Jewish Scripture\ MT-LXX-Parallel\ Bible.Tanach\ \
\n"); # File File id ThML id OSIS id Short Book Title #push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") ); #push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") ); #push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") ); #push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") ); #push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") ); #push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") ); # push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") ); # push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") ); # push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") ); # push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") ); # push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") ); # push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") ); # push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") ); # push(@result, &processBook("16.2Chron.par", "2Chr", "iiChr", "2Chr", "2 Chronicles") ); # push(@result, &processBook("18.Ezra.par", "Ezr", "Ezra", "Ezra", "Ezra") ); # push(@result, &processBook("19.Neh.par", "Neh", "Neh", "Neh", "Nehemiah") ); # push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") ); # push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") ); # #push(@result, &processBook("20.Psalms.par", "Ps", "Ps", "Ps", "Psalms")); # # push(@result, &processBook("23.Prov.par", "Prov", "Prov", "Prov", "Proverbs") ); # push(@result, &processBook("24.Qoh.par", "Qoh", "Eccl", "Eccl", "Ecclesiastes") ); # push(@result, &processBook("25.Cant.par", "Song", "Song", "Song", "Song of Solomon") ); # push(@result, &processBook("40.Isaiah.par", "Isa", "Isa", "Isa", "Isaiah") ); # push(@result, &processBook("41.Jer.par", "Jer", "Jer", "Jer", "Jeremiah") ); # push(@result, &processBook("43.Lam.par", "Lam", "Lam", "Lam", "Lamentations") ); # push(@result, &processBook("44.Ezekiel.par", "Ezek", "Ezek", "Ezek", "Ezekiel") ); # push(@result, &processBookVariant("45.DanielOG.par", "Dan", "Old Greek:", "46.DanielTh.par", "DanTh", "Theodotion:", "Dan", "Dan", "Daniel")); # push(@result, &processBook("28.Hosea.par", "Hos", "Hos", "Hos", "Hosea") ); # push(@result, &processBook("31.Joel.par", "Joel", "Joel", "Joel", "Joel") ); # push(@result, &processBook("30.Amos.par", "Amos", "Amos", "Amos", "Amos") ); # push(@result, &processBook("33.Obadiah.par", "Obad", "Obad", "Obad", "Obadiah") ); push(@result, &processBook("32.Jonah.par", "Jonah", "Jonah", "Jonah", "Jonah") ); # push(@result, &processBook("29.Micah.par", "Mic", "Mic", "Mic", "Micah") ); # push(@result, &processBook("34.Nahum.par", "Nah", "Nah", "Nah", "Nahum") ); # push(@result, &processBook("35.Hab.par", "Hab", "Hab", "Hab", "Habakkuk") ); # push(@result, &processBook("36.Zeph.par", "Zeph", "Zeph", "Zeph", "Zephaniah") ); # push(@result, &processBook("37.Haggai.par", "Hag", "Hag", "Hag", "Haggai") ); # push(@result, &processBook("38.Zech.par", "Zech", "Zech", "Zech", "Zechariah") ); # push(@result, &processBook("39.Malachi.par", "Mal", "Mal", "Mal", "Malachi") ); push(@result, "
\n
"); open( OUTPUT, ">mt-lxx-par.osis.xml" ); print( OUTPUT join("\n", @result) );