summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorMartin Gruner <mg.pub@gmx.net>2005-11-02 21:21:15 +0000
committerMartin Gruner <mg.pub@gmx.net>2005-11-02 21:21:15 +0000
commitc033620be0616c507eda2be35ec6c6d2dfcb633b (patch)
tree6ae49c98e71dc09e882b30b4c1f27e743c84b430 /modules
parent88d0e07a30f5455d9d0cc3e3922772c772ea0a50 (diff)
downloadsword-tools-c033620be0616c507eda2be35ec6c6d2dfcb633b.tar.gz
I'm getting tired of it. Parses Gen-2Sam now.
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@49 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rw-r--r--modules/mt-lxx-parallel/convert.pl103
1 files changed, 71 insertions, 32 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index e2302ad..007b828 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -18,7 +18,7 @@ my $MorphologicalSegmentStart = "<seg type=\"morph\">" ;
my $MorphologicalSegmentEnd = "</seg>" ;
my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegmentStart;
-my $hebrewLetters="A-Z\(\)\+\#\$\*\&/"; #used in a character class of a regexp later
+my $hebrewLetters="A-Z\(\)\+\#\$\*\&/r"; #used in a character class of a regexp later
my %hebrew2utf8 = (
")" =>chr(0x05D0), #HEBREW LETTER ALEF
@@ -30,6 +30,7 @@ my %hebrew2utf8 = (
"H" =>chr(0x05D4), #HEBREW LETTER HE
"W" =>chr(0x05D5), #HEBREW LETTER VAV
"Z" =>chr(0x05D6), #HEBREW LETTER ZAYIN
+"z" =>chr(0x05D6), #HEBREW LETTER ZAYIN # TODO: this occured in a retranslation
"X" =>chr(0x05D7), #HEBREW LETTER HET
"+" =>chr(0x05D8), #HEBREW LETTER TET
"Y" =>chr(0x05D9), #HEBREW LETTER YOD
@@ -49,6 +50,7 @@ my %hebrew2utf8 = (
"C" =>chr(0x05E6), #HEBREW LETTER TSADI
"Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF
"R" =>chr(0x05E8), #HEBREW LETTER RESH
+"r" =>chr(0x05E8), #HEBREW LETTER RESH # TODO: this occured in a retranslation
"#" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN == SIN/SHIN without dot
"\$" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN + SHIN DOT == SHIN
"&" =>chr(0x05E9).chr(0x05C2), #HEBREW LETTER SHIN + SIN DOT == SIN
@@ -167,6 +169,7 @@ my %notes = (
"--?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", # TODO: my addition, check, probably wrong
"--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).",
"---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
+"-+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
"'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
"''" => "Long minus or plus (at least four lines).",
"{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
@@ -176,6 +179,7 @@ my %notes = (
"?" => "Questionable notation, equivalent, etc.",
"??" => "Questionable notation, equivalent, etc.", # TODO: my addition
"{p}" => "Greek preverb representing Hebrew preposition.",
+"{p}+" => "Greek preverb representing Hebrew preposition.", # TODO: my addition ???
"{pm}" => "Greek preverb representing Hebrew preposition.", # TODO: my addition, check
"{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.",
@@ -219,14 +223,19 @@ my %notes = (
"=\%" => "Introducing categories of translation technique recorded in col. b.",
"=\%vap" => "Change from active to passive form in verbs.",
"=\%vpa" => "Change from passive to active form in verbs.",
+"\%vpa" => "Change from passive to active form in verbs.", #my addition
+"=vpa" => "Change from passive to active form in verbs.", # TODO: my addition, check
"=\%p" => "Difference in preposition or particle.",
"=\%p?" => "Difference in preposition or particle?",
"=p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain?
+"={d}\%p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? DOUBLET?
"=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check
"=\%p=" => "Difference in preposition or particle.", # TODO: my addition, check
"=\%p+" => "Addition of preposition or particle.",
+"\%p+" => "Addition of preposition or particle.", #my addition
"=\%p+?" => "Addition of preposition or particle?",
"=\%p-" => "Omission of preposition or particle.",
+"=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
"=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
"=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
#"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
@@ -235,16 +244,20 @@ my %notes = (
"=?\@" => "Etymological exegesis?", #my addition
"=\@?" => "Etymological exegesis?", #my addition
"=\@...a" => "Etymological exegesis according to Aramaic.",
+"=\@a" => "Etymological exegesis according to Aramaic.", # TODO: my addition, check
"=:" => "Introducing reconstructed proper noun.",
+"=:?" => "Introducing reconstructed proper noun?",
":" => "Introducing reconstructed proper noun.", #my addition
"=v" => "Difference in vocalization (reading).",
-"=v?" => "Difference in vocalization (reading)?",
+"={d}v" => "Difference in vocalization (reading).", # TODO: check, occurs in text, DOUBLET?
+"=v?" => "Difference in vocalization (reading)?",
"=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text
"=>" => "Difference in vocalization (reading).", # TODO: check, occurs in text
"=r" => "Incomplete retroversion.",
"{*}" => "Agreement of LXX with ketib.",
"{**}" => "Agreement of LXX with qere.",
+"{**?}" => "Agreement of LXX with qere?", #my addition
"." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",
@@ -255,6 +268,9 @@ my %notes = (
".j" => "Two words of MT joined into one word in the parent text of the LXX.",
".w" => "Different word-division reflected in the parent text of the LXX.",
+"(!)" => "(!)", #my addition
+
+
"<sp" => "<sp", #TODO: FIX, occurs in text
"<sp>" => "<sp>", #TODO: FIX, occurs in text
"<sp^>" => "<sp^>", #TODO: FIX, occurs in text
@@ -296,7 +312,6 @@ sub translateHebrewNote(){
($origNote =~ m/^\[(.+)\]/) and
return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );
-
($origNote =~ m/^=?{\.\.d(.+)}/) and
return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );
@@ -317,6 +332,8 @@ sub translateHebrewNote(){
return( &openNote( $notes{"{...}"} . "(".&translateHebrewWordorNote($1) ." " ) );
($origNote =~ m/^=?{\.\.r([^}]+)$/) and
return( &openNote( $notes{"{..r}"} . "(".&translateHebrewWordorNote($1) ." " ) );
+ ($origNote =~ m/^=?{\.\.([^}]+)$/) and
+ return( &openNote( $notes{"{..}"} . "(".&translateHebrewWordorNote($1) ." " ) );
($origNote =~ m/^([^{]+)}$/) and
return( &closeNote(&translateHebrewWordorNote( $1 ).")" ) );
@@ -352,23 +369,12 @@ sub translateHebrewNote(){
# return("<reference osisRef=\"$1.$2\"/>");
return $origNote;
-# #Special cases: osisREf with bookID, split because of space char, so put them together again
-# ($origNote =~ m/^<\^?(\w+)$/) and
-# # return( "<reference osisRef=\"$1." ); # TODO: check if <reference/> exists
-# return $origNote;
-# ($origNote =~ m/^(\d+)[.:](\d+)>?/) and
-# # return( "$1.$2\"/> " );
-# return $origNote;
-
-
-
- #special case: no note, but a crossref (with book ID)
-# ($origNote =~ m/^<\^?(\w+)\s?(\d+)[.:](\d+)>?/) and
-# return("<reference osisRef=\"$1.$2.$3\">&lt;$1&gt;</reference> ");
-
($origNote =~ m/^[?].*/) and
return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
+ ($origNote =~ m/^(.+),(.+)$/) and # 2 Notes / Words, split up, but only at the end
+ return( &translateHebrewWordorNote( $1 ). "," .&translateHebrewWordorNote( $2 ) );
+
HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
if ( $notes{ substr($origNote,0,$currentLength) } ){
@@ -397,19 +403,31 @@ sub translateGreekNote(){
($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
- ($origNote =~ m/^\[(.+)\]/) and
+ ($origNote =~ m/^\[(.+)\]?/) and
return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );
($origNote =~ m/^\[\[(.+)\]\]/) and
return( &createNote( "Number of verse in MT ($1) is different from the LXX." ) );
+ #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above
+ ($origNote =~ m/^\[\[|\]\]$/) and
+ return $origNote;
+
+ #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above
+ ($origNote =~ m/^\[.+$/) and
+ return $origNote;
+
+ ($origNote =~ m/^=?{\.\.\.(.+)\.\.\.(.+)}/) and # special case: {...word1...word2}
+ return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{...}" })
+ . &createNote("(".&translateGreekWordorNote( $2 ).") ". $notes{ "{...}" }));
+
($origNote =~ m/^=?{\.\.p(.+)}/) and
return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..p}" }) );
($origNote =~ m/^=?{\.\.d(.+)}/) and
return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );
- ($origNote =~ m/^=?{\.\.\.d(.+)}/) and # TODO: my addition, check
+ ($origNote =~ m/^=?{\.\.\.d(.+)}/) and # TODO: my addition, check
return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );
($origNote =~ m/^=?{\.\.(.+)}/) and
@@ -431,10 +449,18 @@ sub translateGreekNote(){
return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
($origNote =~ m/^=?{c([^}]+)$/) and
return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) ); # TODO: occurs {cXXX}, not documented
+ ($origNote =~ m/^{([^}]+)$/) and # TODO: occurs, seems unreasonable
+ return( "(" .&translateGreekWordorNote($1) ." " );
($origNote =~ m/^([^{]+)}$/) and
return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
+ #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed
+ ($origNote =~ m/^<.+>?$/) and
+ return $origNote;
+ #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed
+ ($origNote =~ m/^{=\d+}$/) and
+ return $origNote;
HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
@@ -454,6 +480,7 @@ sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separati
( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
+ ( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew );
my $length = length($hebrew);
my $index = 0;
@@ -474,7 +501,8 @@ sub translateGreekWordorNote(){
( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek );
( $greek =~ m/[}]$/ ) and return &translateGreekNote( $greek );
- ( $greek eq "#" ) and return &translateGreekNote( $greek );
+ ( $greek eq "#" ) and return &translateGreekNote( "#" );
+ ( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 );
( $greek =~ m/^(.+)(\[\d+\])$/ ) and return ( &translateGreekWordorNote( $1 ) . &translateGreekNote ( $2 ) );
@@ -504,10 +532,21 @@ sub parseLine(){
printf("parsing %s\n", $origLine);
- $origLine =~ s/--=/--+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
- $origLine =~ s/-\%vap/=\%vap/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
-
- ($origLine =~ m/^([^=]+)?([=].+)?\t(.+)$/) or die("No match in parseLine().\n");
+ $origLine =~ s/ --=/ --+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+ $origLine =~ s/ -\%vap/ =\%vap/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+ $origLine =~ s/ ;=/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+ $origLine =~ s/ \+;/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+ $origLine =~ s/[\ ]{10,}/\t/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right
+ ($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and
+ $origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing
+ ($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and
+ $origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing
+ ($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and
+ $origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing
+
+# printf("parsing %s\n", $origLine);
+
+ ($origLine =~ m/^([^=\t]+)?([=][^\t]*)?\t(.+)$/) or die("No match in parseLine().\n");
($1 or $2) or die("Hebrew not found.\n");
$3 or die("Greek not found.\n");
$1 and my @hebrewWordsColA = split(/\s+/, $1);
@@ -729,17 +768,17 @@ my @result;
# File File id ThML id OSIS id Short Book Title
-#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
-#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
-#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
-#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
-#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
+push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
+push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
+push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
+push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
#
-# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
-# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
-# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
+ push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
+ push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
+ push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
# push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") );
# push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") );
# push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") );