summaryrefslogtreecommitdiffstats
path: root/modules/mt-lxx-parallel/convert.pl
diff options
context:
space:
mode:
authorMartin Gruner <mg.pub@gmx.net>2005-11-09 21:18:35 +0000
committerMartin Gruner <mg.pub@gmx.net>2005-11-09 21:18:35 +0000
commit0c303ed20fe6ffce49fb0de42522bdfe4c2b5528 (patch)
tree4b1a21cd58a59d4c75545b40c90d44aab15f9c6a /modules/mt-lxx-parallel/convert.pl
parentc033620be0616c507eda2be35ec6c6d2dfcb633b (diff)
downloadsword-tools-0c303ed20fe6ffce49fb0de42522bdfe4c2b5528.tar.gz
Gen-Job parses.
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@50 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/mt-lxx-parallel/convert.pl')
-rw-r--r--modules/mt-lxx-parallel/convert.pl117
1 files changed, 97 insertions, 20 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index 007b828..4187c4d 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -88,6 +88,11 @@ my %greek2utf8 = (
"^" => "^",
+"-" => "-", #occurs in the text
+":" => ":", #occurs in the text!?
+"!" => "!", #occurs in the text!?
+"|" => "|", #occurs in the text!?
+
# "*A" =>chr(0x0391), #GREEK CAPITAL LETTER ALPHA
# "*B" =>chr(0x0392), #GREEK CAPITAL LETTER BETA
# "*G" =>chr(0x0393), #GREEK CAPITAL LETTER GAMMA
@@ -120,6 +125,7 @@ my %greek2utf8 = (
"G" =>chr(0x03B3), #GREEK SMALL LETTER GAMMA
"D" =>chr(0x03B4), #GREEK SMALL LETTER DELTA
"E" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON
+"e" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON # occurs in Neh
"V" =>chr(0x03DD), #GREEK SMALL LETTER DIGAMMA
"Z" =>chr(0x03B6), #GREEK SMALL LETTER ZETA
"H" =>chr(0x03B7), #GREEK SMALL LETTER ETA
@@ -129,6 +135,7 @@ my %greek2utf8 = (
"L" =>chr(0x03BB), #GREEK SMALL LETTER LAMDA
"M" =>chr(0x03BC), #GREEK SMALL LETTER MU
"N" =>chr(0x03BD), #GREEK SMALL LETTER NU
+"n" =>chr(0x03BD), #GREEK SMALL LETTER NU # occurs in Neh
"C" =>chr(0x03BE), #GREEK SMALL LETTER XI
"O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON
"P" =>chr(0x03C0), #GREEK SMALL LETTER PI
@@ -148,6 +155,7 @@ my %notes = (
"{#}" => "Asterized passage (in Job).",
"{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.",
"..a" => "Word included in one of the Aramaic sections.",
+",,a" => "Word included in one of the Aramaic sections.", #my addition, ok
"*" => "Ketib.",
"**" => "Qere.",
"*z" => "Qere wela ketib, ketib wela qere.",
@@ -166,14 +174,18 @@ my %notes = (
"---" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).",
"---?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?",
"--" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check, probably wrong
+"{---%}" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check??
"--?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", # TODO: my addition, check, probably wrong
"--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).",
"---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
+"-.-" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition, check?
"-+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
"'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
"''" => "Long minus or plus (at least four lines).",
"{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
"{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?",
+"{d}?" => "Reference to doublet (occurring between the two elements of the doublet)?",
+"{?d}" => "Reference to doublet (occurring between the two elements of the doublet)?",
"{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.",
"{..r}" => "Notation in Hebrew column of elements repeated in the translation.",
"?" => "Questionable notation, equivalent, etc.",
@@ -206,10 +218,12 @@ my %notes = (
"{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8
"{t}" => "Transliterated Hebrew word.",
+"{dt}" => "Transliterated Hebrew word.", # TODO: occurs. DOUBLET?
"{t.}" => "Transliterated Hebrew word.", # TODO: unknown
"<t?>" => "Transliterated Hebrew word.", # TODO: unknown
"{t?}" => "Transliterated Hebrew word?",
"{c}" => "Unknown.", # TODO: unknown
+"{c}?" => "Unknown.", # TODO: unknown
"#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.",
@@ -257,6 +271,7 @@ my %notes = (
"{*}" => "Agreement of LXX with ketib.",
"{**}" => "Agreement of LXX with qere.",
+"{**" => "Agreement of LXX with qere.", #my addidion
"{**?}" => "Agreement of LXX with qere?", #my addition
"." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",
@@ -298,7 +313,7 @@ sub closeNote(){
sub translateHebrewNote(){
my $origNote = shift;
- print("TranslateHebrewNote $origNote\n");
+# print("TranslateHebrewNote $origNote\n");
($origNote eq "=") and return; #= only marks colB, no real note
@@ -312,6 +327,10 @@ sub translateHebrewNote(){
($origNote =~ m/^\[(.+)\]/) and
return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );
+ #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above
+ ($origNote =~ m/^=?\[\[|\]\]$/) and
+ return $origNote;
+
($origNote =~ m/^=?{\.\.d(.+)}/) and
return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );
@@ -399,7 +418,7 @@ sub translateHebrewLetters(){ #will return unicode hebrew without morph separati
sub translateGreekNote(){
my $origNote = shift;
- print("TranslateGreekNote $origNote\n");
+# print("TranslateGreekNote $origNote\n");
($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
@@ -447,13 +466,10 @@ sub translateGreekNote(){
return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
($origNote =~ m/^=?{\.\.([^}]+)$/) and
return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
+ ($origNote =~ m/^=?{\.\.(\^[^}]+)$/) and
+ return( &openNote($notes{"{..}"} . "(" .&translateGreekWordorNote($1) ." " ) );
($origNote =~ m/^=?{c([^}]+)$/) and
return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) ); # TODO: occurs {cXXX}, not documented
- ($origNote =~ m/^{([^}]+)$/) and # TODO: occurs, seems unreasonable
- return( "(" .&translateGreekWordorNote($1) ." " );
-
- ($origNote =~ m/^([^{]+)}$/) and
- return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
#special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed
($origNote =~ m/^<.+>?$/) and
@@ -462,6 +478,14 @@ sub translateGreekNote(){
($origNote =~ m/^{=\d+}$/) and
return $origNote;
+ ($origNote =~ m/^([^{]+)}$/) and
+ return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
+ ($origNote =~ m/^([^{]+)}\?$/) and
+ return( &closeNote(&translateGreekWordorNote( $1 )."?)" ) );
+
+ ($origNote =~ m/^{([^}]+}?)$/) and # TODO: occurs, seems unreasonable
+ return( "(" .&translateGreekWordorNote($1) ." " );
+
HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
if ( $notes{ substr($origNote,0,$currentLength) } ){
@@ -499,8 +523,10 @@ sub translateGreekWordorNote(){
my $greek = shift;
+# printf("TranslateGreek of $greek\n");
+
( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek );
- ( $greek =~ m/[}]$/ ) and return &translateGreekNote( $greek );
+ ( $greek =~ m/[}]\??$/ ) and return &translateGreekNote( $greek );
( $greek eq "#" ) and return &translateGreekNote( "#" );
( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 );
@@ -537,12 +563,21 @@ sub parseLine(){
$origLine =~ s/ ;=/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
$origLine =~ s/ \+;/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
$origLine =~ s/[\ ]{10,}/\t/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right
+ $origLine =~ s/\.h-<ge10\.4/.h- <ge10.4>/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces
+ $origLine =~ s/}{/} {/; # TODO: UGLY HACK, notes hung together
+ $origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong
+ $origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note
+
($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and
$origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing
($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and
$origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing
($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and
$origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing
+ ($origLine eq "W/YBW) {...EI)S}\tKAI\\ EI)SH=LQEN") and
+ $origLine = "W/YBW)\t{...EI)S} KAI\\ EI)SH=LQEN"; # TODO: hack, TAB misplaced
+ ($origLine eq "W/L) {..^OU)}\tDE\\") and
+ $origLine = "W/L)\t{..^OU)} DE\\"; # TODO: hack, TAB misplaced
# printf("parsing %s\n", $origLine);
@@ -758,6 +793,48 @@ sub loadFile(){ #$fileName loads the file into the buffer and makes small corr
$index++;
}
}
+ elsif ($filename eq "15.1Chron.par"){ #fix for 1Chron
+ foreach my $currentItem (@buffer){
+ if ($buffer[$index] =~ m/^AU\)TOU=/){
+ $result[$#result-1] .= " AUT)TOU=";
+ }
+ elsif ($buffer[$index] =~ m/^E\(\/C/){
+ $result[$#result-1] .= " E(/C";
+ }
+ elsif ($buffer[$index] =~ m/^MOU/){
+ $result[$#result-1] .= " MOU";
+ }
+ else{
+ push(@result, $buffer[$index] );
+ }
+ $index++;
+ }
+ }
+ elsif ($filename eq "19.Neh.par"){ #fix for Numeri, one time where NUMA has to go on the preceding line, no space
+ foreach my $currentItem (@buffer){
+ if ($buffer[$index] =~ m/^NEHL$/){
+ $result[$#result-1] .= "NEHL"; # no space, ANANEL
+ }
+ else{
+ push(@result, $buffer[$index] );
+ }
+ $index++;
+ }
+ }
+ elsif ($filename eq "18.Esther.par"){ #fix for Numeri, one time where NUMA has to go on the preceding line, no space
+ foreach my $currentItem (@buffer){
+ if ($buffer[$index] =~ m/^ESTHKE\/NAI$/){
+ $result[$#result-1] .= "ESTHKE/NAI"; # no space
+ }
+ elsif ($buffer[$index] =~ m/^ESTHKW\\S$/){
+ $result[$#result-1] .= "ESTHKW\S"; # no space
+ }
+ else{
+ push(@result, $buffer[$index] );
+ }
+ $index++;
+ }
+ }
else{
@result = @buffer;
}
@@ -768,17 +845,17 @@ my @result;
# File File id ThML id OSIS id Short Book Title
-push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
-push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
-push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
-push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
-push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
-push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
-push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
-#
- push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
- push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
- push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
+#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
+#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
+#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
+#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
+#push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
+#push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
+
+# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
+# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
+# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
# push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") );
# push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") );
# push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") );
@@ -786,7 +863,7 @@ push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus
# push(@result, &processBook("18.Ezra.par", "Ezr", "Ezra", "Ezra", "Ezra") );
# push(@result, &processBook("19.Neh.par", "Neh", "Neh", "Neh", "Nehemiah") );
# push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") );
-# push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
+ push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
#
# #This might need special handling
# #push(@result, &processBook("Psalms.par", "Ps", "Ps", "Ps", "Psalms",