diff options
author | Martin Gruner <mg.pub@gmx.net> | 2005-11-09 21:18:35 +0000 |
---|---|---|
committer | Martin Gruner <mg.pub@gmx.net> | 2005-11-09 21:18:35 +0000 |
commit | 0c303ed20fe6ffce49fb0de42522bdfe4c2b5528 (patch) | |
tree | 4b1a21cd58a59d4c75545b40c90d44aab15f9c6a /modules/mt-lxx-parallel/convert.pl | |
parent | c033620be0616c507eda2be35ec6c6d2dfcb633b (diff) | |
download | sword-tools-0c303ed20fe6ffce49fb0de42522bdfe4c2b5528.tar.gz |
Gen-Job parses.
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@50 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/mt-lxx-parallel/convert.pl')
-rw-r--r-- | modules/mt-lxx-parallel/convert.pl | 117 |
1 files changed, 97 insertions, 20 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl index 007b828..4187c4d 100644 --- a/modules/mt-lxx-parallel/convert.pl +++ b/modules/mt-lxx-parallel/convert.pl @@ -88,6 +88,11 @@ my %greek2utf8 = ( "^" => "^", +"-" => "-", #occurs in the text +":" => ":", #occurs in the text!? +"!" => "!", #occurs in the text!? +"|" => "|", #occurs in the text!? + # "*A" =>chr(0x0391), #GREEK CAPITAL LETTER ALPHA # "*B" =>chr(0x0392), #GREEK CAPITAL LETTER BETA # "*G" =>chr(0x0393), #GREEK CAPITAL LETTER GAMMA @@ -120,6 +125,7 @@ my %greek2utf8 = ( "G" =>chr(0x03B3), #GREEK SMALL LETTER GAMMA "D" =>chr(0x03B4), #GREEK SMALL LETTER DELTA "E" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON +"e" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON # occurs in Neh "V" =>chr(0x03DD), #GREEK SMALL LETTER DIGAMMA "Z" =>chr(0x03B6), #GREEK SMALL LETTER ZETA "H" =>chr(0x03B7), #GREEK SMALL LETTER ETA @@ -129,6 +135,7 @@ my %greek2utf8 = ( "L" =>chr(0x03BB), #GREEK SMALL LETTER LAMDA "M" =>chr(0x03BC), #GREEK SMALL LETTER MU "N" =>chr(0x03BD), #GREEK SMALL LETTER NU +"n" =>chr(0x03BD), #GREEK SMALL LETTER NU # occurs in Neh "C" =>chr(0x03BE), #GREEK SMALL LETTER XI "O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON "P" =>chr(0x03C0), #GREEK SMALL LETTER PI @@ -148,6 +155,7 @@ my %notes = ( "{#}" => "Asterized passage (in Job).", "{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.", "..a" => "Word included in one of the Aramaic sections.", +",,a" => "Word included in one of the Aramaic sections.", #my addition, ok "*" => "Ketib.", "**" => "Qere.", "*z" => "Qere wela ketib, ketib wela qere.", @@ -166,14 +174,18 @@ my %notes = ( "---" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", "---?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", "--" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check, probably wrong +"{---%}" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check?? "--?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", # TODO: my addition, check, probably wrong "--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", "---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition +"-.-" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition, check? "-+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition "'" => "Long minus or plus (at least four lines).", # TODO: my addition, check "''" => "Long minus or plus (at least four lines).", "{d}" => "Reference to doublet (occurring between the two elements of the doublet).", "{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", +"{d}?" => "Reference to doublet (occurring between the two elements of the doublet)?", +"{?d}" => "Reference to doublet (occurring between the two elements of the doublet)?", "{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.", "{..r}" => "Notation in Hebrew column of elements repeated in the translation.", "?" => "Questionable notation, equivalent, etc.", @@ -206,10 +218,12 @@ my %notes = ( "{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8 "{t}" => "Transliterated Hebrew word.", +"{dt}" => "Transliterated Hebrew word.", # TODO: occurs. DOUBLET? "{t.}" => "Transliterated Hebrew word.", # TODO: unknown "<t?>" => "Transliterated Hebrew word.", # TODO: unknown "{t?}" => "Transliterated Hebrew word?", "{c}" => "Unknown.", # TODO: unknown +"{c}?" => "Unknown.", # TODO: unknown "#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.", @@ -257,6 +271,7 @@ my %notes = ( "{*}" => "Agreement of LXX with ketib.", "{**}" => "Agreement of LXX with qere.", +"{**" => "Agreement of LXX with qere.", #my addidion "{**?}" => "Agreement of LXX with qere?", #my addition "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", @@ -298,7 +313,7 @@ sub closeNote(){ sub translateHebrewNote(){ my $origNote = shift; - print("TranslateHebrewNote $origNote\n"); +# print("TranslateHebrewNote $origNote\n"); ($origNote eq "=") and return; #= only marks colB, no real note @@ -312,6 +327,10 @@ sub translateHebrewNote(){ ($origNote =~ m/^\[(.+)\]/) and return( &createNote( "Number of verse in LXX ($1) is different from MT." ) ); + #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above + ($origNote =~ m/^=?\[\[|\]\]$/) and + return $origNote; + ($origNote =~ m/^=?{\.\.d(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) ); @@ -399,7 +418,7 @@ sub translateHebrewLetters(){ #will return unicode hebrew without morph separati sub translateGreekNote(){ my $origNote = shift; - print("TranslateGreekNote $origNote\n"); +# print("TranslateGreekNote $origNote\n"); ($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) ); @@ -447,13 +466,10 @@ sub translateGreekNote(){ return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) ); ($origNote =~ m/^=?{\.\.([^}]+)$/) and return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) ); + ($origNote =~ m/^=?{\.\.(\^[^}]+)$/) and + return( &openNote($notes{"{..}"} . "(" .&translateGreekWordorNote($1) ." " ) ); ($origNote =~ m/^=?{c([^}]+)$/) and return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) ); # TODO: occurs {cXXX}, not documented - ($origNote =~ m/^{([^}]+)$/) and # TODO: occurs, seems unreasonable - return( "(" .&translateGreekWordorNote($1) ." " ); - - ($origNote =~ m/^([^{]+)}$/) and - return( &closeNote(&translateGreekWordorNote( $1 ).")" ) ); #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed ($origNote =~ m/^<.+>?$/) and @@ -462,6 +478,14 @@ sub translateGreekNote(){ ($origNote =~ m/^{=\d+}$/) and return $origNote; + ($origNote =~ m/^([^{]+)}$/) and + return( &closeNote(&translateGreekWordorNote( $1 ).")" ) ); + ($origNote =~ m/^([^{]+)}\?$/) and + return( &closeNote(&translateGreekWordorNote( $1 )."?)" ) ); + + ($origNote =~ m/^{([^}]+}?)$/) and # TODO: occurs, seems unreasonable + return( "(" .&translateGreekWordorNote($1) ." " ); + HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!} if ( $notes{ substr($origNote,0,$currentLength) } ){ @@ -499,8 +523,10 @@ sub translateGreekWordorNote(){ my $greek = shift; +# printf("TranslateGreek of $greek\n"); + ( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek ); - ( $greek =~ m/[}]$/ ) and return &translateGreekNote( $greek ); + ( $greek =~ m/[}]\??$/ ) and return &translateGreekNote( $greek ); ( $greek eq "#" ) and return &translateGreekNote( "#" ); ( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 ); @@ -537,12 +563,21 @@ sub parseLine(){ $origLine =~ s/ ;=/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable $origLine =~ s/ \+;/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable $origLine =~ s/[\ ]{10,}/\t/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right + $origLine =~ s/\.h-<ge10\.4/.h- <ge10.4>/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces + $origLine =~ s/}{/} {/; # TODO: UGLY HACK, notes hung together + $origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong + $origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note + ($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and $origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing ($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and $origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing ($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and $origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing + ($origLine eq "W/YBW) {...EI)S}\tKAI\\ EI)SH=LQEN") and + $origLine = "W/YBW)\t{...EI)S} KAI\\ EI)SH=LQEN"; # TODO: hack, TAB misplaced + ($origLine eq "W/L) {..^OU)}\tDE\\") and + $origLine = "W/L)\t{..^OU)} DE\\"; # TODO: hack, TAB misplaced # printf("parsing %s\n", $origLine); @@ -758,6 +793,48 @@ sub loadFile(){ #$fileName loads the file into the buffer and makes small corr $index++; } } + elsif ($filename eq "15.1Chron.par"){ #fix for 1Chron + foreach my $currentItem (@buffer){ + if ($buffer[$index] =~ m/^AU\)TOU=/){ + $result[$#result-1] .= " AUT)TOU="; + } + elsif ($buffer[$index] =~ m/^E\(\/C/){ + $result[$#result-1] .= " E(/C"; + } + elsif ($buffer[$index] =~ m/^MOU/){ + $result[$#result-1] .= " MOU"; + } + else{ + push(@result, $buffer[$index] ); + } + $index++; + } + } + elsif ($filename eq "19.Neh.par"){ #fix for Numeri, one time where NUMA has to go on the preceding line, no space + foreach my $currentItem (@buffer){ + if ($buffer[$index] =~ m/^NEHL$/){ + $result[$#result-1] .= "NEHL"; # no space, ANANEL + } + else{ + push(@result, $buffer[$index] ); + } + $index++; + } + } + elsif ($filename eq "18.Esther.par"){ #fix for Numeri, one time where NUMA has to go on the preceding line, no space + foreach my $currentItem (@buffer){ + if ($buffer[$index] =~ m/^ESTHKE\/NAI$/){ + $result[$#result-1] .= "ESTHKE/NAI"; # no space + } + elsif ($buffer[$index] =~ m/^ESTHKW\\S$/){ + $result[$#result-1] .= "ESTHKW\S"; # no space + } + else{ + push(@result, $buffer[$index] ); + } + $index++; + } + } else{ @result = @buffer; } @@ -768,17 +845,17 @@ my @result; # File File id ThML id OSIS id Short Book Title -push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") ); -push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") ); -push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") ); -push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") ); -push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") ); -push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") ); -push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") ); -# - push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") ); - push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") ); - push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") ); +#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") ); +#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") ); +#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") ); +#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") ); +#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") ); +#push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") ); +#push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") ); + +# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") ); +# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") ); +# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") ); # push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") ); # push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") ); # push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") ); @@ -786,7 +863,7 @@ push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus # push(@result, &processBook("18.Ezra.par", "Ezr", "Ezra", "Ezra", "Ezra") ); # push(@result, &processBook("19.Neh.par", "Neh", "Neh", "Neh", "Nehemiah") ); # push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") ); -# push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") ); + push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") ); # # #This might need special handling # #push(@result, &processBook("Psalms.par", "Ps", "Ps", "Ps", "Psalms", |