diff options
author | Martin Gruner <mg.pub@gmx.net> | 2005-11-02 21:21:15 +0000 |
---|---|---|
committer | Martin Gruner <mg.pub@gmx.net> | 2005-11-02 21:21:15 +0000 |
commit | c033620be0616c507eda2be35ec6c6d2dfcb633b (patch) | |
tree | 6ae49c98e71dc09e882b30b4c1f27e743c84b430 /modules | |
parent | 88d0e07a30f5455d9d0cc3e3922772c772ea0a50 (diff) | |
download | sword-tools-c033620be0616c507eda2be35ec6c6d2dfcb633b.tar.gz |
I'm getting tired of it. Parses Gen-2Sam now.
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@49 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rw-r--r-- | modules/mt-lxx-parallel/convert.pl | 103 |
1 files changed, 71 insertions, 32 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl index e2302ad..007b828 100644 --- a/modules/mt-lxx-parallel/convert.pl +++ b/modules/mt-lxx-parallel/convert.pl @@ -18,7 +18,7 @@ my $MorphologicalSegmentStart = "<seg type=\"morph\">" ; my $MorphologicalSegmentEnd = "</seg>" ; my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegmentStart; -my $hebrewLetters="A-Z\(\)\+\#\$\*\&/"; #used in a character class of a regexp later +my $hebrewLetters="A-Z\(\)\+\#\$\*\&/r"; #used in a character class of a regexp later my %hebrew2utf8 = ( ")" =>chr(0x05D0), #HEBREW LETTER ALEF @@ -30,6 +30,7 @@ my %hebrew2utf8 = ( "H" =>chr(0x05D4), #HEBREW LETTER HE "W" =>chr(0x05D5), #HEBREW LETTER VAV "Z" =>chr(0x05D6), #HEBREW LETTER ZAYIN +"z" =>chr(0x05D6), #HEBREW LETTER ZAYIN # TODO: this occured in a retranslation "X" =>chr(0x05D7), #HEBREW LETTER HET "+" =>chr(0x05D8), #HEBREW LETTER TET "Y" =>chr(0x05D9), #HEBREW LETTER YOD @@ -49,6 +50,7 @@ my %hebrew2utf8 = ( "C" =>chr(0x05E6), #HEBREW LETTER TSADI "Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF "R" =>chr(0x05E8), #HEBREW LETTER RESH +"r" =>chr(0x05E8), #HEBREW LETTER RESH # TODO: this occured in a retranslation "#" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN == SIN/SHIN without dot "\$" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN + SHIN DOT == SHIN "&" =>chr(0x05E9).chr(0x05C2), #HEBREW LETTER SHIN + SIN DOT == SIN @@ -167,6 +169,7 @@ my %notes = ( "--?" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX)?", # TODO: my addition, check, probably wrong "--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", "---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition +"-+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition "'" => "Long minus or plus (at least four lines).", # TODO: my addition, check "''" => "Long minus or plus (at least four lines).", "{d}" => "Reference to doublet (occurring between the two elements of the doublet).", @@ -176,6 +179,7 @@ my %notes = ( "?" => "Questionable notation, equivalent, etc.", "??" => "Questionable notation, equivalent, etc.", # TODO: my addition "{p}" => "Greek preverb representing Hebrew preposition.", +"{p}+" => "Greek preverb representing Hebrew preposition.", # TODO: my addition ??? "{pm}" => "Greek preverb representing Hebrew preposition.", # TODO: my addition, check "{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", @@ -219,14 +223,19 @@ my %notes = ( "=\%" => "Introducing categories of translation technique recorded in col. b.", "=\%vap" => "Change from active to passive form in verbs.", "=\%vpa" => "Change from passive to active form in verbs.", +"\%vpa" => "Change from passive to active form in verbs.", #my addition +"=vpa" => "Change from passive to active form in verbs.", # TODO: my addition, check "=\%p" => "Difference in preposition or particle.", "=\%p?" => "Difference in preposition or particle?", "=p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? +"={d}\%p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? DOUBLET? "=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check "=\%p=" => "Difference in preposition or particle.", # TODO: my addition, check "=\%p+" => "Addition of preposition or particle.", +"\%p+" => "Addition of preposition or particle.", #my addition "=\%p+?" => "Addition of preposition or particle?", "=\%p-" => "Omission of preposition or particle.", +"=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain? "=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain? "=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.", #"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.", @@ -235,16 +244,20 @@ my %notes = ( "=?\@" => "Etymological exegesis?", #my addition "=\@?" => "Etymological exegesis?", #my addition "=\@...a" => "Etymological exegesis according to Aramaic.", +"=\@a" => "Etymological exegesis according to Aramaic.", # TODO: my addition, check "=:" => "Introducing reconstructed proper noun.", +"=:?" => "Introducing reconstructed proper noun?", ":" => "Introducing reconstructed proper noun.", #my addition "=v" => "Difference in vocalization (reading).", -"=v?" => "Difference in vocalization (reading)?", +"={d}v" => "Difference in vocalization (reading).", # TODO: check, occurs in text, DOUBLET? +"=v?" => "Difference in vocalization (reading)?", "=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text "=>" => "Difference in vocalization (reading).", # TODO: check, occurs in text "=r" => "Incomplete retroversion.", "{*}" => "Agreement of LXX with ketib.", "{**}" => "Agreement of LXX with qere.", +"{**?}" => "Agreement of LXX with qere?", #my addition "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", @@ -255,6 +268,9 @@ my %notes = ( ".j" => "Two words of MT joined into one word in the parent text of the LXX.", ".w" => "Different word-division reflected in the parent text of the LXX.", +"(!)" => "(!)", #my addition + + "<sp" => "<sp", #TODO: FIX, occurs in text "<sp>" => "<sp>", #TODO: FIX, occurs in text "<sp^>" => "<sp^>", #TODO: FIX, occurs in text @@ -296,7 +312,6 @@ sub translateHebrewNote(){ ($origNote =~ m/^\[(.+)\]/) and return( &createNote( "Number of verse in LXX ($1) is different from MT." ) ); - ($origNote =~ m/^=?{\.\.d(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) ); @@ -317,6 +332,8 @@ sub translateHebrewNote(){ return( &openNote( $notes{"{...}"} . "(".&translateHebrewWordorNote($1) ." " ) ); ($origNote =~ m/^=?{\.\.r([^}]+)$/) and return( &openNote( $notes{"{..r}"} . "(".&translateHebrewWordorNote($1) ." " ) ); + ($origNote =~ m/^=?{\.\.([^}]+)$/) and + return( &openNote( $notes{"{..}"} . "(".&translateHebrewWordorNote($1) ." " ) ); ($origNote =~ m/^([^{]+)}$/) and return( &closeNote(&translateHebrewWordorNote( $1 ).")" ) ); @@ -352,23 +369,12 @@ sub translateHebrewNote(){ # return("<reference osisRef=\"$1.$2\"/>"); return $origNote; -# #Special cases: osisREf with bookID, split because of space char, so put them together again -# ($origNote =~ m/^<\^?(\w+)$/) and -# # return( "<reference osisRef=\"$1." ); # TODO: check if <reference/> exists -# return $origNote; -# ($origNote =~ m/^(\d+)[.:](\d+)>?/) and -# # return( "$1.$2\"/> " ); -# return $origNote; - - - - #special case: no note, but a crossref (with book ID) -# ($origNote =~ m/^<\^?(\w+)\s?(\d+)[.:](\d+)>?/) and -# return("<reference osisRef=\"$1.$2.$3\"><$1></reference> "); - ($origNote =~ m/^[?].*/) and return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) ); + ($origNote =~ m/^(.+),(.+)$/) and # 2 Notes / Words, split up, but only at the end + return( &translateHebrewWordorNote( $1 ). "," .&translateHebrewWordorNote( $2 ) ); + HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!} if ( $notes{ substr($origNote,0,$currentLength) } ){ @@ -397,19 +403,31 @@ sub translateGreekNote(){ ($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) ); - ($origNote =~ m/^\[(.+)\]/) and + ($origNote =~ m/^\[(.+)\]?/) and return( &createNote( "Number of verse in LXX ($1) is different from MT." ) ); ($origNote =~ m/^\[\[(.+)\]\]/) and return( &createNote( "Number of verse in MT ($1) is different from the LXX." ) ); + #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above + ($origNote =~ m/^\[\[|\]\]$/) and + return $origNote; + + #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed FIX THIS, see above + ($origNote =~ m/^\[.+$/) and + return $origNote; + + ($origNote =~ m/^=?{\.\.\.(.+)\.\.\.(.+)}/) and # special case: {...word1...word2} + return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{...}" }) + . &createNote("(".&translateGreekWordorNote( $2 ).") ". $notes{ "{...}" })); + ($origNote =~ m/^=?{\.\.p(.+)}/) and return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..p}" }) ); ($origNote =~ m/^=?{\.\.d(.+)}/) and return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) ); - ($origNote =~ m/^=?{\.\.\.d(.+)}/) and # TODO: my addition, check + ($origNote =~ m/^=?{\.\.\.d(.+)}/) and # TODO: my addition, check return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) ); ($origNote =~ m/^=?{\.\.(.+)}/) and @@ -431,10 +449,18 @@ sub translateGreekNote(){ return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) ); ($origNote =~ m/^=?{c([^}]+)$/) and return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) ); # TODO: occurs {cXXX}, not documented + ($origNote =~ m/^{([^}]+)$/) and # TODO: occurs, seems unreasonable + return( "(" .&translateGreekWordorNote($1) ." " ); ($origNote =~ m/^([^{]+)}$/) and return( &closeNote(&translateGreekWordorNote( $1 ).")" ) ); + #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed + ($origNote =~ m/^<.+>?$/) and + return $origNote; + #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed + ($origNote =~ m/^{=\d+}$/) and + return $origNote; HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks my $currentLength = length( $origNote ) - 1 - $i; #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!} @@ -454,6 +480,7 @@ sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separati ( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew ); ( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew ); + ( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew ); my $length = length($hebrew); my $index = 0; @@ -474,7 +501,8 @@ sub translateGreekWordorNote(){ ( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek ); ( $greek =~ m/[}]$/ ) and return &translateGreekNote( $greek ); - ( $greek eq "#" ) and return &translateGreekNote( $greek ); + ( $greek eq "#" ) and return &translateGreekNote( "#" ); + ( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 ); ( $greek =~ m/^(.+)(\[\d+\])$/ ) and return ( &translateGreekWordorNote( $1 ) . &translateGreekNote ( $2 ) ); @@ -504,10 +532,21 @@ sub parseLine(){ printf("parsing %s\n", $origLine); - $origLine =~ s/--=/--+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable - $origLine =~ s/-\%vap/=\%vap/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable - - ($origLine =~ m/^([^=]+)?([=].+)?\t(.+)$/) or die("No match in parseLine().\n"); + $origLine =~ s/ --=/ --+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable + $origLine =~ s/ -\%vap/ =\%vap/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable + $origLine =~ s/ ;=/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable + $origLine =~ s/ \+;/ =;/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable + $origLine =~ s/[\ ]{10,}/\t/; # TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right + ($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and + $origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing + ($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and + $origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing + ($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and + $origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing + +# printf("parsing %s\n", $origLine); + + ($origLine =~ m/^([^=\t]+)?([=][^\t]*)?\t(.+)$/) or die("No match in parseLine().\n"); ($1 or $2) or die("Hebrew not found.\n"); $3 or die("Greek not found.\n"); $1 and my @hebrewWordsColA = split(/\s+/, $1); @@ -729,17 +768,17 @@ my @result; # File File id ThML id OSIS id Short Book Title -#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") ); -#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") ); -#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") ); -#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") ); -#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") ); +push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") ); +push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") ); +push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") ); +push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") ); +push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") ); push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") ); push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") ); # -# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") ); -# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") ); -# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") ); + push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") ); + push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") ); + push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") ); # push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") ); # push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") ); # push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") ); |