diff options
Diffstat (limited to 'modules/mt-lxx-parallel/convert.pl')
-rw-r--r-- | modules/mt-lxx-parallel/convert.pl | 184 |
1 files changed, 128 insertions, 56 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl index 018439e..f38c6bd 100644 --- a/modules/mt-lxx-parallel/convert.pl +++ b/modules/mt-lxx-parallel/convert.pl @@ -21,6 +21,9 @@ my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegme my $hebrewLetters="A-Z\(\)\+\#\$\*\&/"; #used in a character class of a regexp later my %hebrew2utf8 = ( ")" =>chr(0x05D0), #HEBREW LETTER ALEF + +"A" =>chr(0x05D0), #HEBREW LETTER ALEF # TODO: check, this is from an occurrence of ABRHM + "B" =>chr(0x05D1), #HEBREW LETTER BET "G" =>chr(0x05D2), #HEBREW LETTER GIMEL "D" =>chr(0x05D3), #HEBREW LETTER DALET @@ -58,9 +61,11 @@ my %hebrew2utf8 = ( "," => ",", #separate words in colB -"{" => "{", # TODO: CHECK IF NECCESSARY -"}" => "}", -"." => ".", +"?" => "<note type=\"textual\">Uncertain.</note>" #HACK + +#"{" => "{", # TODO: CHECK IF NECCESSARY +# "}" => "}", +#"." => ".", ); @@ -147,87 +152,77 @@ my %notes = ( "*z" => "Qere wela ketib, ketib wela qere.", "[ ]" => "Reference of number of verse in LXX, different from MT.", # TODO: MAKE USE OF IT "[[ ]]" => "Reference number of verse in MT, different from the LXX.", +"{x}" => "UNKNOWN", # TODO: FIX "--- {x}" => "Apparent minus created by lack of equivalence between long stretches of text in the LXX and MT.", "--+ {x}" => "Apparent plus created by lack of equivalence between long stretches of text in the LXX and MT.", "{...}" => "Equivalent reflected elsewhere in the text, disregarded by indexing program.", "~" => "Difference in sequence between MT and LXX, denoted after the first Hebrew word and before the second one, as well as between two Greek words.", "~~~" => "Equivalent of the Hebrew or Greek word(s) occurring elsewhere in the verse or context (transposition).", "{..~}" => "Stylistic or grammatical transposition.", +"{..}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning "---" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", +"--" => "In the Greek column: Hebrew counterpart lacking in the LXX (minus in the LXX).", # TODO: my addition, check, probably wrong "--+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", "---+" => "In col a. of the Hebrew: element \"added\" in the Greek (plus in the LXX).", # TODO: my addition +"'" => "Long minus or plus (at least four lines).", # TODO: my addition, check "''" => "Long minus or plus (at least four lines).", "{d}" => "Reference to doublet (occurring between the two elements of the doublet).", "{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", "{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.", "{..r}" => "Notation in Hebrew column of elements repeated in the translation.", "?" => "Questionable notation, equivalent, etc.", +"??" => "Questionable notation, equivalent, etc.", # TODO: my addition "{p}" => "Greek preverb representing Hebrew preposition.", "{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", + "{!}" => "Infinitive absolute.", "{!}na" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning +"{!}ad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning +"{!}aj" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning +"{!}nad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}nd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning +"{!}nd+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}p" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning +"{!}p+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}pd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning "{!}-" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning +"{!}v" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning + "{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8 "{t}" => "Transliterated Hebrew word.", "\#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.", "{v}" => "The reading of the main text of the LXX seems to reflect a secondary text, while the \"original\" reading is reflected in a variant.", - +# Notes regarding ColB of the Hebrew "=" => "Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).", -"=?" => "? Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).", # TODO: my addition, check "={d}" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: my addition, check "={d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", # TODO: my addition, check "=\%" => "Introducing categories of translation technique recorded in col. b.", "=\%vap" => "Change from active to passive form in verbs.", "=\%vpa" => "Change from passive to active form in verbs.", "=\%p" => "Difference in preposition or particle.", +"=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check "=\%p+" => "Addition of preposition or particle.", +"=\%p+?" => "Addition of preposition or particle?", "=\%p-" => "Omission of preposition or particle.", "=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.", -"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.", +#"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.", "=+" => "Difference in numbers between MT and the LXX.", "=\@" => "Etymological exegesis.", +"=?\@" => "Etymological exegesis?", #my addition +"=\@?" => "Etymological exegesis?", #my addition "=\@...a" => "Etymological exegesis according to Aramaic.", "=:" => "Introducing reconstructed proper noun.", "=v" => "Difference in vocalization (reading).", +"=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text "=r" => "Incomplete retroversion.", "{*}" => "Agreement of LXX with ketib.", "{**}" => "Agreement of LXX with qere.", - "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", -".)(" => "Interchange of consonants (א/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".(q" => "Interchange of consonants (ע/ק) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".q(" => "Interchange of consonants (ק/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".)x" => "Interchange of consonants (א/ח) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".x)" => "Interchange of consonants (ח/א) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".(x" => "Interchange of consonants (ע/ח) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".x(" => "Interchange of consonants (ח/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".+d" => "Interchange of consonants (ט/ד) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".br" => "Interchange of consonants (ב/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".rb" => "Interchange of consonants (ר/ב) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".rd" => "Interchange of consonants (ר/ד) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".dr" => "Interchange of consonants (ד/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".rg" => "Interchange of consonants (ר/ג) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".rh" => "Interchange of consonants (ר/ה) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".rl" => "Interchange of consonants (ר/ל) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".\mb" => "Interchange of consonants (ק/מ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".mn" => "Interchange of consonants (מ/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".nm" => "Interchange of consonants (נ/מ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".nr" => "Interchange of consonants (נ/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".ny" => "Interchange of consonants (נ/י) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".yn" => "Interchange of consonants (י/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".rn" => "Interchange of consonants (ר/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".\$c" => "Interchange of consonants (שׁ/צ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".\qb" => "Interchange of consonants (ק/ב) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".\wy" => "Interchange of consonants (ו/י) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition -".\yw" => "Interchange of consonants (י/ו) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition - +".a" => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.", # TODO: occurs, unknown ".m" => "Metathesis of consonants between MT and the presumed Hebrew parent text of the LXX.", ".z" => "Possible abbreviation.", ".s" => "One word of MT separated into two or more words in the parent text of the LXX.", @@ -235,6 +230,8 @@ my %notes = ( ".w" => "Different word-division reflected in the parent text of the LXX.", "<sp" => "<sp", #TODO: FIX, occurs in text +"<sp>" => "<sp>", #TODO: FIX, occurs in text +"<sp^>" => "<sp^>", #TODO: FIX, occurs in text "^" => "^", #Notsure what these are "^^^" => "^^^", @@ -257,41 +254,111 @@ sub closeNote(){ sub translateHebrewNote(){ my $origNote = shift; - #print("TranslateHebrewNote $origNote\n"); +# print("TranslateHebrewNote $origNote\n"); - ($origNote =~ m/{\.\.d(.+)}/) and + ($origNote =~ m/^=?\.([a-z()\$+-])([a-z()\$+-])$/) and + return( &createNote("Interchange of consonants (" . + &translateHebrewLetter( uc( $1 ) ) . "/" . &translateHebrewLetter( uc( $2 ) ) . + ") between MT and the presumed Hebrew parent text of the LXX.") ); + + + ($origNote =~ m/^{\.\.d(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) ); - ($origNote =~ m/{\.\.r(.+)}/) and + ($origNote =~ m/^{\.\.r(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) ); - ($origNote =~ m/{\.\.\.(.+)}/) and + ($origNote =~ m/^{\.\.\.(.+)}/) and return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) ); - #Special cases: the note includes more than one hebrew word + ($origNote =~ m/^{\.\.(.+)}/) and + return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..}" }) ); + + #Special cases: the note includes more than one hebrew word, "cat" the results together ($origNote =~ m/^{\.\.\.([^}]+)$/) and return( &openNote("(".&translateHebrewWordorNote($1).") ".$notes{"{...}"} ) ); ($origNote =~ m/^([^{]+)}$/) and return( &closeNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) ); - ($origNote =~ m/^=[^?$hebrewLetters]*([?$hebrewLetters]+)$/) and - return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) ); + ($origNote =~ m/^@([?$hebrewLetters]+)/) and + return( &createNote( $notes{ "=\@" } ) . &translateHebrewWordorNote( $1 ) ); + + ($origNote =~ m/^\^([?$hebrewLetters]+)/) and + return( "^" . &translateHebrewWordorNote( $1 ) ); # TODO: check, what is ^? + + # + # Special handling for the = colB Notes + # + if (($origNote =~ m/^=/) and (not $notes{ $origNote } )) { #only split if the note does not exist, to avoid parsing problems + print("note reads $origNote\n"); + if ($origNote =~ m/^=(<[0-9.a-z]+>)$/){ + return( &translateHebrewWordorNote( $1 ) ); + } + elsif ($origNote =~m/^=(.+)$/ and $notes{ $1 }){ + return( &translateHebrewWordorNote( $1 ) ); + } + elsif ($origNote =~ m/^=([?$hebrewLetters]+)/){ + return( &translateHebrewWordorNote( $1 ) ); + } + elsif ($origNote =~ m/^=([^?$hebrewLetters]+)([?$hebrewLetters]+)/){ #Note + Hebrew text, split up + if ($notes{ $1 }){ + return( &translateHebrewNote( $1 ) . &translateHebrewWordorNote( $2 ) ); + } + elsif( $notes{ "=$1" }){ + return( &translateHebrewNote( "=$1" ) . &translateHebrewWordorNote( $2 ) ); + } + else { die("Could not parse note.\n"); } + } + else { die("Could not parse note.\n"); } + } + + #special case: no note, but a crossref (no book ID) # TODO: for now OSIS refs are not parsed + ($origNote =~ m/^<|>$/) and +# return("<reference osisRef=\"$1.$2\"/>"); + return $origNote; + +# #Special cases: osisREf with bookID, split because of space char, so put them together again +# ($origNote =~ m/^<\^?(\w+)$/) and +# # return( "<reference osisRef=\"$1." ); # TODO: check if <reference/> exists +# return $origNote; +# ($origNote =~ m/^(\d+)[.:](\d+)>?/) and +# # return( "$1.$2\"/> " ); +# return $origNote; + + - #special case: no note, but a crossref - ($origNote =~ m/<(.+)>/) and - return("<reference osisRef=\"$1\"><$1></reference> "); + #special case: no note, but a crossref (with book ID) +# ($origNote =~ m/^<\^?(\w+)\s?(\d+)[.:](\d+)>?/) and +# return("<reference osisRef=\"$1.$2.$3\"><$1></reference> "); ($origNote =~ m/^[?].*/) and return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) ); - ($notes{ $origNote }) or die("Note $origNote not found.\n"); - return( &createNote( $notes{$origNote} ) ); + ($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) ); + + for my $i ( 1 .. (length($origNote)-1) ){ #last try, split up into chunks + if ( $notes{ substr($origNote,0,$i) } ){ + return( &translateHebrewNote(substr($origNote,0,$i)) . &translateHebrewWordorNote(substr($origNote,$i, length($origNote) - $i) ) ); + } + } + + die("Note $origNote not found.\n"); +} + +sub translateHebrewLetter(){ #will return unicode hebrew without morph separation + my $hebrew = shift; + + my $result; + $result = $hebrew2utf8{ $hebrew } || die("Could not find Hebrew letter $hebrew\n"); + + return $result; } + sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation my $hebrew = shift; -# print("TranslateHebrew of: $hebrew"); +# print("TranslateHebrew of: $hebrew\n"); ( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew ); ( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew ); @@ -311,6 +378,9 @@ sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separati } sub translateGreekWordorNote(){ + + return; # TODO: remove + my $greek = shift; foreach my $key (keys %notes){ @@ -342,9 +412,9 @@ sub parseLine(){ my $origLine = shift; my $result; -# printf("parsing %s\n", $origLine); + printf("parsing %s\n", $origLine); - $origLine =~ s/--=/--+/; # TODO: UGLY HACK, this appears in the text but not the notes + $origLine =~ s/--=/--+ =/; # TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable ($origLine =~ m/^([^=]+)?([=].+)?\t(.+)$/) or die("No match in parseLine().\n"); ($1 or $2) or die("Hebrew not found.\n"); @@ -364,14 +434,14 @@ sub parseLine(){ $result .= "</cell>\n <cell>"; foreach my $wordB (@hebrewWordsColB){ - if ( substr($wordB, 0, 1) eq "=" ){ - $wordB =~ m/(=[^$hebrewLetters]*)([$hebrewLetters].*)?/ or die("No match in ColB.\n"); - $1 and $result .= &translateHebrewWordorNote( $1 ); #This isolates the notes introducing colB (=*) - $result .= &translateHebrewWordorNote( $2 ); - } - else { +# if ( $wordB =~ m/^=/ ){ +# $wordB =~ m/(=[^$hebrewLetters()]*)([$hebrewLetters].*)?/ or die("No match in ColB.\n"); #added ( and ) in the first expression, because they can occur in notes also +# $1 and $result .= &translateHebrewWordorNote( $1 ); #This isolates the notes introducing colB (=*) +# $result .= &translateHebrewWordorNote( $2 ); +# } +# else { $result .= &translateHebrewWordorNote( $wordB ); - } +# } } $result .= "</cell>\n <cell>"; @@ -552,6 +622,8 @@ my @result; # File File id ThML id OSIS id Short Book Title push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") ); +die "Finished Genesis\n"; + push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") ); push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") ); push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") ); |