diff options
Diffstat (limited to 'modules/mt-lxx-parallel/convert.pl')
-rw-r--r-- | modules/mt-lxx-parallel/convert.pl | 117 |
1 files changed, 56 insertions, 61 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl index 7dd1e06..7d0de12 100644 --- a/modules/mt-lxx-parallel/convert.pl +++ b/modules/mt-lxx-parallel/convert.pl @@ -35,18 +35,13 @@ my %hebrew2utf8 = ( "+" =>chr(0x05D8), #HEBREW LETTER TET "Y" =>chr(0x05D9), #HEBREW LETTER YOD "I" =>chr(0x05D9), #HEBREW LETTER YOD # TODO: this occured in a retranslation (KI) -#"K" =>chr(0x05DA), #HEBREW LETTER FINAL KAF # TODO: HANDLE FINAL LETTERS "K" =>chr(0x05DB), #HEBREW LETTER KAF "L" =>chr(0x05DC), #HEBREW LETTER LAMED -#chr(0x6D)=>chr(0x05DD), #HEBREW LETTER FINAL MEM "M" =>chr(0x05DE), #HEBREW LETTER MEM -#"N" =>chr(0x05DF), #HEBREW LETTER FINAL NUN "N" =>chr(0x05E0), #HEBREW LETTER NUN "S" =>chr(0x05E1), #HEBREW LETTER SAMEKH "(" =>chr(0x05E2), #HEBREW LETTER AYIN -#"P" =>chr(0x05E3), #HEBREW LETTER FINAL PE "P" =>chr(0x05E4), #HEBREW LETTER PE -#"C" =>chr(0x05E5), #HEBREW LETTER FINAL TSADI "C" =>chr(0x05E6), #HEBREW LETTER TSADI "Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF "R" =>chr(0x05E8), #HEBREW LETTER RESH @@ -58,20 +53,23 @@ my %hebrew2utf8 = ( "-" =>chr(0x05BE), #MAQQEF # Special stuff -"*" =>"(ketiv)", # TODO:FIX -"**" =>"(qere)", # +"*" =>"*<note type=\"textual\">Ketiv.</note>", # TODO:FIX +"**" =>"**<note type=\"textual\">Qere.</note>", # "/" => $MorphologicalDivisionMarker, "," => ",", #separate words in colB - "?" => "<note type=\"textual\">Uncertain.</note>" #HACK +); -#"{" => "{", # TODO: CHECK IF NECCESSARY -# "}" => "}", -#"." => ".", - +my %final_hebrew2utf8 = ( +"K" =>chr(0x05DA), #HEBREW LETTER FINAL KAF # TODO: HANDLE FINAL LETTERS +"M"=>chr(0x05DD), #HEBREW LETTER FINAL MEM +"N" =>chr(0x05DF), #HEBREW LETTER FINAL NUN +"P" =>chr(0x05E3), #HEBREW LETTER FINAL PE +"C" =>chr(0x05E5), #HEBREW LETTER FINAL TSADI ); + my $greekLetters = "A-Z#3\(\)+=|\\/"; my %greek2utf8 = ( @@ -112,7 +110,6 @@ my %greek2utf8 = ( "C" =>chr(0x03BE), #GREEK SMALL LETTER XI "O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON "P" =>chr(0x03C0), #GREEK SMALL LETTER PI -"#3"=>chr(0x03DF), #GREEK SMALL LETTER KOPPA "R" =>chr(0x03C1), #GREEK SMALL LETTER RHO "S" =>chr(0x03C3), #GREEK SMALL LETTER SIGMA "J" =>chr(0x03C2), #GREEK SM LETT FINAL SIGMA / SM LETT SIGMA END OF WORD @@ -124,6 +121,11 @@ my %greek2utf8 = ( "W" =>chr(0x03C9), #GREEK SMALL LETTER OMEGA ); +sub createNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText</note> "); } +sub openNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText "); } +sub closeNote(){ my $noteText = shift; return("$noteText</note> "); } + + my %notes = ( "{#}" => "Asterized passage (in Job).", "{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.", @@ -286,10 +288,6 @@ my %notes = ( ); -sub createNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText</note> "); } -sub openNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText "); } -sub closeNote(){ my $noteText = shift; return("$noteText</note> "); } - sub translateHebrewNote(){ my $origNote = shift; @@ -391,15 +389,45 @@ sub translateHebrewNote(){ sub translateHebrewLetters(){ #will return unicode hebrew without morph separation my $hebrew = shift; + my $lastIndex = length( $hebrew )-1; my $result; + my $letter; (not $hebrew) and die "Hebrew string empty...\n"; - foreach my $i (0..length( $hebrew )-1 ){ - $result .= $hebrew2utf8{ substr($hebrew,$i,1) } || die("Could not find Hebrew letter $hebrew\n"); + foreach my $i (0..$lastIndex ){ + $letter = substr($hebrew,$i,1); + if ($i == $lastIndex && $letter =~ m/(K|M|N|P|C)/){ + $result .= $final_hebrew2utf8{ $letter } || die("Could not find FINAL Hebrew letter $hebrew\n"); +# printf("successfully inserted $final_hebrew2utf8{ $letter }\n"); + } + else{ + $result .= $hebrew2utf8{ $letter } || die("Could not find Hebrew letter $hebrew\n"); + } } return $result; } +sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation + my $hebrew = shift; + + if (not $hebrew) { die("Hebrew string empty.") }; + +# print("TranslateHebrew of: $hebrew\n"); + + $hebrew =~ s/^mn$/.mn/; #Ezek 24:17, error? + + ( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew ); # exact match first + + ( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew ); + ( $hebrew =~ m/^([$hebrewLetters]+)(\[.+\])/ ) and + return &translateHebrewWordorNote( $1 ) . &translateHebrewNote( $2 ); + ( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew ); + + #NOW WE ASSUME IT IS A HEBREW WORD + return $MorphologicalSegmentStart . &translateHebrewLetters( $hebrew ) . $MorphologicalSegmentEnd; +} + + sub translateGreekNote(){ my $origNote = shift; @@ -513,33 +541,17 @@ sub translateGreekNote(){ die("Note $origNote not found.\n"); } -sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation - my $hebrew = shift; - - if (not $hebrew) { die("Hebrew string empty.") }; - -# print("TranslateHebrew of: $hebrew\n"); - - $hebrew =~ s/^mn$/.mn/; #Ezek 24:17, error? - - ( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew ); # exact match first - - ( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew ); - ( $hebrew =~ m/^([$hebrewLetters]+)(\[.+\])/ ) and - return &translateHebrewWordorNote( $1 ) . &translateHebrewNote( $2 ); - ( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew ); +sub translateGreekLetters(){ + my $greek = shift; + my $lastIndex = length( $greek )-1; + my $result; + (not $greek) and die "Greek string empty...\n"; - my $length = length($hebrew); - my $index = 0; - my $result = $MorphologicalSegmentStart; - CHAR_LOOP: while ( $index < $length ) { - my $hsubstr = substr( $hebrew, $index, 1); - $hebrew2utf8{ $hsubstr } || die("could not find Hebrew: $hsubstr of word $hebrew at index $index length $length\n"); - $result .= $hebrew2utf8{ $hsubstr }; - ++$index; + foreach my $i (0..$lastIndex ){ + $result .= $greek2utf8{ substr($greek,$i,1) } || die("Could not find Greek letters in $greek\n"); } - $result .= $MorphologicalSegmentEnd; return $result; + } sub translateGreekWordorNote(){ @@ -560,24 +572,7 @@ sub translateGreekWordorNote(){ ( $greek =~ m/^([^.]+)\.\.\.([^.]+)$/) and # TODO: occurs e.g. {..bla1 bla2...bla3 #TODO: format output return( &translateGreekWordorNote($1) . "..." . &translateGreekWordorNote($2) ); - - my $length = length($greek); - my $index = 0; - my $result; - CHAR_LOOP: while ( $index < $length ) { - my $gsubstr = substr( $greek, $index, 1); - if ($greek2utf8{ $gsubstr } ){ - $result .= $greek2utf8{ $gsubstr }; - } - elsif ( $greek2utf8{ substr( $greek, $index, 2) } ){ #for the #3 letter - $result .= $greek2utf8{ substr( $greek, $index, 2) }; - } - else{ - die("Could not find greek: $gsubstr.\n") - } - ++$index; - } - return $result; + return &translateGreekLetters( $greek ); } my $colBContent; |