summaryrefslogtreecommitdiffstats
path: root/modules/mt-lxx-parallel/convert.pl
diff options
context:
space:
mode:
Diffstat (limited to 'modules/mt-lxx-parallel/convert.pl')
-rw-r--r--modules/mt-lxx-parallel/convert.pl117
1 files changed, 56 insertions, 61 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index 7dd1e06..7d0de12 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -35,18 +35,13 @@ my %hebrew2utf8 = (
"+" =>chr(0x05D8), #HEBREW LETTER TET
"Y" =>chr(0x05D9), #HEBREW LETTER YOD
"I" =>chr(0x05D9), #HEBREW LETTER YOD # TODO: this occured in a retranslation (KI)
-#"K" =>chr(0x05DA), #HEBREW LETTER FINAL KAF # TODO: HANDLE FINAL LETTERS
"K" =>chr(0x05DB), #HEBREW LETTER KAF
"L" =>chr(0x05DC), #HEBREW LETTER LAMED
-#chr(0x6D)=>chr(0x05DD), #HEBREW LETTER FINAL MEM
"M" =>chr(0x05DE), #HEBREW LETTER MEM
-#"N" =>chr(0x05DF), #HEBREW LETTER FINAL NUN
"N" =>chr(0x05E0), #HEBREW LETTER NUN
"S" =>chr(0x05E1), #HEBREW LETTER SAMEKH
"(" =>chr(0x05E2), #HEBREW LETTER AYIN
-#"P" =>chr(0x05E3), #HEBREW LETTER FINAL PE
"P" =>chr(0x05E4), #HEBREW LETTER PE
-#"C" =>chr(0x05E5), #HEBREW LETTER FINAL TSADI
"C" =>chr(0x05E6), #HEBREW LETTER TSADI
"Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF
"R" =>chr(0x05E8), #HEBREW LETTER RESH
@@ -58,20 +53,23 @@ my %hebrew2utf8 = (
"-" =>chr(0x05BE), #MAQQEF
# Special stuff
-"*" =>"(ketiv)", # TODO:FIX
-"**" =>"(qere)", #
+"*" =>"*<note type=\"textual\">Ketiv.</note>", # TODO:FIX
+"**" =>"**<note type=\"textual\">Qere.</note>", #
"/" => $MorphologicalDivisionMarker,
"," => ",", #separate words in colB
-
"?" => "<note type=\"textual\">Uncertain.</note>" #HACK
+);
-#"{" => "{", # TODO: CHECK IF NECCESSARY
-# "}" => "}",
-#"." => ".",
-
+my %final_hebrew2utf8 = (
+"K" =>chr(0x05DA), #HEBREW LETTER FINAL KAF # TODO: HANDLE FINAL LETTERS
+"M"=>chr(0x05DD), #HEBREW LETTER FINAL MEM
+"N" =>chr(0x05DF), #HEBREW LETTER FINAL NUN
+"P" =>chr(0x05E3), #HEBREW LETTER FINAL PE
+"C" =>chr(0x05E5), #HEBREW LETTER FINAL TSADI
);
+
my $greekLetters = "A-Z#3\(\)+=|\\/";
my %greek2utf8 = (
@@ -112,7 +110,6 @@ my %greek2utf8 = (
"C" =>chr(0x03BE), #GREEK SMALL LETTER XI
"O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON
"P" =>chr(0x03C0), #GREEK SMALL LETTER PI
-"#3"=>chr(0x03DF), #GREEK SMALL LETTER KOPPA
"R" =>chr(0x03C1), #GREEK SMALL LETTER RHO
"S" =>chr(0x03C3), #GREEK SMALL LETTER SIGMA
"J" =>chr(0x03C2), #GREEK SM LETT FINAL SIGMA / SM LETT SIGMA END OF WORD
@@ -124,6 +121,11 @@ my %greek2utf8 = (
"W" =>chr(0x03C9), #GREEK SMALL LETTER OMEGA
);
+sub createNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText</note> "); }
+sub openNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText "); }
+sub closeNote(){ my $noteText = shift; return("$noteText</note> "); }
+
+
my %notes = (
"{#}" => "Asterized passage (in Job).",
"{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.",
@@ -286,10 +288,6 @@ my %notes = (
);
-sub createNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText</note> "); }
-sub openNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText "); }
-sub closeNote(){ my $noteText = shift; return("$noteText</note> "); }
-
sub translateHebrewNote(){
my $origNote = shift;
@@ -391,15 +389,45 @@ sub translateHebrewNote(){
sub translateHebrewLetters(){ #will return unicode hebrew without morph separation
my $hebrew = shift;
+ my $lastIndex = length( $hebrew )-1;
my $result;
+ my $letter;
(not $hebrew) and die "Hebrew string empty...\n";
- foreach my $i (0..length( $hebrew )-1 ){
- $result .= $hebrew2utf8{ substr($hebrew,$i,1) } || die("Could not find Hebrew letter $hebrew\n");
+ foreach my $i (0..$lastIndex ){
+ $letter = substr($hebrew,$i,1);
+ if ($i == $lastIndex && $letter =~ m/(K|M|N|P|C)/){
+ $result .= $final_hebrew2utf8{ $letter } || die("Could not find FINAL Hebrew letter $hebrew\n");
+# printf("successfully inserted $final_hebrew2utf8{ $letter }\n");
+ }
+ else{
+ $result .= $hebrew2utf8{ $letter } || die("Could not find Hebrew letter $hebrew\n");
+ }
}
return $result;
}
+sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation
+ my $hebrew = shift;
+
+ if (not $hebrew) { die("Hebrew string empty.") };
+
+# print("TranslateHebrew of: $hebrew\n");
+
+ $hebrew =~ s/^mn$/.mn/; #Ezek 24:17, error?
+
+ ( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew ); # exact match first
+
+ ( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
+ ( $hebrew =~ m/^([$hebrewLetters]+)(\[.+\])/ ) and
+ return &translateHebrewWordorNote( $1 ) . &translateHebrewNote( $2 );
+ ( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
+
+ #NOW WE ASSUME IT IS A HEBREW WORD
+ return $MorphologicalSegmentStart . &translateHebrewLetters( $hebrew ) . $MorphologicalSegmentEnd;
+}
+
+
sub translateGreekNote(){
my $origNote = shift;
@@ -513,33 +541,17 @@ sub translateGreekNote(){
die("Note $origNote not found.\n");
}
-sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation
- my $hebrew = shift;
-
- if (not $hebrew) { die("Hebrew string empty.") };
-
-# print("TranslateHebrew of: $hebrew\n");
-
- $hebrew =~ s/^mn$/.mn/; #Ezek 24:17, error?
-
- ( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew ); # exact match first
-
- ( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
- ( $hebrew =~ m/^([$hebrewLetters]+)(\[.+\])/ ) and
- return &translateHebrewWordorNote( $1 ) . &translateHebrewNote( $2 );
- ( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
+sub translateGreekLetters(){
+ my $greek = shift;
+ my $lastIndex = length( $greek )-1;
+ my $result;
+ (not $greek) and die "Greek string empty...\n";
- my $length = length($hebrew);
- my $index = 0;
- my $result = $MorphologicalSegmentStart;
- CHAR_LOOP: while ( $index < $length ) {
- my $hsubstr = substr( $hebrew, $index, 1);
- $hebrew2utf8{ $hsubstr } || die("could not find Hebrew: $hsubstr of word $hebrew at index $index length $length\n");
- $result .= $hebrew2utf8{ $hsubstr };
- ++$index;
+ foreach my $i (0..$lastIndex ){
+ $result .= $greek2utf8{ substr($greek,$i,1) } || die("Could not find Greek letters in $greek\n");
}
- $result .= $MorphologicalSegmentEnd;
return $result;
+
}
sub translateGreekWordorNote(){
@@ -560,24 +572,7 @@ sub translateGreekWordorNote(){
( $greek =~ m/^([^.]+)\.\.\.([^.]+)$/) and # TODO: occurs e.g. {..bla1 bla2...bla3 #TODO: format output
return( &translateGreekWordorNote($1) . "..." . &translateGreekWordorNote($2) );
-
- my $length = length($greek);
- my $index = 0;
- my $result;
- CHAR_LOOP: while ( $index < $length ) {
- my $gsubstr = substr( $greek, $index, 1);
- if ($greek2utf8{ $gsubstr } ){
- $result .= $greek2utf8{ $gsubstr };
- }
- elsif ( $greek2utf8{ substr( $greek, $index, 2) } ){ #for the #3 letter
- $result .= $greek2utf8{ substr( $greek, $index, 2) };
- }
- else{
- die("Could not find greek: $gsubstr.\n")
- }
- ++$index;
- }
- return $result;
+ return &translateGreekLetters( $greek );
}
my $colBContent;