1 files changed, 128 insertions, 56 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index 018439e..f38c6bd 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -21,6 +21,9 @@ my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegme
 my $hebrewLetters="A-Z\(\)\+\#\$\*\&/"; #used in a character class of a regexp later
 my %hebrew2utf8 = (
 ")" =>chr(0x05D0), #HEBREW LETTER ALEF
+
+"A" =>chr(0x05D0), #HEBREW LETTER ALEF     # TODO: check, this is from an  occurrence of ABRHM
+
 "B" =>chr(0x05D1), #HEBREW LETTER BET
 "G" =>chr(0x05D2), #HEBREW LETTER GIMEL
 "D" =>chr(0x05D3), #HEBREW LETTER DALET
@@ -58,9 +61,11 @@ my %hebrew2utf8 = (
 
 "," => ",", #separate words in colB
 
-"{" => "{",	# TODO: CHECK IF NECCESSARY
-"}" => "}",
-"." => ".",
+"?" => "<note type=\"textual\">Uncertain.</note>" #HACK
+
+#"{" => "{",	# TODO: CHECK IF NECCESSARY
+#	"}" => "}",
+#"." => ".",
 
 );
 
@@ -147,87 +152,77 @@ my %notes = (
 "*z" => "Qere wela ketib, ketib wela qere.",
 "[ ]" => "Reference of number of verse in LXX, different from MT.",  # TODO: MAKE USE OF IT
 "[[ ]]" => "Reference number of verse in MT, different from the LXX.",
+"{x}" => "UNKNOWN",		# TODO: FIX
 "--- {x}" => "Apparent minus created by lack of equivalence between long stretches of text in the LXX and MT.",
 "--+ {x}" => "Apparent plus created by lack of equivalence between long stretches of text in the LXX and MT.",
 "{...}" => "Equivalent reflected elsewhere in the text, disregarded by indexing program.",
 "~" => "Difference in sequence between MT and LXX, denoted after the first Hebrew word and before the second one, as well as between two Greek words.",
 "~~~" => "Equivalent of the Hebrew or Greek word(s) occurring elsewhere in the verse or context (transposition).",
 "{..~}" => "Stylistic or grammatical transposition.",
+"{..}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning
 "---" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",
+"--" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",  # TODO: my addition, check, probably wrong
 "--+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).",
 "---+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
+"'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
 "''" => "Long minus or plus (at least four lines).",
 "{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
 "{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?",
 "{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.",
 "{..r}" => "Notation in Hebrew column of elements repeated in the translation.",
 "?" => "Questionable notation, equivalent, etc.",
+"??" => "Questionable notation, equivalent, etc.", # TODO: my addition
 "{p}" => "Greek preverb representing Hebrew preposition.",
 "{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.",
+
 "{!}" => "Infinitive absolute.",
 "{!}na" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}ad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}aj" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}nad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
 "{!}nd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}nd+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
 "{!}p" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}p+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
 "{!}pd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
 "{!}-" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+"{!}v" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
+
 "{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8
 "{t}" => "Transliterated Hebrew word.",
 "\#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.",
 "{v}" => "The reading of the main text of the LXX seems to reflect a secondary text, while the \"original\" reading is reflected in a variant.",
 
-
+# Notes regarding ColB of the Hebrew
 "=" => "Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).",
-"=?" => "? Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).", # TODO: my addition, check
 "={d}" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: my addition, check
 "={d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", # TODO: my addition, check
 "=\%" => "Introducing categories of translation technique recorded in col. b.",
 "=\%vap" => "Change from active to passive form in verbs.",
 "=\%vpa" => "Change from passive to active form in verbs.",
 "=\%p" => "Difference in preposition or particle.",
+"=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check
 "=\%p+" => "Addition of preposition or particle.",
+"=\%p+?" => "Addition of preposition or particle?",
 "=\%p-" => "Omission of preposition or particle.",
 "=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
-"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
+#"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
 "=+" => "Difference in numbers between MT and the LXX.",
 "=\@" => "Etymological exegesis.",
+"=?\@" => "Etymological exegesis?", #my addition
+"=\@?" => "Etymological exegesis?", #my addition
 "=\@...a" => "Etymological exegesis according to Aramaic.",
 "=:" => "Introducing reconstructed proper noun.",
 "=v" => "Difference in vocalization (reading).",
+"=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text
 "=r" => "Incomplete retroversion.",
 
 "{*}" => "Agreement of LXX with ketib.",
 "{**}" => "Agreement of LXX with qere.",
 
-
 "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",
-".)(" => "Interchange of consonants (א/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".(q" => "Interchange of consonants (ע/ק) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".q(" => "Interchange of consonants (ק/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".)x" => "Interchange of consonants (א/ח) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".x)" => "Interchange of consonants (ח/א) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".(x" => "Interchange of consonants (ע/ח) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".x(" => "Interchange of consonants (ח/ע) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".+d" => "Interchange of consonants (ט/ד) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".br" => "Interchange of consonants (ב/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rb" => "Interchange of consonants (ר/ב) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rd" => "Interchange of consonants (ר/ד) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".dr" => "Interchange of consonants (ד/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rg" => "Interchange of consonants (ר/ג) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rh" => "Interchange of consonants (ר/ה) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rl" => "Interchange of consonants (ר/ל) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\mb" => "Interchange of consonants (ק/מ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".mn" => "Interchange of consonants (מ/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".nm" => "Interchange of consonants (נ/מ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".nr" => "Interchange of consonants (נ/ר) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".ny" => "Interchange of consonants (נ/י) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".yn" => "Interchange of consonants (י/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".rn" => "Interchange of consonants (ר/נ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\$c" => "Interchange of consonants (שׁ/צ) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\qb" => "Interchange of consonants (ק/ב) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\wy" => "Interchange of consonants (ו/י) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-".\yw" => "Interchange of consonants (י/ו) between MT and the presumed Hebrew parent text of the LXX.", # TODO: my addition
-
 
+".a" => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",  # TODO: occurs, unknown
 ".m" => "Metathesis of consonants between MT and the presumed Hebrew parent text of the LXX.",
 ".z" => "Possible abbreviation.",
 ".s" => "One word of MT separated into two or more words in the parent text of the LXX.",
@@ -235,6 +230,8 @@ my %notes = (
 ".w" => "Different word-division reflected in the parent text of the LXX.",
 
 "<sp" => "<sp",  #TODO: FIX, occurs in text
+"<sp>" => "<sp>",  #TODO: FIX, occurs in text
+"<sp^>" => "<sp^>",  #TODO: FIX, occurs in text
 
 "^" => "^",			#Notsure what these are
 "^^^" => "^^^", 
@@ -257,41 +254,111 @@ sub closeNote(){
 
 sub translateHebrewNote(){
 	my $origNote = shift;
-	#print("TranslateHebrewNote $origNote\n");
+#	print("TranslateHebrewNote $origNote\n");
 
-	($origNote =~ m/{\.\.d(.+)}/) and 
+	($origNote =~ m/^=?\.([a-z()\$+-])([a-z()\$+-])$/) and 
+		return( &createNote("Interchange of consonants (" . 
+			&translateHebrewLetter( uc( $1 ) ) . "/" . &translateHebrewLetter( uc( $2 ) ) .
+			") between MT and the presumed Hebrew parent text of the LXX.") );
+
+
+	($origNote =~ m/^{\.\.d(.+)}/) and 
 		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );
 
-	($origNote =~ m/{\.\.r(.+)}/) and 
+	($origNote =~ m/^{\.\.r(.+)}/) and 
 		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) );
 
-	($origNote =~ m/{\.\.\.(.+)}/) and 
+	($origNote =~ m/^{\.\.\.(.+)}/) and 
 		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) );
 
-	#Special cases: the note includes more than one hebrew word
+	($origNote =~ m/^{\.\.(.+)}/) and 
+		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..}" }) );
+
+	#Special cases: the note includes more than one hebrew word, "cat" the results together
 	($origNote =~ m/^{\.\.\.([^}]+)$/) and 
 		return( &openNote("(".&translateHebrewWordorNote($1).") ".$notes{"{...}"} ) );
 	($origNote =~ m/^([^{]+)}$/) and 
 		return( &closeNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) );
 
-	($origNote =~ m/^=[^?$hebrewLetters]*([?$hebrewLetters]+)$/) and 
-		return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
+	($origNote =~ m/^@([?$hebrewLetters]+)/) and
+		return( &createNote( $notes{ "=\@" } ) . &translateHebrewWordorNote( $1 ) );
+
+	($origNote =~ m/^\^([?$hebrewLetters]+)/) and
+		return( "^" . &translateHebrewWordorNote( $1 ) );   # TODO: check, what is ^?
+
+	#
+	# Special handling for the = colB Notes
+	#
+	if (($origNote =~ m/^=/) and (not $notes{ $origNote } )) { #only split if the note does not exist, to avoid parsing problems
+		print("note reads $origNote\n");
+		if ($origNote =~ m/^=(<[0-9.a-z]+>)$/){
+			return( &translateHebrewWordorNote( $1 ) );
+		}
+		elsif ($origNote =~m/^=(.+)$/ and $notes{ $1 }){
+			return( &translateHebrewWordorNote( $1 ) );
+		}
+		elsif ($origNote =~ m/^=([?$hebrewLetters]+)/){
+			return( &translateHebrewWordorNote( $1 ) );
+		}
+		elsif ($origNote =~ m/^=([^?$hebrewLetters]+)([?$hebrewLetters]+)/){ #Note + Hebrew text, split up
+			if ($notes{ $1 }){
+				return( &translateHebrewNote( $1 ) . &translateHebrewWordorNote( $2 ) );
+			}
+			elsif( $notes{ "=$1" }){
+				return( &translateHebrewNote( "=$1" ) . &translateHebrewWordorNote( $2 ) );
+			}
+			else { die("Could not parse note.\n"); }
+		}
+		else { die("Could not parse note.\n"); }
+	}
+
+	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
+ 	($origNote =~ m/^<|>$/) and
+#		return("<reference osisRef=\"$1.$2\"/>");
+		return $origNote;
+
+# 	#Special cases: osisREf with bookID, split because of space char, so put them together again
+# 	($origNote =~ m/^<\^?(\w+)$/) and 
+# #		return( "<reference osisRef=\"$1." );  # TODO: check if <reference/> exists
+# 		return $origNote;
+# 	($origNote =~ m/^(\d+)[.:](\d+)>?/) and 
+# #		return( "$1.$2\"/> " );
+# 		return $origNote;
+
+
 
-	#special case: no note, but a crossref
- 	($origNote =~ m/<(.+)>/) and 
-		return("<reference osisRef=\"$1\">&lt;$1&gt;</reference> ");
+	#special case: no note, but a crossref (with book ID)
+# 	($origNote =~ m/^<\^?(\w+)\s?(\d+)[.:](\d+)>?/) and
+#		return("<reference osisRef=\"$1.$2.$3\">&lt;$1&gt;</reference> ");
 
 	($origNote =~ m/^[?].*/) and 
 		return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
 
-	($notes{ $origNote }) or die("Note $origNote not found.\n");
-	return( &createNote( $notes{$origNote} ) );
+	($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
+
+	for my $i ( 1 .. (length($origNote)-1) ){ #last try, split up into chunks
+		if ( $notes{ substr($origNote,0,$i) } ){
+			return( &translateHebrewNote(substr($origNote,0,$i)) . &translateHebrewWordorNote(substr($origNote,$i, length($origNote) - $i) ) );
+		}
+	}
+	
+	die("Note $origNote not found.\n");
+}
+
+sub translateHebrewLetter(){ #will return unicode hebrew without morph separation
+	my $hebrew = shift;
+
+	my $result;
+	$result = $hebrew2utf8{ $hebrew } || die("Could not find Hebrew letter $hebrew\n");
+
+	return $result;
 }
 
+
 sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation
 	my $hebrew = shift;
 
-#	print("TranslateHebrew of: $hebrew");
+# 	print("TranslateHebrew of: $hebrew\n");
 
 	( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
 	( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
@@ -311,6 +378,9 @@ sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separati
 }
 
 sub translateGreekWordorNote(){
+
+	return;	# TODO: remove
+
 	my $greek = shift;
 
 	foreach my $key (keys %notes){
@@ -342,9 +412,9 @@ sub parseLine(){
 	my $origLine = shift;
 	my $result;
 
-#	printf("parsing %s\n", $origLine);
+	printf("parsing %s\n", $origLine);
 
-	$origLine =~ s/--=/--+/;	# TODO: UGLY HACK, this appears in the text but not the notes
+	$origLine =~ s/--=/--+ =/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
 
 	($origLine =~ m/^([^=]+)?([=].+)?\t(.+)$/) or die("No match in parseLine().\n");
 	($1 or $2) or die("Hebrew not found.\n");
@@ -364,14 +434,14 @@ sub parseLine(){
 	$result .= "</cell>\n  <cell>";
 
 	foreach my $wordB (@hebrewWordsColB){
-		if ( substr($wordB, 0, 1) eq "=" ){
-			$wordB =~ m/(=[^$hebrewLetters]*)([$hebrewLetters].*)?/ or die("No match in ColB.\n");
-			$1 and $result .= &translateHebrewWordorNote( $1 ); #This isolates the notes introducing colB (=*)
-			$result .= &translateHebrewWordorNote( $2 );
-		}
-		else {
+#		if ( $wordB =~ m/^=/ ){
+#			$wordB =~ m/(=[^$hebrewLetters()]*)([$hebrewLetters].*)?/ or die("No match in ColB.\n"); #added ( and ) in the first expression, because they can occur in notes also
+#			$1 and $result .= &translateHebrewWordorNote( $1 ); #This isolates the notes introducing colB (=*)
+#			$result .= &translateHebrewWordorNote( $2 );
+#		}
+#		else {
 			$result .= &translateHebrewWordorNote( $wordB );
-		}
+#		}
 	}
 	$result .= "</cell>\n  <cell>";
 
@@ -552,6 +622,8 @@ my @result;
 
 	# File				File id			ThML id		OSIS id		Short Book Title
 push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+die "Finished Genesis\n"; 
+
 push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
 push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
 push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );