Gen-Job parses.

git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@50 07627401-56e2-0310-80f4-f8cd0041bdcd
author: Martin Gruner <mg.pub@gmx.net> 2005-11-09 21:18:35 +0000
committer: Martin Gruner <mg.pub@gmx.net> 2005-11-09 21:18:35 +0000
commit: 0c303ed20fe6ffce49fb0de42522bdfe4c2b5528 (patch)
tree: 4b1a21cd58a59d4c75545b40c90d44aab15f9c6a /modules/mt-lxx-parallel/convert.pl
parent: c033620be0616c507eda2be35ec6c6d2dfcb633b (diff)
download: sword-tools-0c303ed20fe6ffce49fb0de42522bdfe4c2b5528.tar.gz
1 files changed, 97 insertions, 20 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index 007b828..4187c4d 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -88,6 +88,11 @@ my %greek2utf8 = (
 
 "^" => "^",
 
+"-" => "-", #occurs in the text
+":" => ":", #occurs in the text!?
+"!" => "!", #occurs in the text!?
+"|" => "|", #occurs in the text!?
+
 # "*A" =>chr(0x0391), #GREEK CAPITAL LETTER ALPHA
 # "*B" =>chr(0x0392), #GREEK CAPITAL LETTER BETA
 # "*G" =>chr(0x0393), #GREEK CAPITAL LETTER GAMMA
@@ -120,6 +125,7 @@ my %greek2utf8 = (
 "G" =>chr(0x03B3), #GREEK SMALL LETTER GAMMA
 "D" =>chr(0x03B4), #GREEK SMALL LETTER DELTA
 "E" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON
+"e" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON # occurs in Neh
 "V" =>chr(0x03DD), #GREEK SMALL LETTER DIGAMMA
 "Z" =>chr(0x03B6), #GREEK SMALL LETTER ZETA
 "H" =>chr(0x03B7), #GREEK SMALL LETTER ETA
@@ -129,6 +135,7 @@ my %greek2utf8 = (
 "L" =>chr(0x03BB), #GREEK SMALL LETTER LAMDA
 "M" =>chr(0x03BC), #GREEK SMALL LETTER MU
 "N" =>chr(0x03BD), #GREEK SMALL LETTER NU
+"n" =>chr(0x03BD), #GREEK SMALL LETTER NU # occurs in Neh
 "C" =>chr(0x03BE), #GREEK SMALL LETTER XI
 "O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON
 "P" =>chr(0x03C0), #GREEK SMALL LETTER PI
@@ -148,6 +155,7 @@ my %notes = (
 "{#}" => "Asterized passage (in Job).",
 "{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.",
 "..a" => "Word included in one of the Aramaic sections.",
+",,a" => "Word included in one of the Aramaic sections.", #my addition, ok
 "*" => "Ketib.",
 "**" => "Qere.",
 "*z" => "Qere wela ketib, ketib wela qere.",
@@ -166,14 +174,18 @@ my %notes = (
 "---" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",
 "---?" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX)?",
 "--" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",  # TODO: my addition, check, probably wrong
+"{---%}" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",  # TODO: my addition, check??
 "--?" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX)?",  # TODO: my addition, check, probably wrong
 "--+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).",
 "---+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
+"-.-" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition, check?
 "-+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
 "'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
 "''" => "Long minus or plus (at least four lines).",
 "{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
 "{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?",
+"{d}?" => "Reference to doublet (occurring between the two elements of the doublet)?",
+"{?d}" => "Reference to doublet (occurring between the two elements of the doublet)?",
 "{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.",
 "{..r}" => "Notation in Hebrew column of elements repeated in the translation.",
 "?" => "Questionable notation, equivalent, etc.",
@@ -206,10 +218,12 @@ my %notes = (
 
 "{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8
 "{t}" => "Transliterated Hebrew word.",
+"{dt}" => "Transliterated Hebrew word.", # TODO: occurs. DOUBLET?
 "{t.}" => "Transliterated Hebrew word.", # TODO: unknown
 "<t?>" => "Transliterated Hebrew word.", # TODO: unknown
 "{t?}" => "Transliterated Hebrew word?",
 "{c}" => "Unknown.", # TODO: unknown
+"{c}?" => "Unknown.", # TODO: unknown
 
 
 "#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.",
@@ -257,6 +271,7 @@ my %notes = (
 
 "{*}" => "Agreement of LXX with ketib.",
 "{**}" => "Agreement of LXX with qere.",
+"{**" => "Agreement of LXX with qere.", #my addidion
 "{**?}" => "Agreement of LXX with qere?", #my addition
 
 "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",
@@ -298,7 +313,7 @@ sub closeNote(){
 sub translateHebrewNote(){
 	my $origNote = shift;
 
- 	print("TranslateHebrewNote $origNote\n");
+#	print("TranslateHebrewNote $origNote\n");
 
 	($origNote eq "=") and return;	#= only marks colB, no real note
 
@@ -312,6 +327,10 @@ sub translateHebrewNote(){
 	($origNote =~ m/^\[(.+)\]/) and 
 		return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );
 
+	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed FIX THIS, see above
+ 	($origNote =~ m/^=?\[\[|\]\]$/) and
+		return $origNote;
+
 	($origNote =~ m/^=?{\.\.d(.+)}/) and 
 		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );
 
@@ -399,7 +418,7 @@ sub translateHebrewLetters(){ #will return unicode hebrew without morph separati
 sub translateGreekNote(){
 	my $origNote = shift;
 
- 	print("TranslateGreekNote $origNote\n");
+#	print("TranslateGreekNote $origNote\n");
 
 	($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
 
@@ -447,13 +466,10 @@ sub translateGreekNote(){
 		return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{\.\.([^}]+)$/) and 
 		return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
+	($origNote =~ m/^=?{\.\.(\^[^}]+)$/) and 
+		return( &openNote($notes{"{..}"} . "(" .&translateGreekWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{c([^}]+)$/) and 
 		return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) );	# TODO: occurs {cXXX}, not documented
-	($origNote =~ m/^{([^}]+)$/) and # TODO: occurs, seems unreasonable
-		return( "(" .&translateGreekWordorNote($1) ." " );
-
-	($origNote =~ m/^([^{]+)}$/) and 
-		return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
 
 	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
  	($origNote =~ m/^<.+>?$/) and
@@ -462,6 +478,14 @@ sub translateGreekNote(){
  	($origNote =~ m/^{=\d+}$/) and
 		return $origNote;
 
+	($origNote =~ m/^([^{]+)}$/) and 
+		return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
+	($origNote =~ m/^([^{]+)}\?$/) and 
+		return( &closeNote(&translateGreekWordorNote( $1 )."?)" ) );
+
+	($origNote =~ m/^{([^}]+}?)$/) and # TODO: occurs, seems unreasonable
+		return( "(" .&translateGreekWordorNote($1) ." " );
+
 	HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
 		my $currentLength = length( $origNote ) - 1 - $i;    #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
 		if ( $notes{ substr($origNote,0,$currentLength) } ){
@@ -499,8 +523,10 @@ sub translateGreekWordorNote(){
 
 	my $greek = shift;
 
+#	printf("TranslateGreek of $greek\n");
+
 	( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek );
-	( $greek =~ m/[}]$/ ) and return &translateGreekNote( $greek );
+	( $greek =~ m/[}]\??$/ ) and return &translateGreekNote( $greek );
 	( $greek eq "#" ) and return &translateGreekNote( "#" );
 	( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 );
 
@@ -537,12 +563,21 @@ sub parseLine(){
 	$origLine =~ s/ ;=/ =;/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
 	$origLine =~ s/ \+;/ =;/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
 	$origLine =~ s/[\ ]{10,}/\t/;	# TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right
+	$origLine =~ s/\.h-<ge10\.4/.h- <ge10.4>/;	# TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces 
+	$origLine =~ s/}{/} {/;	# TODO: UGLY HACK, notes hung together
+	$origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong
+	$origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note
+
 	($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and 
 		$origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing
 	($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and 
 		$origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing
 	($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and 
 		$origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing
+	($origLine eq "W/YBW) {...EI)S}\tKAI\\ EI)SH=LQEN") and 
+		$origLine = "W/YBW)\t{...EI)S} KAI\\ EI)SH=LQEN"; # TODO: hack, TAB misplaced
+	($origLine eq "W/L) {..^OU)}\tDE\\") and 
+		$origLine = "W/L)\t{..^OU)} DE\\"; # TODO: hack, TAB misplaced
 
 # 	printf("parsing %s\n", $origLine);
 
@@ -758,6 +793,48 @@ sub loadFile(){ #$fileName			loads the file into the buffer and makes small corr
 			$index++;
 		}
 	}
+	elsif ($filename eq "15.1Chron.par"){	#fix for 1Chron
+		foreach my $currentItem (@buffer){
+			if ($buffer[$index] =~ m/^AU\)TOU=/){
+				$result[$#result-1] .= " AUT)TOU=";
+			}
+			elsif ($buffer[$index] =~ m/^E\(\/C/){
+				$result[$#result-1] .= " E(/C";
+			}
+			elsif ($buffer[$index] =~ m/^MOU/){
+				$result[$#result-1] .= " MOU";
+			}
+			else{
+				push(@result, $buffer[$index] );
+			}
+			$index++;
+		}
+	}
+	elsif ($filename eq "19.Neh.par"){	#fix for Numeri, one time where NUMA has to go on the preceding line, no space
+		foreach my $currentItem (@buffer){
+			if ($buffer[$index] =~ m/^NEHL$/){
+				$result[$#result-1] .= "NEHL"; # no space, ANANEL
+			}
+			else{
+				push(@result, $buffer[$index] );
+			}
+			$index++;
+		}
+	}
+	elsif ($filename eq "18.Esther.par"){	#fix for Numeri, one time where NUMA has to go on the preceding line, no space
+		foreach my $currentItem (@buffer){
+			if ($buffer[$index] =~ m/^ESTHKE\/NAI$/){
+				$result[$#result-1] .= "ESTHKE/NAI"; # no space
+			}
+			elsif ($buffer[$index] =~ m/^ESTHKW\\S$/){
+				$result[$#result-1] .= "ESTHKW\S"; # no space
+			}
+			else{
+				push(@result, $buffer[$index] );
+			}
+			$index++;
+		}
+	}
 	else{ 
 		@result = @buffer;
 	}
@@ -768,17 +845,17 @@ my @result;
 
 
 	# File				File id			ThML id		OSIS id		Short Book Title
-push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
-push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
-push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
-push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
-push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
-push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
-push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
-#  
- push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
- push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
- push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
+#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
+#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
+#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
+#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
+#push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
+#push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
+  
+# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
+# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
+# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
 # push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") );
 # push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") );
 # push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") );
@@ -786,7 +863,7 @@ push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus
 # push(@result, &processBook("18.Ezra.par", "Ezr", "Ezra", "Ezra", "Ezra") );
 # push(@result, &processBook("19.Neh.par", "Neh", "Neh", "Neh", "Nehemiah") );
 # push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") );
-# push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
+ push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
 #  
 #  #This might need special handling
 #  #push(@result, &processBook("Psalms.par", "Ps", "Ps", "Ps", "Psalms",
author	Martin Gruner <mg.pub@gmx.net>	2005-11-09 21:18:35 +0000
committer	Martin Gruner <mg.pub@gmx.net>	2005-11-09 21:18:35 +0000
commit	0c303ed20fe6ffce49fb0de42522bdfe4c2b5528 (patch)
tree	4b1a21cd58a59d4c75545b40c90d44aab15f9c6a /modules/mt-lxx-parallel/convert.pl
parent	c033620be0616c507eda2be35ec6c6d2dfcb633b (diff)
download	sword-tools-0c303ed20fe6ffce49fb0de42522bdfe4c2b5528.tar.gz