I'm getting tired of it. Parses Gen-2Sam now.

git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@49 07627401-56e2-0310-80f4-f8cd0041bdcd
author: Martin Gruner <mg.pub@gmx.net> 2005-11-02 21:21:15 +0000
committer: Martin Gruner <mg.pub@gmx.net> 2005-11-02 21:21:15 +0000
commit: c033620be0616c507eda2be35ec6c6d2dfcb633b (patch)
tree: 6ae49c98e71dc09e882b30b4c1f27e743c84b430 /modules
parent: 88d0e07a30f5455d9d0cc3e3922772c772ea0a50 (diff)
download: sword-tools-c033620be0616c507eda2be35ec6c6d2dfcb633b.tar.gz
1 files changed, 71 insertions, 32 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index e2302ad..007b828 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -18,7 +18,7 @@ my $MorphologicalSegmentStart   = "<seg type=\"morph\">" ;
 my $MorphologicalSegmentEnd     = "</seg>" ;
 my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegmentStart;
 
-my $hebrewLetters="A-Z\(\)\+\#\$\*\&/"; #used in a character class of a regexp later
+my $hebrewLetters="A-Z\(\)\+\#\$\*\&/r"; #used in a character class of a regexp later
 my %hebrew2utf8 = (
 ")" =>chr(0x05D0), #HEBREW LETTER ALEF
 
@@ -30,6 +30,7 @@ my %hebrew2utf8 = (
 "H" =>chr(0x05D4), #HEBREW LETTER HE
 "W" =>chr(0x05D5), #HEBREW LETTER VAV
 "Z" =>chr(0x05D6), #HEBREW LETTER ZAYIN
+"z" =>chr(0x05D6), #HEBREW LETTER ZAYIN  # TODO: this occured in a retranslation
 "X" =>chr(0x05D7), #HEBREW LETTER HET
 "+" =>chr(0x05D8), #HEBREW LETTER TET
 "Y" =>chr(0x05D9), #HEBREW LETTER YOD
@@ -49,6 +50,7 @@ my %hebrew2utf8 = (
 "C" =>chr(0x05E6), #HEBREW LETTER TSADI
 "Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF
 "R" =>chr(0x05E8), #HEBREW LETTER RESH
+"r" =>chr(0x05E8), #HEBREW LETTER RESH  # TODO: this occured in a retranslation
 "#" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN == SIN/SHIN without dot
 "\$" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN + SHIN DOT == SHIN
 "&" =>chr(0x05E9).chr(0x05C2), #HEBREW LETTER SHIN + SIN DOT == SIN
@@ -167,6 +169,7 @@ my %notes = (
 "--?" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX)?",  # TODO: my addition, check, probably wrong
 "--+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).",
 "---+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
+"-+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
 "'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
 "''" => "Long minus or plus (at least four lines).",
 "{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
@@ -176,6 +179,7 @@ my %notes = (
 "?" => "Questionable notation, equivalent, etc.",
 "??" => "Questionable notation, equivalent, etc.", # TODO: my addition
 "{p}" => "Greek preverb representing Hebrew preposition.",
+"{p}+" => "Greek preverb representing Hebrew preposition.",  # TODO: my addition ???
 "{pm}" => "Greek preverb representing Hebrew preposition.",	# TODO: my addition, check
 "{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.",
 
@@ -219,14 +223,19 @@ my %notes = (
 "=\%" => "Introducing categories of translation technique recorded in col. b.",
 "=\%vap" => "Change from active to passive form in verbs.",
 "=\%vpa" => "Change from passive to active form in verbs.",
+"\%vpa" => "Change from passive to active form in verbs.",  #my addition
+"=vpa" => "Change from passive to active form in verbs.",  # TODO: my addition, check
 "=\%p" => "Difference in preposition or particle.",
 "=\%p?" => "Difference in preposition or particle?",
 "=p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain?
+"={d}\%p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? DOUBLET?
 "=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check
 "=\%p=" => "Difference in preposition or particle.", # TODO: my addition, check
 "=\%p+" => "Addition of preposition or particle.",
+"\%p+" => "Addition of preposition or particle.",  #my addition
 "=\%p+?" => "Addition of preposition or particle?",
 "=\%p-" => "Omission of preposition or particle.",
+"=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
 "=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
 "=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
 #"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
@@ -235,16 +244,20 @@ my %notes = (
 "=?\@" => "Etymological exegesis?", #my addition
 "=\@?" => "Etymological exegesis?", #my addition
 "=\@...a" => "Etymological exegesis according to Aramaic.",
+"=\@a" => "Etymological exegesis according to Aramaic.", # TODO: my addition, check
 "=:" => "Introducing reconstructed proper noun.",
+"=:?" => "Introducing reconstructed proper noun?",
 ":" => "Introducing reconstructed proper noun.",	#my addition
 "=v" => "Difference in vocalization (reading).",
-"=v?" => "Difference in vocalization (reading)?",
+"={d}v" => "Difference in vocalization (reading).",   # TODO: check, occurs in text, DOUBLET?
+"=v?" => "Difference in vocalization (reading)?", 
 "=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text
 "=>" => "Difference in vocalization (reading).", # TODO: check, occurs in text
 "=r" => "Incomplete retroversion.",
 
 "{*}" => "Agreement of LXX with ketib.",
 "{**}" => "Agreement of LXX with qere.",
+"{**?}" => "Agreement of LXX with qere?", #my addition
 
 "." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",
 
@@ -255,6 +268,9 @@ my %notes = (
 ".j" => "Two words of MT joined into one word in the parent text of the LXX.",
 ".w" => "Different word-division reflected in the parent text of the LXX.",
 
+"(!)" => "(!)", #my addition
+
+
 "<sp" => "<sp",  #TODO: FIX, occurs in text
 "<sp>" => "<sp>",  #TODO: FIX, occurs in text
 "<sp^>" => "<sp^>",  #TODO: FIX, occurs in text
@@ -296,7 +312,6 @@ sub translateHebrewNote(){
 	($origNote =~ m/^\[(.+)\]/) and 
 		return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );
 
-
 	($origNote =~ m/^=?{\.\.d(.+)}/) and 
 		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );
 
@@ -317,6 +332,8 @@ sub translateHebrewNote(){
 		return( &openNote( $notes{"{...}"} . "(".&translateHebrewWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{\.\.r([^}]+)$/) and 
 		return( &openNote( $notes{"{..r}"} . "(".&translateHebrewWordorNote($1) ." " ) );
+	($origNote =~ m/^=?{\.\.([^}]+)$/) and 
+		return( &openNote( $notes{"{..}"} . "(".&translateHebrewWordorNote($1) ." " ) );
 
 	($origNote =~ m/^([^{]+)}$/) and 
 		return( &closeNote(&translateHebrewWordorNote( $1 ).")" ) );
@@ -352,23 +369,12 @@ sub translateHebrewNote(){
 #		return("<reference osisRef=\"$1.$2\"/>");
 		return $origNote;
 
-# 	#Special cases: osisREf with bookID, split because of space char, so put them together again
-# 	($origNote =~ m/^<\^?(\w+)$/) and 
-# #		return( "<reference osisRef=\"$1." );  # TODO: check if <reference/> exists
-# 		return $origNote;
-# 	($origNote =~ m/^(\d+)[.:](\d+)>?/) and 
-# #		return( "$1.$2\"/> " );
-# 		return $origNote;
-
-
-
-	#special case: no note, but a crossref (with book ID)
-# 	($origNote =~ m/^<\^?(\w+)\s?(\d+)[.:](\d+)>?/) and
-#		return("<reference osisRef=\"$1.$2.$3\">&lt;$1&gt;</reference> ");
-
 	($origNote =~ m/^[?].*/) and 
 		return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
 
+	($origNote =~ m/^(.+),(.+)$/) and 	# 2 Notes / Words, split up, but only at the end
+		return( &translateHebrewWordorNote( $1 ). "," .&translateHebrewWordorNote( $2 ) );
+
 	HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
 		my $currentLength = length( $origNote ) - 1 - $i;    #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
 		if ( $notes{ substr($origNote,0,$currentLength) } ){
@@ -397,19 +403,31 @@ sub translateGreekNote(){
 
 	($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
 
-	($origNote =~ m/^\[(.+)\]/) and 
+	($origNote =~ m/^\[(.+)\]?/) and 
 		return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );
 
 	($origNote =~ m/^\[\[(.+)\]\]/) and 
 		return( &createNote( "Number of verse in MT ($1) is different from the LXX." ) );
 
+	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed FIX THIS, see above
+ 	($origNote =~ m/^\[\[|\]\]$/) and
+		return $origNote;
+
+	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed FIX THIS, see above
+ 	($origNote =~ m/^\[.+$/) and
+		return $origNote;
+
+	($origNote =~ m/^=?{\.\.\.(.+)\.\.\.(.+)}/) and  # special case: {...word1...word2}
+		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{...}" })
+			. &createNote("(".&translateGreekWordorNote( $2 ).") ". $notes{ "{...}" })); 
+
 	($origNote =~ m/^=?{\.\.p(.+)}/) and 
 		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..p}" }) );
 
 	($origNote =~ m/^=?{\.\.d(.+)}/) and 
 		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );
 
-	($origNote =~ m/^=?{\.\.\.d(.+)}/) and 																										# TODO: my addition, check
+	($origNote =~ m/^=?{\.\.\.d(.+)}/) and # TODO: my addition, check
 		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );
 
 	($origNote =~ m/^=?{\.\.(.+)}/) and 
@@ -431,10 +449,18 @@ sub translateGreekNote(){
 		return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{c([^}]+)$/) and 
 		return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) );	# TODO: occurs {cXXX}, not documented
+	($origNote =~ m/^{([^}]+)$/) and # TODO: occurs, seems unreasonable
+		return( "(" .&translateGreekWordorNote($1) ." " );
 
 	($origNote =~ m/^([^{]+)}$/) and 
 		return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
 
+	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
+ 	($origNote =~ m/^<.+>?$/) and
+		return $origNote;
+	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
+ 	($origNote =~ m/^{=\d+}$/) and
+		return $origNote;
 
 	HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
 		my $currentLength = length( $origNote ) - 1 - $i;    #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
@@ -454,6 +480,7 @@ sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separati
 
 	( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
 	( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
+	( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew );
 
 	my $length = length($hebrew);
 	my $index = 0;
@@ -474,7 +501,8 @@ sub translateGreekWordorNote(){
 
 	( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek );
 	( $greek =~ m/[}]$/ ) and return &translateGreekNote( $greek );
-	( $greek eq "#" ) and return &translateGreekNote( $greek );
+	( $greek eq "#" ) and return &translateGreekNote( "#" );
+	( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 );
 
 	( $greek =~ m/^(.+)(\[\d+\])$/ ) and return ( &translateGreekWordorNote( $1 ) . &translateGreekNote ( $2 ) );
 
@@ -504,10 +532,21 @@ sub parseLine(){
 
  	printf("parsing %s\n", $origLine);
 
-	$origLine =~ s/--=/--+ =/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
-	$origLine =~ s/-\%vap/=\%vap/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
-
-	($origLine =~ m/^([^=]+)?([=].+)?\t(.+)$/) or die("No match in parseLine().\n");
+	$origLine =~ s/ --=/ --+ =/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+	$origLine =~ s/ -\%vap/ =\%vap/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+	$origLine =~ s/ ;=/ =;/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+	$origLine =~ s/ \+;/ =;/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
+	$origLine =~ s/[\ ]{10,}/\t/;	# TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right
+	($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and 
+		$origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing
+	($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and 
+		$origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\\ OI( LI/QOI"; # TODO: hack, Tab missing
+	($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and 
+		$origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing
+
+# 	printf("parsing %s\n", $origLine);
+
+	($origLine =~ m/^([^=\t]+)?([=][^\t]*)?\t(.+)$/) or die("No match in parseLine().\n");
 	($1 or $2) or die("Hebrew not found.\n");
 	$3 or die("Greek not found.\n");
 	$1 and my @hebrewWordsColA = split(/\s+/, $1);
@@ -729,17 +768,17 @@ my @result;
 
 
 	# File				File id			ThML id		OSIS id		Short Book Title
-#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
-#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
-#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
-#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
-#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
+push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
+push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
+push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
+push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
+push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
 push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
 push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
 #  
-# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
-# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
-# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
+ push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
+ push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
+ push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
 # push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") );
 # push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") );
 # push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") );
author	Martin Gruner <mg.pub@gmx.net>	2005-11-02 21:21:15 +0000
committer	Martin Gruner <mg.pub@gmx.net>	2005-11-02 21:21:15 +0000
commit	c033620be0616c507eda2be35ec6c6d2dfcb633b (patch)
tree	6ae49c98e71dc09e882b30b4c1f27e743c84b430 /modules
parent	88d0e07a30f5455d9d0cc3e3922772c772ea0a50 (diff)
download	sword-tools-c033620be0616c507eda2be35ec6c6d2dfcb633b.tar.gz