Gen-Isa parses

git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@51 07627401-56e2-0310-80f4-f8cd0041bdcd
author: Martin Gruner <mg.pub@gmx.net> 2005-11-16 21:20:49 +0000
committer: Martin Gruner <mg.pub@gmx.net> 2005-11-16 21:20:49 +0000
commit: d7b2907b2906b1fa1da2687ff87de291380469ae (patch)
tree: c516142a84f2b98164fbb3822335ac8a7979f000 /modules
parent: 0c303ed20fe6ffce49fb0de42522bdfe4c2b5528 (diff)
download: sword-tools-d7b2907b2906b1fa1da2687ff87de291380469ae.tar.gz
1 files changed, 75 insertions, 79 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl
index 4187c4d..5cca6f9 100644
--- a/modules/mt-lxx-parallel/convert.pl
+++ b/modules/mt-lxx-parallel/convert.pl
@@ -183,6 +183,7 @@ my %notes = (
 "'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
 "''" => "Long minus or plus (at least four lines).",
 "{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
+"={d};" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: occurs, check?
 "{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?",
 "{d}?" => "Reference to doublet (occurring between the two elements of the doublet)?",
 "{?d}" => "Reference to doublet (occurring between the two elements of the doublet)?",
@@ -194,6 +195,7 @@ my %notes = (
 "{p}+" => "Greek preverb representing Hebrew preposition.",  # TODO: my addition ???
 "{pm}" => "Greek preverb representing Hebrew preposition.",	# TODO: my addition, check
 "{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.",
+"{..^{p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", #my addition
 
 "{+}" => "Unknown.", # TODO: occurs in the text, unknown meaning
 "{+?}" => "Unknown?", # TODO: occurs in the text, unknown meaning
@@ -224,6 +226,7 @@ my %notes = (
 "{t?}" => "Transliterated Hebrew word?",
 "{c}" => "Unknown.", # TODO: unknown
 "{c}?" => "Unknown.", # TODO: unknown
+"<qla>" => "Unknown.", # TODO: unknown
 
 
 "#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.",
@@ -237,7 +240,8 @@ my %notes = (
 "=\%" => "Introducing categories of translation technique recorded in col. b.",
 "=\%vap" => "Change from active to passive form in verbs.",
 "=\%vpa" => "Change from passive to active form in verbs.",
-"\%vpa" => "Change from passive to active form in verbs.",  #my addition
+"=\%vpa?" => "Change from passive to active form in verbs?",
+"\%vpa" => "Change from passive to active form in verbs.",
 "=vpa" => "Change from passive to active form in verbs.",  # TODO: my addition, check
 "=\%p" => "Difference in preposition or particle.",
 "=\%p?" => "Difference in preposition or particle?",
@@ -249,6 +253,7 @@ my %notes = (
 "\%p+" => "Addition of preposition or particle.",  #my addition
 "=\%p+?" => "Addition of preposition or particle?",
 "=\%p-" => "Omission of preposition or particle.",
+"=\%p-?" => "Omission of preposition or particle?",
 "=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
 "=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
 "=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
@@ -269,6 +274,9 @@ my %notes = (
 "=>" => "Difference in vocalization (reading).", # TODO: check, occurs in text
 "=r" => "Incomplete retroversion.",
 
+"=a" => "Aramaic?",	# TODO: UNKNOWN
+
+
 "{*}" => "Agreement of LXX with ketib.",
 "{**}" => "Agreement of LXX with qere.",
 "{**" => "Agreement of LXX with qere.", #my addidion
@@ -293,6 +301,7 @@ my %notes = (
 "^" => "^",			#Notsure what these are
 "?^" => "?^",			#Notsure what these are
 "^^^" => "^^^", 
+"___" => "___",
 
 );
 
@@ -313,7 +322,7 @@ sub closeNote(){
 sub translateHebrewNote(){
 	my $origNote = shift;
 
-#	print("TranslateHebrewNote $origNote\n");
+	print("TranslateHebrewNote $origNote\n");
 
 	($origNote eq "=") and return;	#= only marks colB, no real note
 
@@ -361,6 +370,9 @@ sub translateHebrewNote(){
 	($origNote =~ m/^=?@([?$hebrewLetters]+)/) and
 		return( &createNote( $notes{ "=\@" } ) . &translateHebrewWordorNote( $1 ) );
 
+	($origNote =~ m/^={d}@([?$hebrewLetters]+)a/) and #special case ;)
+		return( &createNote( $notes{"{d}"} ." ". $notes{ "=\@a" } ) . &translateHebrewWordorNote( $1 ) );
+
 	($origNote =~ m/^=?\^([?$hebrewLetters]+)/) and
 		return( "^" . &translateHebrewWordorNote( $1 ) );   # TODO: check, what is ^?
 
@@ -388,6 +400,9 @@ sub translateHebrewNote(){
 #		return("<reference osisRef=\"$1.$2\"/>");
 		return $origNote;
 
+ 	($origNote =~ m/^.+[.].+$/) and #Occurs e.g.: "<gen1.1 ex1.2 lev3.3"
+		return $origNote;
+
 	($origNote =~ m/^[?].*/) and 
 		return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );
 
@@ -418,7 +433,7 @@ sub translateHebrewLetters(){ #will return unicode hebrew without morph separati
 sub translateGreekNote(){
 	my $origNote = shift;
 
-#	print("TranslateGreekNote $origNote\n");
+	print("TranslateGreekNote $origNote\n");
 
 	($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );
 
@@ -443,6 +458,15 @@ sub translateGreekNote(){
 	($origNote =~ m/^=?{\.\.p(.+)}/) and 
 		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..p}" }) );
 
+	($origNote =~ m/^=?{\.\.\.r(.+)}/) and 
+		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..r}" }) );
+
+	($origNote =~ m/^=?{\.\.r(.+)}/) and 
+		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..r}" }) );
+
+	($origNote =~ m/^=?{\.\.c(.+)}/) and 
+		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..c}" }) );
+
 	($origNote =~ m/^=?{\.\.d(.+)}/) and 
 		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );
 
@@ -462,6 +486,8 @@ sub translateGreekNote(){
 		return( &openNote($notes{"{..r}"} . "(" .&translateGreekWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{\.\.p([^}]+)$/) and 
 		return( &openNote($notes{"{..p}"} . "(" .&translateGreekWordorNote($1) ." " ) );
+	($origNote =~ m/^=?{\.\.\.d([^}]+)$/) and 
+		return( &openNote($notes{"{..d}"} . "(" .&translateGreekWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{\.\.\.([^}]+)$/) and 
 		return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
 	($origNote =~ m/^=?{\.\.([^}]+)$/) and 
@@ -486,6 +512,9 @@ sub translateGreekNote(){
 	($origNote =~ m/^{([^}]+}?)$/) and # TODO: occurs, seems unreasonable
 		return( "(" .&translateGreekWordorNote($1) ." " );
 
+	($origNote =~ m/^(.+),(.+)$/) and 	# 2 Notes / Words, split up, but only at the end
+		return( &translateGreekWordorNote( $1 ). "," .&translateGreekWordorNote( $2 ) );
+
 	HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
 		my $currentLength = length( $origNote ) - 1 - $i;    #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
 		if ( $notes{ substr($origNote,0,$currentLength) } ){
@@ -503,6 +532,10 @@ sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separati
 # 	print("TranslateHebrew of: $hebrew\n");
 
 	( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
+
+	( $hebrew =~ m/^([$hebrewLetters]+)(\[.+\])/ ) and 
+		return &translateHebrewWordorNote( $1 ) . &translateHebrewNote( $2 );
+
 	( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );
 	( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew );
 
@@ -523,7 +556,7 @@ sub translateGreekWordorNote(){
 
 	my $greek = shift;
 
-#	printf("TranslateGreek of $greek\n");
+	printf("TranslateGreek of $greek\n");
 
 	( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek );
 	( $greek =~ m/[}]\??$/ ) and return &translateGreekNote( $greek );
@@ -531,6 +564,8 @@ sub translateGreekWordorNote(){
 	( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 );
 
 	( $greek =~ m/^(.+)(\[\d+\])$/ ) and return ( &translateGreekWordorNote( $1 ) . &translateGreekNote ( $2 ) );
+	( $greek =~ m/^([^.]+)\.\.\.([^.]+)$/) and # TODO: occurs e.g.  {..bla1 bla2...bla3 #TODO: format output
+		return( &translateGreekWordorNote($1) . "..." . &translateGreekWordorNote($2) );
 
 
 	my $length = length($greek);
@@ -567,6 +602,9 @@ sub parseLine(){
 	$origLine =~ s/}{/} {/;	# TODO: UGLY HACK, notes hung together
 	$origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong
 	$origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note
+	$origLine =~ s/E\t\)KPE\/SH\|/\tE)KPE\/SH|/; #occurs, tab misplaced
+
+	$origLine =~ m/^W\(\/SPER/ and return;	#ignore, probably an error
 
 	($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and 
 		$origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing
@@ -752,7 +790,7 @@ sub processBookVariant(){
 
 }
 
-sub loadFile(){ #$fileName			loads the file into the buffer and makes small corrections
+sub loadFile(){ #$fileName	loads the file into the buffer and makes small corrections
 	my $filename = shift;
 
 	open( FILE, "$prefix/$filename") or die("Could not open file $prefix/$filename");
@@ -760,83 +798,41 @@ sub loadFile(){ #$fileName			loads the file into the buffer and makes small corr
 
 	my @result;
 	my $index = 0;
-	if (($filename eq "45.DanielOG.par") or($filename eq "46.DanielTh.par")){ #several places where DANIHL has to to on the preceding line
-		foreach my $currentItem (@buffer){
-			if ($buffer[$index] =~ m/^DANIHL/){
-				$result[$#result-1] .= " DANIHL";
-			}
-			else{
-				push(@result, $buffer[$index] );
-			}
-			$index++;
+	foreach my $currentItem (@buffer){
+		if ($buffer[$index] =~ m/^DANIHL/){
+			$result[$#result-1] .= " ".$buffer[$index];
 		}
-	}
-	elsif ($filename eq "04.Num.par"){	#fix for Numeri, one time where NUMA has to go on the preceding line, no space
-		foreach my $currentItem (@buffer){
-			if ($buffer[$index] =~ m/^NUMA/){
-				$result[$#result-1] .= "NUMA";
-			}
-			else{
-				push(@result, $buffer[$index] );
-			}
-			$index++;
+		elsif ($buffer[$index] =~ m/^NUMA/){
+			$result[$#result-1] .= $buffer[$index];
 		}
-	}
-	elsif ($filename eq "05.Deut.par"){	#fix for Numeri, one time where NUMA has to go on the preceding line, no space
-		foreach my $currentItem (@buffer){
-			if ($buffer[$index] =~ m/^DEUTERONO\/MION/){
-				$result[$#result-1] .= " DEUTERONO/MION";
-			}
-			else{
-				push(@result, $buffer[$index] );
-			}
-			$index++;
+		elsif ($buffer[$index] =~ m/^DEUTERONO\/MION/){
+			$result[$#result-1] .= " ".$buffer[$index];
 		}
-	}
-	elsif ($filename eq "15.1Chron.par"){	#fix for 1Chron
-		foreach my $currentItem (@buffer){
-			if ($buffer[$index] =~ m/^AU\)TOU=/){
-				$result[$#result-1] .= " AUT)TOU=";
-			}
-			elsif ($buffer[$index] =~ m/^E\(\/C/){
-				$result[$#result-1] .= " E(/C";
-			}
-			elsif ($buffer[$index] =~ m/^MOU/){
-				$result[$#result-1] .= " MOU";
-			}
-			else{
-				push(@result, $buffer[$index] );
-			}
-			$index++;
+		elsif ($buffer[$index] =~ m/^AU\)TOU=/){
+			$result[$#result-1] .= " ".$buffer[$index];
 		}
-	}
-	elsif ($filename eq "19.Neh.par"){	#fix for Numeri, one time where NUMA has to go on the preceding line, no space
-		foreach my $currentItem (@buffer){
-			if ($buffer[$index] =~ m/^NEHL$/){
-				$result[$#result-1] .= "NEHL"; # no space, ANANEL
-			}
-			else{
-				push(@result, $buffer[$index] );
-			}
-			$index++;
+		elsif ($buffer[$index] =~ m/^E\(\/C/){
+			$result[$#result-1] .= " ". $buffer[$index];
 		}
-	}
-	elsif ($filename eq "18.Esther.par"){	#fix for Numeri, one time where NUMA has to go on the preceding line, no space
-		foreach my $currentItem (@buffer){
-			if ($buffer[$index] =~ m/^ESTHKE\/NAI$/){
-				$result[$#result-1] .= "ESTHKE/NAI"; # no space
-			}
-			elsif ($buffer[$index] =~ m/^ESTHKW\\S$/){
-				$result[$#result-1] .= "ESTHKW\S"; # no space
-			}
-			else{
-				push(@result, $buffer[$index] );
-			}
-			$index++;
+		elsif ($buffer[$index] =~ m/^MOU/){
+			$result[$#result-1] .= " " . $buffer[$index];
 		}
-	}
-	else{ 
-		@result = @buffer;
+		elsif ($buffer[$index] =~ m/^NEHL$/){
+			$result[$#result-1] .= $buffer[$index]; # no space, ANANEL
+		}
+		elsif ($buffer[$index] =~ m/^ESTHKE\/NAI$/){
+			$result[$#result-1] .= $buffer[$index]; # no space
+		}
+		elsif ($buffer[$index] =~ m/^ESTHKW\\S$/){
+			$result[$#result-1] .= $buffer[$index]; # no space
+		}
+		elsif ($buffer[$index] =~ m/^ISA/){	# a few lines in ISAIAH have this in different styles
+			$result[$#result-1] .= $buffer[$index]; # no space
+		}
+		else{
+			push(@result, $buffer[$index] );
+		}
+		$index++;
 	}
 	return @result;
 }
@@ -863,15 +859,15 @@ my @result;
 # push(@result, &processBook("18.Ezra.par", "Ezr", "Ezra", "Ezra", "Ezra") );
 # push(@result, &processBook("19.Neh.par", "Neh", "Neh", "Neh", "Nehemiah") );
 # push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") );
- push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
+# push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
 #  
 #  #This might need special handling
 #  #push(@result, &processBook("Psalms.par", "Ps", "Ps", "Ps", "Psalms",
 #  
-# push(@result, &processBook("23.Prov.par", "Prov", "Prov", "Prov", "Proverbs") );
+#push(@result, &processBook("23.Prov.par", "Prov", "Prov", "Prov", "Proverbs") );
 # push(@result, &processBook("24.Qoh.par", "Qoh", "Eccl", "Eccl", "Ecclesiastes") );
 # push(@result, &processBook("25.Cant.par", "Song", "Song", "Song", "Song of Solomon") );
-# push(@result, &processBook("40.Isaiah.par", "Isa", "Isa", "Isa", "Isaiah") );
+ push(@result, &processBook("40.Isaiah.par", "Isa", "Isa", "Isa", "Isaiah") );
 # push(@result, &processBook("41.Jer.par", "Jer", "Jer", "Jer", "Jeremiah") );
 # push(@result, &processBook("43.Lam.par", "Lam", "Lam", "Lam", "Lamentations") );
 # push(@result, &processBook("44.Ezekiel.par", "Ezek", "Ezek", "Ezek", "Ezekiel") );
author	Martin Gruner <mg.pub@gmx.net>	2005-11-16 21:20:49 +0000
committer	Martin Gruner <mg.pub@gmx.net>	2005-11-16 21:20:49 +0000
commit	d7b2907b2906b1fa1da2687ff87de291380469ae (patch)
tree	c516142a84f2b98164fbb3822335ac8a7979f000 /modules
parent	0c303ed20fe6ffce49fb0de42522bdfe4c2b5528 (diff)
download	sword-tools-d7b2907b2906b1fa1da2687ff87de291380469ae.tar.gz