summaryrefslogblamecommitdiffstats
path: root/modules/mt-lxx-parallel/convert.pl
blob: 45cae4beddd653f0ef310a8a4b217875479f361a (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12











                                                           

                                                 
                  
 



                                                                                        
                                                                                     

                                      


                                                                                              





                                       
                                                                                


                                     
                                                                                    

                                       
                                     


                                        
                                    


                                           
                                                                               






                                                                          

                                                         


                                    
                                                       
  
 





                                                                         

  
 

                                     









                                                                    
                                                        
 
           
 




                                 




                                                                         
                                                              








                                              
                                                         


                                              










                                                                         




                                                                                             



                                                                                                           
                                                                          


                                            

                                                                                            
                                           





                                                                                                                                                       
                                                                                                

                                                                                                  
                                                                                           
                                                                                            
                                                                                                                                      
                                                                                                                            
                                                                                                                                       

                                                                                                            
                                                                                                                   
                                                                                                          
                                                                              

                                                                                     
                                                                                                              
                                                                                      

                                                                                      


                                                                                                                   
                                                                      
                                                          
                                                                                    
                                                                                          
                                                                                                                      
                                                                                                                                    
                                                                                                                                     
 


                                                                 
                                

                                                                                 
                                                                                  
                                                                                   


                                                                                   
                                                                                  
                                                                                   
                                                                                   
                                                                                 
                                                                                  
                                                                                  

                                                                                  

                                                                                 

                                                                                                                 
                                                                 


                                                        
 
                                    
                                     
                                    
                                      

                                                                                


                                                                                                                                                              

                                                                                                                                                
                                    
                                                                                                                                  
                                                                                                                 
                                                                                                                                          



                                                                                                                  

                                                            
                                                                                     
                                                   
                                                   
                                                    
                                                    
                                                                                        
                                                                                                      
                                                                               
                                                                               
                                                  
                                                               
                                                   
                                                  
                                                   
                                                   
                                                                                         
                                                                                       
                                                                                                
                                                                                    

                                                        

                                                
                                                           
                                                                                   
                                                 
                                                  
                                                                    
                                                

                                                                                             
                                                                               
                                                                              

                                   


                                       

                                        
                                                    
                                                      
 
                                                                                              
 
                                                                                                                        





                                                                                              


                            
                                           

                                               

                                                       
                                                       
                
               


  

                             
 
                                                 
 

                                                      




                                                                             
                                                                    
                                                                                                         

                                                                                          
                                            
                                                                                             
 



                                                                                                                            
                                             

                                                                                                    



                                                                                                    

                                                                                                    
                                              

                                                                                                    
                                            

                                                                                                   




                                                                                                 

                                                                                                
 
                                         

                                                                            
 
                                                      

                                                                                            
                                                       





                                                                                                                                    
                                                








                                                                   






                                                                                                        


                                                                            
                                       

                                                                                        


                                                                                                  


                                                                                                                 



                                                                                                                                                                                               





                                           
                                                                                  
                           
                                            
                   
                   
                                                         
 








                                                                                                                        
         
                       

 




















                                                                                                          


                             
                                                
 

                                                     

                                                                             
                                                

                                                                                                 


                                                                                             




                                                                                                                            
                                                                          





                                                                                                     


                                                                                                    


                                                                                                   

                                                                                                   
 








                                                                                                   


                                                                                                   
                                                                         

                                                                                                   





                                                                                                    





                                                                                                                                             


                                                                                                 






                                                                                                

                                                                                                



                                                                                                

                                                                                               

                                                                                                                                     
 





                                                                                                        
 




                                                                            


                                                                                                




                                                                                                        








                                                                                                                                                                                             




                                                       
 

                                                                                                                 
         
                       
 


                               
 

                          
                                                       
                                             
 

                                                                                         
                                                                                    
                                                                          

                                                                                                             

                                                                                                                     

                                                                                                                
 
                                                

 

                



                             
                                          
 




                                                                                                                                         



                                                                                                                                 
                                                                           

                                                             
                                                      

                                                                    
                                                              
 
                                                                          
 

                                                                                                               






                                                                                                      

                                                                                                                              



                                                                                             
 
                                         

                                                                                              











                                                      
                                                                                   



                                             



                                                                              


                                       
                                        
                                                                                 
         
                                     
 
                                        


                       


                                                                                              







                                                                                      
 
                                                         
 


                             



                                                                                                                 






                                                                                   

                                               
                         






                                                                                






                                                                                   



                                               
                 




                                             

                                                                        
                             



                                     
 
                                         


                   

                                                                 
                                              
                                           
                                                                         

                                                  


                                                                                                       


                                                                                                    
                                                                                                                                                         

                                     



                                                                                                        
                                                                                                              
                                 
                                                             
                                                                    


                                                                   


                                                                                                     



                                                                
                                                                                            


                                     

                                      
                         

                        

 


                                                                                                                                                                        
                              
                                      
                                 
                              
                                      
                                 



                                     

                                            


                   

                                                                 
                                              
                                                                                                  
                                           

                                                  
                                           
                                                                                                           
                                                               
                                                                                                           
                                                               
                                                                



                                                                                                
                                                                                             


                                                                   
                                                                                             


                                            


                                                                                                                 

                                            
                                                                                           
                                                                                     
                                                                                              
                         
                                                                      

                                                                
                                                                                                                     


                                     

                                      

                         

                        

 
                                                                                          




                                                                                         

                      

                                                
                                                                  
                 
                                                 
                                                             
                 
                                                            
                                                                 
                 
                                                     
                                                                 
                 
                                                   
                                                                  
                 
                                                
                                                                   
                 
                                                  
                                                                                
                 
                                                         
                                                                        
                 
                                                       
                                                                        
                 
                                                                                                             
                                                                        
                 
                                                                
                                                                        
                 
                                                                

                                                                        
                                                               

                                                                        
                                                                                    






                                                                                                                                                               
                 



                                                        




                       
 
 















                                                                                                                                                                                                                                              
                                                                                                                






                                                                                                                                                                 



                                                                                     






                                                                                         
                                                                         
   
                                                                          
   
                                                                                  

                                                                                         
                                                                               


                                                                                    
 
                                                                                                                                                
 



                                                                                    







                                                                                   
 
                                      
 

                                       
 
#!/usr/bin/perl -w

#
# This tool is supposed to convert the ccat Parallel MT/LXX
# to a valid OSIS file.
#
# @author Martin Gruner
# @copyright GPL
#

use strict;

binmode(STDOUT, ":utf8"); #see "man perluniintro"

my $prefix = "./";

my $MorphologicalSegmentStart   = "<seg type=\"morph\">" ;
my $MorphologicalSegmentEnd     = "</seg>" ;
my $MorphologicalDivisionMarker = $MorphologicalSegmentEnd . $MorphologicalSegmentStart;

my $hebrewLetters="A-Z\(\)\+\#\$\*\&/r"; #used in a character class of a regexp later
my %hebrew2utf8 = (
")" =>chr(0x05D0), #HEBREW LETTER ALEF

"A" =>chr(0x05D0), #HEBREW LETTER ALEF     # TODO: check, this is from an  occurrence of ABRHM

"B" =>chr(0x05D1), #HEBREW LETTER BET
"G" =>chr(0x05D2), #HEBREW LETTER GIMEL
"D" =>chr(0x05D3), #HEBREW LETTER DALET
"H" =>chr(0x05D4), #HEBREW LETTER HE
"W" =>chr(0x05D5), #HEBREW LETTER VAV
"Z" =>chr(0x05D6), #HEBREW LETTER ZAYIN
"z" =>chr(0x05D6), #HEBREW LETTER ZAYIN  # TODO: this occured in a retranslation
"X" =>chr(0x05D7), #HEBREW LETTER HET
"+" =>chr(0x05D8), #HEBREW LETTER TET
"Y" =>chr(0x05D9), #HEBREW LETTER YOD
"I" =>chr(0x05D9), #HEBREW LETTER YOD	# TODO: this occured in a retranslation (KI)
"K" =>chr(0x05DB), #HEBREW LETTER KAF
"L" =>chr(0x05DC), #HEBREW LETTER LAMED
"M" =>chr(0x05DE), #HEBREW LETTER MEM
"N" =>chr(0x05E0), #HEBREW LETTER NUN
"S" =>chr(0x05E1), #HEBREW LETTER SAMEKH
"(" =>chr(0x05E2), #HEBREW LETTER AYIN
"P" =>chr(0x05E4), #HEBREW LETTER PE
"C" =>chr(0x05E6), #HEBREW LETTER TSADI
"Q" =>chr(0x05E7), #HEBREW LETTER QOF / KOF
"R" =>chr(0x05E8), #HEBREW LETTER RESH
"r" =>chr(0x05E8), #HEBREW LETTER RESH  # TODO: this occured in a retranslation
"#" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN == SIN/SHIN without dot
"\$" =>chr(0x05E9).chr(0x05C1), #HEBREW LETTER SHIN + SHIN DOT == SHIN
"&" =>chr(0x05E9).chr(0x05C2), #HEBREW LETTER SHIN + SIN DOT == SIN
"T" =>chr(0x05EA), #HEBREW LETTER TAV
"-" =>chr(0x05BE), #MAQQEF

# Special stuff
"*" =>"*<note type=\"textual\">Ketiv.</note>", # TODO:FIX
"**" =>"**<note type=\"textual\">Qere.</note>", #
"/" => $MorphologicalDivisionMarker,

"," => ",", #separate words in colB
"?" => "<note type=\"textual\">Uncertain.</note>" #HACK
);

my %final_hebrew2utf8 = (
"K" =>chr(0x05DA), #HEBREW LETTER FINAL KAF  # TODO: HANDLE FINAL LETTERS
"M"=>chr(0x05DD), #HEBREW LETTER FINAL MEM
"N" =>chr(0x05DF), #HEBREW LETTER FINAL NUN
"P" =>chr(0x05E3), #HEBREW LETTER FINAL PE
"C" =>chr(0x05E5), #HEBREW LETTER FINAL TSADI
);


my $greekLetters = "A-Z#3\(\)+=|\\/";

my %greek2utf8 = (
"\\"=>chr(0x0300), #COMBINING GRAVE ACCENT
"/" =>chr(0x0301), #COMBINING ACUTE ACCENT
"+" =>chr(0x0308), #COMBINING DIAERESIS
"=" =>chr(0x0342), #COMBINING GREEK PERISPOMENI / CIRCUMFLEX
")" =>chr(0x0313), #COMBINING COMMA ABOVE / SMOOTH BREATHING
"(" =>chr(0x0314), #COMBINING REVERSED COMMA ABOVE / ROUGH BREATHING
"|" =>chr(0x0345), #COMBINING GREEK YPOGEGRAMMENI / IOTA SUBSCRIPT

"'" => "'",
"?" => "<note type=\"textual\">Uncertain.</note>", #HACK

"^" => "^",

"-" => "-", #occurs in the text
":" => ":", #occurs in the text!?
"!" => "!", #occurs in the text!?
"|" => "|", #occurs in the text!?

"A" =>chr(0x03B1), #GREEK SMALL LETTER ALPHA
"B" =>chr(0x03B2), #GREEK SM LETT BETA / SM LETTER BETA BEGINNING OF WORD
"G" =>chr(0x03B3), #GREEK SMALL LETTER GAMMA
"D" =>chr(0x03B4), #GREEK SMALL LETTER DELTA
"E" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON
"e" =>chr(0x03B5), #GREEK SMALL LETTER EPSILON # occurs in Neh
"V" =>chr(0x03DD), #GREEK SMALL LETTER DIGAMMA
"Z" =>chr(0x03B6), #GREEK SMALL LETTER ZETA
"H" =>chr(0x03B7), #GREEK SMALL LETTER ETA
"Q" =>chr(0x03B8), #GREEK SMALL LETTER THETA
"I" =>chr(0x03B9), #GREEK SMALL LETTER IOTA
"K" =>chr(0x03BA), #GREEK SMALL LETTER KAPPA
"L" =>chr(0x03BB), #GREEK SMALL LETTER LAMDA
"M" =>chr(0x03BC), #GREEK SMALL LETTER MU
"N" =>chr(0x03BD), #GREEK SMALL LETTER NU
"n" =>chr(0x03BD), #GREEK SMALL LETTER NU # occurs in Neh
"C" =>chr(0x03BE), #GREEK SMALL LETTER XI
"O" =>chr(0x03BF), #GREEK SMALL LETTER OMICRON
"P" =>chr(0x03C0), #GREEK SMALL LETTER PI
"R" =>chr(0x03C1), #GREEK SMALL LETTER RHO
"S" =>chr(0x03C3), #GREEK SMALL LETTER SIGMA
"J" =>chr(0x03C2), #GREEK SM LETT FINAL SIGMA / SM LETT SIGMA END OF WORD
"T" =>chr(0x03C4), #GREEK SMALL LETTER TAU
"U" =>chr(0x03C5), #GREEK SMALL LETTER UPSILON
"F" =>chr(0x03C6), #GREEK SMALL LETTER PHI
"X" =>chr(0x03C7), #GREEK SMALL LETTER CHI
"Y" =>chr(0x03C8), #GREEK SMALL LETTER PSI
"W" =>chr(0x03C9), #GREEK SMALL LETTER OMEGA
);

sub createNote(){ my $noteText = shift; return("<note type=\"textual\">$noteText</note> "); }
sub openNote(){   my $noteText = shift;	return("<note type=\"textual\">$noteText "); }
sub closeNote(){  my $noteText = shift;	return("$noteText</note> "); }


my %notes = (
"{#}" => "Asterized passage (in Job).",
"{g}" => "Reference to difference between the text of Rahlfs and that of the relevant Göttingen edition.",
"..a" => "Word included in one of the Aramaic sections.",
",,a" => "Word included in one of the Aramaic sections.", #my addition, ok
"*" => "Ketib.",
"**" => "Qere.",
"*z" => "Qere wela ketib, ketib wela qere.",
#"[ ]" => "Reference of number of verse in LXX, different from MT.",  # TODO: MAKE USE OF IT
#"[[ ]]" => "Reference number of verse in MT, different from the LXX.",
"{x}" => "UNKNOWN",		# TODO: FIX
"--- {x}" => "Apparent minus created by lack of equivalence between long stretches of text in the LXX and MT.",
"--+ {x}" => "Apparent plus created by lack of equivalence between long stretches of text in the LXX and MT.",
"{...}" => "Equivalent reflected elsewhere in the text, disregarded by indexing program.",
"~" => "Difference in sequence between MT and LXX, denoted after the first Hebrew word and before the second one, as well as between two Greek words.",
"~~~" => "Equivalent of the Hebrew or Greek word(s) occurring elsewhere in the verse or context (transposition).",
"{..~}" => "Stylistic or grammatical transposition.",
"{..}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning
"{...}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning
"{....}" => "Stylistic or grammatical transposition.", # TODO: occurs in the text, unknown meaning
"---" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",
"---?" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX)?",
"--" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",  # TODO: my addition, check, probably wrong
"{---%}" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX).",  # TODO: my addition, check??
"--?" => "In the Greek column:  Hebrew counterpart lacking in the LXX (minus in the LXX)?",  # TODO: my addition, check, probably wrong
"--+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).",
"---+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
"-.-" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition, check?
"-+" => "In col a. of the Hebrew:  element \"added\" in the Greek (plus in the LXX).", # TODO: my addition
"'" => "Long minus or plus (at least four lines).", # TODO: my addition, check
"''" => "Long minus or plus (at least four lines).",
"{d}" => "Reference to doublet (occurring between the two elements of the doublet).",
"={d};" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: occurs, check?
"{d?}" => "Reference to doublet (occurring between the two elements of the doublet)?",
"{d}?" => "Reference to doublet (occurring between the two elements of the doublet)?",
"{?d}" => "Reference to doublet (occurring between the two elements of the doublet)?",
"{..d}" => "Distributive rendering, occurring once in the translation but referring to more than one Hebrew word.",
"{..r}" => "Notation in Hebrew column of elements repeated in the translation.",
"?" => "Questionable notation, equivalent, etc.",
"??" => "Questionable notation, equivalent, etc.", # TODO: my addition
"{p}" => "Greek preverb representing Hebrew preposition.",
"{p}+" => "Greek preverb representing Hebrew preposition.",  # TODO: my addition ???
"{pm}" => "Greek preverb representing Hebrew preposition.",	# TODO: my addition, check
"{..p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.",
"{...p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", #my addition
"{..^{p}" => "Preposition added in the LXX in accordance with the rules of the Greek language or translational habits.", #my addition

"{+}" => "Unknown.", # TODO: occurs in the text, unknown meaning
"{+?}" => "Unknown?", # TODO: occurs in the text, unknown meaning

"{!}" => "Infinitive absolute.",
"{!}+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}n" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}na" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}na+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}ad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}aj" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}nad" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}nd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}ndd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}nd+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}p" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}p+" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}pc" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}pd" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}-" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning
"{!}v" => "Infinitive absolute.", # TODO: occurs in the text, but unknown meaning

"{s}" => "Hebrew M/, MN (comparative, superlative) reflected by Greek comparative or superlative.", # TODO: UTF-8
"{t}" => "Transliterated Hebrew word.",
"{dt}" => "Transliterated Hebrew word.", # TODO: occurs. DOUBLET?
"{t.}" => "Transliterated Hebrew word.", # TODO: unknown
"<t?>" => "Transliterated Hebrew word.", # TODO: unknown
"{t?}" => "Transliterated Hebrew word?",

"{c}" => "Unknown.", # TODO: unknown
"{c}?" => "Unknown.", # TODO: unknown
"{z}" => "Unknown.", # TODO: unknown
"<qla>" => "Unknown.", # TODO: unknown
"#\"" => "Unknown.", # TODO: unknown
".vs" => "Unknown.", # TODO: unknown, can hardly be a change of cons. Ezek 40:49


"#" => "Long line continuing in next one, placed both at the end of the line running over and at the beginning of the following line in the opposite column.",
"{v}" => "The reading of the main text of the LXX seems to reflect a secondary text, while the \"original\" reading is reflected in a variant.",

# Notes regarding ColB of the Hebrew
#"=" => "Introducing col. b of the Hebrew (a selection of retroverted readings, presumably found in the parent text of the LXX).",
"={d}" => "Reference to doublet (occurring between the two elements of the doublet).", # TODO: my addition, check
"={d}\@" => "Reference to doublet (occurring between the two elements of the doublet). Etymological exegesis.", # TODO: my addition, check
"={d?}" => "Reference to doublet (occurring between the two elements of the doublet)?", # TODO: my addition, check
"=\%" => "Introducing categories of translation technique recorded in col. b.",
"=\%vap" => "Change from active to passive form in verbs.",
"=\%vpa" => "Change from passive to active form in verbs.",
"=\%vpa?" => "Change from passive to active form in verbs?",
"\%vpa" => "Change from passive to active form in verbs.",
"=vpa" => "Change from passive to active form in verbs.",  # TODO: my addition, check
"=\%p" => "Difference in preposition or particle.",
"=p\%" => "Difference in preposition or particle.",
"=\%p?" => "Difference in preposition or particle?",
"=\%?p" => "Difference in preposition or particle?",
"=p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain?
"={d}\%p" => "Difference in preposition or particle.", # TODO: my addition, check, uncertain? DOUBLET?
"=\%pa" => "Difference in preposition or particle.", # TODO: my addition, check
"=\%p=" => "Difference in preposition or particle.", # TODO: my addition, check
"=\%p+" => "Addition of preposition or particle.",
"\%p+" => "Addition of preposition or particle.",  #my addition
"=\%p+?" => "Addition of preposition or particle?",
"=\%p-" => "Omission of preposition or particle.",
"=\%p-?" => "Omission of preposition or particle?",
"=\%?p-" => "Omission of preposition or particle?",
"=p\%-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
"=p-" => "Omission of preposition or particle.", # TODO: my addition, check, uncertain?
"=;" => "Retroversion in col. b based on equivalence occurring in immediate or remote context.",
#"G" => "Hebrew variant, but at this stage no plausible retroversion is suggested.",
"=+" => "Difference in numbers between MT and the LXX.",
"=\@" => "Etymological exegesis.",
"=?\@" => "Etymological exegesis?", #my addition
"=\@?" => "Etymological exegesis?", #my addition
"=\@...a" => "Etymological exegesis according to Aramaic.",
"=\@a" => "Etymological exegesis according to Aramaic.", # TODO: my addition, check
"=:" => "Introducing reconstructed proper noun.",
"=:?" => "Introducing reconstructed proper noun?",
":" => "Introducing reconstructed proper noun.",	#my addition
"=v" => "Difference in vocalization (reading).",
"={d}v" => "Difference in vocalization (reading).",   # TODO: check, occurs in text, DOUBLET?
"=v?" => "Difference in vocalization (reading)?", 
"=vs" => "Difference in vocalization (reading).", # TODO: check, occurs in text
"=>" => "Difference in vocalization (reading).", # TODO: check, occurs in text
"=r" => "Incomplete retroversion.",

"=a" => "Aramaic?",	# TODO: UNKNOWN


"{*}" => "Agreement of LXX with ketib.",
"{**}" => "Agreement of LXX with qere.",
"{**" => "Agreement of LXX with qere.", #my addidion
"{**?}" => "Agreement of LXX with qere?", #my addition

"." => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",

".a" => "Interchange of consonants between MT and the presumed Hebrew parent text of the LXX.",  # TODO: occurs, unknown
".m" => "Metathesis of consonants between MT and the presumed Hebrew parent text of the LXX.",
".z" => "Possible abbreviation.",
".s" => "One word of MT separated into two or more words in the parent text of the LXX.",
".j" => "Two words of MT joined into one word in the parent text of the LXX.",
".w" => "Different word-division reflected in the parent text of the LXX.",

"(!)" => "(!)", #my addition


"<sp" => "<sp",  #TODO: FIX, occurs in text
"<sp>" => "<sp>",  #TODO: FIX, occurs in text
"<sp^>" => "<sp^>",  #TODO: FIX, occurs in text

"^" => "^",			#Notsure what these are
"?^" => "?^",			#Notsure what these are
"^^^" => "^^^", 
"___" => "___",

);

sub translateHebrewNote(){
	my $origNote = shift;

#	print("TranslateHebrewNote $origNote\n");

	(not $origNote) and die("Hebrew note empty.");

	($origNote eq "=") and return;	#= only marks colB, no real note

	($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );

	($origNote =~ m/^=?\.([a-z()&\$+-])([a-z()&\$+-]+)$/) and 
		return( &createNote("Interchange of consonants (" . 
			&translateHebrewLetters( uc( $1 ) ) . "/" . &translateHebrewLetters( uc( $2 ) ) .
			") between MT and the presumed Hebrew parent text of the LXX.") );

	($origNote =~ m/^\[([^\]]+)\]/) and 
		return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );

	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed FIX THIS, see above
 	($origNote =~ m/^=?\[\[|\]\]$/) and
		return $origNote;

	($origNote =~ m/^=?{\.\.d(.+)}/) and 
		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..d}" }) );

	($origNote =~ m/^=?{\.\.\.r(.+)}/) and 
		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) );

	($origNote =~ m/^=?{\.\.r(.+)}/) and 
		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..r}" }) );

	($origNote =~ m/^=?{\.\.\.(.+)}/) and 
		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{...}" }) );

	($origNote =~ m/^=?{\.\.(.+)}/) and 
		return( &createNote("(".&translateHebrewWordorNote( $1 ).") ". $notes{ "{..}" }) );

	#Special cases: the note includes more than one word, "cat" the results together
	($origNote =~ m/^=?{\.\.\.([^}]+)$/) and 
		return( &openNote( $notes{"{...}"} . "(".&translateHebrewWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.r([^}]+)$/) and 
		return( &openNote( $notes{"{..r}"} . "(".&translateHebrewWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.([^}]+)$/) and 
		return( &openNote( $notes{"{..}"} . "(".&translateHebrewWordorNote($1) ." " ) );

	($origNote =~ m/^([^{]+)}$/) and 
		return( &closeNote(&translateHebrewWordorNote( $1 ).")" ) );


	($origNote =~ m/^=?@([?$hebrewLetters]+)/) and
		return( &createNote( $notes{ "=\@" } ) . &translateHebrewWordorNote( $1 ) );

	($origNote =~ m/^=?\^([?$hebrewLetters]+)/) and
		return( "^" . &translateHebrewWordorNote( $1 ) );   # TODO: check, what is ^?

	#
	# Special handling for the = colB Notes
	#
	if (($origNote =~ m/^=/) and (not $notes{ $origNote } )) { #only split if the note does not exist, to avoid parsing problems
# 		print("note reads $origNote\n");
		if ($origNote =~ m/^=(<[0-9.a-z]+>)$/){
			return( &translateHebrewWordorNote( $1 ) );
		}
		elsif ($origNote =~m/^=(.+)$/ and $notes{ $1 }){
			return( &translateHebrewWordorNote( $1 ) );
		}
		elsif ($origNote =~ m/^=([?$hebrewLetters]+)/){
			return( &translateHebrewWordorNote( $1 ) );
		}
	}

	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
 	($origNote =~ m/^<|>$/) and
#		return("<reference osisRef=\"$1.$2\"/>");
		return $origNote;

 	($origNote =~ m/^.+[.].+$/) and #Occurs e.g.: "<gen1.1 ex1.2 lev3.3"
		return $origNote;

	($origNote =~ m/^[?](.*)/) and 
		return( &createNote( $notes{"?"} ) . &translateHebrewWordorNote( $1 ) );

	($origNote =~ m/^(.+),(.+)$/) and 	# 2 Notes / Words, split up, but only at the end
		return( &translateHebrewWordorNote( $1 ). "," .&translateHebrewWordorNote( $2 ) );

	($origNote =~ m/^={d}@([?$hebrewLetters]+)a/) and #special case ;)
		return( &createNote( $notes{"{d}"} ." ". $notes{ "=\@a" } ) . &translateHebrewWordorNote( $1 ) );

	HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
		my $currentLength = length( $origNote ) - 1 - $i;    #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
		if ( $notes{ substr($origNote,0,$currentLength) } ){
			return( &translateHebrewNote(substr($origNote,0,$currentLength)) . &translateHebrewWordorNote(substr($origNote,$currentLength, length($origNote) - $currentLength) ) );
		}
	}
	
	die("Note $origNote not found.\n");
}

sub translateHebrewLetters(){ #will return unicode hebrew without morph separation
	my $hebrew = shift;
	my $lastIndex = length( $hebrew )-1;
	my $result;
	my $letter;
	(not $hebrew) and die "Hebrew string empty...\n";

	foreach my $i (0..$lastIndex ){
		$letter = substr($hebrew,$i,1);
		if ($i == $lastIndex && $letter =~ m/(K|M|N|P|C)/){
			$result .= $final_hebrew2utf8{ $letter } || die("Could not find FINAL Hebrew letter $hebrew\n");
# 			printf("successfully inserted $final_hebrew2utf8{ $letter }\n");
		}
		else{
			$result .= $hebrew2utf8{ $letter } || die("Could not find Hebrew letter $hebrew\n");
		}
	}
	return $result;
}

sub translateHebrewWordorNote(){ #will return unicode hebrew with morph separation
	my $hebrew = shift;

	if (not $hebrew) { die("Hebrew string empty.") };

# 	print("TranslateHebrew of: $hebrew\n");

	$hebrew =~ s/^mn$/.mn/;	#Ezek 24:17, error?

	( $notes{ $hebrew } ) and return &translateHebrewNote( $hebrew ); # exact match first

	( $hebrew =~ m/^[^$hebrewLetters]/ ) and return &translateHebrewNote( $hebrew );
	( $hebrew =~ m/^([$hebrewLetters]+)(\[.+\])/ ) and 
		return &translateHebrewWordorNote( $1 ) . &translateHebrewNote( $2 );
	( $hebrew =~ m/[}]$/ ) and return &translateHebrewNote( $hebrew );

	#NOW WE ASSUME IT IS A HEBREW WORD
	return $MorphologicalSegmentStart . &translateHebrewLetters( $hebrew ) . $MorphologicalSegmentEnd;
}


sub translateGreekNote(){
	my $origNote = shift;

#	print("TranslateGreekNote $origNote\n");

	(not $origNote) and die("Greek note empty.");

	($notes{ $origNote }) and return( &createNote( $notes{$origNote} ) );

	($origNote =~ m/^\[\[([^\]]+)\]\]/) and 
		return( &createNote( "Number of verse in MT ($1) is different from the LXX." ) );

	($origNote =~ m/^\[([^\]]+)\]?/) and 
		return( &createNote( "Number of verse in LXX ($1) is different from MT." ) );

	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed FIX THIS, see above
 	($origNote =~ m/^\[\[|\]\]$/) and
		return $origNote;

	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed FIX THIS, see above
 	($origNote =~ m/^\[[^\]]+$|\d+\]$/) and     #e.g. [119. 7], [111.2
		return $origNote;

	($origNote =~ m/^=?{\.\.\.(.+)\.\.\.(.+)}/) and  # special case: {...word1...word2}
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{...}" })
			. &createNote("(".&translateGreekWordorNote( $2 ).") ". $notes{ "{...}" })); 

	($origNote =~ m/^=?{\.\.\.p(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{...p}" }) );

	($origNote =~ m/^=?{\.\.p(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..p}" }) );

	($origNote =~ m/^=?{\.\.~(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..~}" }) );

	($origNote =~ m/^=?{\.\.\.r(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..r}" }) );

	($origNote =~ m/^=?{\.\.r(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..r}" }) );

	($origNote =~ m/^=?{\.\.c(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..c}" }) );

	($origNote =~ m/^=?{\.\.d(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );

	($origNote =~ m/^=?{\.\.\.d(.+)}/) and # TODO: my addition, check
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..d}" }) );

	($origNote =~ m/^=?{\.\.\.\.(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{....}" }) );

	($origNote =~ m/^=?{\.\.\.(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{...}" }) );

	($origNote =~ m/^=?{\.\.(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{..}" }) );

	($origNote =~ m/^=?{c(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{c}" }) );	# TODO: occurs {cXXX}, not documented

	($origNote =~ m/^=?{g(.+)}/) and 
		return( &createNote("(".&translateGreekWordorNote( $1 ).") ". $notes{ "{g}" }) );

	#Special cases: the note includes more than one word, "cat" the results together
	($origNote =~ m/^=?{\.\.d([^}]+)$/) and 
		return( &openNote( $notes{"{..d}"} . "(".&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.r([^}]+)$/) and 
		return( &openNote($notes{"{..r}"} . "(" .&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.p([^}]+)$/) and 
		return( &openNote($notes{"{..p}"} . "(" .&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.\.d([^}]+)$/) and 
		return( &openNote($notes{"{..d}"} . "(" .&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.\.([^}]+)$/) and 
		return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.([^}]+)$/) and 
		return( &openNote($notes{"{...}"} . "(" .&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{\.\.(\^[^}]+)$/) and 
		return( &openNote($notes{"{..}"} . "(" .&translateGreekWordorNote($1) ." " ) );
	($origNote =~ m/^=?{c([^}]+)$/) and 
		return( &openNote($notes{"{c}"} . "(" .&translateGreekWordorNote($1) ." " ) );	# TODO: occurs {cXXX}, not documented

	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
 	($origNote =~ m/^<.+>?$/) and
		return $origNote;
	#special case: no note, but a crossref (no book ID)     # TODO: for now OSIS refs are not parsed
 	($origNote =~ m/^{=\d+}$/) and
		return $origNote;

	($origNote =~ m/^([^{]+)}$/) and 
		return( &closeNote(&translateGreekWordorNote( $1 ).")" ) );
	($origNote =~ m/^([^{]+)}\?$/) and 
		return( &closeNote(&translateGreekWordorNote( $1 )."?)" ) );

	($origNote =~ m/^(.+),(.+)$/) and 	# 2 Notes / Words, split up, but only at the end
		return( &translateGreekWordorNote( $1 ). "," .&translateGreekWordorNote( $2 ) );

	($origNote eq "}") and return $origNote;	#special case in EZEK 47:20, superfluous bracket

	($origNote =~ m/^{([^}]+}?)$/) and # TODO: occurs, seems unreasonable
		return( "(" .&translateGreekWordorNote($1) ." " );

	HANDLE_NOTE_FALLBACK: for my $i ( 1..(length($origNote)-1) ){ #last try, split up into chunks
		my $currentLength = length( $origNote ) - 1 - $i;    #start with the longest and become shorter, to find the complicated notes {!}p before the simple {!}
		if ( $notes{ substr($origNote,0,$currentLength) } ){
			return( &translateGreekNote(substr($origNote,0,$currentLength)) . &translateGreekWordorNote(substr($origNote,$currentLength, length($origNote) - $currentLength) ) );
		}
	}
	die("Note $origNote not found.\n");
}

sub translateGreekLetters(){
	my $greek = shift;
	my $lastIndex = length( $greek )-1;
	my $result;
	(not $greek) and die "Greek string empty...\n";

	foreach my $i (0..$lastIndex ){
		$result .= $greek2utf8{ substr($greek,$i,1) } || die("Could not find Greek letters in $greek\n");
	}
	return $result;

}

sub translateGreekWordorNote(){

	my $greek = shift;

	if (not $greek) { die("Greek string empty.") };
#	printf("TranslateGreek of $greek\n");

	( $notes {$greek} ) and return &translateGreekNote( $greek ); # exact match first

	( $greek =~ m/^[^$greekLetters]/ ) and return &translateGreekNote( $greek );
	( $greek =~ m/[}]\??$/ ) and return &translateGreekNote( $greek );
	( $greek eq "#" ) and return &translateGreekNote( "#" );
	( $greek =~ m/(.+)(\[.+\])$/ ) and return &translateGreekWordorNote( $1 ) .&translateGreekNote( $2 );

	( $greek =~ m/^(.+)(\[\d+\])$/ ) and return ( &translateGreekWordorNote( $1 ) . &translateGreekNote ( $2 ) );
	( $greek =~ m/^([^.]+)\.\.\.([^.]+)$/) and # TODO: occurs e.g.  {..bla1 bla2...bla3 #TODO: format output
		return( &translateGreekWordorNote($1) . "..." . &translateGreekWordorNote($2) );

	return &translateGreekLetters( $greek );
}

my $colBContent;

sub parseLine(){
	my $origLine = shift;
	my $result;

#	printf("parsing %s\n", $origLine);

	$origLine =~ s/ --=/ --+ =/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
	$origLine =~ s/ -\%vap/ =\%vap/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
	$origLine =~ s/ ;=/ =;/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
	$origLine =~ s/ \+;/ =;/;	# TODO: UGLY HACK, this appears in the text but not the notes; this seems most reasonable
	$origLine =~ s/[\ ]{10,}/\t/;	# TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces in the right
	$origLine =~ s/\.h-<ge10\.4/.h- <ge10.4>/;	# TODO: UGLY HACK, sometimes the tab in the wrong place but large spaces 
	$origLine =~ s/}{/} {/;	# TODO: UGLY HACK, notes hung together
	$origLine =~ s/=a\$\/DY/=A\$\/DY/;# TODO: UGLY HACK, Hebrew letter wrong
	$origLine =~ s/{\.\.\^EPIQEI\\S\.\.\^E\)FI\/LHSA}/{..^EPIQEI\\S E)FI\/LHSA}/;# TODO: UGLY HACK, strange note
	$origLine =~ s/E\t\)KPE\/SH\|/\tE)KPE\/SH|/; #occurs, tab misplaced
	$origLine =~ s/^\(..r\(L\/YK}/{..r(L\/YK}/; # in EZEK

	$origLine =~ s/^DANW {t}$/DANW\t{t}/; # in DAN
	$origLine =~ s/AI\)W=NOS\[110\.10/AI)W=NOS [110.10/; # in PS
	$origLine =~ s/W\/YD\(Y{\*\*}/W\/YD(Y {**}/; # in PS
	$origLine =~ s/{\.1\.dU\(PE\\R}/{..dU(PE\\R}/; # in PS

	$origLine =~ m/^W\(\/SPER/ and return;	#ignore, probably an error

	($origLine eq "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY KAI\\ TO\\N AMORRAI=ON ") and 
		$origLine = "W/)T H/GRG\$Y ^ =W/)T W/H/)MRY\tKAI\\ TO\\N AMORRAI=ON"; # TODO: hack, Tab missing

	($origLine eq "W/H/KHNYM =W/H/)BNYM .m .kb # KAI\\ OI( LI/QOI ") and # in JoshB: Tab misplaced
		$origLine = "W/H/KHNYM =W/H/)BNYM .m .kb\tKAI\ OI( LI/QOI"; # TODO: hack, Tab missing

	($origLine eq "{...?AU)TOU=} MDBR =v\tLALOU=NTOS") and 
		$origLine = "MDBR =v\tLALOU=NTOS"; # In EZEK: TODO: error, greek in first col

	($origLine eq "W/YC+YRW =;W/YC+YDW .rd <9.12 E)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO ") and 
		$origLine = "W/YC+YRW =;W/YC+YDW .rd <9.12\tE)PESITI/SANTO {d} KAI\\ H(TOIMA/SANTO"; # TODO: hack, Tab missing
	($origLine eq "W/YBW) {...EI)S}\tKAI\\ EI)SH=LQEN") and 
		$origLine = "W/YBW)\t{...EI)S} KAI\\ EI)SH=LQEN"; # TODO: hack, TAB misplaced
	($origLine eq "W/L) {..^OU)}\tDE\\") and 
		$origLine = "W/L)\t{..^OU)} DE\\"; # TODO: hack, TAB misplaced

# 	print("parsing %s\n", $origLine);

	($origLine =~ m/^([^=\t]+)?([=][^\t]*)?\t(.+)$/) or die("No match in parseLine().\n");
	($1 or $2) or die("Hebrew not found.\n");
	$3 or die("Greek not found.\n");
	$1 and my @hebrewWordsColA = split(/\s+/, $1);
	$2 and my @hebrewWordsColB = split(/\s+/, $2);
	my @greekWords = split(/\s+/, $3);

#	printf("1: $1 2: $2 3: $3\n");

	# 3 columns= Hebrew ColA, Hebrew ColB, Greek

	$result .= "<row>\n  <cell>";
	foreach my $wordA (@hebrewWordsColA){
		($wordA) and $result .= &translateHebrewWordorNote( $wordA ) . " ";
	}
	$result .= "</cell>\n  <cell>";

	foreach my $wordB (@hebrewWordsColB){
		if ($wordB) {
			$result .= &translateHebrewWordorNote( $wordB ) . " ";
			$colBContent = 1;
		}
	}
	$result .= "</cell>\n  <cell>";

	foreach my $wordG (@greekWords){
		($wordG) and $result .= &translateGreekWordorNote( $wordG ). " ";
	}
	$result .= "</cell>\n</row>";

#  	printf("Result: %s\n", $result);
	return $result;
}

my $headerWithColB =     "<row>\n  <cell>MT</cell><cell>(LXX)</cell><cell>LXX</cell>\n</row>";
my $headerWithoutColB  = "<row>\n  <cell>MT</cell><cell></cell><cell>LXX</cell>\n</row>";

#
# grabVerseContent - if the Verse can be found, returns its Content, otherwise nothing
#
sub grabVerseContent(){  #Bookname, chapter, verse, @list
	my @result;
	my $bookname = shift; my $chapter = shift; my $verse = shift; my @buffer = @_;

	my $index=0;

#	printf("Parsing $bookname $chapter:$verse...\n");

	$colBContent = undef;
	my @tmp;

	if ($bookname eq "Obad"){ #special handling, no chapter:verse structure
		LOOP: foreach my $current_item (@buffer){
			if ($chapter == 1 and $current_item =~ m/^$bookname $verse/){ #only for the first chapter
				while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){
					push(@tmp, &parseLine( $buffer[$index] ) );
				}
				if ($colBContent){
					push(@result, $headerWithColB, @tmp)
				}
				else{
					push(@result, $headerWithoutColB, @tmp)
				}
				return @result;
			}
			$index++;
		}
	}
	else{
		LOOP: foreach my $current_item (@buffer){
			if ($current_item =~ m/^$bookname $chapter:$verse/){
				while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){
					push(@tmp, &parseLine( $buffer[$index] ) );
				}
				if ($colBContent){
					push(@result, $headerWithColB, @tmp)
				}
				else{
					push(@result, $headerWithoutColB, @tmp)
				}
				return @result;
			}
			$index++;
		}
	}
	return;
}	#Nothing found, don't return a value.

sub processBook(){
# File	File id		ThML id		OSIS id		Short Book Title

	my $filename = shift;
	my $bookname_infile = shift;
	my $thml_id = shift;
	my $osis_id = shift;
	my $short_book_title = shift;

	my @BUF = &loadFile( $filename );

	my @result;

	push(@result, "<div type=\"book\" osisID=\"$osis_id\">");

	CHAPTER: foreach my $chapter(1..1000){
		my $chapter_header_written;
		print("Processing $bookname_infile chapter $chapter.\n");
		my $verse_found;
		VERSE: foreach my $verse(1..1000){
			my @verseContent = &grabVerseContent($bookname_infile, $chapter, $verse, @BUF);
			if (@verseContent) {
				if ($bookname_infile eq "Obad"){
					if (not $chapter_header_written) { 
						$chapter_header_written = 1; #no chapters in Obadiah
					}
					push(@result, "<verse osisID=\"$osis_id.$verse\">\n<table>"); #chapter will be ignored for >1 by grabVerseContent
				}
				else{
					if (not $chapter_header_written) { 
						push(@result, "<chapter osisID=\"$osis_id.$chapter\">");
						$chapter_header_written = 1;
					}
					push(@result, "<verse osisID=\"$osis_id.$chapter.$verse\">\n<table>");
				}
				push(@result, @verseContent);
				push(@result, "</table>\n</verse>");
				$verse_found = 1;
			}
			else{ #verse nonexistent, goto next chapter
				if ($chapter_header_written and (not $bookname_infile eq "Obad") ) { 
					push(@result, "</chapter>");
				}
				last VERSE;
			}
		}
		if (not $verse_found){ #chapter empty, stop here
			if ($chapter == 1) { die("Error: no content in $bookname_infile"); }
			last CHAPTER;
		}
	}

	push(@result, "</div>"); #book
	print("done.\n");

	return(@result);
}

sub processBookVariant(){
# FileA	File_id_A		VariantNameA		FileB		File_id_B		VariantNameB		ThML id		OSIS id		Short Book Title

	my $filenameA = shift;
	my $bookname_infile_A = shift;
	my $variantNameA = shift;
	my $filenameB = shift;
	my $bookname_infile_B = shift;
	my $variantNameB = shift;
	my $thml_id = shift;
	my $osis_id = shift;
	my $short_book_title = shift;

	my @BUFA = loadFile( "$filenameA" );
	my @BUFB = loadFile( "$filenameB" );

	my @result;

	push(@result, "<div type=\"book\" osisID=\"$osis_id\">");

	CHAPTER: foreach my $chapter(1..1000){
		print("Processing $bookname_infile_A and $bookname_infile_B chapter $chapter.\n");
		my $chapter_header_written;
		my $verse_found;
		VERSE: foreach my $verse(1..1000){
			my $colBcontentTMP;
			my @verseContentA = &grabVerseContent($bookname_infile_A, $chapter, $verse, @BUFA);
			($colBContent) and $colBcontentTMP = 1;
			my @verseContentB = &grabVerseContent($bookname_infile_B, $chapter, $verse, @BUFB);
			($colBContent) and $colBcontentTMP = 1;
			if (@verseContentA or @verseContentB) { 
				if (not $chapter_header_written) { 
					push(@result, "<chapter osisID=\"$osis_id.$chapter\">");
					$chapter_header_written = 1;
				}
				push(@result, "<verse osisID=\"$osis_id.$chapter.$verse\">");
				$verse_found = 1;
			}
			else{ #verse nonexistent, goto next chapter
				if ($chapter_header_written) { push(@result, "</chapter>"); }
				last VERSE;
			}
			if (@verseContentA){
				if (@verseContentB){ push(@result, "<table><row><cell>" . $variantNameA ."\n") };
				push(@result, "<table>", @verseContentA, "</table>");
				if (@verseContentB){ push(@result, "</cell><cell>") };
			}
			if (@verseContentB){
				if (@verseContentA){ push(@result, $variantNameB . "\n") };
				push(@result, "<table>", @verseContentB, "</table>");
				if (@verseContentA){ push(@result, "</cell></row></table>") };
			}
			if ($verse_found){ push(@result, "</verse>") }
		}
		if (not $verse_found){ #chapter empty, stop here
			if ($chapter == 1) { die("Error: no content in $bookname_infile_A and $bookname_infile_B"); }
			last CHAPTER;
		}
	}

	push(@result, "</div>"); #book
	print("done.\n");

	return(@result);

}

sub loadFile(){ #$fileName	loads the file into the buffer and makes small corrections
	my $filename = shift;

	open( FILE, "$prefix/$filename") or die("Could not open file $prefix/$filename");
	my @buffer = <FILE>; chomp(@buffer); close( FILE );

	my @result;
	my $index = 0;
	LOOP: foreach my $currentItem (@buffer){
		if ($currentItem =~ m/^DANIHL/){
			$result[$#result] .= " " .$buffer[$index];
		}
		elsif ($currentItem =~ m/^NUMA/){
			$result[$#result] .= $buffer[$index];
		}
		elsif ($currentItem =~ m/^DEUTERONO\/MION/){
			$result[$#result] .= " ".$buffer[$index];
		}
		elsif ($currentItem =~ m/^AU\)TOU=/){
			$result[$#result] .= " ".$buffer[$index];
		}
		elsif ($currentItem =~ m/^E\(\/C/){
			$result[$#result] .= " ". $buffer[$index];
		}
		elsif ($currentItem =~ m/^MOU/){
			$result[$#result] .= " " . $buffer[$index];
		}
		elsif ($currentItem =~ m/^NEHL$/){
			$result[$#result] .= $buffer[$index]; # no space, ANANEL
		}
		elsif ($currentItem =~ m/^ESTHKE\/NAI$/){
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif ($currentItem =~ m/^ESTHKW\\S$/){
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif ($currentItem =~ m/^ISA/){	# a few lines in ISAIAH have this in different styles
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif ($currentItem =~ m/^LAMYAN/){	# in LAM
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif ($currentItem =~ m/^EZEKIHL/){	# in LAM
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif ($currentItem =~ m/^\)$/){	# in PS
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif ($currentItem =~ m/^PS[Y\s]/){	# in PS; breaks at PS or PSY
			$result[$#result] .= $buffer[$index]; # no space
		}
		elsif (($buffer[$index+1] =~ m/^#/) && ($buffer[$index] =~ m/^(.*)#$/)){	# in Daniel, # is used as a "continue line on next line" marker
			push(@result, $1);
		}
		elsif (($buffer[$index-1] =~ m/^(.*)#$/) && $buffer[$index] =~ m/^#\t(.*)/){ # in Daniel, # is used as a "continue line on next line" marker
			$result[$#result] .= $1;
		}
		else{
			push(@result, $buffer[$index] );
		}
		$index++;
	}
	return @result;
}

my @result;



push(@result,"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");

push(@result,"<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\"  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.2.1.xsd\">\n");

push(@result,"<osisText osisIDWork=\"MT-LXX-Parallel\" xml:lang=\"en\">\n");

push(@result,"
<header>\
  <work osisWork=\"MT-LXX-Parallel\">\
    <title>The Parallel Aligned Hebrew-Aramaic and Greek texts of Jewish Scripture</title>\
    <identifier type=\"OSIS\">MT-LXX-Parallel</identifier>\
    <refSystem>Bible.Tanach</refSystem>\
  </work>\
</header>\n");

	# File				File id			ThML id		OSIS id		Short Book Title
#push(@result, &processBook("01.Genesis.par", "Gen", "Gen", "Gen", "Genesis") );
#push(@result, &processBook("02.Exodus.par", "Exod", "Exod", "Exod", "Exodus") );
#push(@result, &processBook("03.Lev.par", "Lev", "Lev", "Lev", "Leviticus") );
#push(@result, &processBook("04.Num.par", "Num", "Num", "Num", "Numbers") );
#push(@result, &processBook("05.Deut.par", "Deut", "Deut", "Deut", "Deuteronomy") );
#push(@result, &processBookVariant("07.JoshA.par", "JoshA", "Codex Alexandrinus:", "06.JoshB.par", "JoshB", "Codex Vaticanus:", "Josh", "Josh", "Joshua") );
# push(@result, &processBookVariant("09.JudgesA.par", "JudgA", "Codex Alexandrinus:", "08.JudgesB.par", "JudgB", "Codex Vaticanus:", "Judg", "Judg", "Judges") );
  
# push(@result, &processBook("10.Ruth.par", "Ruth", "Ruth", "Ruth", "Ruth") );
# push(@result, &processBook("11.1Sam.par", "1Sam/K", "iSam", "1Sam", "1 Samuel") );
# push(@result, &processBook("12.2Sam.par", "2Sam/K", "iiSam", "2Sam", "2 Samuel") );
# push(@result, &processBook("13.1Kings.par", "1/3Kgs", "iKgs", "1Kgs", "1 Kings") );
# push(@result, &processBook("14.2Kings.par", "2/4Kgs", "iiKgs", "2Kgs", "2 Kings") );
# push(@result, &processBook("15.1Chron.par", "1Chr", "iChr", "1Chr", "1 Chronicles") );
# push(@result, &processBook("16.2Chron.par", "2Chr", "iiChr", "2Chr", "2 Chronicles") );
# push(@result, &processBook("18.Ezra.par", "Ezr", "Ezra", "Ezra", "Ezra") );
# push(@result, &processBook("19.Neh.par", "Neh", "Neh", "Neh", "Nehemiah") );
# push(@result, &processBook("18.Esther.par", "Esth", "Esth", "Esth", "Esther") );
# push(@result, &processBook("26.Job.par", "Job", "Job", "Job", "Job") );
#  
#push(@result, &processBook("20.Psalms.par", "Ps", "Ps", "Ps", "Psalms"));
#  
# push(@result, &processBook("23.Prov.par", "Prov", "Prov", "Prov", "Proverbs") );
# push(@result, &processBook("24.Qoh.par", "Qoh", "Eccl", "Eccl", "Ecclesiastes") );
# push(@result, &processBook("25.Cant.par", "Song", "Song", "Song", "Song of Solomon") );
# push(@result, &processBook("40.Isaiah.par", "Isa", "Isa", "Isa", "Isaiah") );
# push(@result, &processBook("41.Jer.par", "Jer", "Jer", "Jer", "Jeremiah") );
# push(@result, &processBook("43.Lam.par", "Lam", "Lam", "Lam", "Lamentations") );
# push(@result, &processBook("44.Ezekiel.par", "Ezek", "Ezek", "Ezek", "Ezekiel") );
 
# push(@result, &processBookVariant("45.DanielOG.par", "Dan", "Old Greek:", "46.DanielTh.par", "DanTh", "Theodotion:", "Dan", "Dan", "Daniel"));
 
# push(@result, &processBook("28.Hosea.par", "Hos", "Hos", "Hos", "Hosea") );
# push(@result, &processBook("31.Joel.par", "Joel", "Joel", "Joel", "Joel") );
# push(@result, &processBook("30.Amos.par", "Amos", "Amos", "Amos", "Amos") );
# push(@result, &processBook("33.Obadiah.par", "Obad", "Obad", "Obad", "Obadiah") );
push(@result, &processBook("32.Jonah.par", "Jonah", "Jonah", "Jonah", "Jonah") );
# push(@result, &processBook("29.Micah.par", "Mic", "Mic", "Mic", "Micah") );
# push(@result, &processBook("34.Nahum.par", "Nah", "Nah", "Nah", "Nahum") );
# push(@result, &processBook("35.Hab.par", "Hab", "Hab", "Hab", "Habakkuk") );
# push(@result, &processBook("36.Zeph.par", "Zeph", "Zeph", "Zeph", "Zephaniah") );
# push(@result, &processBook("37.Haggai.par", "Hag", "Hag", "Hag", "Haggai") );
# push(@result, &processBook("38.Zech.par", "Zech", "Zech", "Zech", "Zechariah") );
# push(@result, &processBook("39.Malachi.par", "Mal", "Mal", "Mal", "Malachi") );

push(@result, "</osisText>\n</osis>");

open( OUTPUT, ">mt-lxx-par.osis.xml" );
print( OUTPUT join("\n", @result) );