summaryrefslogblamecommitdiffstats
path: root/modules/portuguese/transform.pl
blob: 230a98362bd329c4418cca325b64347f50dc543d (plain) (tree)
1
2
3
4
5
6
7
8
9

               

                 


         

                        
 












































































                   


                  
                 
                

                    

             


                                         
                                             
        
                               
 



                          
                                                                                                

                                                                                                                                                        
         
        


                                

                                                                


                              




















                                                                                                               






                          


                                                                                                   
                                                                                                    






                                                                                                         

                                                                                                 

                                                                                                   



         












                                                    
                                       







                                                                                                                                     

































                                                                                                                                                                                                                 

                                                   



                               


                                      












                                                             
                                                      
                                                         
 

                                                             












                                                                           
                    
         
        
                                      
                   


                          







        
#!/usr/bin/perl

use XML::LibXSLT;
use XML::LibXML;
use utf8;



my @files=`ls -1 *.xml`;

my %books = qw( 
01GEN.xml	GEN
02EXO.xml	EXO
03LEV.xml	LEV
04NUM.xml	NUM
05DEU.xml	DEU
06JOS.xml	JOS
07JUDG.xml	JDG
08RUT.xml	RUT
091SAM.xml	1SA
102SAM.xml	2SA
111KGS.xml	1KI
122KGS.xml	2KI
131CHR.xml	1CH
142CHR.xml	2CH
15ESRA.xml	EZR
16NEH.xml	NEH
17TOB.xml	TOB
18JUDIT.xml	JDT
19EST.xml	EST
201MAK.xml	1MA
212MAK.xml	2MA
22JOB.xml	JOB
23PSA.xml	PSA
24PRO.xml	PRO
25ECL.xml	ECC
26SONG.xml	SNG
27WIS.xml	WIS
28SIR.xml	SIR
29ISA.xml	ISA
30JER.xml	JER
31LAM.xml	LAM
32BAR.xml	BAR
33EZE.xml	EZK
34DAN.xml	DAN
35HOS.xml	HOS
36JOEL.xml	JOL
37AMOS.xml	AMO
38OBA.xml	OBA
39JONAS.xml	JON
40MIC.xml	MIC
41NAH.xml	NAM
42HAB.xml	HAB
43ZEPH.xml	ZEP
44HAG.xml	HAG
45HAB.xml	ZEC
46MAL.xml	MAL
47MAT.xml	MAT
48MRK.xml	MRK
49LUK.xml	LUK
50JHN.xml	JHN
51ACTS.xml	ACT
52ROM.xml	ROM
531COR.xml	1CO
542COR.xml	2CO
55GAL.xml	GAL
56EPH.xml	EPH
57PHIL.xml	PHP
58COL.xml	COL
591THES.xml	1TH
602THES.xml	2TH
611TIM.xml	1TI
622TIM.xml	2TI
63TIT.xml	TIT
64PHLM.xml	PHM
65HEB.xml	HEB
66JAM.xml	JAS
671PET.xml	1PE
682PET.xml	2PE
691JHN.xml	1JN
702JHN.xml	2JN
713JHN.xml	3JN
72JUDE.xml	JUD
73REV.xml	REV
);


foreach (@files){ 

        my @lines;
        my $text;
        my $tag;
        my $preface;
        my @preface;
        
	chop;
	open TEXT, ">>$_.text.xml";
	open USFM, ">>$_.text.sfm";
	open PREFACE, ">>$_.preface.xml";
	open PREFACEUSFM, ">>$_.preface.sfm";
	
	chomp(@lines=`cat $_`);

 	
 	
 	foreach (@lines) {
 		
		s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
		s/(size=\"20\"\ face=\".*?\-BoldItalic\"\ color=\"\#EC008C\")/class=\"chapter\"\ $1/g;  # Deuterocanonical Chapters in Esther and Daniel
		s/(size=\"19\"\ face=\".*\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\" $1/g; # Psalsm
	}
	
SPLIT:   foreach (@lines) {
                
                if (/chapter/) {
                   push (@preface, "</page></pdf2xml>");
                   $text='<?xml version="1.0"?><pdf2xml><page>';
                   last SPLIT;
                   }
                else {
    		   s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
                   s/(size=\"5\"\ face=\".*?Helvetica\-Bold\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
                   s/(size=\"5\"\ face=\".*?Helvetica\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
                   s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
                   s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;   
                   s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;   
                   s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
                   s/(size=\"14\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
    		   s/<text.*?>/<text>\ $1/g;
		   s/(size=\"8\"\ face=\".*?\-BoldItalic\" color=\"\#231F20\")/class=\"Intro_title_2\"\ $1/g;
		   s/(size=\"7\"\ face=\".*?\-BoldItalic\"\ color=\"\#231F20\")/class=\"Intro_title_ref"\ $1/g;
		   s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"reference\"\ $1/g;
		   s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"Intro_outline\" $1/g;
		   s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Verse_Range\" $1/g;
		   s/(size=\"7\"\ face=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"Intro_footer\" $1/g;
		   s/(size=\"7\"\ face=\".*?\+Helvetica-Bold\"\ color=\"\#231F20\")/class=\"Image_title\" $1/g;
		   s/(size=\"43\"\ face=\".*?\-Italic\"\ color=\"\#6D6E70\")/class=\"Intro_initial\"$1/g;
		   s/(size=\"11\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\" $1/g;
		   s/(size=\"6\"\ face=\".*?\-Oblique\"\ color=\"\#231F20\")/class=\"Image_ref\" $1/g;
		   s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
                   push( @preface, $_."\n");
                   $_="";
                }
        }

        
        foreach (@lines) {

 		s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
		s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g;
		s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
		s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
		s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"section_title\"\ $1/g;
		s/(size=\"7\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"refverse_no\"\ $1/g;
		s/(size=\"9\"\ face=\".*?\-Bold\"\ color=\"\#231F20\">)/class=\"refchapter_no\"\ $1/g;
		s/<font\s*size=\"7\"\s*face=\".*?ZapfDingbats\"\s*color=\"\#231F20\">\s*.<\/font>//g;
		s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"ref_text\"\ $1/g;
		s/(size=\"7\"\ face\=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"page_footer\"\ $1/g;
		s/(size=\"6\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
		s/<text.*?>/<text>\ $1/g;
		s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g;
		s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
                s/$/\n/;		
        }
        

        
	
	
	foreach (@lines) {
	
	
		 
		 s/<font\ class=\"(.*?)\".*?>/<$1>/;
		 $tag = $1;
		 s/<\/font/"<\/".$tag/e;
		 s/<\/>/<\/font>/;	 
	}	 
	

	$text = $text.join ("",@lines);
	
	
	$text =~ s/\n\s*<text>//g;
	$text =~ s/<\/text>(\n\s*|)//g;
	$text =~ s/(.)<\/intro_para>\n\s*<bookname2>\ (.*?)<\/bookname2>\n\s*<intro_para>/<bookname2>$1$2<\/bookname2>/g;
	$text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g;
	$text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g;
	
	
	
	foreach (@preface) {
	
	
		 
		 s/<font\ class=\"(.*?)\".*?>/<$1>/;
		 $tag = $1;
		 s/<\/font/"<\/".$tag/e;
		 s/<\/>/<\/font>/;	 
	}	 
	
	$preface = join ("",@preface);
	
	
	$preface =~ s/\s+/\ /g;
	$preface =~ s/<(|\/)text>//g;
	$preface =~ s/(.)<\/Intro_para>\s*<bookname2>(.*?)<\/bookname2>/<bookname2>$1$2<\/bookname2><\/Intro_para>/g;
	$preface =~ s/(S|D)\s*<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
	# $preface =~ s/(D)<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
	$preface =~ s/(S|D)\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
	# $preface =~ s/(D)\s*<Lords_Name>\s*(EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
	$preface =~ s/<\/Intro_para>\s*\n*\s*<Intro_para>//g;
	$preface =~ s/<\/Intro_title_2>\s*<Intro_para>(.*?)<\/Intro_para>/\ $1<\/Intro_title_2>/g;
	$preface =~ s/<\/Intro_outline>\s*<Intro_title_2>\s*(.*?)<\/Intro_title_2>/$1<\/Intro_outline>/g;
	$preface =~ s/\/>\s*<\/Intro_footer>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>/\ alt=\"$1$2\"\/><\/Intro_footer>/g;
	# $preface =~ s/png\"\/>\s*<\/Intro_para>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>\s*?<Image_ref>(.*?)<\/Image_ref>/png\"\ alt=\"$1$2\"\ ref=\"$3\"\/><\/Intro_para>/g;
	$preface =~ s/<Intro_initial>(.*?)<\/Intro_initial>\s<Intro_para>/<Intro_para>$1\ /g;
        $preface =~ s/\s+/\ /g;
	$preface =~ s/-\ //g;
	$preface =~ s/<Intro/\n<Intro/g;
            
	
	
	# create an instance of XSL::XSLT processor
        print TEXT $text;
        close TEXT;
        print PREFACE $preface;
        close PREFACE;
        
        my $parser = new XML::LibXML;
        my $xslt   = new XML::LibXSLT;
          
        
        my $source     = $parser->parse_string($preface);
        my $style_doc  = $parser->parse_file('preface.xsl');

        my $stylesheet = $xslt->parse_stylesheet($style_doc);
        my $results    = $stylesheet->transform($source);
        
        print "I am still  working on $_ \n";
        
        print USFM "\\id $books{$_}";            
        print USFM $stylesheet->output_string($results);
        
        
        my $source     = $parser->parse_string($text);
        my $style_doc  = $parser->parse_file('text.xsl');

        my $stylesheet = $xslt->parse_stylesheet($style_doc);
        my $results    = $stylesheet->transform($source);
        
        print "I am working on $_ \n";
        
        # print USFM "\\id $books{$_}";            
        @lines = split( "\n", $stylesheet->output_string($results));
        
        foreach (@lines) {
            
            s/^\s*–\s*$//;
            s/\\nd\s+E\s+\\nd*\s+-\\nd\s+NHOR\s+\\nd\*/\\nd SENHOR\\nd\*/g;
            s/^\\v\ \ /\\p\n\\v\ /;
            s/-\ //g;
            s/(\\v\s+[0123456789]+)\(/$1\ (/;
                    
        }
        
        print USFM join("\n", @lines);
        close USFM;
              
        
        close PREFACEUSFM;
	
	
}