blob: 230a98362bd329c4418cca325b64347f50dc543d (
plain) (
tree)
|
|
#!/usr/bin/perl
use XML::LibXSLT;
use XML::LibXML;
use utf8;
my @files=`ls -1 *.xml`;
my %books = qw(
01GEN.xml GEN
02EXO.xml EXO
03LEV.xml LEV
04NUM.xml NUM
05DEU.xml DEU
06JOS.xml JOS
07JUDG.xml JDG
08RUT.xml RUT
091SAM.xml 1SA
102SAM.xml 2SA
111KGS.xml 1KI
122KGS.xml 2KI
131CHR.xml 1CH
142CHR.xml 2CH
15ESRA.xml EZR
16NEH.xml NEH
17TOB.xml TOB
18JUDIT.xml JDT
19EST.xml EST
201MAK.xml 1MA
212MAK.xml 2MA
22JOB.xml JOB
23PSA.xml PSA
24PRO.xml PRO
25ECL.xml ECC
26SONG.xml SNG
27WIS.xml WIS
28SIR.xml SIR
29ISA.xml ISA
30JER.xml JER
31LAM.xml LAM
32BAR.xml BAR
33EZE.xml EZK
34DAN.xml DAN
35HOS.xml HOS
36JOEL.xml JOL
37AMOS.xml AMO
38OBA.xml OBA
39JONAS.xml JON
40MIC.xml MIC
41NAH.xml NAM
42HAB.xml HAB
43ZEPH.xml ZEP
44HAG.xml HAG
45HAB.xml ZEC
46MAL.xml MAL
47MAT.xml MAT
48MRK.xml MRK
49LUK.xml LUK
50JHN.xml JHN
51ACTS.xml ACT
52ROM.xml ROM
531COR.xml 1CO
542COR.xml 2CO
55GAL.xml GAL
56EPH.xml EPH
57PHIL.xml PHP
58COL.xml COL
591THES.xml 1TH
602THES.xml 2TH
611TIM.xml 1TI
622TIM.xml 2TI
63TIT.xml TIT
64PHLM.xml PHM
65HEB.xml HEB
66JAM.xml JAS
671PET.xml 1PE
682PET.xml 2PE
691JHN.xml 1JN
702JHN.xml 2JN
713JHN.xml 3JN
72JUDE.xml JUD
73REV.xml REV
);
foreach (@files){
my @lines;
my $text;
my $tag;
my $preface;
my @preface;
chop;
open TEXT, ">>$_.text.xml";
open USFM, ">>$_.text.sfm";
open PREFACE, ">>$_.preface.xml";
open PREFACEUSFM, ">>$_.preface.sfm";
chomp(@lines=`cat $_`);
foreach (@lines) {
s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
s/(size=\"20\"\ face=\".*?\-BoldItalic\"\ color=\"\#EC008C\")/class=\"chapter\"\ $1/g; # Deuterocanonical Chapters in Esther and Daniel
s/(size=\"19\"\ face=\".*\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\" $1/g; # Psalsm
}
SPLIT: foreach (@lines) {
if (/chapter/) {
push (@preface, "</page></pdf2xml>");
$text='<?xml version="1.0"?><pdf2xml><page>';
last SPLIT;
}
else {
s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
s/(size=\"5\"\ face=\".*?Helvetica\-Bold\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
s/(size=\"5\"\ face=\".*?Helvetica\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;
s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;
s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
s/(size=\"14\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
s/<text.*?>/<text>\ $1/g;
s/(size=\"8\"\ face=\".*?\-BoldItalic\" color=\"\#231F20\")/class=\"Intro_title_2\"\ $1/g;
s/(size=\"7\"\ face=\".*?\-BoldItalic\"\ color=\"\#231F20\")/class=\"Intro_title_ref"\ $1/g;
s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"reference\"\ $1/g;
s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"Intro_outline\" $1/g;
s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Verse_Range\" $1/g;
s/(size=\"7\"\ face=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"Intro_footer\" $1/g;
s/(size=\"7\"\ face=\".*?\+Helvetica-Bold\"\ color=\"\#231F20\")/class=\"Image_title\" $1/g;
s/(size=\"43\"\ face=\".*?\-Italic\"\ color=\"\#6D6E70\")/class=\"Intro_initial\"$1/g;
s/(size=\"11\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\" $1/g;
s/(size=\"6\"\ face=\".*?\-Oblique\"\ color=\"\#231F20\")/class=\"Image_ref\" $1/g;
s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
push( @preface, $_."\n");
$_="";
}
}
foreach (@lines) {
s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g;
s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"section_title\"\ $1/g;
s/(size=\"7\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"refverse_no\"\ $1/g;
s/(size=\"9\"\ face=\".*?\-Bold\"\ color=\"\#231F20\">)/class=\"refchapter_no\"\ $1/g;
s/<font\s*size=\"7\"\s*face=\".*?ZapfDingbats\"\s*color=\"\#231F20\">\s*.<\/font>//g;
s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"ref_text\"\ $1/g;
s/(size=\"7\"\ face\=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"page_footer\"\ $1/g;
s/(size=\"6\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
s/<text.*?>/<text>\ $1/g;
s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g;
s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
s/$/\n/;
}
foreach (@lines) {
s/<font\ class=\"(.*?)\".*?>/<$1>/;
$tag = $1;
s/<\/font/"<\/".$tag/e;
s/<\/>/<\/font>/;
}
$text = $text.join ("",@lines);
$text =~ s/\n\s*<text>//g;
$text =~ s/<\/text>(\n\s*|)//g;
$text =~ s/(.)<\/intro_para>\n\s*<bookname2>\ (.*?)<\/bookname2>\n\s*<intro_para>/<bookname2>$1$2<\/bookname2>/g;
$text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g;
$text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g;
foreach (@preface) {
s/<font\ class=\"(.*?)\".*?>/<$1>/;
$tag = $1;
s/<\/font/"<\/".$tag/e;
s/<\/>/<\/font>/;
}
$preface = join ("",@preface);
$preface =~ s/\s+/\ /g;
$preface =~ s/<(|\/)text>//g;
$preface =~ s/(.)<\/Intro_para>\s*<bookname2>(.*?)<\/bookname2>/<bookname2>$1$2<\/bookname2><\/Intro_para>/g;
$preface =~ s/(S|D)\s*<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
# $preface =~ s/(D)<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
$preface =~ s/(S|D)\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
# $preface =~ s/(D)\s*<Lords_Name>\s*(EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
$preface =~ s/<\/Intro_para>\s*\n*\s*<Intro_para>//g;
$preface =~ s/<\/Intro_title_2>\s*<Intro_para>(.*?)<\/Intro_para>/\ $1<\/Intro_title_2>/g;
$preface =~ s/<\/Intro_outline>\s*<Intro_title_2>\s*(.*?)<\/Intro_title_2>/$1<\/Intro_outline>/g;
$preface =~ s/\/>\s*<\/Intro_footer>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>/\ alt=\"$1$2\"\/><\/Intro_footer>/g;
# $preface =~ s/png\"\/>\s*<\/Intro_para>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>\s*?<Image_ref>(.*?)<\/Image_ref>/png\"\ alt=\"$1$2\"\ ref=\"$3\"\/><\/Intro_para>/g;
$preface =~ s/<Intro_initial>(.*?)<\/Intro_initial>\s<Intro_para>/<Intro_para>$1\ /g;
$preface =~ s/\s+/\ /g;
$preface =~ s/-\ //g;
$preface =~ s/<Intro/\n<Intro/g;
# create an instance of XSL::XSLT processor
print TEXT $text;
close TEXT;
print PREFACE $preface;
close PREFACE;
my $parser = new XML::LibXML;
my $xslt = new XML::LibXSLT;
my $source = $parser->parse_string($preface);
my $style_doc = $parser->parse_file('preface.xsl');
my $stylesheet = $xslt->parse_stylesheet($style_doc);
my $results = $stylesheet->transform($source);
print "I am still working on $_ \n";
print USFM "\\id $books{$_}";
print USFM $stylesheet->output_string($results);
my $source = $parser->parse_string($text);
my $style_doc = $parser->parse_file('text.xsl');
my $stylesheet = $xslt->parse_stylesheet($style_doc);
my $results = $stylesheet->transform($source);
print "I am working on $_ \n";
# print USFM "\\id $books{$_}";
@lines = split( "\n", $stylesheet->output_string($results));
foreach (@lines) {
s/^\s*–\s*$//;
s/\\nd\s+E\s+\\nd*\s+-\\nd\s+NHOR\s+\\nd\*/\\nd SENHOR\\nd\*/g;
s/^\\v\ \ /\\p\n\\v\ /;
s/-\ //g;
s/(\\v\s+[0123456789]+)\(/$1\ (/;
}
print USFM join("\n", @lines);
close USFM;
close PREFACEUSFM;
}
|