#!/usr/bin/perl use XML::LibXSLT; use XML::LibXML; use utf8; my @files=`ls -1 *.xml`; my %books = qw( 01GEN.xml GEN 02EXO.xml EXO 03LEV.xml LEV 04NUM.xml NUM 05DEU.xml DEU 06JOS.xml JOS 07JUDG.xml JDG 08RUT.xml RUT 091SAM.xml 1SA 102SAM.xml 2SA 111KGS.xml 1KI 122KGS.xml 2KI 131CHR.xml 1CH 142CHR.xml 2CH 15ESRA.xml EZR 16NEH.xml NEH 17TOB.xml TOB 18JUDIT.xml JDT 19EST.xml EST 201MAK.xml 1MA 212MAK.xml 2MA 22JOB.xml JOB 23PSA.xml PSA 24PRO.xml PRO 25ECL.xml ECC 26SONG.xml SNG 27WIS.xml WIS 28SIR.xml SIR 29ISA.xml ISA 30JER.xml JER 31LAM.xml LAM 32BAR.xml BAR 33EZE.xml EZK 34DAN.xml DAN 35HOS.xml HOS 36JOEL.xml JOL 37AMOS.xml AMO 38OBA.xml OBA 39JONAS.xml JON 40MIC.xml MIC 41NAH.xml NAM 42HAB.xml HAB 43ZEPH.xml ZEP 44HAG.xml HAG 45HAB.xml ZEC 46MAL.xml MAL 47MAT.xml MAT 48MRK.xml MRK 49LUK.xml LUK 50JHN.xml JHN 51ACTS.xml ACT 52ROM.xml ROM 531COR.xml 1CO 542COR.xml 2CO 55GAL.xml GAL 56EPH.xml EPH 57PHIL.xml PHP 58COL.xml COL 591THES.xml 1TH 602THES.xml 2TH 611TIM.xml 1TI 622TIM.xml 2TI 63TIT.xml TIT 64PHLM.xml PHM 65HEB.xml HEB 66JAM.xml JAS 671PET.xml 1PE 682PET.xml 2PE 691JHN.xml 1JN 702JHN.xml 2JN 713JHN.xml 3JN 72JUDE.xml JUD 73REV.xml REV ); foreach (@files){ my @lines; my $text; my $tag; my $preface; my @preface; chop; open TEXT, ">>$_.text.xml"; open USFM, ">>$_.text.sfm"; open PREFACE, ">>$_.preface.xml"; open PREFACEUSFM, ">>$_.preface.sfm"; chomp(@lines=`cat $_`); foreach (@lines) { s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g; s/(size=\"20\"\ face=\".*?\-BoldItalic\"\ color=\"\#EC008C\")/class=\"chapter\"\ $1/g; # Deuterocanonical Chapters in Esther and Daniel s/(size=\"19\"\ face=\".*\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\" $1/g; # Psalsm } SPLIT: foreach (@lines) { if (/chapter/) { push (@preface, ""); $text=''; last SPLIT; } else { s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g; s/(size=\"5\"\ face=\".*?Helvetica\-Bold\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g; s/(size=\"5\"\ face=\".*?Helvetica\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g; s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g; s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g; s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g; s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g; s/(size=\"14\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g; s//\ $1/g; s/(size=\"8\"\ face=\".*?\-BoldItalic\" color=\"\#231F20\")/class=\"Intro_title_2\"\ $1/g; s/(size=\"7\"\ face=\".*?\-BoldItalic\"\ color=\"\#231F20\")/class=\"Intro_title_ref"\ $1/g; s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"reference\"\ $1/g; s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"Intro_outline\" $1/g; s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Verse_Range\" $1/g; s/(size=\"7\"\ face=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"Intro_footer\" $1/g; s/(size=\"7\"\ face=\".*?\+Helvetica-Bold\"\ color=\"\#231F20\")/class=\"Image_title\" $1/g; s/(size=\"43\"\ face=\".*?\-Italic\"\ color=\"\#6D6E70\")/class=\"Intro_initial\"$1/g; s/(size=\"11\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\" $1/g; s/(size=\"6\"\ face=\".*?\-Oblique\"\ color=\"\#231F20\")/class=\"Image_ref\" $1/g; s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g; push( @preface, $_."\n"); $_=""; } } foreach (@lines) { s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g; s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g; s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g; s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g; s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"section_title\"\ $1/g; s/(size=\"7\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"refverse_no\"\ $1/g; s/(size=\"9\"\ face=\".*?\-Bold\"\ color=\"\#231F20\">)/class=\"refchapter_no\"\ $1/g; s/\s*.<\/font>//g; s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"ref_text\"\ $1/g; s/(size=\"7\"\ face\=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"page_footer\"\ $1/g; s/(size=\"6\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g; s//\ $1/g; s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g; s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g; s/$/\n/; } foreach (@lines) { s//<$1>/; $tag = $1; s/<\/font/"<\/".$tag/e; s/<\/>/<\/font>/; } $text = $text.join ("",@lines); $text =~ s/\n\s*//g; $text =~ s/<\/text>(\n\s*|)//g; $text =~ s/(.)<\/intro_para>\n\s*\ (.*?)<\/bookname2>\n\s*/$1$2<\/bookname2>/g; $text =~ s/(S)<\/maintext>\n\s*\s*(ENHOR)<\/Lords_Name>\n\s*/$1$2<\/Lords_Name>/g; $text =~ s/(.*?<\/verse_no>)\n\s*(.*?<\/maintext>)\n\s*?:()/$1$2<\/verse>\n/g; foreach (@preface) { s//<$1>/; $tag = $1; s/<\/font/"<\/".$tag/e; s/<\/>/<\/font>/; } $preface = join ("",@preface); $preface =~ s/\s+/\ /g; $preface =~ s/<(|\/)text>//g; $preface =~ s/(.)<\/Intro_para>\s*(.*?)<\/bookname2>/$1$2<\/bookname2><\/Intro_para>/g; $preface =~ s/(S|D)\s*<\/Intro_para>\s*\n*\s*\s*(ENHOR|EUS)<\/Lords_Name>\s*\n*\s*/$1$2<\/Lords_Name>/g; # $preface =~ s/(D)<\/Intro_para>\s*\n*\s*\s*(EUS)<\/Lords_Name>\s*\n*\s*/$1$2<\/Lords_Name>/g; $preface =~ s/(S|D)\s*\s*(ENHOR|EUS)<\/Lords_Name>/$1$2<\/Lords_Name>/g; # $preface =~ s/(D)\s*\s*(EUS)<\/Lords_Name>/$1$2<\/Lords_Name>/g; $preface =~ s/<\/Intro_para>\s*\n*\s*//g; $preface =~ s/<\/Intro_title_2>\s*(.*?)<\/Intro_para>/\ $1<\/Intro_title_2>/g; $preface =~ s/<\/Intro_outline>\s*\s*(.*?)<\/Intro_title_2>/$1<\/Intro_outline>/g; $preface =~ s/\/>\s*<\/Intro_footer>\s*(.*?)<\/Image_title>\s*?(.*?)<\/Intro_footer>/\ alt=\"$1$2\"\/><\/Intro_footer>/g; # $preface =~ s/png\"\/>\s*<\/Intro_para>\s*(.*?)<\/Image_title>\s*?(.*?)<\/Intro_footer>\s*?(.*?)<\/Image_ref>/png\"\ alt=\"$1$2\"\ ref=\"$3\"\/><\/Intro_para>/g; $preface =~ s/(.*?)<\/Intro_initial>\s/$1\ /g; $preface =~ s/\s+/\ /g; $preface =~ s/-\ //g; $preface =~ s/parse_string($preface); my $style_doc = $parser->parse_file('preface.xsl'); my $stylesheet = $xslt->parse_stylesheet($style_doc); my $results = $stylesheet->transform($source); print "I am still working on $_ \n"; print USFM "\\id $books{$_}"; print USFM $stylesheet->output_string($results); my $source = $parser->parse_string($text); my $style_doc = $parser->parse_file('text.xsl'); my $stylesheet = $xslt->parse_stylesheet($style_doc); my $results = $stylesheet->transform($source); print "I am working on $_ \n"; # print USFM "\\id $books{$_}"; @lines = split( "\n", $stylesheet->output_string($results)); foreach (@lines) { s/^\s*–\s*$//; s/\\nd\s+E\s+\\nd*\s+-\\nd\s+NHOR\s+\\nd\*/\\nd SENHOR\\nd\*/g; s/^\\v\ \ /\\p\n\\v\ /; s/-\ //g; s/(\\v\s+[0123456789]+)\(/$1\ (/; } print USFM join("\n", @lines); close USFM; close PREFACEUSFM; }