diff options
author | Peter von Kaehne <refdoc@gmx.net> | 2010-09-14 23:16:34 +0000 |
---|---|---|
committer | Peter von Kaehne <refdoc@gmx.net> | 2010-09-14 23:16:34 +0000 |
commit | d797bb25dd06f988315a81a281a5a5f1c3e10d12 (patch) | |
tree | aff95a17bf29729f54b055e1d09bdd56fe3732b8 | |
parent | b270e5b7f32e5783cd130a789ec3c8b3094186f7 (diff) | |
download | sword-tools-d797bb25dd06f988315a81a281a5a5f1c3e10d12.tar.gz |
updated, produces now usfm for the text part
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@303 07627401-56e2-0310-80f4-f8cd0041bdcd
-rwxr-xr-x | modules/portuguese/transform.pl | 67 |
1 files changed, 51 insertions, 16 deletions
diff --git a/modules/portuguese/transform.pl b/modules/portuguese/transform.pl index a9d308b..4762adc 100755 --- a/modules/portuguese/transform.pl +++ b/modules/portuguese/transform.pl @@ -1,25 +1,48 @@ #!/usr/bin/perl +use XML::LibXSLT; +use XML::LibXML; my @files=`ls -1 *.xml`; -# my @ident=`cat books`; foreach (@files){ my @lines; my @text; my $tag; - # my %vs; - # my %kw; + chop; - open SIMPLE, ">>$_.simple.xml"; + open TEXT, ">>$_.text.xml"; + open USFM, ">>$_.text.sfm"; + open PREFACE, ">>$_.preface.xml"; + chomp(@lines=`cat $_`); - # @lines[1]="\\id @ident[$_-1] "; + foreach (@lines) { + s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g; + } + +SPLIT: foreach (@lines) { + + if (/chapter/) { + print (PREFACE "</page></pdf2xml>"); + $text='<?xml version="1.0" encoding="utf-8" ?><pdf2xml><page>'; + last SPLIT; + } + else { + s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g; + print (PREFACE $_."\n"); + $_=""; + } + } + + + foreach (@lines) { + s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g; s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g; s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g; @@ -33,11 +56,12 @@ foreach (@files){ # s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g; s/<text.*?>/<text>\ $1/g; s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g; - s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g; - s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g; s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g; s/$/\n/; - } + } + + + foreach (@lines) { @@ -51,7 +75,7 @@ foreach (@files){ } - $text = join ("",@lines); + $text = $text.join ("",@lines); $text =~ s/\n\s*<text>//g; @@ -60,14 +84,25 @@ foreach (@files){ $text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g; $text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g; - # @lines = split(/\n/,$text); - # foreach (@lines) { - - + # create an instance of XSL::XSLT processor + print TEXT $text; + close text; + + my $parser = new XML::LibXML; + my $xslt = new XML::LibXSLT; + + my $source = $parser->parse_string($text); + my $style_doc = $parser->parse_file('transform.xsl'); - - print (SIMPLE $text); - close SIMPLE; + my $stylesheet = $xslt->parse_stylesheet($style_doc); + my $results = $stylesheet->transform($source); + + print USFM $stylesheet->output_string($results); + + close USFM; + + $text=""; + } |