diff options
author | Peter von Kaehne <refdoc@gmx.net> | 2010-11-06 00:12:39 +0000 |
---|---|---|
committer | Peter von Kaehne <refdoc@gmx.net> | 2010-11-06 00:12:39 +0000 |
commit | 839aaf4f0d09aa47992984f4eda12b40500ac5cb (patch) | |
tree | 2d9916a143f00ead6d65e6abaa94c330f6e34227 /modules/crossreferences | |
parent | 73e0795df1cc18cc284bdd328d1b810d063e1add (diff) | |
download | sword-tools-839aaf4f0d09aa47992984f4eda12b40500ac5cb.tar.gz |
This is a complete make over of xreffix now using the Sword API via the perl bindings. I have written it in a fashion which should allow adapting it easily to different texts
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@313 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/crossreferences')
-rw-r--r-- | modules/crossreferences/xreffix.pl | 125 |
1 files changed, 115 insertions, 10 deletions
diff --git a/modules/crossreferences/xreffix.pl b/modules/crossreferences/xreffix.pl index 84ed321..2b00722 100644 --- a/modules/crossreferences/xreffix.pl +++ b/modules/crossreferences/xreffix.pl @@ -1,4 +1,5 @@ #!/usr/bin/perl + ## Creates proper OSIS references where usfm2osis has failed. ## Licensed under the standard BSD license: @@ -40,7 +41,6 @@ use Sword; use feature "state"; - $version = "1.1"; $osisVersion = "2.1.1"; @@ -48,7 +48,7 @@ $date = '$Date: 2010-08-04 05:46:26 +0000 (Tue, 04 Aug 2009) $'; $rev = '$Rev: 231 $'; $mgr = new Sword::SWMgr(); -$module = $mgr->getModule('KJV'); +$module = $mgr->getModule('GERSCH2000'); if (scalar(@ARGV) < 1) { print "xreffix.pl -- fixes crossreferences in OSIS files where usfm2osis.pl has failed. version $version\nRevision $rev ($date)\nSyntax: xreffix.pl <input filename> [-o <output-file>] [-l <xreflocale>].\n"; @@ -59,7 +59,7 @@ if ($ARGV[1] eq "-o") { $outputFilename = "$ARGV[2]"; } else { - $outputFilename = "$ARGV[0].fixed"; + $outputFilename = "$ARGV[0].fixed.xml"; } if ($ARGV[1] eq "-l") { $locale = "$ARGV[2];" @@ -83,8 +83,13 @@ $c_book = "Gen"; $c_chapter="1"; $c_verse="1"; -foreach (@data) { +addRefs(); +readLocale(); +foreach (@data) { + + # the actual document locale takes preference. Not sure if this is a good decision + if (/xml:lang\=\"(.+?)\"/) { if ($locale ne $1) { print "This document is in the locale of ".$1."\n"; @@ -92,30 +97,130 @@ foreach (@data) { } } + + # The conversion to OSIS requires a context scope for single verse references. + # This needs to be always maintained and passed on. + if (/<div\ type\=\"book\"\ osisID=\"(.+?)\">/) { $c_book=$1; print "\n"."Now working on ".$c_book."\n"; } if (/<chapter\ sID\=\".*?\.([0-9]+)\"/) { $c_chapter=$1; - print "."; + print "\n"."Now working on ".$c_book.$c_chapter."\n"; + } if (/<verse\ sID\=\".*?\.([0-9]+)\"/) { $c_verse=$1; } - my $scope= new Sword::VerseKey; - $scope->setText($c_book.$c_chapter.$c_verse); - s/<note\ type=\"crossReference\">(.*?)<\/note>/"<note n=\"".note_index()."\" osisID=\"$c_book.$c_chapter.$c_verse!crossReference.".note_index()."\" osisRef=\"$c_book.$c_chapter.$c_verse\" type=\"crossReference\">".Sword::VerseKey::convertToOSIS($1, $scope)."<\/note>"/eg; + + # Finally the isolated references are passed to the actual conversion routine + + s/<reference>(.*?)<\/reference>/createReference($1,$c_book,$c_chapter,$c_verse)/eg; + + + } + +foreach (@data) { + s/<note\ type=\"crossReference\">(.*?)<\/note>/"<note n=\"".note_index()."\" osisID=\"$c_book.$c_chapter.$c_verse!crossReference.".note_index()."\" osisRef=\"$c_book.$c_chapter.$c_verse\" type=\"crossReference\">".$1."<\/note>"/eg; } + print (OUTF @data); close OUTF; -sub note_index { +#################################################################################### + +# In the conversion routine the references need to get cleaned up and prepared for conversion + +sub createReference() { + + my $ref = @_[0]; + print "I got this here: ".$ref."\n"; + print "this is the current scope: ".@_[1].".".@_[2].".".@_[3]."\n"; + my $scope= new Sword::VerseKey; + $scope->setText(@_[1].".".@_[2].".".@_[3]); + + + # This is about changing the various separators etc for non-English vocales into English ones + # You need to be careful if you change any of the indicators. The order of changes currently done is for German. + # Look at the list given in sub readLocale. + # If your text is in English or marked up along English lines you will need to comment out a few sections. + + $ref =~ s/$sep_cv/:/g; + $ref =~ s/;$ind_v\ /;\ /g; + $ref =~ s/^$ind_v//; + # $ref =~ s/$sep_l/,/g; + $ref =~ s/\./,/g; + # Sometimes xrefs have prose content apart from the actual references. + + my @refs = split(/$fill_start/,$ref); + + my $return=''; + foreach (@refs) { + + # I am sure this can be done more elegantly, but I have currently no clue + # Basically repetitive prose content in xrefs like "compare" needs to get "neutralised prior to conversion to OSIS, + # but it should not get lost, so I attach it here to the return string + + if (/^$fill_end/) { + $return = $return." ".$fill; + $_ =~ s/^$fill_end//; + } + + print "I put this here in:".$_."\n"; + $return = $return.Sword::VerseKey::convertToOSIS($_, $scope) ; + } + print "and I created that: ".$return."\n"; + + # After the cleansing and conversion in to English standard we want to recreate in the reference prose the original separators + + $return =~ s/(>.*?),(?=.*?<)/$1.$sep_l.$2/eg; + $return =~ s/(>.*?):(?=.*?<)/$1$sep_cv$2/g; + + + $return; + } + +sub note_index { my @note = qw(a a b b c c d d e e f f g g h h i i j j k k l l m m n n o o p p q q r r s s t t u u v v w w x x y y z z ); state $i=0; - $return = $note[$i % 52]; + my $return = $note[$i % 52]; ++$i; $return; } + + +##################################################################################### +# Edit the following subroutines for your particular project + +# Many locale have different indicators for book/chapter/verse separation etc. The conversion routine requires English standard separators + +sub readLocale () { + + $sep_bc = ' '; # separator between books and chapters + $sep_cv = ','; # separator between chapters and verses + $ind_v = 'V\.'; # indicator for single verse - unfortunatly this will get lost in the conversion. + $sep_l = '\.'; # separator for list of chapters or verses + + $fill_start ="vg"; # indicators for "compare" + $fill_end ="l"; # / -> reads as "vgl." + $fill = "vgl\."#/ + +} + +# Your text might have references which are not yet marked up. Here is your chance to do so. +sub addRefs () { + + foreach (@data) { + + + # references included inline + s/\(z\.B\.\ (.*?)\)/\(z\.B\.\ <reference>$1<\/reference>\)/g; + + # parallel reference subtitles + s/<title\ type=\"parallel\">(.*?)<\/title>/<title\ type=\"parallel\"><reference>$1<\/reference><\/title>/g; + } + +}
\ No newline at end of file |