#!/usr/bin/perl ## confmaker.pl - provides a initial conf file for a new module by analysing given OSIS xml file. ## The programme searches for relevant tags and creates the GlobalOptionFilter entries and other relevant conf entries ## Licensed under the standard BSD license: # Copyright (c) 2002-2009 CrossWire Bible Society # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # * Neither the name of the CrossWire Bible Society nor the names of # its contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ## For general inquiries, comments, suggestions, bug reports, etc. email: ## sword-support@crosswire.org ######################################################################### use XML::LibXML; use I18N::LangTags::List; use Unicode::UCD 'charinfo'; #use open ':std', ':encoding(UTF-8)'; #use open qw/:std :utf8/; use utf8; use Sword; use HTML::Strip; my %version = ( KJV => '1.5.9', KJVA => '1.6.0', NRSV => '1.6.0', NRSVA => '1.6.0', MT => '1.6.0', Leningrad => '1.6.0', Synodal => '1.6.1', Vulg => '1.6.1', Luther => '1.6.1', German => '1.6.1', Catholic => '1.6.2', Catholic2 => '1.6.2', LXX => '1.7.2', Orthodox => '1.7.2', SynodalProt => '1.7.2', DarbyFr => '1.8.0', Segond => '1.8.0', Calvin => '1.8.0' ); my @av11n = ( 'KJV', 'KJVA', 'NRSV', 'NRSVA', 'MT', 'Leningrad', 'Synodal', 'Vulg', 'Luther', 'German', 'Catholic', 'Catholic2', 'LXX', 'Orthodox', 'SynodalProt', 'DarbyFR', 'Segond', 'Calvin' ); my $v11n = "KJV"; # If the script is called without a v11n chosen it will set KJV as standard. ## Obtain arguments if (scalar(@ARGV) < 1) { print "\nconfmaker.pl -- - provides a initial conf file for a new module by analysing given OSIS xml file.\n"; print "Syntax: confmaker.pl [-o ] [-i ] [-m] [-l ] [-v ]\n"; print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n"; print "- If no -o option is specified is used.\n"; print "- if the -m option is used no -i option may be used. -m expects parametres added by other means, e.g. a makefile"; print "- The script can currently produce a valid conf file for OSIS bibles, but not for any other import formats.\n"; exit (-1); } $file = @ARGV[0]; $nextarg = 1; if (@ARGV[$nextarg] eq "-o") { $outputFileName = "@ARGV[$nextarg+1]"; $nextarg += 2; open (OUTF,, ">", "$outputFileName") or die "Could not open file $outputFileName for writing."; select(OUTF) } if (@ARGV[$nextarg] eq "-i") { $inputFileName = "@ARGV[$nextarg+1]"; $nextarg += 2; open (INF,, "<","$inputFileName") or die "Could not open inputfile $inputFileName for reading" ; @inputFile = ; } if (@ARGV[$nextarg] eq "-m") { if ($inputFileName) { print STDERR "You can not define both an input file and use the -m option, sorry...\n"; exit 1;} $makefile=true; $nextarg +=1; } if (@ARGV[$nextarg] eq "-l") { $language = "@ARGV[$nextarg+1]"; $nextarg += 2; } if (@ARGV[$nextarg] eq "-v") { $v11n = "@ARGV[$nextarg+1]"; } my $parser = XML::LibXML->new(); my $doc = $parser->parse_file($file); my $manager = new Sword::SWMgr(); $manager->setGlobalOption("Hebrew Vowel Points", "Off"); $manager->setGlobalOption("Hebrew Cantillation", "Off"); $manager->setGlobalOption("Arabic Vowel Points", "Off"); $manager->setGlobalOption("Greek Accents", "Off"); my $hs = HTML::Strip->new(); my $doc_text = new Sword::SWBuf($hs->parse($doc->toString())); ## obtain name, type and language my @elements = $doc->getElementsByTagName('osisText'); my $doc_name = @elements[0]->getAttribute('osisIDWork'); my $doc_type = @elements[0]->getAttribute('osisRefWork'); my $doc_lang = @elements[0]->getAttribute('xml:lang'); my $doc_lang_name=I18N::LangTags::List::name($doc_lang); ; if ((length($language)==0) && (length($doc_lang)==0)) { print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n"; print STDERR "The language is undefined and no language was given on the commandline !\n"; exit; } if (((length($language)>0) && (length($doc_lang)>0)) && ($language ne $doc_lang)){ print STDERR "The language ($language) given on the commandline and the language of the document ($doc_lang_name) appear not to agree with each other !\n"; exit; } if ((length($language)>0) && (length($doc_lang)==0)) { $doc_lang_name = I18N::LangTags::List::name($language); } if (!(exists $version{$v11n})) { print STDERR "This versification does not exist (yet) \n"; print STDERR "Valid versfication systems are\n\t"; my $notmorethan4 = 1; foreach (@av11n) { $notmorethan4++; if ($notmorethan4 <= 5) { print STDERR "$_ "; } else { print STDERR "\n\t$_ "; $notmorethan4 = 2; } } print STDERR "\n"; exit(-1); } #remove
tag and child nodes as its presence can cause confusion for my $header ($doc->getElementsByTagName('header')) { $header->unbindNode; } ##GlobalOptionsFilter - prepare my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg', 'seg'); my @word_features = ('lemma', 'strong', 'gloss', 'morph',); my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents'); my %doc_filters = ( 'title' => "OSISHeadings", 'note' => "OSISFootnotes", 'reference' => "OSISScripref", 'gloss' => "OSISGlosses", 'lemma' => "OSISLemma", 'strong' => "OSISStrongs", 'morph' => "OSISMorph", 'q' => "OSISRedLetterWords", 'rdg' => 'OSISVariants', 'enum' => 'OSISEnum', 'xlit' => 'OSISXlit', 'seg' => 'OSISMorphSegmentation' ); my %doc_feature = ( 'strong' => 'StrongsNumbers', 'figure' => 'Images', 'p' => 'NoParagraphs' ); my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints", 'Arabic Vowel Points' => 'UTF8ArabicPoints', 'Hebrew Cantillation' => 'UTF8Cantillation', 'Greek Accents' => 'UTF8GreekAccents', ); my %doc_has_feature; ## GlobalOptionsFilter - search for foreach (@doc_features) { my @elements = $doc->getElementsByTagName($_); if (@elements>0) { $doc_has_feature{$_}=true } ; } my @elements = $doc->getElementsByTagName('w'); foreach my $f(@word_features) { foreach my $e(@elements) { if ($e->hasAttribute($f)) { $doc_has_feature{$f}=true; last; } } } if ($doc_has_feature{'lemma'}) { foreach my $e(@elements) { if ($e->hasAttribute('lemma')) { my $lemma = $e->getAttribute('lemma'); if (index(lc($lemma), 'strong') != -1) { $doc_has_feature{'strong'}=true; last; } } } } my @paragraphs = $doc->getElementsByTagName('p'); if (@paragraphs==0) {$doc_has_feature{'p'}=true}; # Assemble and print out print "[".$doc_name."]\n"; if ($doc_type =~ m/Bible/i) { print "ModDrv=zText\n"; print "DataPath=./modules/texts/ztext/".lc($doc_name)."/\n"; } if ($doc_type =~ m/Commentary/i) { print "ModDrv=zCom\n"; print "DataPath=./modules/comments/zcom/".lc($doc_name)."/\n"; } print "CompressType=ZIP\n"; print "BlockType=BOOK\n"; print "Encoding=UTF-8\n"; print "SourceType=OSIS\n"; print "SwordVersionDate=".`date +"%F"`; print "Lang=".$doc_lang."\n"; foreach (@doc_features) { if ($doc_has_feature{$_}) { print "GlobalOptionFilter=".$doc_filters{$_}."\n" } } foreach (@word_features) { if ($doc_has_feature{$_}) { print "GlobalOptionFilter=".$doc_filters{$_}."\n" } } foreach $filter(@char_features) { my $tmp = new Sword::SWBuf($hs->parse($doc->toString())); $manager->filterText($filter, $tmp); if ($tmp->c_str() ne $doc_text->c_str()) { print "GlobalOptionFilter=".%diacritics{$filter}."\n"; } } foreach (@doc_features) { if ($doc_has_feature{$_} && exists $doc_feature{$_}) { print "Feature=".$doc_feature{$_}."\n" } } foreach (@word_features) { if ($doc_has_feature{$_} && exists $doc_feature{$_}) { print "Feature=".$doc_feature{$_}."\n" } } if ($doc_has_feature{'p'}) { print "Feature=".$doc_feature{'p'}."\n" } print "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n"; print "MinimumVersion=".$version{$v11n}."\n"; print "Versification=".$v11n."\n"; if (@inputFile>0) { foreach(@inputFile) { print $_; } } elsif (!$makefile){ print "DistributionLicense=copyrighted. Do not distribute\n"; print "Description=".$doc_name." Bible in ".$doc_lang_name."\n"; print "About=".$doc_name." Bible in ".$doc_lang_name."\n"; print "Version=1.0\n"; print "History_1.0=First release\n"; }