#!/usr/bin/perl
## confmaker.pl - provides a initial conf file for a new module by analysing given OSIS xml file.
## The programme searches for relevant tags and creates the GlobalOptionFilter entries and other relevant conf entries
## Licensed under the standard BSD license:
# Copyright (c) 2002-2009 CrossWire Bible Society <http://www.crosswire.org/>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of the CrossWire Bible Society nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
## For general inquiries, comments, suggestions, bug reports, etc. email:
## sword-support@crosswire.org
#########################################################################
use XML::LibXML;
use I18N::LangTags::List;
use Unicode::UCD 'charinfo';
#use open ':std', ':encoding(UTF-8)';
#use open qw/:std :utf8/;
use utf8;
use Sword;
use HTML::Strip;
my %version = ( KJV => '1.5.9',
KJVA => '1.6.0',
NRSV => '1.6.0',
NRSVA => '1.6.0',
MT => '1.6.0',
Leningrad => '1.6.0',
Synodal => '1.6.1',
Vulg => '1.6.1',
Luther => '1.6.1',
German => '1.6.1',
Catholic => '1.6.2',
Catholic2 => '1.6.2',
LXX => '1.7.2',
Orthodox => '1.7.2',
SynodalProt => '1.7.2',
DarbyFr => '1.8.0',
Segond => '1.8.0',
Calvin => '1.8.0'
);
my @av11n = ( 'KJV', 'KJVA', 'NRSV', 'NRSVA', 'MT', 'Leningrad', 'Synodal', 'Vulg',
'Luther', 'German', 'Catholic', 'Catholic2', 'LXX', 'Orthodox',
'SynodalProt', 'DarbyFR', 'Segond', 'Calvin'
);
my $v11n = "KJV"; # If the script is called without a v11n chosen it will set KJV as standard.
## Obtain arguments
if (scalar(@ARGV) < 1) {
print "\nconfmaker.pl -- - provides a initial conf file for a new module by analysing given OSIS xml file.\n";
print "Syntax: confmaker.pl <osis XML file> [-o <conf-output-file>] [-i <conf-input-file>] [-m] [-l <language-code>] [-v <v11n>]\n";
print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
print "- If no -o option is specified <STDOUT> is used.\n";
print "- if the -m option is used no -i option may be used. -m expects parametres added by other means, e.g. a makefile";
print "- The script can currently produce a valid conf file for OSIS bibles, but not for any other import formats.\n";
exit (-1);
}
$file = @ARGV[0];
$nextarg = 1;
if (@ARGV[$nextarg] eq "-o") {
$outputFileName = "@ARGV[$nextarg+1]";
$nextarg += 2;
open (OUTF,, ">", "$outputFileName") or die "Could not open file $outputFileName for writing.";
select(OUTF)
}
if (@ARGV[$nextarg] eq "-i") {
$inputFileName = "@ARGV[$nextarg+1]";
$nextarg += 2;
open (INF,, "<","$inputFileName") or die "Could not open inputfile $inputFileName for reading" ;
@inputFile = <INF>;
}
if (@ARGV[$nextarg] eq "-m") {
if ($inputFileName) {
print STDERR "You can not define both an input file and use the -m option, sorry...\n";
exit 1;}
$makefile=true;
$nextarg +=1;
}
if (@ARGV[$nextarg] eq "-l") {
$language = "@ARGV[$nextarg+1]";
$nextarg += 2;
}
if (@ARGV[$nextarg] eq "-v") {
$v11n = "@ARGV[$nextarg+1]";
}
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($file);
my $manager = new Sword::SWMgr();
$manager->setGlobalOption("Hebrew Vowel Points", "Off");
$manager->setGlobalOption("Hebrew Cantillation", "Off");
$manager->setGlobalOption("Arabic Vowel Points", "Off");
$manager->setGlobalOption("Greek Accents", "Off");
my $hs = HTML::Strip->new();
my $doc_text = new Sword::SWBuf($hs->parse($doc->toString()));
## obtain name, type and language
for my $root ($doc->findnodes(q{/header/})) {
$root->unbindNode;
}
my @elements = $doc->getElementsByTagName('osisText');
my $doc_name = @elements[0]->getAttribute('osisIDWork');
my $doc_type = @elements[0]->getAttribute('osisRefWork');
my $doc_lang = @elements[0]->getAttribute('xml:lang');
my $doc_lang_name=I18N::LangTags::List::name($doc_lang);
;
if ((length($language)==0) && (length($doc_lang)==0)) {
print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n";
print STDERR "The language is undefined and no language was given on the commandline !\n";
exit;
}
if (((length($language)>0) && (length($doc_lang)>0)) && ($language ne $doc_lang)){
print STDERR "The language ($language) given on the commandline and the language of the document ($doc_lang_name) appear not to agree with each other !\n";
exit;
}
if ((length($language)>0) && (length($doc_lang)==0)) {
$doc_lang_name = I18N::LangTags::List::name($language);
}
if (!(exists $version{$v11n})) {
print STDERR "This versification does not exist (yet) \n";
print STDERR "Valid versfication systems are\n\t";
my $notmorethan4 = 1;
foreach (@av11n) {
$notmorethan4++;
if ($notmorethan4 <= 5) {
print STDERR "$_ ";
}
else {
print STDERR "\n\t$_ ";
$notmorethan4 = 2;
}
}
print STDERR "\n";
exit(-1);
}
#remove <header> tag and child nodes as its presence can cause confusion
for my $header ($doc->getElementsByTagName('header')) {
$header->unbindNode;
}
##GlobalOptionsFilter - prepare
my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg');
my @word_features = ('lemma', 'strong', 'gloss', 'morph',);
my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents');
my %doc_filters = ( 'title' => "OSISHeadings",
'note' => "OSISFootnotes",
'reference' => "OSISScripref",
'gloss' => "OSISGlosses",
'lemma' => "OSISLemma",
'strong' => "OSISStrongs",
'morph' => "OSISMorph",
'q' => "OSISRedLetterWords",
'rdg' => 'OSISVariants',
'enum' => 'OSISEnum',
'xlit' => 'OSISXlit'
);
my %doc_feature = ( 'strong' => 'StrongsNumbers',
'figure' => 'Images',
'p' => 'NoParagraphs',
);
my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints",
'Arabic Vowel Points' => 'UTF8ArabicPoints',
'Hebrew Cantillation' => 'UTF8Cantillation',
'Greek Accents' => 'UTF8GreekAccents',
);
my %doc_has_feature;
## GlobalOptionsFilter - search for
foreach (@doc_features) {
my @elements = $doc->getElementsByTagName($_);
if (@elements>0) { $doc_has_feature{$_}=true } ;
}
my @elements = $doc->getElementsByTagName('w');
foreach my $f(@word_features) {
foreach my $e(@elements) {
if ($e->hasAttribute($f)) {
$doc_has_feature{$f}=true;
last;
}
}
}
if ($doc_has_feature{'lemma'}) {
foreach my $e(@elements) {
if ($e->hasAttribute('lemma')) {
my $lemma = $e->getAttribute('lemma');
if (index(lc($lemma), 'strong') != -1) {
$doc_has_feature{'strong'}=true;
last;
}
}
}
}
my @paragraphs = $doc->getElementsByTagName('p');
if (@paragraphs==0) {$doc_has_feature{'p'}=true};
# Assemble and print out
print "[".$doc_name."]\n";
if ($doc_type =~ m/Bible/i) {
print "ModDrv=zText\n";
print "DataPath=./modules/texts/ztext/".lc($doc_name)."/\n";
}
if ($doc_type =~ m/Commentary/i) {
print "ModDrv=zCom\n";
print "DataPath=./modules/comments/zcom/".lc($doc_name)."/\n";
}
print "CompressType=ZIP\n";
print "BlockType=BOOK\n";
print "Encoding=UTF-8\n";
print "SourceType=OSIS\n";
print "SwordVersionDate=".`date +"%F"`;
print "Lang=".$doc_lang."\n";
foreach (@doc_features) {
if ($doc_has_feature{$_}) {
print "GlobalOptionFilter=".$doc_filters{$_}."\n"
}
}
foreach (@word_features) {
if ($doc_has_feature{$_}) {
print "GlobalOptionFilter=".$doc_filters{$_}."\n"
}
}
foreach $filter(@char_features) {
my $tmp = new Sword::SWBuf($hs->parse($doc->toString()));
$manager->filterText($filter, $tmp);
if ($tmp->c_str() ne $doc_text->c_str()) {
print "GlobalOptionFilter=".%diacritics{$filter}."\n";
}
}
foreach (@doc_features) {
if ($doc_has_feature{$_} && exists $doc_feature{$_}) {
print "Feature=".$doc_feature{$_}."\n"
}
}
foreach (@word_features) {
if ($doc_has_feature{$_} && exists $doc_feature{$_}) {
print "Feature=".$doc_feature{$_}."\n"
}
}
if ($doc_has_feature{'p'}) {
print "Feature=".$doc_feature{'p'}."\n"
}
print "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n";
print "MinimumVersion=".$version{$v11n}."\n";
print "Versification=".$v11n."\n";
if (@inputFile>0) {
foreach(@inputFile) {
print $_;
}
}
elsif (!$makefile){
print "DistributionLicense=copyrighted. Do not distribute\n";
print "Description=".$doc_name." Bible in ".$doc_lang_name."\n";
print "About=".$doc_name." Bible in ".$doc_lang_name."\n";
print "Version=1.0\n";
print "History_1.0=First release\n";
}