#!/usr/bin/perl
## confmaker.pl - provides a initial conf file for a new module by analysing given OSIS xml file.
## The programme searches for relevant tags and creates the GlobalOptionFilter entries and other relevant conf entries
## Licensed under the standard BSD license:
# Copyright (c) 2002-2009 CrossWire Bible Society <http://www.crosswire.org/>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of the CrossWire Bible Society nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
## For general inquiries, comments, suggestions, bug reports, etc. email:
## sword-support@crosswire.org
#########################################################################
use XML::LibXML;
use I18N::LangTags::List;
use Unicode::UCD 'charinfo';
#use open ':std', ':encoding(UTF-8)';
#use open qw/:std :utf8/;
use utf8;
use Sword;
use HTML::Strip;
## Obtain arguments
if (scalar(@ARGV) < 1) {
print "\nconfmaker.pl -- - provides a initial conf file for a new module by analysing given OSIS xml file.\n";
print "Syntax: confmaker.pl <osis XML file> [-o <conf-output-file>] [-i <conf-input-file>] [-m] [-l <language-code>]\n";
print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
print "- If no -o option is specified <STDOUT> is used.\n";
print "- if the -m option is used no -i option may be used. -m expects parametres added by other means, e.g. a makefile";
print "- The script can currently produce a valid conf file for OSIS bibles, but not for any other import formats.\n";
exit (-1);
}
$file = @ARGV[0];
$nextarg = 1;
if (@ARGV[$nextarg] eq "-o") {
$outputFileName = "@ARGV[$nextarg+1]";
$nextarg += 2;
open (OUTF,, ">", "$outputFileName") or die "Could not open file $outputFileName for writing.";
select(OUTF)
}
if (@ARGV[$nextarg] eq "-i") {
$inputFileName = "@ARGV[$nextarg+1]";
$nextarg += 2;
open (INF,, "<","$inputFileName") or die "Could not open inputfile $inputFileName for reading" ;
@inputFile = <INF>;
}
if (@ARGV[$nextarg] eq "-m") {
if ($inputFileName) {
print "You can not define both an input file and use the -m option, sorry...\n";
exit 1;}
$makefile=true;
$nextarg +=1;
}
if (@ARGV[$nextarg] eq "-l") {
$language = "@ARGV[$nextarg+1]";
$nextarg += 2;
}
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($file);
my $manager = new Sword::SWMgr();
$manager->setGlobalOption("Hebrew Vowel Points", "Off");
$manager->setGlobalOption("Hebrew Cantillation", "Off");
$manager->setGlobalOption("Arabic Vowel Points", "Off");
#$manager->setGlobalOption("Greek Accents", "Off");
my $hs = HTML::Strip->new();
my $doc_text = new Sword::SWBuf($hs->parse($doc->toString()));
## obtain name, type and language
my @elements = $doc->getElementsByTagName('osisText');
my $doc_name = @elements[0]->getAttribute('osisIDWork');
my $doc_type = @elements[0]->getAttribute('osisRefWork');
my $doc_lang = @elements[0]->getAttribute('xml:lang');
my $doc_lang_name=I18N::LangTags::List::name($doc_lang);
;
if ((length($language)==0) && (length($doc_lang)==0)) {
print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n";
print STDERR "The language is undefined and no language was given on the commandline !\n";
exit;
}
if (((length($language)>0) && (length($doc_lang)>0)) && ($language ne $doc_lang)){
print STDERR "The language ($language) given on the commandline and the language of the document ($doc_lang_name) appear not to agree with each other !\n";
exit;
}
if ((length($language)>0) && (length($doc_lang)==0)) {
$doc_lang_name = I18N::LangTags::List::name($language);
}
##GlobalOptionsFilter - prepare
my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg');
my @word_features = ('lemma', 'strong', 'gloss', 'morph',);
my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents');
my %doc_filters = ( 'title' => "OSISHeadings",
'note' => "OSISFootnotes",
'reference' => "OSISScripref",
'gloss' => "OSISGlosses",
'lemma' => "OSISLemma",
'strong' => "OSISStrongs",
'morph' => "OSISMorph",
'q' => "OSISRedLetterWords",
'rdg' => 'OSISVariants',
'enum' => 'OSISEnum',
'xlit' => 'OSISXlit'
);
my %doc_feature = ( 'strong' => 'StrongsNumbers',
'figure' => 'Images',
'p' => 'NoParagraphs',
);
my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints",
'Arabic Vowel Points' => 'UTF8ArabicPoints',
'Hebrew Cantillation' => 'UTF8Cantillation',
'Greek Accents' => 'UTF8GreekAccents',
);
my %doc_has_feature;
## GlobalOptionsFilter - search for
foreach (@doc_features) {
my @elements = $doc->getElementsByTagName($_);
if (@elements>0) { $doc_has_feature{$_}=true } ;
}
my @elements = $doc->getElementsByTagName('w');
foreach my $f(@word_features) {
foreach my $e(@elements) {
if ($e->hasAttribute($f)) {
$doc_has_feature{$f}=true;
last;
}
}
}
if ($doc_has_feature{'lemma'}) {
foreach my $e(@elements) {
if ($e->hasAttribute('lemma')) {
my $lemma = $e->getAttribute('lemma');
if (index(lc($lemma), 'strong') != -1) {
$doc_has_feature{'strong'}=true;
last;
}
}
}
}
my @paragraphs = $doc->getElementsByTagName('p');
if (@paragraphs==0) {$doc_has_feature{'p'}=true};
# Assemble and print out
print "[".$doc_name."]\n";
if ($doc_type =~ m/Bible/i) {
print "ModDrv=zText\n";
print "DataPath=./modules/texts/ztext/".lc($doc_name)."/\n";
}
if ($doc_type =~ m/Commentary/i) {
print "ModDrv=zCom\n";
print "DataPath=./modules/comments/zcom/".lc($doc_name)."/\n";
}
print "CompressType=ZIP\n";
print "BlockType=BOOK\n";
print "Encoding=UTF-8\n";
print "SourceType=OSIS\n";
print "SwordVersionDate=".`date +"%F"`;
print "Lang=".$doc_lang."\n";
foreach (@doc_features) {
if ($doc_has_feature{$_}) {
print "GlobalOptionFilter=".$doc_filters{$_}."\n"
}
}
foreach (@word_features) {
if ($doc_has_feature{$_}) {
print "GlobalOptionFilter=".$doc_filters{$_}."\n"
}
}
foreach $filter(@char_features) {
my $tmp = new Sword::SWBuf($hs->parse($doc->toString()));
$manager->filterText($filter, $tmp);
if ($tmp->c_str() ne $doc_text->c_str()) {
print "GlobalOptionFilter=".%diacritics{$filter}."\n";
}
}
foreach (@doc_features) {
if ($doc_has_feature{$_} && exists $doc_feature{$_}) {
print "Feature=".$doc_feature{$_}."\n"
}
}
foreach (@word_features) {
if ($doc_has_feature{$_} && exists $doc_feature{$_}) {
print "Feature=".$doc_feature{$_}."\n"
}
}
if ($doc_has_feature{'p'}) {
print "Feature=".$doc_feature{'p'}."\n"
}
print "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n";
if (@inputFile>0) {
foreach(@inputFile) {
print $_;
}
}
elsif (!$makefile){
print "DistributionLicense=copyrighted. Do not distribute\n";
print "Description=".$doc_name." Bible in ".$doc_lang_name."\n";
print "About=".$doc_name." Bible in ".$doc_lang_name."\n";
print "Version=1.0\n";
print "History_1.0=First release\n";
}