summaryrefslogblamecommitdiffstats
path: root/modules/conf/confmaker.pl
blob: e2c411d06df55af655bd107356b8b17bb2d10df3 (plain) (tree)









































                                                                                                                      
                            




                                     
 


























                                                                                                      

                        
                                                                                                                   
                                                                                                                                        
                                                                                              
                                                               
                                                                                                                             








                                                                                                                          

                                             
                                                                                                      





                                            

                                                                                                        
        
 

                              
                                                                                                 









                                       



                                   

 


                                     




                                                        
                                                  


                                                              







                                                         
                                                        
 
 
 
 
                                                       




                                                                                             

                                                                                                                                                              


        

                                                          

    


                                                              
 













                               



                                                                        
                                                           
                                                                                                           


                                              
                                           
                                      

                                       


                                          


                                  
              
 
            
                                                 
                                         

                                            
                    



                                                               
                                                               

                   









                                                   
                                               











                               













                                            

                                                 

 


                        
                          
 






                                                             
                                                               

 
 

                           
 



                                        
                              


                               
                                                        



                               
                                                        

       

                                 











                                                            

                                                          
                                             



                                                          
                                             

       


                                          
 
                                                                        

                                             





                        
                   
                                                                   

                                                                   
                         
                                       
 
#!/usr/bin/perl
## confmaker.pl - provides a initial conf file for a new module by analysing  given OSIS xml file. 
## The programme searches for relevant tags and creates the GlobalOptionFilter entries and other relevant conf entries

## Licensed under the standard BSD license:

# Copyright (c) 2002-2009 CrossWire Bible Society <http://www.crosswire.org/>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#        notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in
#       the documentation and/or other materials provided with the
#       distribution.
#     * Neither the name of the CrossWire Bible Society nor the names of
#       its contributors may be used to endorse or promote products
#       derived from this software without specific prior written
#       permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

## For general inquiries, comments, suggestions, bug reports, etc. email:
## sword-support@crosswire.org

#########################################################################
use XML::LibXML;
use I18N::LangTags::List;
use Unicode::UCD 'charinfo';
#use open ':std', ':encoding(UTF-8)';
#use open qw/:std :utf8/;
use utf8;
use Sword;
use HTML::Strip;

my %version	 = (  KJV     		=> '1.5.9',
                      KJVA    		=> '1.6.0',
                      NRSV    		=> '1.6.0',
                      NRSVA		=> '1.6.0',
                      MT		=> '1.6.0',
                      Leningrad		=> '1.6.0',
                      Synodal		=> '1.6.1',
                      Vulg		=> '1.6.1',
                      Luther		=> '1.6.1',
                      German		=> '1.6.1',
                      Catholic		=> '1.6.2',
                      Catholic2		=> '1.6.2',
                      LXX		=> '1.7.2',
                      Orthodox		=> '1.7.2',
                      SynodalProt	=> '1.7.2',
                      DarbyFr		=> '1.8.0',
                      Segond		=> '1.8.0',
                      Calvin		=> '1.8.0'
                   );

my @av11n	= ( 'KJV', 'KJVA', 'NRSV', 'NRSVA', 'MT', 'Leningrad', 'Synodal', 'Vulg', 
                    'Luther', 'German', 'Catholic', 'Catholic2', 'LXX', 'Orthodox', 
                    'SynodalProt', 'DarbyFR', 'Segond', 'Calvin' 
                  );
                  
my $v11n	= "KJV"; # If the script is called without a v11n chosen it will set KJV as standard. 

## Obtain arguments
if (scalar(@ARGV) < 1) {
    print "\nconfmaker.pl -- - provides a initial conf file for a new module by analysing  given OSIS xml file.\n";
    print "Syntax: confmaker.pl <osis XML file> [-o <conf-output-file>] [-i <conf-input-file>] [-m] [-l <language-code>] [-v <v11n>]\n";
    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
    print "- If no -o option is specified <STDOUT> is used.\n";
    print "- if the -m option is used no -i option may be used. -m expects parametres added by other means, e.g. a makefile";
    print "- The script can currently produce a valid conf file for OSIS bibles, but not for any other import formats.\n";
    exit (-1);
}

$file = @ARGV[0];

$nextarg = 1;

if (@ARGV[$nextarg] eq "-o") {
       $outputFileName = "@ARGV[$nextarg+1]";
       $nextarg += 2;
       open (OUTF,, ">", "$outputFileName") or die "Could not open file $outputFileName for writing.";
       select(OUTF)
       }

if (@ARGV[$nextarg] eq "-i") {
       $inputFileName = "@ARGV[$nextarg+1]";
       $nextarg += 2;
       open (INF,, "<","$inputFileName") or die "Could not open inputfile $inputFileName for reading" ; 
       @inputFile = <INF>;
       }

if (@ARGV[$nextarg] eq "-m") {
       if ($inputFileName) {
          print STDERR "You can not define both an input file and use the -m option, sorry...\n";
          exit 1;}
       $makefile=true;
       $nextarg +=1;
       }

if (@ARGV[$nextarg] eq "-l") {
       $language = "@ARGV[$nextarg+1]";
       $nextarg += 2;
       }

if (@ARGV[$nextarg] eq "-v") {
       $v11n = "@ARGV[$nextarg+1]";
       }

 

my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($file);

my $manager = new Sword::SWMgr();

$manager->setGlobalOption("Hebrew Vowel Points", "Off");
$manager->setGlobalOption("Hebrew Cantillation", "Off");
$manager->setGlobalOption("Arabic Vowel Points", "Off");
$manager->setGlobalOption("Greek Accents", "Off");

my $hs = HTML::Strip->new();
my $doc_text = new Sword::SWBuf($hs->parse($doc->toString()));

## obtain name, type and language

my @elements = $doc->getElementsByTagName('osisText');

my $doc_name = @elements[0]->getAttribute('osisIDWork');
my $doc_type = @elements[0]->getAttribute('osisRefWork');
my $doc_lang = @elements[0]->getAttribute('xml:lang');
my $doc_lang_name=I18N::LangTags::List::name($doc_lang);
;

 

if ((length($language)==0) && (length($doc_lang)==0)) {
   print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n";
   print STDERR "The language is undefined and no language was given on the commandline !\n";
   exit;
   }

if (((length($language)>0) && (length($doc_lang)>0)) && ($language ne $doc_lang)){
   print STDERR "The language ($language) given on the commandline and the language of the document ($doc_lang_name) appear not to agree with each other !\n";
   exit;
   }

if ((length($language)>0) && (length($doc_lang)==0)) {
   $doc_lang_name = I18N::LangTags::List::name($language);
   }

if (!(exists $version{$v11n}))  {
    print STDERR "This versification does not exist (yet) \n";
    print STDERR "Valid versfication systems are\n\t";

    my $notmorethan4 = 1;
    foreach (@av11n) {
      $notmorethan4++;
      if ($notmorethan4 <= 5) {
        print STDERR "$_ ";
      }
      else {
        print STDERR "\n\t$_ ";
        $notmorethan4 = 2;
      }
    }
    print STDERR "\n";
    exit(-1);
  }

##GlobalOptionsFilter - prepare

my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg');
my @word_features = ('lemma', 'strong', 'gloss', 'morph',);
my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents');

my %doc_filters = ( 'title' => "OSISHeadings",
             'note'  => "OSISFootnotes",
             'reference' => "OSISScripref",
             'gloss' => "OSISGlosses",
             'lemma' => "OSISLemma",
             'strong' => "OSISStrongs",
             'morph' => "OSISMorph",
             'q'  => "OSISRedLetterWords",
             'rdg' => 'OSISVariants',
             'enum' => 'OSISEnum',
             'xlit' => 'OSISXlit'
             
            );

            
my %doc_feature = ( 'strong' => 'StrongsNumbers',
                    'figure' => 'Images',
                     'p'  => 'NoParagraphs',

                  );

my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints",
                   'Arabic Vowel Points' => 'UTF8ArabicPoints',
                   'Hebrew Cantillation' => 'UTF8Cantillation',
                   'Greek Accents' 	 => 'UTF8GreekAccents',
                 );

            
my %doc_has_feature;

## GlobalOptionsFilter - search for
            
foreach (@doc_features) {
   my @elements = $doc->getElementsByTagName($_);
   if (@elements>0) { $doc_has_feature{$_}=true } ;
   }

my @elements = $doc->getElementsByTagName('w');

foreach my $f(@word_features) {

  foreach my $e(@elements) {
   if ($e->hasAttribute($f)) {
    $doc_has_feature{$f}=true;
    last;
   }
  }
 
}   

if ($doc_has_feature{'lemma'}) {
  foreach my $e(@elements) {
   if ($e->hasAttribute('lemma')) {
    my $lemma = $e->getAttribute('lemma');
    
    if (index(lc($lemma), 'strong') != -1) {
      $doc_has_feature{'strong'}=true;
      last;
    }
   }
  }     
} 
    

my @paragraphs = $doc->getElementsByTagName('p');
if (@paragraphs==0) {$doc_has_feature{'p'}=true};


   
# Assemble and print out

print "[".$doc_name."]\n";

if ($doc_type =~ m/Bible/i) { 
 print  "ModDrv=zText\n";
 print "DataPath=./modules/texts/ztext/".lc($doc_name)."/\n";
}

if ($doc_type =~ m/Commentary/i) {
 print  "ModDrv=zCom\n";
 print "DataPath=./modules/comments/zcom/".lc($doc_name)."/\n";
}


print "CompressType=ZIP\n";
print "BlockType=BOOK\n";

print  "Encoding=UTF-8\n";
print  "SourceType=OSIS\n";
print  "SwordVersionDate=".`date +"%F"`;

print  "Lang=".$doc_lang."\n";

foreach (@doc_features) {
   if ($doc_has_feature{$_}) { 
      print  "GlobalOptionFilter=".$doc_filters{$_}."\n"
      }
   }   
foreach (@word_features) {
   if ($doc_has_feature{$_}) { 
      print  "GlobalOptionFilter=".$doc_filters{$_}."\n"
      }
   }   

foreach $filter(@char_features) {
   my $tmp = new Sword::SWBuf($hs->parse($doc->toString()));
   
   $manager->filterText($filter, $tmp);

   if ($tmp->c_str() ne $doc_text->c_str()) {
      print "GlobalOptionFilter=".%diacritics{$filter}."\n";
      
   }
}


      
foreach (@doc_features) {
   if ($doc_has_feature{$_} && exists $doc_feature{$_}) { 
      print  "Feature=".$doc_feature{$_}."\n"
      }
   }   
foreach (@word_features) {
   if ($doc_has_feature{$_} && exists $doc_feature{$_}) { 
      print  "Feature=".$doc_feature{$_}."\n"
      }
   }   
if ($doc_has_feature{'p'}) {
   print "Feature=".$doc_feature{'p'}."\n"
   }

print  "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n";
print "MinimumVersion=".$version{$v11n}."\n";
print "Versification=".$v11n."\n";

if (@inputFile>0) {
   foreach(@inputFile) {
      print $_;
      }
   }
elsif (!$makefile){
   print "DistributionLicense=copyrighted. Do not distribute\n";   
   print "Description=".$doc_name." Bible in ".$doc_lang_name."\n";
   print "About=".$doc_name." Bible in ".$doc_lang_name."\n";
   print "Version=1.0\n";
   print "History_1.0=First release\n";
}