diff options
author | Dominique Corbex <domcox@crosswire.org> | 2022-09-18 17:05:59 +0000 |
---|---|---|
committer | Dominique Corbex <domcox@crosswire.org> | 2022-09-18 17:05:59 +0000 |
commit | 4a5a79d3a294e4a8a44a461a6041df79a447b99d (patch) | |
tree | 8e0930def974c8680d8011c6665ce3bd111d5784 /modules/conf | |
parent | 9ee854a4d91196887e640230db56b0fffcfdd240 (diff) | |
download | sword-tools-4a5a79d3a294e4a8a44a461a6041df79a447b99d.tar.gz |
Replace confmaker.pl by confmaker.py
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@553 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/conf')
-rwxr-xr-x | modules/conf/confmaker.pl | 345 | ||||
-rwxr-xr-x | modules/conf/confmaker.py | 543 |
2 files changed, 543 insertions, 345 deletions
diff --git a/modules/conf/confmaker.pl b/modules/conf/confmaker.pl deleted file mode 100755 index 612c4f7..0000000 --- a/modules/conf/confmaker.pl +++ /dev/null @@ -1,345 +0,0 @@ -#!/usr/bin/perl -## confmaker.pl - provides a initial conf file for a new module by analysing given OSIS xml file. -## The programme searches for relevant tags and creates the GlobalOptionFilter entries and other relevant conf entries - -## Licensed under the standard BSD license: - -# Copyright (c) 2002-2009 CrossWire Bible Society <http://www.crosswire.org/> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of the CrossWire Bible Society nor the names of -# its contributors may be used to endorse or promote products -# derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -## For general inquiries, comments, suggestions, bug reports, etc. email: -## sword-support@crosswire.org - -######################################################################### -use XML::LibXML; -use I18N::LangTags::List; -use Unicode::UCD 'charinfo'; -#use open ':std', ':encoding(UTF-8)'; -#use open qw/:std :utf8/; -use utf8; -use Sword; -use HTML::Strip; - -my %version = ( KJV => '1.5.9', - KJVA => '1.6.0', - NRSV => '1.6.0', - NRSVA => '1.6.0', - MT => '1.6.0', - Leningrad => '1.6.0', - Synodal => '1.6.1', - Vulg => '1.6.1', - Luther => '1.6.1', - German => '1.6.1', - Catholic => '1.6.2', - Catholic2 => '1.6.2', - LXX => '1.7.2', - Orthodox => '1.7.2', - SynodalProt => '1.7.2', - DarbyFr => '1.8.0', - Segond => '1.8.0', - Calvin => '1.8.0' - ); - -my @av11n = ( 'KJV', 'KJVA', 'NRSV', 'NRSVA', 'MT', 'Leningrad', 'Synodal', 'Vulg', - 'Luther', 'German', 'Catholic', 'Catholic2', 'LXX', 'Orthodox', - 'SynodalProt', 'DarbyFR', 'Segond', 'Calvin' - ); - -my $v11n = "KJV"; # If the script is called without a v11n chosen it will set KJV as standard. - -## Obtain arguments -if (scalar(@ARGV) < 1) { - print "\nconfmaker.pl -- - provides a initial conf file for a new module by analysing given OSIS xml file.\n"; - print "Syntax: confmaker.pl <osis XML file> [-o <conf-output-file>] [-i <conf-input-file>] [-m] [-l <language-code>] [-v <v11n>]\n"; - print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n"; - print "- If no -o option is specified <STDOUT> is used.\n"; - print "- if the -m option is used no -i option may be used. -m expects parametres added by other means, e.g. a makefile"; - print "- The script can currently produce a valid conf file for OSIS bibles, but not for any other import formats.\n"; - exit (-1); -} - -$file = @ARGV[0]; - -$nextarg = 1; - -if (@ARGV[$nextarg] eq "-o") { - $outputFileName = "@ARGV[$nextarg+1]"; - $nextarg += 2; - open (OUTF,, ">", "$outputFileName") or die "Could not open file $outputFileName for writing."; - select(OUTF) - } - -if (@ARGV[$nextarg] eq "-i") { - $inputFileName = "@ARGV[$nextarg+1]"; - $nextarg += 2; - open (INF,, "<","$inputFileName") or die "Could not open inputfile $inputFileName for reading" ; - @inputFile = <INF>; - } - -if (@ARGV[$nextarg] eq "-m") { - if ($inputFileName) { - print STDERR "You can not define both an input file and use the -m option, sorry...\n"; - exit 1;} - $makefile=true; - $nextarg +=1; - } - -if (@ARGV[$nextarg] eq "-l") { - $language = "@ARGV[$nextarg+1]"; - $nextarg += 2; - } - -if (@ARGV[$nextarg] eq "-v") { - $v11n = "@ARGV[$nextarg+1]"; - } - - - -my $parser = XML::LibXML->new(); -my $doc = $parser->parse_file($file); - -my $manager = new Sword::SWMgr(); - -$manager->setGlobalOption("Hebrew Vowel Points", "Off"); -$manager->setGlobalOption("Hebrew Cantillation", "Off"); -$manager->setGlobalOption("Arabic Vowel Points", "Off"); -$manager->setGlobalOption("Greek Accents", "Off"); - -my $hs = HTML::Strip->new(); -my $doc_text = new Sword::SWBuf($hs->parse($doc->toString())); - -## obtain name, type and language - -my @elements = $doc->getElementsByTagName('osisText'); - -my $doc_name = @elements[0]->getAttribute('osisIDWork'); -my $doc_type = @elements[0]->getAttribute('osisRefWork'); -my $doc_lang = @elements[0]->getAttribute('xml:lang'); -my $doc_lang_name=I18N::LangTags::List::name($doc_lang); -; - - - -if ((length($language)==0) && (length($doc_lang)==0)) { - print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n"; - print STDERR "The language is undefined and no language was given on the commandline !\n"; - exit; - } - -if (((length($language)>0) && (length($doc_lang)>0)) && ($language ne $doc_lang)){ - print STDERR "The language ($language) given on the commandline and the language of the document ($doc_lang_name) appear not to agree with each other !\n"; - exit; - } - -if ((length($language)>0) && (length($doc_lang)==0)) { - $doc_lang_name = I18N::LangTags::List::name($language); - } - -if (!(exists $version{$v11n})) { - print STDERR "This versification does not exist (yet) \n"; - print STDERR "Valid versfication systems are\n\t"; - - my $notmorethan4 = 1; - foreach (@av11n) { - $notmorethan4++; - if ($notmorethan4 <= 5) { - print STDERR "$_ "; - } - else { - print STDERR "\n\t$_ "; - $notmorethan4 = 2; - } - } - print STDERR "\n"; - exit(-1); - } -#remove <header> tag and child nodes as its presence can cause confusion -for my $header ($doc->getElementsByTagName('header')) { - $header->unbindNode; -} - - - -##GlobalOptionsFilter - prepare - -my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg', 'seg'); -my @word_features = ('lemma', 'strong', 'gloss', 'morph',); -my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents'); - -my %doc_filters = ( 'title' => "OSISHeadings", - 'note' => "OSISFootnotes", - 'reference' => "OSISScripref", - 'gloss' => "OSISGlosses", - 'lemma' => "OSISLemma", - 'strong' => "OSISStrongs", - 'morph' => "OSISMorph", - 'q' => "OSISRedLetterWords", - 'rdg' => 'OSISVariants', - 'enum' => 'OSISEnum', - 'xlit' => 'OSISXlit', - 'seg' => 'OSISMorphSegmentation' - - ); - - -my %doc_feature = ( 'strong' => 'StrongsNumbers', - 'figure' => 'Images', - 'p' => 'NoParagraphs' - - ); - -my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints", - 'Arabic Vowel Points' => 'UTF8ArabicPoints', - 'Hebrew Cantillation' => 'UTF8Cantillation', - 'Greek Accents' => 'UTF8GreekAccents', - ); - - -my %doc_has_feature; - -## GlobalOptionsFilter - search for - -foreach (@doc_features) { - my @elements = $doc->getElementsByTagName($_); - if (@elements>0) { $doc_has_feature{$_}=true } ; - } - -my @elements = $doc->getElementsByTagName('w'); - -foreach my $f(@word_features) { - - foreach my $e(@elements) { - if ($e->hasAttribute($f)) { - $doc_has_feature{$f}=true; - last; - } - } - -} - -if ($doc_has_feature{'lemma'}) { - foreach my $e(@elements) { - if ($e->hasAttribute('lemma')) { - my $lemma = $e->getAttribute('lemma'); - - if (index(lc($lemma), 'strong') != -1) { - $doc_has_feature{'strong'}=true; - last; - } - } - } -} - - -my @paragraphs = $doc->getElementsByTagName('p'); -if (@paragraphs==0) {$doc_has_feature{'p'}=true}; - - - -# Assemble and print out - -print "[".$doc_name."]\n"; - -if ($doc_type =~ m/Bible/i) { - print "ModDrv=zText\n"; - print "DataPath=./modules/texts/ztext/".lc($doc_name)."/\n"; -} - -if ($doc_type =~ m/Commentary/i) { - print "ModDrv=zCom\n"; - print "DataPath=./modules/comments/zcom/".lc($doc_name)."/\n"; -} - - -print "CompressType=ZIP\n"; -print "BlockType=BOOK\n"; - -print "Encoding=UTF-8\n"; -print "SourceType=OSIS\n"; -print "SwordVersionDate=".`date +"%F"`; - -print "Lang=".$doc_lang."\n"; - - - -foreach (@doc_features) { - if ($doc_has_feature{$_}) { - print "GlobalOptionFilter=".$doc_filters{$_}."\n" - } - } -foreach (@word_features) { - if ($doc_has_feature{$_}) { - print "GlobalOptionFilter=".$doc_filters{$_}."\n" - } - } - -foreach $filter(@char_features) { - my $tmp = new Sword::SWBuf($hs->parse($doc->toString())); - - $manager->filterText($filter, $tmp); - - if ($tmp->c_str() ne $doc_text->c_str()) { - print "GlobalOptionFilter=".%diacritics{$filter}."\n"; - - } -} - - - -foreach (@doc_features) { - if ($doc_has_feature{$_} && exists $doc_feature{$_}) { - print "Feature=".$doc_feature{$_}."\n" - } - } -foreach (@word_features) { - if ($doc_has_feature{$_} && exists $doc_feature{$_}) { - print "Feature=".$doc_feature{$_}."\n" - } - } -if ($doc_has_feature{'p'}) { - print "Feature=".$doc_feature{'p'}."\n" - } - -print "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n"; -print "MinimumVersion=".$version{$v11n}."\n"; -print "Versification=".$v11n."\n"; - -if (@inputFile>0) { - foreach(@inputFile) { - print $_; - } - } -elsif (!$makefile){ - print "DistributionLicense=copyrighted. Do not distribute\n"; - print "Description=".$doc_name." Bible in ".$doc_lang_name."\n"; - print "About=".$doc_name." Bible in ".$doc_lang_name."\n"; - print "Version=1.0\n"; - print "History_1.0=First release\n"; -} diff --git a/modules/conf/confmaker.py b/modules/conf/confmaker.py new file mode 100755 index 0000000..2ed9476 --- /dev/null +++ b/modules/conf/confmaker.py @@ -0,0 +1,543 @@ +#!/usr/bin/env python3 + +# -*- coding: utf-8 -*- + +# confmaker.py - Provides a initial conf file for a new module by analyzing +# the related OSIS xml file. + +## The programme searches for relevant tags and creates the GlobalOptionFilter +# entries and other relevant conf entries. This a port to Python from the +# previous confmaker.pl Perl script we were using. It fixes detection of +# diacritics and OSISMorphSegmentation (GlobalOpionFilters) and adds support +# for genbook and modules with large entries > 64Kb. + +# Copyright (C) 2020 CrossWire Bible Society + + +# Author: kris <kristof.szabo@lutheran.hu> & domcox <domcox@crosswire.org> + +# This file is part of Sword Modules + +# Sword Modules is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Sword Modules is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Sword Modules. If not, see <https://www.gnu.org/licenses/>. + +# Created: 2021-01-08 +# +# Revision: +# 2021-01-16 domcox <domcox@crosswire.org> +# Changed language library from iso-639 to langtags + + +# Requirements + +import time +import re +import argparse +import sys +import xml.etree.ElementTree as ET +from datetime import date +from pathlib import Path +try: + import langtags +except: + sys.stderr.write("You do not have the Python langtags library installed. Please install it (pip install langtags).\n") + sys.exit(1) +try: + import Sword +except: + sys.stderr.write("You do not have the SWORD library installed. Please install it.\n") + sys.exit(1) + + +# Variables + +Version = '1.1' + +doc = [] + +versification = { + 'KJV': '1.5.9', + 'KJVA': '1.6.0', + 'NRSV': '1.6.0', + 'NRSVA': '1.6.0', + 'MT': '1.6.0', + 'Leningrad': '1.6.0', + 'Synodal': '1.6.1', + 'Vulg': '1.6.1', + 'Luther': '1.6.1', + 'German': '1.6.1', + 'Catholic': '1.6.2', + 'Catholic2': '1.6.2', + 'LXX': '1.7.2', + 'Orthodox': '1.7.2', + 'SynodalProt': '1.7.2', + 'DarbyFr': '1.8.0', + 'Segond': '1.8.0', + 'Calvin': '1.8.0' +} + + +# Functions + +def die(msg): + ''' + Show an error message then exit on error + ''' + print('ERROR! ' + msg, file=sys.stderr) + sys.exit(1) + + +def get_parameters(): + """ + Get Parse command-line options. + Returns dict containing parameters values + """ + + # Creating parser + description = ''' + provides a conf file for a module by analysing given OSIS XML file and optionally including extra elements from a conf.in file. + ''' + parser = argparse.ArgumentParser(description=description) + + # Adding arguments + parser.add_argument("-i", "--infile", help="conf.in file containing extra elements to include, (default none)") + parser.add_argument("-o", "--outfile", help="name of generated conf file, (default to screen)") + parser.add_argument("-v", "--v11n", default='KJV', help="versification schema, (default: KJV)") + parser.add_argument("-s", "--size", default='2' , help="set -s 4 for modules with large entries > 64Kb, (default -s 2)") + parser.add_argument('osis', help='OSIS XML file') + + # Parsing arguments + args = parser.parse_args() + + return (vars(args)) + + +def check_parameters(params): + ''' + Check CLI parameters for validity + ''' + + # Checking OSIS file + osisfile = params['osis'] + fileObj = Path(osisfile) + if not fileObj.is_file(): + die(f"File '{osisfile}' does not exist.") + + # Checking conf.in file in input + if params['infile']: + infile = params['infile'] + fileObj = Path(infile) + if not fileObj.is_file(): + die(f"File '{infile}' does not exist.") + + # Checking Size + size = params['size'] + if size not in ('2', '4'): + die(f"--size='{size}' Incorrect value.") + + # Chexcking versification schema + v11n = params['v11n'] + av11n = versification.keys() + if v11n not in av11n: + die(f"'{v11n}': Unknown versification schema.") + return (True) + + +def get_osistext(osisfile): + """ + Read osisText node from osis file. + Returns dict containing osisIDWork, osisRefWork, osisLang + """ + + # Search for <osisText ... > node + start_tag = '<osisText' + end_tag = '>' + start_tag_identified = False + node_identified = False + # osisText content in XML + captured_line = '' + # osisText attributes + osistext = dict([]) + # open Osis + with open(osisfile) as f: + # Read lines until osisText is captured + while not node_identified: + line = f.readline() + if not line: + # End of File + die('osisText not found in osis file') + # Search for osisText tag + if start_tag in line: + start_tag_identified = True + if start_tag_identified: + # capture osisText content + captured_line += line + if end_tag in line: + # osisText is fully captured + node_identified = True + # Read attributes + for attribute in {'osisIDWork', 'osisRefWork', 'xml:lang'}: + value = (re.search(rf'{attribute}="(.+?)"', captured_line, flags=re.IGNORECASE)) + if value: + osistext[attribute] = value.group(1) + else: + die(f'osisText attribute missing: {attribute}') + return osistext + + +def check_osistext(osistext): + ''' + Check osisText attributes + ''' + # Check osisIDWork + module = osistext['osisIDWork'].lower() + if len(module) < 1: + die('FATAL: osisIDWork is empty.') + + # Check osisRefWork + moduletype = osistext['osisRefWork'] + if moduletype.lower() not in ['bible','commentary','genbook']: + die(f"FATAL: Invalid attribute osisRefWork: {osiswork}") + + # Check Language + lang = osistext['xml:lang'] + language = get_language(lang) + + return True + + +def get_language(lang): + """ + Search BCP-47 Languages Database for lang + """ + found = False + + try: + tag = langtags.Tag(lang) + found = True + except: + die(f"Language '{lang}' not found in BCP 47 Languages Database") + + # Sometimes language description is multiline -> remove '\n' + return (tag.language.description.replace('\n', ' ')) + + +def is_tag(xml_file, tag): + """ + Search for 'tag' in OSIS file and returns True if 'tag' exists, False otherwise + """ + # Start searching after <header> tag to avoid confusion + end_header_tag = '</header>' + header_read = False + # Tag to search + start_tag = f'<{tag}' + tag_identified = False + line = True + with open(xml_file) as f: + # Read until tag is identified + while line and not tag_identified: + line = f.readline() + # Skip <header> section + if end_header_tag in line: + header_read = True + if header_read: + if start_tag in line: + tag_identified = True + return tag_identified + + +def is_attribute(xml_file, tag, attribute): + """ + Search for 'tag' + 'attribute' in OSIS file, + returns True if 'tag' + 'attribute' exists, False otherwise + """ + # Start searching after <header> tag to avoid confusion + end_header_tag = '</header>' + header_read = False + # Start and end tags defining the element that may have 'attribute' + start_tag = f'<{tag}' + end_tag = f'</{tag}>' + element = '' + start_tag_identified = False + attribute_identified = False + line = True + with open(xml_file) as f: + # Read lines until attribute is identified + while line and not attribute_identified: + line = f.readline() + # Skip <header> section + if end_header_tag in line: + header_read = True + if header_read: + # Search for tag + if start_tag in line: + start_tag_identified = True + if start_tag_identified: + # Read elemnt + element += line + if end_tag in line: + element += line + start_tag_identified = False + # Search for attribute + if attribute in line: + attribute_identified = True + element = '' + return attribute_identified + + +def is_diacritic(xml_file, lang, diacritic): + ''' + Search for 'diacritic' in OSIS File + Returns True or False + ''' + # Don't search OSIS targetting other languages than Hebrew, Greek, Arabic + if not lang in ('ar','grc','he','hbo'): + return False + elif not lang in 'ar' and diacritic == 'Arabic Vowel Points': + return False + elif not lang in 'grc' and diacritic == 'Greek Accents': + return False + elif not lang in ('he','hbo') and diacritic == 'Hebrew Cantillation': + return False + elif not lang in ('he','hbo') and diacritic == 'Hebrew Vowel Points': + return False + else: + # Grab the base SWORD manager + mgr = Sword.SWMgr() + mgr.setGlobalOption("Arabic Vowel Points", "Off"); + mgr.setGlobalOption("Greek Accents", "Off"); + mgr.setGlobalOption("Hebrew Cantillation", "Off"); + mgr.setGlobalOption("Hebrew Vowel Points", "Off"); + + # Parse XML + xml_text = ET.parse(xml_file) + xml_root = xml_text.getroot() + + # Remove all tags and keep bare text only, make 2 sets + strip_text = ET.tostring(xml_root, encoding='unicode', method='text') + ref_text = Sword.SWBuf(strip_text) + mod_text = Sword.SWBuf(strip_text) + + # Apply filter on 1 text + mgr.filterText(diacritic, mod_text) + + # Compare original bare text and filtered one + # return True is the filter has made changes to the text, False otherwise + return(ref_text.c_str() != mod_text.c_str()) + + +def build_doc(conf): + ''' + Generate conf file + ''' + # Module Name + module = conf['osisIDWork'] + doc.append("[" + module + "]") + + # Module Type + moduletype = conf['osisRefWork'] + # Parameters related to moduletype + # Big entries + size = conf['size'] + block = '4' if size == '4' else '' + # mod + mod = module.lower() + # ModDrv + Datapath + if moduletype.lower() in 'bible': + doc.append("ModDrv=zText" + block) + doc.append("DataPath=./modules/texts/ztext" + block + "/" + mod + "/") + if moduletype.lower() in 'commentary': + doc.append("ModDrv=zCom" + block) + doc.append("DataPath=./modules/comments/zcom" + block + "/" + mod + "/") + if moduletype.lower() in 'genbook': + doc.append("ModDrv=RawGenBook" + block) + doc.append("DataPath=./modules/genbook/rawgenbook/" + block + "/" + mod + "/" + mod) + + # Compression + if moduletype.lower() in ['bible','commentary']: + doc.append('CompressType=ZIP') + + # misc. + doc.append('BlockType=BOOK') + doc.append('Encoding=UTF-8') + doc.append('SourceType=OSIS') + doc.append('OSISVersion=2.1.1') + doc.append('SwordVersionDate=' + str(date.today())) + + # Language + lang = conf['xml:lang'] + doc.append('Lang=' + lang) + + # GlobalOptionFilter + # Get Osis file name + osis = conf['osis'] + # We should have Footnotes before Headings on order to have + # working notes in titles + # Footnotes + if is_tag(osis, 'note'): + doc.append('GlobalOptionFilter=OSISFootnotes') + # Headings + if is_tag(osis, 'title'): + doc.append('GlobalOptionFilter=OSISHeadings') + # Scripref + if is_tag(osis, 'reference'): + doc.append('GlobalOptionFilter=OSISScripref') + # RedLetterWords + if is_tag(osis, 'q '): + doc.append('GlobalOptionFilter=OSISRedLetterWords') + # Variants + variants = False + if is_attribute(osis, 'seg', ' type="x-variant"'): + variants = True + if is_tag(osis, 'rdg'): + variants = True + if variants: + doc.append('GlobalOptionFilter=OSISVariants') + # MorphSegmentaton + osisMorphSegmentation = False + if is_attribute(osis, 'seg', 'type="morph"'): + osisMorphSegmentation = True + if is_attribute(osis, 'seg', 'type="x-morph"'): + osisMorphSegmentation = True + if osisMorphSegmentation: + doc.append('GlobalOptionFilter=OSISMorphSegmentation') + # Lemma + if is_attribute(osis, 'w', ' lemma='): + doc.append('GlobalOptionFilter=OSISLemma') + # Strong + strong = is_attribute(osis, 'w', 'strong:') + if strong: + doc.append('GlobalOptionFilter=OSISStrongs') + # Glosses + if is_attribute(osis, 'w', ' gloss='): + doc.append('GlobalOptionFilter=OSISGlosses') + # Morph + if is_attribute(osis, 'w', ' morph='): + doc.append('GlobalOptionFilter=OSISMorph') + # Enum + if is_attribute(osis, 'w', ' n='): + doc.append('GlobalOptionFilter=OSISEnum') + # Xlit + if is_attribute(osis, 'w', ' xlit='): + doc.append('GlobalOptionFilter=OSISXlit') + + # Diacritics + # Hebrew Vowel Points + if is_diacritic(osis, lang, 'Hebrew Vowel Points'): + doc.append('GlobalOptionFilter=UTF8HebrewPoints') + # Arabic Vowel Points + if is_diacritic(osis, lang, 'Arabic Vowel Points'): + doc.append('GlobalOptionFilter=UTF8ArabicPoints') + # Hebrew Cantillation + if is_diacritic(osis, lang, 'Hebrew Cantillation'): + doc.append('GlobalOptionFilter=UTF8Cantillation') + # Greek Accents + if is_diacritic(osis, lang, 'Greek Accents'): + doc.append('GlobalOptionFilter=UTF8GreekAccents ') + + # Features + # StrongsNumbers + if strong: + doc.append('Feature=StrongsNumbers') + # Images + if is_tag(osis, 'figure '): + doc.append('Feature=Images') + # NoParagraphs + if not is_tag(osis, 'p '): + doc.append('Feature=NoParagraphs') + + # LCSH + lang_name = get_language(lang) + if moduletype.lower() in ['bible','commentary']: + doc.append('LCSH=' + moduletype + '.' + lang_name) + + # Sword Minimum Version + doc.append('MinimumVersion=' + versification[conf['v11n']]) + if moduletype.lower() in ['bible','commentary']: + doc.append('Versification=' + conf['v11n']) + return True + + +def include_file(conf): + ''' + Include conf.in file if it exists + ''' + # Get conf.in file if it exists + infile = conf['infile'] + if infile: + # Read and include conf.in contents + with open(infile, 'r', encoding='utf-8', newline='\n') as f: + for line in f: + doc.append(line.rstrip()) + else: + # No conf.in file -> generate default values + module = conf['osisIDWork'] + moduletype = conf['osisRefWork'] + language = get_language(conf['xml:lang']) + doc.append('DistributionLicense=Copyrighted') + doc.append(f'Description={module}, {moduletype} in {language}') + doc.append(f'About={module}, {moduletype} in {language}') + doc.append('Version=1.0') + doc.append('History_1.0=First release') + return True + + +def print_out(conf, doc): + ''' + Print generated conf file + ''' + # Get conf file name + outfile = conf['outfile'] + if not outfile: + # Default to screen + for element in doc: + print(element) + else: + # Write config to file + with open(outfile, 'w') as f: + for element in doc: + print(element, file=f) + return True + + +def main(): + ''' + Main function + ''' + # Start benchmark + start_time = time.perf_counter() + + # Read CLI params + params = get_parameters() + check_parameters(params) + + # Read OSIS attributes + osis_attributes = (get_osistext(params['osis'])) + #print(osis_attributes) + check_osistext(osis_attributes) + + # Generate conf + cf = {**params, **osis_attributes} + build_doc(cf) + include_file(cf) + print_out(cf, doc) + + # Benchmark results + end_time = time.perf_counter() + total_time = round(end_time - start_time, 1) + print(f'-- Module Config generated in {total_time} s') + + return True + + +main() |