#!/usr/bin/env python3 # -*- coding: utf-8 -*- # confmaker.py - Provides a initial conf file for a new module by analyzing # the related OSIS xml file. ## The programme searches for relevant tags and creates the GlobalOptionFilter # entries and other relevant conf entries. This a port to Python from the # previous confmaker.pl Perl script we were using. It fixes detection of # diacritics and OSISMorphSegmentation (GlobalOpionFilters) and adds support # for genbook and modules with large entries > 64Kb. # Copyright (C) 2020 CrossWire Bible Society # Author: kris & domcox # This file is part of Sword Modules # Sword Modules is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # Sword Modules is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with Sword Modules. If not, see . # Created: 2021-01-08 # # Revision: # 2021-01-16 domcox # Changed language library from iso-639 to langtags # Requirements import time import re import argparse import sys import xml.etree.ElementTree as ET from datetime import date from pathlib import Path try: import langtags except: sys.stderr.write("You do not have the Python langtags library installed. Please install it (pip install langtags).\n") sys.exit(1) try: import Sword except: sys.stderr.write("You do not have the SWORD library installed. Please install it.\n") sys.exit(1) # Variables Version = '1.1' doc = [] versification = { 'KJV': '1.5.9', 'KJVA': '1.6.0', 'NRSV': '1.6.0', 'NRSVA': '1.6.0', 'MT': '1.6.0', 'Leningrad': '1.6.0', 'Synodal': '1.6.1', 'Vulg': '1.6.1', 'Luther': '1.6.1', 'German': '1.6.1', 'Catholic': '1.6.2', 'Catholic2': '1.6.2', 'LXX': '1.7.2', 'Orthodox': '1.7.2', 'SynodalProt': '1.7.2', 'DarbyFr': '1.8.0', 'Segond': '1.8.0', 'Calvin': '1.8.0' } # Functions def die(msg): ''' Show an error message then exit on error ''' print('ERROR! ' + msg, file=sys.stderr) sys.exit(1) def get_parameters(): """ Get Parse command-line options. Returns dict containing parameters values """ # Creating parser description = ''' provides a conf file for a module by analysing given OSIS XML file and optionally including extra elements from a conf.in file. ''' parser = argparse.ArgumentParser(description=description) # Adding arguments parser.add_argument("-i", "--infile", help="conf.in file containing extra elements to include, (default none)") parser.add_argument("-o", "--outfile", help="name of generated conf file, (default to screen)") parser.add_argument("-v", "--v11n", default='KJV', help="versification schema, (default: KJV)") parser.add_argument("-s", "--size", default='2' , help="set -s 4 for modules with large entries > 64Kb, (default -s 2)") parser.add_argument('osis', help='OSIS XML file') # Parsing arguments args = parser.parse_args() return (vars(args)) def check_parameters(params): ''' Check CLI parameters for validity ''' # Checking OSIS file osisfile = params['osis'] fileObj = Path(osisfile) if not fileObj.is_file(): die(f"File '{osisfile}' does not exist.") # Checking conf.in file in input if params['infile']: infile = params['infile'] fileObj = Path(infile) if not fileObj.is_file(): die(f"File '{infile}' does not exist.") # Checking Size size = params['size'] if size not in ('2', '4'): die(f"--size='{size}' Incorrect value.") # Chexcking versification schema v11n = params['v11n'] av11n = versification.keys() if v11n not in av11n: die(f"'{v11n}': Unknown versification schema.") return (True) def get_osistext(osisfile): """ Read osisText node from osis file. Returns dict containing osisIDWork, osisRefWork, osisLang """ # Search for node start_tag = ' remove '\n' return (tag.language.description.replace('\n', ' ')) def is_tag(xml_file, tag): """ Search for 'tag' in OSIS file and returns True if 'tag' exists, False otherwise """ # Start searching after
tag to avoid confusion end_header_tag = '
' header_read = False # Tag to search start_tag = f'<{tag}' tag_identified = False line = True with open(xml_file) as f: # Read until tag is identified while line and not tag_identified: line = f.readline() # Skip
section if end_header_tag in line: header_read = True if header_read: if start_tag in line: tag_identified = True return tag_identified def is_attribute(xml_file, tag, attribute): """ Search for 'tag' + 'attribute' in OSIS file, returns True if 'tag' + 'attribute' exists, False otherwise """ # Start searching after
tag to avoid confusion end_header_tag = '
' header_read = False # Start and end tags defining the element that may have 'attribute' start_tag = f'<{tag}' end_tag = f'' element = '' start_tag_identified = False attribute_identified = False line = True with open(xml_file) as f: # Read lines until attribute is identified while line and not attribute_identified: line = f.readline() # Skip
section if end_header_tag in line: header_read = True if header_read: # Search for tag if start_tag in line: start_tag_identified = True if start_tag_identified: # Read elemnt element += line if end_tag in line: element += line start_tag_identified = False # Search for attribute if attribute in line: attribute_identified = True element = '' return attribute_identified def is_diacritic(xml_file, lang, diacritic): ''' Search for 'diacritic' in OSIS File Returns True or False ''' # Don't search OSIS targetting other languages than Hebrew, Greek, Arabic if not lang in ('ar','grc','he','hbo'): return False elif not lang in 'ar' and diacritic == 'Arabic Vowel Points': return False elif not lang in 'grc' and diacritic == 'Greek Accents': return False elif not lang in ('he','hbo') and diacritic == 'Hebrew Cantillation': return False elif not lang in ('he','hbo') and diacritic == 'Hebrew Vowel Points': return False else: # Grab the base SWORD manager mgr = Sword.SWMgr() mgr.setGlobalOption("Arabic Vowel Points", "Off"); mgr.setGlobalOption("Greek Accents", "Off"); mgr.setGlobalOption("Hebrew Cantillation", "Off"); mgr.setGlobalOption("Hebrew Vowel Points", "Off"); # Parse XML xml_text = ET.parse(xml_file) xml_root = xml_text.getroot() # Remove all tags and keep bare text only, make 2 sets strip_text = ET.tostring(xml_root, encoding='unicode', method='text') ref_text = Sword.SWBuf(strip_text) mod_text = Sword.SWBuf(strip_text) # Apply filter on 1 text mgr.filterText(diacritic, mod_text) # Compare original bare text and filtered one # return True is the filter has made changes to the text, False otherwise return(ref_text.c_str() != mod_text.c_str()) def build_doc(conf): ''' Generate conf file ''' # Module Name module = conf['osisIDWork'] doc.append("[" + module + "]") # Module Type moduletype = conf['osisRefWork'] # Parameters related to moduletype # Big entries size = conf['size'] block = '4' if size == '4' else '' # mod mod = module.lower() # ModDrv + Datapath if moduletype.lower() in 'bible': doc.append("ModDrv=zText" + block) doc.append("DataPath=./modules/texts/ztext" + block + "/" + mod + "/") if moduletype.lower() in 'commentary': doc.append("ModDrv=zCom" + block) doc.append("DataPath=./modules/comments/zcom" + block + "/" + mod + "/") if moduletype.lower() in 'genbook': doc.append("ModDrv=RawGenBook" + block) doc.append("DataPath=./modules/genbook/rawgenbook/" + block + "/" + mod + "/" + mod) # Compression if moduletype.lower() in ['bible','commentary']: doc.append('CompressType=ZIP') # misc. doc.append('BlockType=BOOK') doc.append('Encoding=UTF-8') doc.append('SourceType=OSIS') doc.append('OSISVersion=2.1.1') doc.append('SwordVersionDate=' + str(date.today())) # Language lang = conf['xml:lang'] doc.append('Lang=' + lang) # GlobalOptionFilter # Get Osis file name osis = conf['osis'] # We should have Footnotes before Headings on order to have # working notes in titles # Footnotes if is_tag(osis, 'note'): doc.append('GlobalOptionFilter=OSISFootnotes') # Headings if is_tag(osis, 'title'): doc.append('GlobalOptionFilter=OSISHeadings') # Scripref if is_tag(osis, 'reference'): doc.append('GlobalOptionFilter=OSISScripref') # RedLetterWords if is_tag(osis, 'q '): doc.append('GlobalOptionFilter=OSISRedLetterWords') # Variants variants = False if is_attribute(osis, 'seg', ' type="x-variant"'): variants = True if is_tag(osis, 'rdg'): variants = True if variants: doc.append('GlobalOptionFilter=OSISVariants') # MorphSegmentaton osisMorphSegmentation = False if is_attribute(osis, 'seg', 'type="morph"'): osisMorphSegmentation = True if is_attribute(osis, 'seg', 'type="x-morph"'): osisMorphSegmentation = True if osisMorphSegmentation: doc.append('GlobalOptionFilter=OSISMorphSegmentation') # Lemma if is_attribute(osis, 'w', ' lemma='): doc.append('GlobalOptionFilter=OSISLemma') # Strong strong = is_attribute(osis, 'w', 'strong:') if strong: doc.append('GlobalOptionFilter=OSISStrongs') # Glosses if is_attribute(osis, 'w', ' gloss='): doc.append('GlobalOptionFilter=OSISGlosses') # Morph if is_attribute(osis, 'w', ' morph='): doc.append('GlobalOptionFilter=OSISMorph') # Enum if is_attribute(osis, 'w', ' n='): doc.append('GlobalOptionFilter=OSISEnum') # Xlit if is_attribute(osis, 'w', ' xlit='): doc.append('GlobalOptionFilter=OSISXlit') # Diacritics # Hebrew Vowel Points if is_diacritic(osis, lang, 'Hebrew Vowel Points'): doc.append('GlobalOptionFilter=UTF8HebrewPoints') # Arabic Vowel Points if is_diacritic(osis, lang, 'Arabic Vowel Points'): doc.append('GlobalOptionFilter=UTF8ArabicPoints') # Hebrew Cantillation if is_diacritic(osis, lang, 'Hebrew Cantillation'): doc.append('GlobalOptionFilter=UTF8Cantillation') # Greek Accents if is_diacritic(osis, lang, 'Greek Accents'): doc.append('GlobalOptionFilter=UTF8GreekAccents ') # Features # StrongsNumbers if strong: doc.append('Feature=StrongsNumbers') # Images if is_tag(osis, 'figure '): doc.append('Feature=Images') # NoParagraphs if not is_tag(osis, 'p '): doc.append('Feature=NoParagraphs') # LCSH lang_name = get_language(lang) if moduletype.lower() in ['bible','commentary']: doc.append('LCSH=' + moduletype + '.' + lang_name) # Sword Minimum Version doc.append('MinimumVersion=' + versification[conf['v11n']]) if moduletype.lower() in ['bible','commentary']: doc.append('Versification=' + conf['v11n']) return True def include_file(conf): ''' Include conf.in file if it exists ''' # Get conf.in file if it exists infile = conf['infile'] if infile: # Read and include conf.in contents with open(infile, 'r', encoding='utf-8', newline='\n') as f: for line in f: doc.append(line.rstrip()) else: # No conf.in file -> generate default values module = conf['osisIDWork'] moduletype = conf['osisRefWork'] language = get_language(conf['xml:lang']) doc.append('DistributionLicense=Copyrighted') doc.append(f'Description={module}, {moduletype} in {language}') doc.append(f'About={module}, {moduletype} in {language}') doc.append('Version=1.0') doc.append('History_1.0=First release') return True def print_out(conf, doc): ''' Print generated conf file ''' # Get conf file name outfile = conf['outfile'] if not outfile: # Default to screen for element in doc: print(element) else: # Write config to file with open(outfile, 'w') as f: for element in doc: print(element, file=f) return True def main(): ''' Main function ''' # Start benchmark start_time = time.perf_counter() # Read CLI params params = get_parameters() check_parameters(params) # Read OSIS attributes osis_attributes = (get_osistext(params['osis'])) #print(osis_attributes) check_osistext(osis_attributes) # Generate conf cf = {**params, **osis_attributes} build_doc(cf) include_file(cf) print_out(cf, doc) # Benchmark results end_time = time.perf_counter() total_time = round(end_time - start_time, 1) print(f'-- Module Config generated in {total_time} s') return True main()