#!/usr/bin/env python3 import collections import logging import re import sys import xml.sax from xml.sax.saxutils import XMLFilterBase, XMLGenerator ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth', '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh', 'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer', 'Lam', 'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jonah', 'Mic', 'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal', 'Matt', 'Mark', 'Luke', 'John', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph', 'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus', 'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John', 'Jude', 'Rev') # 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma TranslDict = collections.OrderedDict([ ("Gn", "Gen"), ("Ex", "Exod"), ("Lv", "Lev"), ("Nu", "Num"), ("Dt", "Deut"), ("Joz", "Josh"), ("Sd", "Judg"), ("Rt", "Ruth"), ("1S", "1Sam"), ("2S", "2Sam"), ("1Kr", "1Kgs"), ("2Kr", "2Kgs"), ("1Pa", "1Chr"), ("2Pa", "2Chr"), ("Ezd", "Ezra"), ("Neh", "Neh"), ("Est", "Esth"), ("Jb", "Job"), ("Ž", "Ps"), ("Př", "Prov"), ("Kaz", "Eccl"), ("Pís", "Song"), ("Iz", "Isa"), ("Jr", "Jer"), ("Pl", "Lam"), ("Ez", "Ezek"), ("Da", "Dan"), ("Oz", "Hos"), ("Jl", "Joel"), ("Am", "Amos"), ("Obad", "Obad"), ("Jon", "Jonah"), ("Mi", "Mic"), ("Na", "Nah"), ("Abk", "Hab"), ("Sof", "Zeph"), ("Ag", "Hag"), ("Za", "Zech"), ("Mal", "Mal"), ("Mt", "Matt"), ("Mk", "Mark"), ("L", "Luke"), ("J", "John"), # nebo Jn ("Sk", "Acts"), ("Ř", "Rom"), ("1K", "1Cor"), ("2K", "2Cor"), ("Ga", "Gal"), ("Ef", "Eph"), ("Fp", "Phil"), ("Ko", "Col"), ("1Te", "1Thess"), ("2Te", "2Thess"), ("1Tm", "1Tim"), # nebo 1Ti ("2Tm", "2Tim"), ("Tt", "Titus"), ("Flm", "Phlm"), ("He", "Heb"), ("Jk", "Jas"), ("1Pt", "1Pet"), ("2Pt", "2Pet"), ("1J", "1John"), ("2J", "2John"), ("3J", "3John"), ("Jud", "Jude"), ("Zj", "Rev")]) CZ_BOOKS = tuple(TranslDict.keys()) EN_BOOKS = tuple(TranslDict.values()) BIBLE_REF_PARSE_RE = re.compile(r'(%s)?\s*([0-9]+),([0-9—.-]+)' % '|'.join(CZ_BOOKS)) SPLIT_VERSE_RE = re.compile('[—.-]') def parse_notes_test(instr): logging.debug('%s\ninstr = %s', '-' * 30, instr) matches = BIBLE_REF_PARSE_RE.findall(instr) logging.debug('matches = %s', matches) out_list = [] book = None for match in matches: logging.debug('match = %s', match) if match[0]: book = match[0] elif book is None: book = '' chapter = int(match[1]) verse = match[2] if SPLIT_VERSE_RE.search(verse): verse_split = SPLIT_VERSE_RE.split(verse) verse = int(verse_split[0]) next_verse = int(verse_split[1]) logging.debug('verse, next_verse = %s, %s', verse, next_verse) out_list.append((book, chapter, verse, next_verse)) else: out_list.append((book, chapter, int(verse))) return tuple(out_list) class GenerateReferencesFilter(XMLFilterBase): def __init__(self, upstream, downstream): XMLFilterBase.__init__(self, upstream) self._downstream = downstream self._cur_book = None self._cur_chapter = 0 self._cur_verse = 0 self._in_note = True self._note_content = "" # def startDocument(self): # pass # def endDocument(self): # pass def startElement(self, name, attrs): # noqa if name == "verse" and 'sID' in attrs: ref_elements = attrs['sID'].split('.') self._cur_book, self._cur_chapter, self._cur_verse = \ ref_elements[0], int(ref_elements[1]), int(ref_elements[2]) elif name == "note": self._in_note = True self._note_content = "" self._downstream.startElement(name, attrs) def endElement(self, name): # noqa if name == 'note' and self._in_note: logging.debug('content:\n%s', self._note_content) self._in_note = False self._note_content = "" self._downstream.endElement(name) def characters(self, content): if self._in_note and len(content.strip()) > 0: self._note_content += content self._downstream.characters(content) if __name__ == "__main__": downstream_handler = XMLGenerator(encoding="utf-8", short_empty_elements=True) parser = GenerateReferencesFilter(xml.sax.make_parser(), downstream_handler) parser.parse(sys.argv[1])