path: root/generate_reference.py



#!/usr/bin/env python3

import collections
import logging
import re
import sys
import xml.sax

from xml.sax.saxutils import XMLFilterBase, XMLGenerator

ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
              '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh',
              'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer', 'Lam',
              'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jonah', 'Mic',
              'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal', 'Matt', 'Mark',
              'Luke', 'John', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph',
              'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus',
              'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John',
              'Jude', 'Rev')

# 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma

TranslDict = collections.OrderedDict([
    ("Gn", "Gen"),
    ("Ex", "Exod"),
    ("Lv", "Lev"),
    ("Nu", "Num"),
    ("Dt", "Deut"),
    ("Joz", "Josh"),
    ("Sd", "Judg"),
    ("Rt", "Ruth"),
    ("1S", "1Sam"),
    ("2S", "2Sam"),
    ("1Kr", "1Kgs"),
    ("2Kr", "2Kgs"),
    ("1Pa", "1Chr"),
    ("2Pa", "2Chr"),
    ("Ezd", "Ezra"),
    ("Neh", "Neh"),
    ("Est", "Esth"),
    ("Jb", "Job"),
    ("Ž", "Ps"),
    ("Př", "Prov"),
    ("Kaz", "Eccl"),
    ("Pís", "Song"),
    ("Iz", "Isa"),
    ("Jr", "Jer"),
    ("Pl", "Lam"),
    ("Ez", "Ezek"),
    ("Da", "Dan"),
    ("Oz", "Hos"),
    ("Jl", "Joel"),
    ("Am", "Amos"),
    ("Obad", "Obad"),
    ("Jon", "Jonah"),
    ("Mi", "Mic"),
    ("Na", "Nah"),
    ("Abk", "Hab"),
    ("Sof", "Zeph"),
    ("Ag", "Hag"),
    ("Za", "Zech"),
    ("Mal", "Mal"),
    ("Mt", "Matt"),
    ("Mk", "Mark"),
    ("L", "Luke"),
    ("J", "John"),  # nebo Jn
    ("Sk", "Acts"),
    ("Ř", "Rom"),
    ("1K", "1Cor"),
    ("2K", "2Cor"),
    ("Ga", "Gal"),
    ("Ef", "Eph"),
    ("Fp", "Phil"),
    ("Ko", "Col"),
    ("1Te", "1Thess"),
    ("2Te", "2Thess"),
    ("1Tm", "1Tim"),  # nebo 1Ti
    ("2Tm", "2Tim"),
    ("Tt", "Titus"),
    ("Flm", "Phlm"),
    ("He", "Heb"),
    ("Jk", "Jas"),
    ("1Pt", "1Pet"),
    ("2Pt", "2Pet"),
    ("1J", "1John"),
    ("2J", "2John"),
    ("3J", "3John"),
    ("Jud", "Jude"),
    ("Zj", "Rev")])

CZ_BOOKS = tuple(TranslDict.keys())
EN_BOOKS = tuple(TranslDict.values())

BIBLE_REF_PARSE_RE = re.compile(r'(%s)?\s*([0-9]+),([0-9—.-]+)'
                                % '|'.join(CZ_BOOKS))
SPLIT_VERSE_RE = re.compile('[—.-]')


def parse_notes_test(instr):
    logging.debug('%s\ninstr = %s', '-' * 30, instr)
    matches = BIBLE_REF_PARSE_RE.findall(instr)
    logging.debug('matches = %s', matches)
    out_list = []
    book = None

    for match in matches:
        logging.debug('match = %s', match)
        if match[0]:
            book = match[0]
        elif book is None:
            book = ''
        chapter = int(match[1])
        verse = match[2]
        if SPLIT_VERSE_RE.search(verse):
            verse_split = SPLIT_VERSE_RE.split(verse)
            verse = int(verse_split[0])
            next_verse = int(verse_split[1])
            logging.debug('verse, next_verse = %s, %s', verse, next_verse)
            out_list.append((book, chapter, verse, next_verse))
        else:
            out_list.append((book, chapter, int(verse)))
    return tuple(out_list)


class GenerateReferencesFilter(XMLFilterBase):
    def __init__(self, upstream, downstream):
        XMLFilterBase.__init__(self, upstream)
        self._downstream = downstream
        self._cur_book = None
        self._cur_chapter = 0
        self._cur_verse = 0
        self._in_note = True
        self._note_content = ""

#    def startDocument(self):
#        pass

#    def endDocument(self):
#        pass

    def startElement(self, name, attrs):  # noqa
        if name == "verse" and 'sID' in attrs:
            ref_elements = attrs['sID'].split('.')
            self._cur_book, self._cur_chapter, self._cur_verse = \
                ref_elements[0], int(ref_elements[1]), int(ref_elements[2])
        elif name == "note":
            self._in_note = True
            self._note_content = ""
        self._downstream.startElement(name, attrs)

    def endElement(self, name):  # noqa
        if name == 'note' and self._in_note:
            logging.debug('content:\n%s', self._note_content)
            self._in_note = False
            self._note_content = ""
        self._downstream.endElement(name)

    def characters(self, content):
        if self._in_note and len(content.strip()) > 0:
            self._note_content += content
        self._downstream.characters(content)


if __name__ == "__main__":
    downstream_handler = XMLGenerator(encoding="utf-8",
                                      short_empty_elements=True)
    parser = GenerateReferencesFilter(xml.sax.make_parser(),
                                      downstream_handler)
    parser.parse(sys.argv[1])