#!/usr/bin/env python3
import collections
import logging
import re
import sys
import xml.sax
from xml.sax.saxutils import XMLFilterBase, XMLGenerator
ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
'1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh',
'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer', 'Lam',
'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jonah', 'Mic',
'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal', 'Matt', 'Mark',
'Luke', 'John', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph',
'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus',
'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John',
'Jude', 'Rev')
# 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma
TranslDict = collections.OrderedDict([
("Gn", "Gen"),
("Ex", "Exod"),
("Lv", "Lev"),
("Nu", "Num"),
("Dt", "Deut"),
("Joz", "Josh"),
("Sd", "Judg"),
("Rt", "Ruth"),
("1S", "1Sam"),
("2S", "2Sam"),
("1Kr", "1Kgs"),
("2Kr", "2Kgs"),
("1Pa", "1Chr"),
("2Pa", "2Chr"),
("Ezd", "Ezra"),
("Neh", "Neh"),
("Est", "Esth"),
("Jb", "Job"),
("Ž", "Ps"),
("Př", "Prov"),
("Kaz", "Eccl"),
("Pís", "Song"),
("Iz", "Isa"),
("Jr", "Jer"),
("Pl", "Lam"),
("Ez", "Ezek"),
("Da", "Dan"),
("Oz", "Hos"),
("Jl", "Joel"),
("Am", "Amos"),
("Obad", "Obad"),
("Jon", "Jonah"),
("Mi", "Mic"),
("Na", "Nah"),
("Abk", "Hab"),
("Sof", "Zeph"),
("Ag", "Hag"),
("Za", "Zech"),
("Mal", "Mal"),
("Mt", "Matt"),
("Mk", "Mark"),
("L", "Luke"),
("J", "John"), # nebo Jn
("Sk", "Acts"),
("Ř", "Rom"),
("1K", "1Cor"),
("2K", "2Cor"),
("Ga", "Gal"),
("Ef", "Eph"),
("Fp", "Phil"),
("Ko", "Col"),
("1Te", "1Thess"),
("2Te", "2Thess"),
("1Tm", "1Tim"), # nebo 1Ti
("2Tm", "2Tim"),
("Tt", "Titus"),
("Flm", "Phlm"),
("He", "Heb"),
("Jk", "Jas"),
("1Pt", "1Pet"),
("2Pt", "2Pet"),
("1J", "1John"),
("2J", "2John"),
("3J", "3John"),
("Jud", "Jude"),
("Zj", "Rev")])
CZ_BOOKS = tuple(TranslDict.keys())
EN_BOOKS = tuple(TranslDict.values())
BIBLE_REF_PARSE_RE = re.compile(r'(%s)?\s*([0-9]+),([0-9—.-]+)'
% '|'.join(CZ_BOOKS))
SPLIT_VERSE_RE = re.compile('[—.-]')
def parse_notes_test(instr):
logging.debug('%s\ninstr = %s', '-' * 30, instr)
matches = BIBLE_REF_PARSE_RE.findall(instr)
logging.debug('matches = %s', matches)
out_list = []
book = None
for match in matches:
logging.debug('match = %s', match)
if match[0]:
book = match[0]
elif book is None:
book = ''
chapter = int(match[1])
verse = match[2]
if SPLIT_VERSE_RE.search(verse):
verse_split = SPLIT_VERSE_RE.split(verse)
verse = int(verse_split[0])
next_verse = int(verse_split[1])
logging.debug('verse, next_verse = %s, %s', verse, next_verse)
out_list.append((book, chapter, verse, next_verse))
else:
out_list.append((book, chapter, int(verse)))
return tuple(out_list)
class GenerateReferencesFilter(XMLFilterBase):
def __init__(self, upstream, downstream):
XMLFilterBase.__init__(self, upstream)
self._downstream = downstream
self._cur_book = None
self._cur_chapter = 0
self._cur_verse = 0
self._in_note = True
self._note_content = ""
# def startDocument(self):
# pass
# def endDocument(self):
# pass
def startElement(self, name, attrs): # noqa
if name == "verse" and 'sID' in attrs:
ref_elements = attrs['sID'].split('.')
self._cur_book, self._cur_chapter, self._cur_verse = \
ref_elements[0], int(ref_elements[1]), int(ref_elements[2])
elif name == "note":
self._in_note = True
self._note_content = ""
self._downstream.startElement(name, attrs)
def endElement(self, name): # noqa
if name == 'note' and self._in_note:
logging.debug('content:\n%s', self._note_content)
self._in_note = False
self._note_content = ""
self._downstream.endElement(name)
def characters(self, content):
if self._in_note and len(content.strip()) > 0:
self._note_content += content
self._downstream.characters(content)
if __name__ == "__main__":
downstream_handler = XMLGenerator(encoding="utf-8",
short_empty_elements=True)
parser = GenerateReferencesFilter(xml.sax.make_parser(),
downstream_handler)
parser.parse(sys.argv[1])