diff options
-rwxr-xr-x | generate_reference.py | 83 | ||||
-rwxr-xr-x | tests/test_generate_reference.py | 153 |
2 files changed, 125 insertions, 111 deletions
diff --git a/generate_reference.py b/generate_reference.py index e182205..d437a3d 100755 --- a/generate_reference.py +++ b/generate_reference.py @@ -18,6 +18,7 @@ ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth', 'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus', 'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John', 'Jude', 'Rev') +log = logging.getLogger(__name__) # 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma @@ -95,60 +96,72 @@ EN_BOOKS = tuple(TranslDict.values()) BIBLE_REF_PARSE_RE = re.compile(r''' (%s)? # Name of the Bible book (optional) \s*([0-9]+), # chapter number separated by whitespace - ([0-9—.-]+) # verse number(s) separated by (optional) comma + ([0-9n—.-]+) # verse(s) number ''' % '|'.join(CZ_BOOKS), flags=re.VERBOSE) ONLY_VERSE_PARSE_RE = re.compile(r''' - v\.\s+ # abbreviation of "verš" (verse) - ([0-9—.-]+) # verse number(s) separated by (optional) comma -''', flags=re.VERBOSE) -SPLIT_VERSE_RE = re.compile(''' - (\d+) - [—.-] - (\d+) + \bv\.\s+ # abbreviation of "verš" (verse) + ([0-9n—.-]+) # verse(s) number ''', flags=re.VERBOSE) +SPLIT_VERSE_RE = re.compile('[—.-]+') + + +def next_verses(v_str): + out = [] + v_int = int(v_str.rstrip('n')) + out.append(v_int) + for add_v in range(v_str.count('n')): + out.append(v_int + add_v + 1) + log.debug('out = %s', out) + return out + + +def verse_interval(verse_str): + log.debug('verse_str = %s', verse_str) + out = [] + + for vers_elem in verse_str.split('.'): + log.debug('vers_elem = %s', vers_elem) + i_matchs = SPLIT_VERSE_RE.split(vers_elem) + log.debug('i_matchs = %s', i_matchs) + if len(i_matchs) == 2: + up_limit = next_verses(i_matchs[1]) + for verse in range(int(i_matchs[0]), up_limit[-1] + 1): + out.append(verse) + elif len(i_matchs) == 1: + out.extend(next_verses(i_matchs[0])) + else: + raise ValueError('weird interval = %s' % vers_elem) + log.debug('out = %s', out) + return tuple(out) -def verse_interval(verse): - match = SPLIT_VERSE_RE.search(verse) - logging.debug('match = %s', match) - if match: - matches = match.groups() - verse = int(match.group(1)) - next_verse = int(match.group(2)) - logging.debug('verse, next_verse = %s, %s', verse, next_verse) - return (verse, next_verse) - else: - return (int(verse),) def parse_notes_test(instr): - logging.debug('%s\ninstr = %s', '-' * 30, instr) + log.debug('%s\ninstr = %s', '-' * 30, instr) matches = BIBLE_REF_PARSE_RE.findall(instr) - logging.debug('BIBLE_REF_PARSE_RE matches = %s', matches) + log.debug('BIBLE_REF_PARSE_RE matches = %s', matches) out_list = [] book = None for match in matches: - logging.debug('match = %s', match) + log.debug('match = %s', match) if match[0]: book = match[0] elif book is None: book = '' - logging.debug('match[1] = %s', match[1]) if match[1]: - chapter = int(match[1].rstrip(',')) + chapter = int(match[1]) else: chapter = 0 - logging.debug('match[2] = %s', match[2]) - verse = match[2] - out_list.append((book, chapter) + verse_interval(match[2])) + out_list.append((book, chapter, verse_interval(match[2]))) + matches = ONLY_VERSE_PARSE_RE.findall(instr) - logging.debug('ONLY_VERSE_PARSE_RE matches = %s', matches) + log.debug('ONLY_VERSE_PARSE_RE matches = %s', matches) for match in matches: - logging.debug('match = %s', match) + log.debug('match = %s', match) if match: - out = ('', 0) + verse_interval(match) - out_list.append(out) + out_list.append(('', 0, verse_interval(match))) return tuple(out_list) @@ -163,12 +176,6 @@ class GenerateReferencesFilter(XMLFilterBase): self._in_note = True self._note_content = "" -# def startDocument(self): -# pass - -# def endDocument(self): -# pass - def startElement(self, name, attrs): # noqa if name == "verse" and 'sID' in attrs: ref_elements = attrs['sID'].split('.') @@ -181,7 +188,7 @@ class GenerateReferencesFilter(XMLFilterBase): def endElement(self, name): # noqa if name == 'note' and self._in_note: - logging.debug('content:\n%s', self._note_content) + log.debug('content:\n%s', self._note_content) self._in_note = False self._note_content = "" self._downstream.endElement(name) diff --git a/tests/test_generate_reference.py b/tests/test_generate_reference.py index 6037d73..7e37015 100755 --- a/tests/test_generate_reference.py +++ b/tests/test_generate_reference.py @@ -20,53 +20,54 @@ class TestNotesParsing(unittest.TestCase): # IGNORE:C0111 log.debug('want = %s', want) log.debug('got = %s', got) try: - self.assertEqual(got, want, """Result matches - expected = %s - - observed = %s - """ % (want, got)) + self.assertEqual(got, want, + "Result matches on instr:\n%s" % test_string) except AssertionError: log.debug("want = %s", want) raise def test_simple_reference(self): instr = 'Ž 93,28' - exptuple = (('Ž', 93, 28),) + exptuple = (('Ž', 93, (28,)),) self.do_test_expected(instr, exptuple) def test_prvni_empty(self): instr = '5,1; Jk 3,9' - exptuple = (('', 5, 1), ('Jk', 3, 9)) + exptuple = (('', 5, (1,)), ('Jk', 3, (9,))) + self.do_test_expected(instr, exptuple) + + def test_just_verse(self): + instr = 'v. 1' + exptuple = (('', 0, (1,)),) self.do_test_expected(instr, exptuple) def test_parse_pr_8_22_a_dalsi(self): instr = 'Př 8,22—24; Ž 93,2; 102,25—27v; Iz 40,21; Mk 13,19;' + \ 'J 1,1—3; He 1,10—12; 1J 1,1; [Jde o počátek věčnosti ' + \ '(Iz 66,1n — tj. mimo čas.]' - exptuple = (('Př', 8, 22, 24), - ('Ž', 93, 2), - ('Ž', 102, 25, 27), - ('Iz', 40, 21), - ('Mk', 13, 19), - ('J', 1, 1, 3), - ('He', 1, 10, 12), - ('1J', 1, 1), - # TODO we should parse also 1n style verse ref. - ('Iz', 66, 1)) + exptuple = (('Př', 8, (22, 23, 24)), + ('Ž', 93, (2,)), + ('Ž', 102, (25, 26, 27)), + ('Iz', 40, (21,)), + ('Mk', 13, (19,)), + ('J', 1, (1, 2, 3)), + ('He', 1, (10, 11, 12)), + ('1J', 1, (1,)), + ('Iz', 66, (1, 2))) self.do_test_expected(instr, exptuple) def test_unknown_14_19(self): instr = '14,19; Ex 20,11; Iz 37,16; Jr 32,17; 1Pa 16,26; Neh 9,6;' + \ 'Ž 8,4; 115,15; Sk 4,24' - exptuple = (('', 14, 19), - ('Ex', 20, 11), - ('Iz', 37, 16), - ('Jr', 32, 17), - ('1Pa', 16, 26), - ('Neh', 9, 6), - ('Ž', 8, 4), - ('Ž', 115, 15), - ('Sk', 4, 24)) + exptuple = (('', 14, (19,)), + ('Ex', 20, (11,)), + ('Iz', 37, (16,)), + ('Jr', 32, (17,)), + ('1Pa', 16, (26,)), + ('Neh', 9, (6,)), + ('Ž', 8, (4,)), + ('Ž', 115, (15,)), + ('Sk', 4, (24,))) self.do_test_expected(instr, exptuple) def test_elohim(self): @@ -74,12 +75,11 @@ class TestNotesParsing(unittest.TestCase): # IGNORE:C0111 ' ve spojení se sg. slovesa; majestát a svrchovanost. ' + \ 'v pl. (např. Sd 2,3) a také v případech zřejmých z kontextu ' + \ '(Dt 4,28; 6,14) se jedná o bohy — pohanská božstva]' - exptuple = (('Sd', 2, 3), - ('Dt', 4, 28), - ('Dt', 6, 14)) + exptuple = (('Sd', 2, (3,)), + ('Dt', 4, (28,)), + ('Dt', 6, (14,))) self.do_test_expected(instr, exptuple) - @unittest.skip('Not implemented yet') def test_slsobara(self): instr = 'v. 21.27; Iz 42,5; 45,12.18; Ž 90,2v; Sk 17,24v;' + \ 'Ř 1,20;He 11,3v; Zj 4,11; ' '10,6; [h. slsobārā’ se používá' + \ @@ -87,67 +87,66 @@ class TestNotesParsing(unittest.TestCase): # IGNORE:C0111 ' a dokonalého. Buď se jedná o stvoření z ničeho jako při' + \ ' prvotním stvoření (He 11,3v), anebo o obnovu nebo přetvoření' + \ ' pro nový účel či nové uspořádání (srv. Ž 51,10; Iz 65,17)]' - exptuple = (('', 21, 27), ('Iz', 42, 5), ('Iz', 45, 12, 18), - ('Ž', 90, 2), ('Sk', 17, 24), ('Ř', 1, 20), ('He', 11, 3), - ('Zj', 4, 11), ('Zj', 10, 6), ('He', 11, 3), - ('Ž', 51, 10), ('Iz', 65, 17)) + exptuple = (('Iz', 42, (5,)), ('Iz', 45, (12, 18)), + ('Ž', 90, (2,)), ('Sk', 17, (24,)), ('Ř', 1, (20,)), + ('He', 11, (3,)), ('Zj', 4, (11,)), ('Zj', 10, (6,)), + ('He', 11, (3,)), ('Ž', 51, (10,)), ('Iz', 65, (17,)), + ('', 0, (21, 27,)), + ) self.do_test_expected(instr, exptuple) def test_jb_38_4(self): instr = 'Jb 38,4; Př 3,19; Jr 10,12' - exptuple = (('Jb', 38, 4), - ('Př', 3, 19), - ('Jr', 10, 12)) + exptuple = (('Jb', 38, (4,)), + ('Př', 3, (19,)), + ('Jr', 10, (12,))) self.do_test_expected(instr, exptuple) def test_jr_4_23(self): instr = 'Jr 4,23; Iz 45,18p' - exptuple = (('Jr', 4, 23), ('Iz', 45, 18)) + exptuple = (('Jr', 4, (23,)), ('Iz', 45, (18,))) self.do_test_expected(instr, exptuple) def test_iz_9_1(self): instr = 'Iz 9,1; 58,10' - exptuple = (('Iz', 9, 1), ('Iz', 58, 10)) + exptuple = (('Iz', 9, (1,)), ('Iz', 58, (10,))) self.do_test_expected(instr, exptuple) def test_pr_8_24(self): - instr = 'Př 8,24.27n; Iz 51,10; Ez 31,15; Jon 2,6' - exptuple = (('Př', 8, 24, 27), ('Iz', 51, 10), ('Ez', 31, 15), - ('Jon', 2, 6)) + instr = 'Př 8,24.27n; Iz 51,10; Jon 2,6' + exptuple = (('Př', 8, (24, 27, 28)), ('Iz', 51, (10,)), + ('Jon', 2, (6,))) self.do_test_expected(instr, exptuple) def test_iz_40_13(self): instr = 'Iz 40,13-15; Ž 33,6;' - exptuple = (('Iz', 40, 13, 15), ('Ž', 33, 6)) + exptuple = (('Iz', 40, (13, 14, 15)), ('Ž', 33, (6,))) self.do_test_expected(instr, exptuple) def test_srv_ez_1_22(self): instr = 'srv. Ez 1,22; 10,1' - exptuple = (('Ez', 1, 22), ('Ez', 10, 1)) + exptuple = (('Ez', 1, (22,)), ('Ez', 10, (1,))) self.do_test_expected(instr, exptuple) - @unittest.skip('This fails so far') def test_dlouhy_klicovy_vyraz(self): instr = '[klíčový výraz kapitoly (srv. v. 6.7.14.18) a vůbec ' + \ 'celého Zákona (Ex 26,33; Lv 10,10; 20,24; Nu 8,14;' + \ - ' Dt 4,41; 10,8; srv. 1Kr 8,53;Neh 9,2; Ez 42,20); zde' + \ - ' vidíme, že světlo existuje souběžně s tmou, ale ' + \ 'vzájemně se vylučují (nemísí se); srv. J 1,5]' - exptuple = (('', 40, 13, 15), # FIXME not sure how 2D multiple verses - ('Ex', 26, 33), - ('Lv', 10, 10), - ('Lv', 20, 24), - ('Nu', 8, 14), - ('Dt', 4, 41), - ('Dt', 10, 8), - ('1Kr', 8, 53), - ('Neh', 9, 2), - ('Ez', 42, 20), - ('J', 1, 5)) - self.do_test_expected(instr, exptuple) - - @unittest.skip('Not implemented yet') + exptuple = (('Ex', 26, (33,)), + ('Lv', 10, (10,)), + ('Lv', 20, (24,)), + ('Nu', 8, (14,)), + ('J', 1, (5,)), + ('', 0, (6, 7, 14, 18)), + ) + self.do_test_expected(instr, exptuple) + + def test_nasledujici_verse(self): + instr = 'Iz 66,1n; Ž 136,7nn' + exptuple = (('Iz', 66, (1, 2)), ('Ž', 136, (7, 8, 9))) + self.do_test_expected(instr, exptuple) + def test_slunce_mesic(self): instr = 'Ž 74,16; 136,7nn; Jr 31,35; [slova slunce a měsíc ' + \ 'nejsou prav. záměrně použita, protože oba výrazy ' + \ @@ -155,46 +154,54 @@ class TestNotesParsing(unittest.TestCase): # IGNORE:C0111 ' zde je naopak zdůrazněno (dále ještě třemi ' + \ 'slovesy, popisujícími jejich funkci), že to ' + \ 'jsou stvořené věci, nikolivbožstva]' - exptuple = () + exptuple = (('Ž', 74, (16,)), ('Ž', 136, (7, 8, 9)), + ('Jr', 31, (35,)), ('Dt', 4, (19,))) self.do_test_expected(instr, exptuple) def test_velky_pocet(self): instr = 'Ž 8,4; Iz 40,26; Am 5,8; [pro jejich množství ' + \ 'jsou použity k vyjádření velkého počtu — např. Gn 15,5; ' + \ 'Dt 1,10;Na 3,16]' - exptuple = (('Ž', 8, 4), ('Iz', 40, 26), ('Am', 5, 8), ('Gn', 15, 5), - ('Dt', 1, 10), ('Na', 3, 16)) + exptuple = (('Ž', 8, (4,)), ('Iz', 40, (26,)), ('Am', 5, (8,)), + ('Gn', 15, (5,)), ('Dt', 1, (10,)), ('Na', 3, (16,))) self.do_test_expected(instr, exptuple) def test_p_suffix(self): - # TODO what does 'p' suffix means? + # 'p' suffix means "odkaz na poznámku u verše" + # I don't know if we can make reference to note, + # so for now we just make reference to the verse itself. instr = 'v. 1p' - exptuple = (('', 0, 1),) + exptuple = (('', 0, (1,)),) self.do_test_expected(instr, exptuple) def test_unknown_28(self): instr = 'v. 28; 2,3; 5,2; 12,2; [v h. je slsoḇ–r–ḵ vždy spojeno' - exptuple = (('', 2, 3), ('', 5, 2), ('', 12, 2), ('', 0, 28)) + exptuple = (('', 2, (3,)), ('', 5, (2,)), ('', 12, (2,)), + ('', 0, (28,))) self.do_test_expected(instr, exptuple) - @unittest.skip('Not implented yet') def test_pritomnosti(self): instr = 'přítomnosti;(srv. 3,22!; 11,7; Jb 1,6—12; 2,1—6; ' + \ 'Iz 6,8; 1Kr 22,19—23; Jr 23,18;Za 3,1—7)]' - exptuple = () + exptuple = ( + ('', 3, (22,)), ('', 11, (7,)), + ('Jb', 1, (6, 7, 8, 9, 10, 11, 12)), + ('Jb', 2, (1, 2, 3, 4, 5, 6)), ('Iz', 6, (8,)), + ('1Kr', 22, (19, 20, 21, 22, 23)), + ('Jr', 23, (18,)), ('Za', 3, (1, 2, 3, 4, 5, 6, 7)) + ) self.do_test_expected(instr, exptuple) - @unittest.skip('Not implented yet') def test_unknown_9_6(self): instr = '9,6; 5,3; 1K 11,7; Ef 4,24; srv. Ko 3,10p; ' + \ '[tzn. aby prostvoření představoval Boha]' - exptuple = () + exptuple = (('', 9, (6,)), ('', 5, (3,)), ('1K', 11, (7,)), + ('Ef', 4, (24,)), ('Ko', 3, (10,))) self.do_test_expected(instr, exptuple) - @unittest.skip('Not implented yet') def test_pohlavi(self): instr = 'n.: mužského a ženského pohlaví. 5,2;Dt 4,16; Mt 19,4' - exptuple = () + exptuple = (('', 5, (2,)), ('Dt', 4, (16,)), ('Mt', 19, (4,))) self.do_test_expected(instr, exptuple) |