2 files changed, 125 insertions, 111 deletions
diff --git a/generate_reference.py b/generate_reference.py
index e182205..d437a3d 100755
--- a/generate_reference.py
+++ b/generate_reference.py
@@ -18,6 +18,7 @@ ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
               'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus',
               'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John',
               'Jude', 'Rev')
+log = logging.getLogger(__name__)
 
 # 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma
 
@@ -95,60 +96,72 @@ EN_BOOKS = tuple(TranslDict.values())
 BIBLE_REF_PARSE_RE = re.compile(r'''
     (%s)?  # Name of the Bible book (optional)
     \s*([0-9]+),  # chapter number separated by whitespace
-    ([0-9—.-]+)   # verse number(s) separated by (optional) comma
+    ([0-9n—.-]+)   # verse(s) number
 ''' % '|'.join(CZ_BOOKS), flags=re.VERBOSE)
 ONLY_VERSE_PARSE_RE = re.compile(r'''
-    v\.\s+       # abbreviation of "verš" (verse)
-    ([0-9—.-]+)  # verse number(s) separated by (optional) comma
-''', flags=re.VERBOSE)
-SPLIT_VERSE_RE = re.compile('''
-    (\d+)
-    [—.-]
-    (\d+)
+    \bv\.\s+       # abbreviation of "verš" (verse)
+    ([0-9n—.-]+)   # verse(s) number
 ''', flags=re.VERBOSE)
+SPLIT_VERSE_RE = re.compile('[—.-]+')
+
+
+def next_verses(v_str):
+    out = []
+    v_int = int(v_str.rstrip('n'))
+    out.append(v_int)
+    for add_v in range(v_str.count('n')):
+        out.append(v_int + add_v + 1)
+    log.debug('out = %s', out)
+    return out
+
+
+def verse_interval(verse_str):
+    log.debug('verse_str = %s', verse_str)
+    out = []
+
+    for vers_elem in verse_str.split('.'):
+        log.debug('vers_elem = %s', vers_elem)
+        i_matchs = SPLIT_VERSE_RE.split(vers_elem)
+        log.debug('i_matchs = %s', i_matchs)
+        if len(i_matchs) == 2:
+            up_limit = next_verses(i_matchs[1])
+            for verse in range(int(i_matchs[0]), up_limit[-1] + 1):
+                out.append(verse)
+        elif len(i_matchs) == 1:
+            out.extend(next_verses(i_matchs[0]))
+        else:
+            raise ValueError('weird interval = %s' % vers_elem)
 
+    log.debug('out = %s', out)
+    return tuple(out)
 
-def verse_interval(verse):
-    match = SPLIT_VERSE_RE.search(verse)
-    logging.debug('match = %s', match)
-    if match:
-        matches = match.groups()
-        verse = int(match.group(1))
-        next_verse = int(match.group(2))
-        logging.debug('verse, next_verse = %s, %s', verse, next_verse)
-        return (verse, next_verse)
-    else:
-        return (int(verse),)
 
 def parse_notes_test(instr):
-    logging.debug('%s\ninstr = %s', '-' * 30, instr)
+    log.debug('%s\ninstr = %s', '-' * 30, instr)
     matches = BIBLE_REF_PARSE_RE.findall(instr)
-    logging.debug('BIBLE_REF_PARSE_RE matches = %s', matches)
+    log.debug('BIBLE_REF_PARSE_RE matches = %s', matches)
     out_list = []
     book = None
 
     for match in matches:
-        logging.debug('match = %s', match)
+        log.debug('match = %s', match)
         if match[0]:
             book = match[0]
         elif book is None:
             book = ''
-        logging.debug('match[1] = %s', match[1])
         if match[1]:
-            chapter = int(match[1].rstrip(','))
+            chapter = int(match[1])
         else:
             chapter = 0
-        logging.debug('match[2] = %s', match[2])
-        verse = match[2]
-        out_list.append((book, chapter) + verse_interval(match[2]))
+        out_list.append((book, chapter, verse_interval(match[2])))
+
     matches = ONLY_VERSE_PARSE_RE.findall(instr)
-    logging.debug('ONLY_VERSE_PARSE_RE matches = %s', matches)
+    log.debug('ONLY_VERSE_PARSE_RE matches = %s', matches)
 
     for match in matches:
-        logging.debug('match = %s', match)
+        log.debug('match = %s', match)
         if match:
-            out = ('', 0) + verse_interval(match)
-            out_list.append(out)
+            out_list.append(('', 0, verse_interval(match)))
 
     return tuple(out_list)
 
@@ -163,12 +176,6 @@ class GenerateReferencesFilter(XMLFilterBase):
         self._in_note = True
         self._note_content = ""
 
-#    def startDocument(self):
-#        pass
-
-#    def endDocument(self):
-#        pass
-
     def startElement(self, name, attrs):  # noqa
         if name == "verse" and 'sID' in attrs:
             ref_elements = attrs['sID'].split('.')
@@ -181,7 +188,7 @@ class GenerateReferencesFilter(XMLFilterBase):
 
     def endElement(self, name):  # noqa
         if name == 'note' and self._in_note:
-            logging.debug('content:\n%s', self._note_content)
+            log.debug('content:\n%s', self._note_content)
             self._in_note = False
             self._note_content = ""
         self._downstream.endElement(name)
diff --git a/tests/test_generate_reference.py b/tests/test_generate_reference.py
index 6037d73..7e37015 100755
--- a/tests/test_generate_reference.py
+++ b/tests/test_generate_reference.py
@@ -20,53 +20,54 @@ class TestNotesParsing(unittest.TestCase):  # IGNORE:C0111
         log.debug('want = %s', want)
         log.debug('got = %s', got)
         try:
-            self.assertEqual(got, want, """Result matches
-            expected = %s
-
-            observed = %s
-            """ % (want, got))
+            self.assertEqual(got, want,
+                             "Result matches on instr:\n%s" % test_string)
         except AssertionError:
             log.debug("want = %s", want)
             raise
 
     def test_simple_reference(self):
         instr = 'Ž 93,28'
-        exptuple = (('Ž', 93, 28),)
+        exptuple = (('Ž', 93, (28,)),)
         self.do_test_expected(instr, exptuple)
 
     def test_prvni_empty(self):
         instr = '5,1; Jk 3,9'
-        exptuple = (('', 5, 1), ('Jk', 3, 9))
+        exptuple = (('', 5, (1,)), ('Jk', 3, (9,)))
+        self.do_test_expected(instr, exptuple)
+
+    def test_just_verse(self):
+        instr = 'v. 1'
+        exptuple = (('', 0, (1,)),)
         self.do_test_expected(instr, exptuple)
 
     def test_parse_pr_8_22_a_dalsi(self):
         instr = 'Př 8,22—24; Ž 93,2; 102,25—27v; Iz 40,21; Mk 13,19;' + \
             'J 1,1—3; He 1,10—12; 1J 1,1; [Jde o počátek věčnosti ' + \
             '(Iz 66,1n — tj. mimo čas.]'
-        exptuple = (('Př', 8, 22, 24),
-                    ('Ž', 93, 2),
-                    ('Ž', 102, 25, 27),
-                    ('Iz', 40, 21),
-                    ('Mk', 13, 19),
-                    ('J', 1, 1, 3),
-                    ('He', 1, 10, 12),
-                    ('1J', 1, 1),
-                    # TODO we should parse also 1n style verse ref.
-                    ('Iz', 66, 1))
+        exptuple = (('Př', 8, (22, 23, 24)),
+                    ('Ž', 93, (2,)),
+                    ('Ž', 102, (25, 26, 27)),
+                    ('Iz', 40, (21,)),
+                    ('Mk', 13, (19,)),
+                    ('J', 1, (1, 2, 3)),
+                    ('He', 1, (10, 11, 12)),
+                    ('1J', 1, (1,)),
+                    ('Iz', 66, (1, 2)))
         self.do_test_expected(instr, exptuple)
 
     def test_unknown_14_19(self):
         instr = '14,19; Ex 20,11; Iz 37,16; Jr 32,17; 1Pa 16,26; Neh 9,6;' + \
             'Ž 8,4; 115,15; Sk 4,24'
-        exptuple = (('', 14, 19),
-                    ('Ex', 20, 11),
-                    ('Iz', 37, 16),
-                    ('Jr', 32, 17),
-                    ('1Pa', 16, 26),
-                    ('Neh', 9, 6),
-                    ('Ž', 8, 4),
-                    ('Ž', 115, 15),
-                    ('Sk', 4, 24))
+        exptuple = (('', 14, (19,)),
+                    ('Ex', 20, (11,)),
+                    ('Iz', 37, (16,)),
+                    ('Jr', 32, (17,)),
+                    ('1Pa', 16, (26,)),
+                    ('Neh', 9, (6,)),
+                    ('Ž', 8, (4,)),
+                    ('Ž', 115, (15,)),
+                    ('Sk', 4, (24,)))
         self.do_test_expected(instr, exptuple)
 
     def test_elohim(self):
@@ -74,12 +75,11 @@ class TestNotesParsing(unittest.TestCase):  # IGNORE:C0111
             ' ve spojení se sg.  slovesa; majestát a svrchovanost. ' + \
             'v pl. (např. Sd 2,3) a také v případech zřejmých z kontextu ' + \
             '(Dt 4,28; 6,14) se jedná o bohy — pohanská božstva]'
-        exptuple = (('Sd', 2, 3),
-                    ('Dt', 4, 28),
-                    ('Dt', 6, 14))
+        exptuple = (('Sd', 2, (3,)),
+                    ('Dt', 4, (28,)),
+                    ('Dt', 6, (14,)))
         self.do_test_expected(instr, exptuple)
 
-    @unittest.skip('Not implemented yet')
     def test_slsobara(self):
         instr = 'v. 21.27; Iz 42,5; 45,12.18; Ž 90,2v; Sk 17,24v;' + \
             'Ř 1,20;He 11,3v; Zj 4,11; ' '10,6; [h. slsobārā’ se používá' + \
@@ -87,67 +87,66 @@ class TestNotesParsing(unittest.TestCase):  # IGNORE:C0111
             ' a dokonalého. Buď se jedná o stvoření z ničeho jako při' + \
             ' prvotním stvoření (He 11,3v), anebo o obnovu nebo přetvoření' + \
             ' pro nový účel či nové uspořádání (srv. Ž 51,10; Iz 65,17)]'
-        exptuple = (('', 21, 27), ('Iz', 42, 5), ('Iz', 45, 12, 18),
-                    ('Ž', 90, 2), ('Sk', 17, 24), ('Ř', 1, 20), ('He', 11, 3),
-                    ('Zj', 4, 11), ('Zj', 10, 6), ('He', 11, 3),
-                    ('Ž', 51, 10), ('Iz', 65, 17))
+        exptuple = (('Iz', 42, (5,)), ('Iz', 45, (12, 18)),
+                    ('Ž', 90, (2,)), ('Sk', 17, (24,)), ('Ř', 1, (20,)),
+                    ('He', 11, (3,)), ('Zj', 4, (11,)), ('Zj', 10, (6,)),
+                    ('He', 11, (3,)), ('Ž', 51, (10,)), ('Iz', 65, (17,)),
+                    ('', 0, (21, 27,)),
+                    )
 
         self.do_test_expected(instr, exptuple)
 
     def test_jb_38_4(self):
         instr = 'Jb 38,4; Př 3,19; Jr 10,12'
-        exptuple = (('Jb', 38, 4),
-                    ('Př', 3, 19),
-                    ('Jr', 10, 12))
+        exptuple = (('Jb', 38, (4,)),
+                    ('Př', 3, (19,)),
+                    ('Jr', 10, (12,)))
         self.do_test_expected(instr, exptuple)
 
     def test_jr_4_23(self):
         instr = 'Jr 4,23; Iz 45,18p'
-        exptuple = (('Jr', 4, 23), ('Iz', 45, 18))
+        exptuple = (('Jr', 4, (23,)), ('Iz', 45, (18,)))
         self.do_test_expected(instr, exptuple)
 
     def test_iz_9_1(self):
         instr = 'Iz 9,1; 58,10'
-        exptuple = (('Iz', 9, 1), ('Iz', 58, 10))
+        exptuple = (('Iz', 9, (1,)), ('Iz', 58, (10,)))
         self.do_test_expected(instr, exptuple)
 
     def test_pr_8_24(self):
-        instr = 'Př 8,24.27n; Iz 51,10; Ez 31,15; Jon 2,6'
-        exptuple = (('Př', 8, 24, 27), ('Iz', 51, 10), ('Ez', 31, 15),
-                    ('Jon', 2, 6))
+        instr = 'Př 8,24.27n; Iz 51,10; Jon 2,6'
+        exptuple = (('Př', 8, (24, 27, 28)), ('Iz', 51, (10,)),
+                    ('Jon', 2, (6,)))
         self.do_test_expected(instr, exptuple)
 
     def test_iz_40_13(self):
         instr = 'Iz 40,13-15; Ž 33,6;'
-        exptuple = (('Iz', 40, 13, 15), ('Ž', 33, 6))
+        exptuple = (('Iz', 40, (13, 14, 15)), ('Ž', 33, (6,)))
         self.do_test_expected(instr, exptuple)
 
     def test_srv_ez_1_22(self):
         instr = 'srv. Ez 1,22; 10,1'
-        exptuple = (('Ez', 1, 22), ('Ez', 10, 1))
+        exptuple = (('Ez', 1, (22,)), ('Ez', 10, (1,)))
         self.do_test_expected(instr, exptuple)
 
-    @unittest.skip('This fails so far')
     def test_dlouhy_klicovy_vyraz(self):
         instr = '[klíčový výraz kapitoly (srv. v. 6.7.14.18) a vůbec ' + \
             'celého Zákona (Ex 26,33; Lv 10,10; 20,24; Nu 8,14;' + \
-            ' Dt 4,41; 10,8; srv.  1Kr 8,53;Neh 9,2; Ez 42,20); zde' + \
-            ' vidíme, že světlo existuje souběžně s tmou, ale ' + \
             'vzájemně se vylučují (nemísí se); srv. J 1,5]'
-        exptuple = (('', 40, 13, 15),  # FIXME not sure how 2D multiple verses
-                    ('Ex', 26, 33),
-                    ('Lv', 10, 10),
-                    ('Lv', 20, 24),
-                    ('Nu', 8, 14),
-                    ('Dt', 4, 41),
-                    ('Dt', 10, 8),
-                    ('1Kr', 8, 53),
-                    ('Neh', 9, 2),
-                    ('Ez', 42, 20),
-                    ('J', 1, 5))
-        self.do_test_expected(instr, exptuple)
-
-    @unittest.skip('Not implemented yet')
+        exptuple = (('Ex', 26, (33,)),
+                    ('Lv', 10, (10,)),
+                    ('Lv', 20, (24,)),
+                    ('Nu', 8, (14,)),
+                    ('J', 1, (5,)),
+                    ('', 0, (6, 7, 14, 18)),
+                    )
+        self.do_test_expected(instr, exptuple)
+
+    def test_nasledujici_verse(self):
+        instr = 'Iz 66,1n; Ž 136,7nn'
+        exptuple = (('Iz', 66, (1, 2)), ('Ž', 136, (7, 8, 9)))
+        self.do_test_expected(instr, exptuple)
+
     def test_slunce_mesic(self):
         instr = 'Ž 74,16; 136,7nn; Jr 31,35; [slova slunce a měsíc ' + \
                 'nejsou prav. záměrně použita, protože oba výrazy ' + \
@@ -155,46 +154,54 @@ class TestNotesParsing(unittest.TestCase):  # IGNORE:C0111
                 ' zde je naopak zdůrazněno (dále ještě třemi ' + \
                 'slovesy, popisujícími jejich funkci), že to ' + \
                 'jsou stvořené věci, nikolivbožstva]'
-        exptuple = ()
+        exptuple = (('Ž', 74, (16,)), ('Ž', 136, (7, 8, 9)),
+                    ('Jr', 31, (35,)), ('Dt', 4, (19,)))
         self.do_test_expected(instr, exptuple)
 
     def test_velky_pocet(self):
         instr = 'Ž 8,4; Iz 40,26; Am 5,8; [pro jejich množství ' + \
             'jsou použity k vyjádření velkého počtu — např. Gn 15,5; ' + \
             'Dt 1,10;Na 3,16]'
-        exptuple = (('Ž', 8, 4), ('Iz', 40, 26), ('Am', 5, 8), ('Gn', 15, 5),
-                    ('Dt', 1, 10), ('Na', 3, 16))
+        exptuple = (('Ž', 8, (4,)), ('Iz', 40, (26,)), ('Am', 5, (8,)),
+                    ('Gn', 15, (5,)), ('Dt', 1, (10,)), ('Na', 3, (16,)))
         self.do_test_expected(instr, exptuple)
 
     def test_p_suffix(self):
-        # TODO what does 'p' suffix means?
+        # 'p' suffix means "odkaz na poznámku u verše"
+        # I don't know if we can make reference to note,
+        # so for now we just make reference to the verse itself.
         instr = 'v. 1p'
-        exptuple = (('', 0, 1),)
+        exptuple = (('', 0, (1,)),)
         self.do_test_expected(instr, exptuple)
 
     def test_unknown_28(self):
         instr = 'v. 28; 2,3; 5,2; 12,2; [v h. je slsoḇ–r–ḵ vždy spojeno'
-        exptuple = (('', 2, 3), ('', 5, 2), ('', 12, 2), ('', 0, 28))
+        exptuple = (('', 2, (3,)), ('', 5, (2,)), ('', 12, (2,)),
+                    ('', 0, (28,)))
         self.do_test_expected(instr, exptuple)
 
-    @unittest.skip('Not implented yet')
     def test_pritomnosti(self):
         instr = 'přítomnosti;(srv. 3,22!; 11,7; Jb 1,6—12; 2,1—6; ' + \
                 'Iz 6,8; 1Kr 22,19—23; Jr 23,18;Za 3,1—7)]'
-        exptuple = ()
+        exptuple = (
+            ('', 3, (22,)), ('', 11, (7,)),
+            ('Jb', 1, (6, 7, 8, 9, 10, 11, 12)),
+            ('Jb', 2, (1, 2, 3, 4, 5, 6)), ('Iz', 6, (8,)),
+            ('1Kr', 22, (19, 20, 21, 22, 23)),
+            ('Jr', 23, (18,)), ('Za', 3, (1, 2, 3, 4, 5, 6, 7))
+        )
         self.do_test_expected(instr, exptuple)
 
-    @unittest.skip('Not implented yet')
     def test_unknown_9_6(self):
         instr = '9,6; 5,3; 1K 11,7; Ef 4,24; srv. Ko 3,10p; ' + \
                 '[tzn. aby prostvoření představoval Boha]'
-        exptuple = ()
+        exptuple = (('', 9, (6,)), ('', 5, (3,)), ('1K', 11, (7,)),
+                    ('Ef', 4, (24,)), ('Ko', 3, (10,)))
         self.do_test_expected(instr, exptuple)
 
-    @unittest.skip('Not implented yet')
     def test_pohlavi(self):
         instr = 'n.: mužského a ženského pohlaví. 5,2;Dt 4,16; Mt 19,4'
-        exptuple = ()
+        exptuple = (('', 5, (2,)), ('Dt', 4, (16,)), ('Mt', 19, (4,)))
         self.do_test_expected(instr, exptuple)