From 388e56cf16e7c5fe7569112d229fa628d5b81497 Mon Sep 17 00:00:00 2001
From: Matěj Cepl <mcepl@cepl.eu>
Date: Sun, 14 May 2017 00:02:50 +0200
Subject: All tests for the verse parser PASS!!!

Yay!
---
 generate_reference.py | 83 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 38 deletions(-)

(limited to 'generate_reference.py')

diff --git a/generate_reference.py b/generate_reference.py
index e182205..d437a3d 100755
--- a/generate_reference.py
+++ b/generate_reference.py
@@ -18,6 +18,7 @@ ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
               'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus',
               'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John',
               'Jude', 'Rev')
+log = logging.getLogger(__name__)
 
 # 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma
 
@@ -95,60 +96,72 @@ EN_BOOKS = tuple(TranslDict.values())
 BIBLE_REF_PARSE_RE = re.compile(r'''
     (%s)?  # Name of the Bible book (optional)
     \s*([0-9]+),  # chapter number separated by whitespace
-    ([0-9—.-]+)   # verse number(s) separated by (optional) comma
+    ([0-9n—.-]+)   # verse(s) number
 ''' % '|'.join(CZ_BOOKS), flags=re.VERBOSE)
 ONLY_VERSE_PARSE_RE = re.compile(r'''
-    v\.\s+       # abbreviation of "verš" (verse)
-    ([0-9—.-]+)  # verse number(s) separated by (optional) comma
-''', flags=re.VERBOSE)
-SPLIT_VERSE_RE = re.compile('''
-    (\d+)
-    [—.-]
-    (\d+)
+    \bv\.\s+       # abbreviation of "verš" (verse)
+    ([0-9n—.-]+)   # verse(s) number
 ''', flags=re.VERBOSE)
+SPLIT_VERSE_RE = re.compile('[—.-]+')
+
+
+def next_verses(v_str):
+    out = []
+    v_int = int(v_str.rstrip('n'))
+    out.append(v_int)
+    for add_v in range(v_str.count('n')):
+        out.append(v_int + add_v + 1)
+    log.debug('out = %s', out)
+    return out
+
+
+def verse_interval(verse_str):
+    log.debug('verse_str = %s', verse_str)
+    out = []
+
+    for vers_elem in verse_str.split('.'):
+        log.debug('vers_elem = %s', vers_elem)
+        i_matchs = SPLIT_VERSE_RE.split(vers_elem)
+        log.debug('i_matchs = %s', i_matchs)
+        if len(i_matchs) == 2:
+            up_limit = next_verses(i_matchs[1])
+            for verse in range(int(i_matchs[0]), up_limit[-1] + 1):
+                out.append(verse)
+        elif len(i_matchs) == 1:
+            out.extend(next_verses(i_matchs[0]))
+        else:
+            raise ValueError('weird interval = %s' % vers_elem)
 
+    log.debug('out = %s', out)
+    return tuple(out)
 
-def verse_interval(verse):
-    match = SPLIT_VERSE_RE.search(verse)
-    logging.debug('match = %s', match)
-    if match:
-        matches = match.groups()
-        verse = int(match.group(1))
-        next_verse = int(match.group(2))
-        logging.debug('verse, next_verse = %s, %s', verse, next_verse)
-        return (verse, next_verse)
-    else:
-        return (int(verse),)
 
 def parse_notes_test(instr):
-    logging.debug('%s\ninstr = %s', '-' * 30, instr)
+    log.debug('%s\ninstr = %s', '-' * 30, instr)
     matches = BIBLE_REF_PARSE_RE.findall(instr)
-    logging.debug('BIBLE_REF_PARSE_RE matches = %s', matches)
+    log.debug('BIBLE_REF_PARSE_RE matches = %s', matches)
     out_list = []
     book = None
 
     for match in matches:
-        logging.debug('match = %s', match)
+        log.debug('match = %s', match)
         if match[0]:
             book = match[0]
         elif book is None:
             book = ''
-        logging.debug('match[1] = %s', match[1])
         if match[1]:
-            chapter = int(match[1].rstrip(','))
+            chapter = int(match[1])
         else:
             chapter = 0
-        logging.debug('match[2] = %s', match[2])
-        verse = match[2]
-        out_list.append((book, chapter) + verse_interval(match[2]))
+        out_list.append((book, chapter, verse_interval(match[2])))
+
     matches = ONLY_VERSE_PARSE_RE.findall(instr)
-    logging.debug('ONLY_VERSE_PARSE_RE matches = %s', matches)
+    log.debug('ONLY_VERSE_PARSE_RE matches = %s', matches)
 
     for match in matches:
-        logging.debug('match = %s', match)
+        log.debug('match = %s', match)
         if match:
-            out = ('', 0) + verse_interval(match)
-            out_list.append(out)
+            out_list.append(('', 0, verse_interval(match)))
 
     return tuple(out_list)
 
@@ -163,12 +176,6 @@ class GenerateReferencesFilter(XMLFilterBase):
         self._in_note = True
         self._note_content = ""
 
-#    def startDocument(self):
-#        pass
-
-#    def endDocument(self):
-#        pass
-
     def startElement(self, name, attrs):  # noqa
         if name == "verse" and 'sID' in attrs:
             ref_elements = attrs['sID'].split('.')
@@ -181,7 +188,7 @@ class GenerateReferencesFilter(XMLFilterBase):
 
     def endElement(self, name):  # noqa
         if name == 'note' and self._in_note:
-            logging.debug('content:\n%s', self._note_content)
+            log.debug('content:\n%s', self._note_content)
             self._in_note = False
             self._note_content = ""
         self._downstream.endElement(name)
-- 
cgit