summaryrefslogtreecommitdiffstats
path: root/generate_reference.py
diff options
context:
space:
mode:
Diffstat (limited to 'generate_reference.py')
-rwxr-xr-xgenerate_reference.py83
1 files changed, 45 insertions, 38 deletions
diff --git a/generate_reference.py b/generate_reference.py
index e182205..d437a3d 100755
--- a/generate_reference.py
+++ b/generate_reference.py
@@ -18,6 +18,7 @@ ENGL_BOOKS = ('Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus',
'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John',
'Jude', 'Rev')
+log = logging.getLogger(__name__)
# 1Mak, 2Mak, 3Mak, 4Mak, ditto s Ma
@@ -95,60 +96,72 @@ EN_BOOKS = tuple(TranslDict.values())
BIBLE_REF_PARSE_RE = re.compile(r'''
(%s)? # Name of the Bible book (optional)
\s*([0-9]+), # chapter number separated by whitespace
- ([0-9—.-]+) # verse number(s) separated by (optional) comma
+ ([0-9n—.-]+) # verse(s) number
''' % '|'.join(CZ_BOOKS), flags=re.VERBOSE)
ONLY_VERSE_PARSE_RE = re.compile(r'''
- v\.\s+ # abbreviation of "verš" (verse)
- ([0-9—.-]+) # verse number(s) separated by (optional) comma
-''', flags=re.VERBOSE)
-SPLIT_VERSE_RE = re.compile('''
- (\d+)
- [—.-]
- (\d+)
+ \bv\.\s+ # abbreviation of "verš" (verse)
+ ([0-9n—.-]+) # verse(s) number
''', flags=re.VERBOSE)
+SPLIT_VERSE_RE = re.compile('[—.-]+')
+
+
+def next_verses(v_str):
+ out = []
+ v_int = int(v_str.rstrip('n'))
+ out.append(v_int)
+ for add_v in range(v_str.count('n')):
+ out.append(v_int + add_v + 1)
+ log.debug('out = %s', out)
+ return out
+
+
+def verse_interval(verse_str):
+ log.debug('verse_str = %s', verse_str)
+ out = []
+
+ for vers_elem in verse_str.split('.'):
+ log.debug('vers_elem = %s', vers_elem)
+ i_matchs = SPLIT_VERSE_RE.split(vers_elem)
+ log.debug('i_matchs = %s', i_matchs)
+ if len(i_matchs) == 2:
+ up_limit = next_verses(i_matchs[1])
+ for verse in range(int(i_matchs[0]), up_limit[-1] + 1):
+ out.append(verse)
+ elif len(i_matchs) == 1:
+ out.extend(next_verses(i_matchs[0]))
+ else:
+ raise ValueError('weird interval = %s' % vers_elem)
+ log.debug('out = %s', out)
+ return tuple(out)
-def verse_interval(verse):
- match = SPLIT_VERSE_RE.search(verse)
- logging.debug('match = %s', match)
- if match:
- matches = match.groups()
- verse = int(match.group(1))
- next_verse = int(match.group(2))
- logging.debug('verse, next_verse = %s, %s', verse, next_verse)
- return (verse, next_verse)
- else:
- return (int(verse),)
def parse_notes_test(instr):
- logging.debug('%s\ninstr = %s', '-' * 30, instr)
+ log.debug('%s\ninstr = %s', '-' * 30, instr)
matches = BIBLE_REF_PARSE_RE.findall(instr)
- logging.debug('BIBLE_REF_PARSE_RE matches = %s', matches)
+ log.debug('BIBLE_REF_PARSE_RE matches = %s', matches)
out_list = []
book = None
for match in matches:
- logging.debug('match = %s', match)
+ log.debug('match = %s', match)
if match[0]:
book = match[0]
elif book is None:
book = ''
- logging.debug('match[1] = %s', match[1])
if match[1]:
- chapter = int(match[1].rstrip(','))
+ chapter = int(match[1])
else:
chapter = 0
- logging.debug('match[2] = %s', match[2])
- verse = match[2]
- out_list.append((book, chapter) + verse_interval(match[2]))
+ out_list.append((book, chapter, verse_interval(match[2])))
+
matches = ONLY_VERSE_PARSE_RE.findall(instr)
- logging.debug('ONLY_VERSE_PARSE_RE matches = %s', matches)
+ log.debug('ONLY_VERSE_PARSE_RE matches = %s', matches)
for match in matches:
- logging.debug('match = %s', match)
+ log.debug('match = %s', match)
if match:
- out = ('', 0) + verse_interval(match)
- out_list.append(out)
+ out_list.append(('', 0, verse_interval(match)))
return tuple(out_list)
@@ -163,12 +176,6 @@ class GenerateReferencesFilter(XMLFilterBase):
self._in_note = True
self._note_content = ""
-# def startDocument(self):
-# pass
-
-# def endDocument(self):
-# pass
-
def startElement(self, name, attrs): # noqa
if name == "verse" and 'sID' in attrs:
ref_elements = attrs['sID'].split('.')
@@ -181,7 +188,7 @@ class GenerateReferencesFilter(XMLFilterBase):
def endElement(self, name): # noqa
if name == 'note' and self._in_note:
- logging.debug('content:\n%s', self._note_content)
+ log.debug('content:\n%s', self._note_content)
self._in_note = False
self._note_content = ""
self._downstream.endElement(name)