#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
import codecs
import logging
import re
import sys
import jinja2
import lxml.etree as et
logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
level=logging.INFO)
cur_year = 2015
book_abbrs = {
"Gn": "Gen", "Ex": "Exod",
"Lv": "Lev", # NA in source code,
"Nu": "Num", "Dt": "Deut", "Jos": "Josh", "Jdc": "Judg",
"Rth": "Ruth",
"1Sm": "1%20Sam", "2Sm": "2%20Sam", "1Rg": "1%20Kgs", "2Rg": "2%20Kgs",
"1Chr": "1%20Chr", "2Chr": "2%20Chr", "Esr": "Ezra", "Neh": "Neh",
"Esth": "Esth", # NA in source code,
"Job": "Job", "Ps": "Ps", "Prv": "Prov", "Eccl": "Eccl",
"Song": "Song", # NA in source code,
"Is": "Isa", "Jr": "Jer", "Thr": "Lam", "Ez": "Ezek",
"Dn": "Dan", "Hos": "Hos", "Joel": "Joel",
"Am": "Amos",
"Obad": "Obad", # NA in source code
"Jon": "Jonah", "Mch": "Mic",
"Nah": "Nah", # NA in source code
"Hab": "Hab", "Zph": "Zeph", "Hgg": "Hag",
"Zch": "Zech", "Ml": "Mal",
"Mt": "Matt", "Mc": "Mark", "L": "Luke", "J": "John",
"Act": "Acts",
"R": "Rom", "1K": "1%20Cor", "2K": "2%20Cor",
"G": "Gal", "E": "Eph", "Ph": "Phil", "Kol": "Col",
"1Th": "1%20Thess", "2Th": "2%20Thess", "1T": "1%20Tim", "2T": "2%20Tim",
"Tt": "Titus",
"Phm": "Phlm",
"H": "Heb", "Jc": "Jas",
"1P": "1%20Pet", "2P": "2%20Pet",
"1J": "1%20John", "2J": "2%20John", "3J": "3%20John",
"Jd": "Jude", "Ap": "Rev"
}
def csv2dict(filename):
out_dict = {}
split_re = re.compile(r'\s+')
field_names = ['no', 'test', 'cs_abbr', 'cs_name',
'de_abbr', 'de_name']
with codecs.open(filename, 'rb', 'utf8') as csvfile:
for row in csvfile:
line_dict = dict(zip(field_names, split_re.split(row)))
out_dict[line_dict['cs_abbr']] = line_dict
logging.debug("out_dict:\n%s", out_dict)
return out_dict
def parse_verses(lines):
"""
Having list of <L> elements, make the text out of them (with
<BR> elements where necessary).
"""
out = ""
logging.debug("lines = %s", lines)
logging.debug("lines = len %s", len(lines))
if len(lines) > 0:
out = lines[0].text
if len(lines) > 1:
for line in lines[1:]:
logging.debug("lines = %s", lines)
logging.debug("another line = %s", line.text)
if line.text is not None:
out += "<br>\n" + line.text
logging.debug("body = %s", out)
return out
def parse_body(elem):
"""Parse one verse element
Example could be (or NT instead of OT, or DAY):
<OT>
<S b="Ps" ch="91" v="9"/>
<L>V Hospodinu je tvé útočiště.</L>
<SL>Žalm 91,9</SL>
</OT>
"""
if len(list(elem)) == 0:
return None
wword = {
'text': ''
}
source_elem = elem.find('S')
logging.debug("S elem = %s", source_elem)
logging.debug("S elem = attrib %s", source_elem.attrib)
# biblical reference (computer readable)
wword['ref_id'] = {
"book": source_elem.attrib['b'],
"chapter": source_elem.attrib['ch'],
"verse": source_elem.attrib['v'],
}
logging.debug("wword['ref_id'] = %s", wword['ref_id'])
# biblical reference (compatible with the English Bible programs)
try:
wword['int_ref_id'] = wword['ref_id'].copy()
wword['int_ref_id']['book'] = book_abbrs[wword['ref_id']['book']]
except KeyError:
logging.error('book = %s', wword['ref_id'])
raise
# text of the verse
wword['text'] = parse_verses(list(elem.getiterator("L")))
# references
ref = elem.find("SL")
logging.debug("ref = %s", ref)
logging.debug("ref = len %s", len(ref))
if ref is not None:
wword['ref'] = ref.text
return wword
def parse_losung(elem):
"""
Parse one losung. Example:
<LOSUNG d="2" m="1">
<TL>Pondělí 2. ledna 2012</TL>
<OT>
<S b="Ps" ch="91" v="9"/>
<L>V Hospodinu je tvé útočiště.</L>
<SL>Žalm 91,9</SL>
</OT>
<NT>
<S b="1P" ch="5" v="7"/>
<L>Všechnu svou starost vložte na něj, neboť mu na vás záleží.</L>
<SL>1.Petrova 5,7</SL>
</NT>
<SR><SL>Jozue 24,1-2a.13-18.25-26</SL></SR>
<CR><SL>Marek 1,1-13</SL></CR>
</LOSUNG>
"""
logging.debug("losung = %s", elem)
out = {}
date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),)
out['date_id'] = "%4d-%02d-%02d" % (date_id)
logging.debug('date_id = %s', out['date_id'])
out['date_full'] = elem.find("TL").text
out['watchwords'] = []
for tst in ['OT', 'NT']:
sect = elem.find(tst)
out['watchwords'].append(parse_body(sect))
out['readings'] = []
for read in ['SR', 'CR']:
sect = elem.find(read)
logging.debug("read %s = %s", read, sect)
out['readings'].append("\n".join([el.text
for el in sect.findall("SL")]))
return date_id, out
def parse_day(elem, whole_dict):
"""
Parsse <DAY> element.
Example:
<DAY d="1" m="1" type="holiday" name="Nový Rok ">
<S b="Kol" ch="3" v="17"/>
<L>Všechno, cokoli mluvíte nebo děláte,</L>
<L>čiňte ve jménu Pána Ježíše a skrze něho děkujte Bohu Otci.</L>
<SL>Koloským 3,17</SL>
</DAY>
<DAY d="17" m="2" type="sunday" name="Invocavit"
meaning="Vzývati mne bude a vyslyším jej. Žalm 91,15" ord="7">
<S b="1J" ch="3" v="8"/>
<L>Proto se zjevil Syn Boží, aby zmařil činy ďáblovy.</L>
<SL>1.Janova 3,8b</SL>
</DAY>
<DAY d="13" m="1" type="week" name="Alianční modlitební týden ">
</DAY>
"""
date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),)
logging.debug('date_id = %s', date_id)
out = whole_dict[date_id]
if elem.attrib['type'] == "holiday":
out['holy_name'] = elem.attrib['name'].strip()
out['holy_text'] = parse_body(elem)
elif elem.attrib['type'] == "sunday":
out['sun_name'] = elem.attrib['name'].strip()
if 'meaning' in elem.attrib:
out['sun_mean'] = elem.attrib['meaning'].strip()
out['sun_ord'] = int(elem.attrib['ord'])
out['sun_text'] = parse_body(elem)
elif elem.attrib['type'] == "week":
out['week_title'] = elem.attrib['name'].strip()
else:
raise ValueError("Unknown DAY type = %s", elem.attrib['type'])
def parse_file(filename):
tree = et.parse(filename).getroot()
article_dict = {}
env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'))
template = env.get_template('czech.html')
for los in tree.getiterator("LOSUNG"):
key, text = parse_losung(los)
article_dict[key] = text
for los in tree.getiterator("DAY"):
parse_day(los, article_dict)
article_list = []
article_keys = sorted(article_dict.keys())
for key in article_keys:
article_list.append(article_dict[key])
return template.render(articles=article_list).encode('utf8')
if __name__ == "__main__":
print(parse_file(sys.argv[1]))