#!/usr/bin/python3
import re
import codecs
import lxml.etree as et
import sys
import logging
#logging.basicConfig(level=logging.DEBUG)
import jinja2
cur_year = 2012
def csv2dict(filename):
out_dict = {}
splitRE = re.compile(r'\s+')
field_names = ['no', 'test', 'cs_abbr', 'cs_name',
'de_abbr', 'de_name']
with codecs.open(filename, 'rb', 'utf8') as csvfile:
for row in csvfile:
line_dict = dict(zip(field_names, splitRE.split(row)))
out_dict[line_dict['cs_abbr']] = line_dict
logging.debug("out_dict:\n%s", out_dict)
return out_dict
def parse_body(elem):
"""Parse one verse element
Example could be (or NT instead of OT):
<OT>
<S b="Ps" ch="91" v="9"/>
<L>V Hospodinu je tvé útočiště.</L>
<SL>Žalm 91,9</SL>
</OT>
"""
wword = {
'text': ''
}
source_elem = elem.find('S')
logging.debug("elem = %s", source_elem)
logging.debug("elem = attrib %s", source_elem.attrib)
# biblical reference (computer readable)
wword['ref_id'] = {
"book": source_elem.attrib['b'],
"chapter": source_elem.attrib['ch'],
"verse": source_elem.attrib['v'],
}
# text of the verse
verses = list(elem.getiterator("L"))
logging.debug("verses = %s", verses)
logging.debug("verses = len %s", len(verses))
if len(verses) > 0:
wword['text'] = verses[0].text
if len(verses) > 1:
for line in verses[1:]:
logging.debug("verses = %s", verses)
logging.debug("another line = %s", line.text)
if line.text is not None:
wword['text'] += "<br>\n" + line.text
logging.debug("body = %s", wword['text'])
# references
ref = elem.find("SL")
logging.debug("ref = %s", ref)
logging.debug("ref = len %s", len(ref))
if ref is not None:
wword['ref'] = ref.text
return wword
def parse_one(elem):
"""
Parse one losung. Example:
<LOSUNG d="2" m="1">
<TL>Pondělí 2. ledna 2012</TL>
<OT>
<S b="Ps" ch="91" v="9"/>
<L>V Hospodinu je tvé útočiště.</L>
<SL>Žalm 91,9</SL>
</OT>
<NT>
<S b="1P" ch="5" v="7"/>
<L>Všechnu svou starost vložte na něj, neboť mu na vás záleží.</L>
<SL>1.Petrova 5,7</SL>
</NT>
<SR><SL>Jozue 24,1-2a.13-18.25-26</SL></SR>
<CR><SL>Marek 1,1-13</SL></CR>
</LOSUNG>
"""
logging.debug("losung = %s", elem)
out = {}
out['date_id'] = "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]),
int(elem.attrib["d"]))
out['date_full'] = elem.find("TL").text
out['watchwords'] = []
for tst in ['OT', 'NT']:
sect = elem.find(tst)
out['watchwords'].append(parse_body(sect))
out['readings'] = []
for read in ['SR', 'CR']:
sect = elem.find(read)
logging.debug("read %s = %s", read, sect)
out['readings'].append("\n".join([el.text
for el in sect.findall("SL")]))
return out
def parse_file(filename):
tree = et.parse(filename).getroot()
article_list = []
env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'))
template = env.get_template('czech.html')
for los in tree.getiterator("LOSUNG"):
article_list.append(parse_one(los))
return template.render(articles=article_list)
if __name__ == "__main__":
print(parse_file(sys.argv[1]))