#!/usr/bin/python3 import re import codecs import lxml.etree as et import sys import logging logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', level=logging.INFO) import jinja2 cur_year = 2014 book_abbrs = { "Gn": "Gen", "Ex": "Exod", "Lv": "Lev", # NA in source code, "Nu": "Num", "Dt": "Deut", "Jos": "Josh", "Jdc": "Judg", "Rth": "Ruth", "1Sm": "1Sam", "2Sm": "2Sam", "1Rg": "1Kgs", "2Rg": "2Kgs", "1Chr": "1Chr", "2Chr": "2Chr", "Esr": "Ezra", "Neh": "Neh", "Esth": "Esth", # NA in source code, "Job": "Job", "Ps": "Ps", "Prv": "Prov", "Eccl": "Eccl", "Song": "Song", # NA in source code, "Is": "Isa", "Jr": "Jer", "Thr": "Lam", "Ez": "Ezek", "Dn": "Dan", "Hos": "Hos", "Joel": "Joel", "Am": "Amos", "Obad": "Obad", # NA in source code "Jon": "Jonah", "Mch": "Mic", "Nah": "Nah", # NA in source code "Hab": "Hab", "Zph": "Zeph", "Hgg": "Hag", "Zch": "Zech", "Ml": "Mal", "Mt": "Matt", "Mc": "Mark", "L": "Luke", "J": "John", "Act": "Acts", "R": "Rom", "1K": "1Cor", "2K": "2Cor", "G": "Gal", "E": "Eph", "Ph": "Phil", "Kol": "Col", "1Th": "1Thess", "2Th": "2Thess", "1T": "1Tim", "2T": "2Tim", "Tt": "Titus", "Phm": "Phlm", "H": "Heb", "Jc": "Jas", "1P": "1Pet", "2P": "2Pet", "1J": "1John", "2J": "2John", "3J": "3John", "Jd": "Jude", "Ap": "Rev" } def csv2dict(filename): out_dict = {} splitRE = re.compile(r'\s+') field_names = ['no', 'test', 'cs_abbr', 'cs_name', 'de_abbr', 'de_name'] with codecs.open(filename, 'rb', 'utf8') as csvfile: for row in csvfile: line_dict = dict(zip(field_names, splitRE.split(row))) out_dict[line_dict['cs_abbr']] = line_dict logging.debug("out_dict:\n%s", out_dict) return out_dict def parse_verses(lines): """ Having list of elements, make the text out of them (with
elements where necessary). """ out = "" logging.debug("lines = %s", lines) logging.debug("lines = len %s", len(lines)) if len(lines) > 0: out = lines[0].text if len(lines) > 1: for line in lines[1:]: logging.debug("lines = %s", lines) logging.debug("another line = %s", line.text) if line.text is not None: out += "
\n" + line.text logging.debug("body = %s", out) return out def parse_body(elem): """Parse one verse element Example could be (or NT instead of OT, or DAY): V Hospodinu je tvé útočiště. Žalm 91,9 """ if len(list(elem)) == 0: return None wword = { 'text': '' } source_elem = elem.find('S') logging.debug("S elem = %s", source_elem) logging.debug("S elem = attrib %s", source_elem.attrib) # biblical reference (computer readable) wword['ref_id'] = { "book": source_elem.attrib['b'], "chapter": source_elem.attrib['ch'], "verse": source_elem.attrib['v'], } # biblical reference (compatible with the English Bible programs) wword['int_ref_id'] = wword['ref_id'].copy() wword['int_ref_id']['book'] = book_abbrs[wword['ref_id']['book']] # text of the verse wword['text'] = parse_verses(list(elem.getiterator("L"))) # references ref = elem.find("SL") logging.debug("ref = %s", ref) logging.debug("ref = len %s", len(ref)) if ref is not None: wword['ref'] = ref.text return wword def parse_losung(elem): """ Parse one losung. Example: Pondělí 2. ledna 2012 V Hospodinu je tvé útočiště. Žalm 91,9 Všechnu svou starost vložte na něj, neboť mu na vás záleží. 1.Petrova 5,7 Jozue 24,1-2a.13-18.25-26 Marek 1,1-13 """ logging.debug("losung = %s", elem) out = {} date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),) out['date_id'] = "%4d-%02d-%02d" % (date_id) out['date_full'] = elem.find("TL").text out['watchwords'] = [] for tst in ['OT', 'NT']: sect = elem.find(tst) out['watchwords'].append(parse_body(sect)) out['readings'] = [] for read in ['SR', 'CR']: sect = elem.find(read) logging.debug("read %s = %s", read, sect) out['readings'].append("\n".join([el.text for el in sect.findall("SL")])) return date_id, out def parse_day(elem, whole_dict): """ Parsse element. Example: Všechno, cokoli mluvíte nebo děláte, čiňte ve jménu Pána Ježíše a skrze něho děkujte Bohu Otci. Koloským 3,17 Proto se zjevil Syn Boží, aby zmařil činy ďáblovy. 1.Janova 3,8b """ date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),) out = whole_dict[date_id] if elem.attrib['type'] == "holiday": out['holy_name'] = elem.attrib['name'].strip() out['holy_text'] = parse_body(elem) elif elem.attrib['type'] == "sunday": out['sun_name'] = elem.attrib['name'].strip() if 'meaning' in elem.attrib: out['sun_mean'] = elem.attrib['meaning'].strip() out['sun_ord'] = int(elem.attrib['ord']) out['sun_text'] = parse_body(elem) elif elem.attrib['type'] == "week": out['week_title'] = elem.attrib['name'].strip() else: raise ValueError("Unknown DAY type = %s", elem.attrib['type']) def parse_file(filename): tree = et.parse(filename).getroot() article_dict = {} env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates')) template = env.get_template('czech.html') for los in tree.getiterator("LOSUNG"): key, text = parse_losung(los) article_dict[key] = text for los in tree.getiterator("DAY"): parse_day(los, article_dict) article_list = [] article_keys = sorted(article_dict.keys()) for key in article_keys: article_list.append(article_dict[key]) return template.render(articles=article_list) if __name__ == "__main__": print(parse_file(sys.argv[1]))