path: root/generate_html_cs.py



#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals

import codecs
import logging
import re
import sys

import jinja2

import lxml.etree as et


logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                    level=logging.INFO)

cur_year = 2015

book_abbrs = {
    "Gn": "Gen", "Ex": "Exod",
    "Lv": "Lev",  # NA in source code,
    "Nu": "Num", "Dt": "Deut", "Jos": "Josh", "Jdc": "Judg",
    "Rth": "Ruth",
    "1Sm": "1%20Sam", "2Sm": "2%20Sam", "1Rg": "1%20Kgs", "2Rg": "2%20Kgs",
    "1Chr": "1%20Chr", "2Chr": "2%20Chr", "Esr": "Ezra", "Neh": "Neh",
    "Esth": "Esth",  # NA in source code,
    "Job": "Job", "Ps": "Ps", "Prv": "Prov", "Eccl": "Eccl",
    "Song": "Song",  # NA in source code,
    "Is": "Isa", "Jr": "Jer", "Thr": "Lam", "Ez": "Ezek",
    "Dn": "Dan", "Hos": "Hos", "Joel": "Joel",
    "Am": "Amos",
    "Obad": "Obad",  # NA in source code
    "Jon": "Jonah", "Mch": "Mic",
    "Nah": "Nah",  # NA in source code
    "Hab": "Hab", "Zph": "Zeph", "Hgg": "Hag",
    "Zch": "Zech", "Ml": "Mal",
    "Mt": "Matt", "Mc": "Mark", "L": "Luke", "J": "John",
    "Act": "Acts",
    "R": "Rom", "1K": "1%20Cor", "2K": "2%20Cor",
    "G": "Gal", "E": "Eph", "Ph": "Phil", "Kol": "Col",
    "1Th": "1%20Thess", "2Th": "2%20Thess", "1T": "1%20Tim", "2T": "2%20Tim",
    "Tt": "Titus",
    "Phm": "Phlm",
    "H": "Heb", "Jc": "Jas",
    "1P": "1%20Pet", "2P": "2%20Pet",
    "1J": "1%20John", "2J": "2%20John", "3J": "3%20John",
    "Jd": "Jude", "Ap": "Rev"
}


def csv2dict(filename):
    out_dict = {}
    split_re = re.compile(r'\s+')
    field_names = ['no', 'test', 'cs_abbr', 'cs_name',
                   'de_abbr', 'de_name']
    with codecs.open(filename, 'rb', 'utf8') as csvfile:
        for row in csvfile:
            line_dict = dict(zip(field_names, split_re.split(row)))
            out_dict[line_dict['cs_abbr']] = line_dict
    logging.debug("out_dict:\n%s", out_dict)
    return out_dict


def parse_verses(lines):
    """
    Having list of <L> elements, make the text out of them (with
    <BR> elements where necessary).
    """
    out = ""
    logging.debug("lines = %s", lines)
    logging.debug("lines = len %s", len(lines))
    if len(lines) > 0:
        out = lines[0].text
        if len(lines) > 1:
            for line in lines[1:]:
                logging.debug("lines = %s", lines)
                logging.debug("another line = %s", line.text)
                if line.text is not None:
                    out += "<br>\n" + line.text
    logging.debug("body = %s", out)
    return out


def parse_body(elem):
    """Parse one verse element
    Example could be (or NT instead of OT, or DAY):
      <OT>
       <S b="Ps" ch="91" v="9"/>
       <L>V Hospodinu je tvé útočiště.</L>
       <SL>Žalm 91,9</SL>
      </OT>
    """
    if len(list(elem)) == 0:
        return None

    wword = {
        'text': ''
    }
    source_elem = elem.find('S')
    logging.debug("S elem = %s", source_elem)
    logging.debug("S elem = attrib %s", source_elem.attrib)

    # biblical reference (computer readable)
    wword['ref_id'] = {
        "book": source_elem.attrib['b'],
        "chapter": source_elem.attrib['ch'],
        "verse": source_elem.attrib['v'],
    }
    logging.debug("wword['ref_id'] = %s", wword['ref_id'])

    # biblical reference (compatible with the English Bible programs)
    try:
        wword['int_ref_id'] = wword['ref_id'].copy()
        wword['int_ref_id']['book'] = book_abbrs[wword['ref_id']['book']]
    except KeyError:
        logging.error('book = %s', wword['ref_id'])
        raise

    # text of the verse
    wword['text'] = parse_verses(list(elem.getiterator("L")))

    # references
    ref = elem.find("SL")
    logging.debug("ref = %s", ref)
    logging.debug("ref = len %s", len(ref))
    if ref is not None:
        wword['ref'] = ref.text
    return wword


def parse_losung(elem):
    """
    Parse one losung. Example:
        <LOSUNG d="2" m="1">
         <TL>Pondělí 2. ledna 2012</TL>
         <OT>
          <S b="Ps" ch="91" v="9"/>
          <L>V Hospodinu je tvé útočiště.</L>
          <SL>Žalm 91,9</SL>
         </OT>
         <NT>
          <S b="1P" ch="5" v="7"/>
          <L>Všechnu svou starost vložte na něj, neboť mu na vás záleží.</L>
          <SL>1.Petrova 5,7</SL>
         </NT>
         <SR><SL>Jozue 24,1-2a.13-18.25-26</SL></SR>
         <CR><SL>Marek 1,1-13</SL></CR>
        </LOSUNG>
    """
    logging.debug("losung = %s", elem)
    out = {}
    date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),)
    out['date_id'] = "%4d-%02d-%02d" % (date_id)
    logging.debug('date_id = %s', out['date_id'])

    out['date_full'] = elem.find("TL").text
    out['watchwords'] = []

    for tst in ['OT', 'NT']:
        sect = elem.find(tst)
        out['watchwords'].append(parse_body(sect))

    out['readings'] = []
    for read in ['SR', 'CR']:
        sect = elem.find(read)
        logging.debug("read %s = %s", read, sect)
        out['readings'].append("\n".join([el.text
                               for el in sect.findall("SL")]))

    return date_id, out


def parse_day(elem, whole_dict):
    """
    Parsse <DAY> element.

    Example:
       <DAY d="1" m="1" type="holiday" name="Nový Rok  ">
         <S b="Kol" ch="3" v="17"/>
         <L>Všechno, cokoli mluvíte nebo děláte,</L>
         <L>čiňte ve jménu Pána Ježíše a skrze něho děkujte Bohu Otci.</L>
         <SL>Koloským 3,17</SL>
       </DAY>

       <DAY d="17" m="2" type="sunday" name="Invocavit"
            meaning="Vzývati mne bude a vyslyším jej. Žalm 91,15" ord="7">
         <S b="1J" ch="3" v="8"/>
         <L>Proto se zjevil Syn Boží, aby zmařil činy ďáblovy.</L>
         <SL>1.Janova 3,8b</SL>
       </DAY>

       <DAY d="13" m="1" type="week" name="Alianční modlitební týden  ">
       </DAY>
    """
    date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),)
    logging.debug('date_id = %s', date_id)
    out = whole_dict[date_id]

    if elem.attrib['type'] == "holiday":
        out['holy_name'] = elem.attrib['name'].strip()
        out['holy_text'] = parse_body(elem)
    elif elem.attrib['type'] == "sunday":
        out['sun_name'] = elem.attrib['name'].strip()
        if 'meaning' in elem.attrib:
            out['sun_mean'] = elem.attrib['meaning'].strip()
        out['sun_ord'] = int(elem.attrib['ord'])
        out['sun_text'] = parse_body(elem)
    elif elem.attrib['type'] == "week":
        out['week_title'] = elem.attrib['name'].strip()
    else:
        raise ValueError("Unknown DAY type = %s", elem.attrib['type'])


def parse_file(filename):
    tree = et.parse(filename).getroot()
    article_dict = {}
    env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'))
    template = env.get_template('czech.html')

    for los in tree.getiterator("LOSUNG"):
        key, text = parse_losung(los)
        article_dict[key] = text

    for los in tree.getiterator("DAY"):
        parse_day(los, article_dict)

    article_list = []
    article_keys = sorted(article_dict.keys())
    for key in article_keys:
        article_list.append(article_dict[key])
    return template.render(articles=article_list).encode('utf8')


if __name__ == "__main__":
    print(parse_file(sys.argv[1]))