path: root/generate_html_cs.py

                  

#!/usr/bin/python3
import re
import codecs
import lxml.etree as et
import sys
import logging
#logging.basicConfig(level=logging.DEBUG)
import jinja2

cur_year = 2012

def csv2dict(filename):
    out_dict = {}
    splitRE = re.compile(r'\s+')
    field_names = ['no', 'test', 'cs_abbr', 'cs_name',
            'de_abbr', 'de_name']
    with codecs.open(filename, 'rb', 'utf8') as csvfile:
        for row in csvfile:
            line_dict = dict(zip(field_names, splitRE.split(row)))
            out_dict[line_dict['cs_abbr']] = line_dict
    logging.debug("out_dict:\n%s", out_dict)
    return out_dict


def parse_body(elem):
    """Parse one verse element
    Example could be (or NT instead of OT):
      <OT>
       <S b="Ps" ch="91" v="9"/>
       <L>V Hospodinu je tvé útočiště.</L>
       <SL>Žalm 91,9</SL>
      </OT>
    """
    wword = {
        'text': ''
    }
    source_elem = elem.find('S')
    logging.debug("elem = %s", source_elem)
    logging.debug("elem = attrib %s", source_elem.attrib)

    # biblical reference (computer readable)
    wword['ref_id'] = {
        "book": source_elem.attrib['b'],
        "chapter": source_elem.attrib['ch'],
        "verse": source_elem.attrib['v'],
    }

    # text of the verse
    verses = list(elem.getiterator("L"))
    logging.debug("verses = %s", verses)
    logging.debug("verses = len %s", len(verses))
    if len(verses) > 0:
        wword['text'] = verses[0].text
        if len(verses) > 1:
            for line in verses[1:]:
                logging.debug("verses = %s", verses)
                logging.debug("another line = %s", line.text)
                if line.text is not None:
                    wword['text'] += "<br>\n" + line.text
    logging.debug("body = %s", wword['text'])

    # references
    ref = elem.find("SL")
    logging.debug("ref = %s", ref)
    logging.debug("ref = len %s", len(ref))
    if ref is not None:
        wword['ref'] = ref.text
    return wword


def parse_one(elem):
    """
    Parse one losung. Example:
        <LOSUNG d="2" m="1">
         <TL>Pondělí 2. ledna 2012</TL>
         <OT>
          <S b="Ps" ch="91" v="9"/>
          <L>V Hospodinu je tvé útočiště.</L>
          <SL>Žalm 91,9</SL>
         </OT>
         <NT>
          <S b="1P" ch="5" v="7"/>
          <L>Všechnu svou starost vložte na něj, neboť mu na vás záleží.</L>
          <SL>1.Petrova 5,7</SL>
         </NT>
         <SR><SL>Jozue 24,1-2a.13-18.25-26</SL></SR>
         <CR><SL>Marek 1,1-13</SL></CR>
        </LOSUNG>
    """
    logging.debug("losung = %s", elem)
    out = {}
    out['date_id'] = "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]),
            int(elem.attrib["d"]))

    out['date_full'] = elem.find("TL").text
    out['watchwords'] = []

    for tst in ['OT', 'NT']:
        sect = elem.find(tst)
        out['watchwords'].append(parse_body(sect))

    out['readings'] = []
    for read in ['SR', 'CR']:
        sect = elem.find(read)
        logging.debug("read %s = %s", read, sect)
        out['readings'].append("\n".join([el.text
            for el in sect.findall("SL")]))

    return out


def parse_file(filename):
    tree = et.parse(filename).getroot()
    article_list = []
    env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'))
    template = env.get_template('czech.html')

    for los in tree.getiterator("LOSUNG"):
        article_list.append(parse_one(los))

    return template.render(articles=article_list)


if __name__ == "__main__":
    print(parse_file(sys.argv[1]))