generate_html.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

#!/usr/bin/python
import re
import codecs
from xml.etree import ElementTree as et
import sys
import logging
#logging.basicConfig(level=logging.DEBUG)

cur_year = 2012


def csv2dict(filename):
    out_dict = {}
    splitRE = re.compile(r'\s+')
    field_names = ['no', 'test', 'cs_abbr', 'cs_name',
            'de_abbr', 'de_name']
    with codecs.open(filename, 'rb', 'utf8') as csvfile:
        for row in csvfile:
            line_dict = dict(zip(field_names, splitRE.split(row)))
            out_dict[line_dict['cs_abbr']] = line_dict
    return out_dict


def parse_body(elem):
    out = []
    body = et.Element("p")
    logging.debug("body elem = %s", elem)
    verses = list(elem.getiterator("L"))
    logging.debug("verses = %s", verses)
    logging.debug("verses = len %s", len(verses))
    if len(verses) > 0:
        body.text = verses[0].text
        logging.debug("first line = %s", body.text)
        if len(verses) > 1:
            for line in verses[1:]:
                l = et.SubElement(body, "br")
                logging.debug("another line = %s", line.text)
                l.tail = line.text
    ref = elem.find("SL")
    logging.debug("ref = %s", ref)
    logging.debug("ref = len %s", len(ref))
    out.append(body)
    if ref is not None:
        sig = et.Element("p")
        sig.attrib['class'] = "reference"
        sig.text = ref.text
        out.append(sig)
    return out


def parse_one(elem):
    logging.debug("losung = %s", elem)
    out = et.Element("article", attrib={
        "id": "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]),
            int(elem.attrib["d"]))
        })
    head = et.SubElement(out, "header")
    title = et.SubElement(head, "h1")
    title.text = elem.find("TL").text
    for tst in ['OT', 'NT']:
        sect = elem.find(tst)
        for el in parse_body(sect):
            out.append(el)
    return out


def parse_file(filename):
    tree = et.parse(filename).getroot()
    doc = et.Element("html")
    head = et.SubElement(doc, "head")
    et.SubElement(head, "meta", attrib={"charset": "utf-8"})
    et.SubElement(head, "meta", attrib={
        "name": "viewport",
        "content": "width=device-width, initial-scale=1.0, " + \
                " maximum-scale=2.0, user-scalable=yes"})
    style = et.SubElement(head, "link", attrib={
            "rel": "stylesheet",
            "type": "text/css",
            "href": "screen.css"
        })
    title = et.SubElement(head, "title")
    title.text = "Title"
    script = et.SubElement(head, "script",
            attrib={
                "type": "text/javascript",
                "src": "hesla.js",
                "async": "async",
                "defer": "defer"
            })
    # script element cannot be empty, otherwise we would get self-closed
    # element, which is invalid for <script>
    script.text = "\n"

    body = et.SubElement(doc, "body")
    noscript = et.SubElement(body, "noscript")
    noscript.text="I am sorry, this really doesn't work without JavaScript."
    for los in tree.getiterator("LOSUNG"):
        body.append(parse_one(los))

    return "<!DOCTYPE html>\n" + et.tostring(doc, encoding="utf-8")

if __name__ == "__main__":
    print(parse_file(sys.argv[1]))