diff options
author | Matěj Cepl <mcepl@redhat.com> | 2012-09-21 01:02:05 +0200 |
---|---|---|
committer | Matěj Cepl <mcepl@redhat.com> | 2012-09-21 01:02:09 +0200 |
commit | 2ab034b507f6d893f2553a4d9e4b2606cb8c54b9 (patch) | |
tree | 353689b14bc045f50c17435f52b59559f383b943 /generate_html.py | |
parent | beeaf134299396aac98dcace2533ea2419758aa1 (diff) | |
download | hesla-2ab034b507f6d893f2553a4d9e4b2606cb8c54b9.tar.gz |
The first working version of generate_html.py
Diffstat (limited to 'generate_html.py')
-rw-r--r-- | generate_html.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/generate_html.py b/generate_html.py new file mode 100644 index 0000000..ab968e8 --- /dev/null +++ b/generate_html.py @@ -0,0 +1,104 @@ +#!/usr/bin/python +import re +import codecs +from xml.etree import ElementTree as et +import logging +#logging.basicConfig(level=logging.DEBUG) + +cur_year = 2012 + + +def csv2dict(filename): + out_dict = {} + splitRE = re.compile(r'\s+') + field_names = ['no', 'test', 'cs_abbr', 'cs_name', + 'de_abbr', 'de_name'] + with codecs.open(filename, 'rb', 'utf8') as csvfile: + for row in csvfile: + line_dict = dict(zip(field_names, splitRE.split(row))) + out_dict[line_dict['cs_abbr']] = line_dict + return out_dict + + +def parse_body(elem): + out = [] + body = et.Element("p") + logging.debug("body elem = %s", elem) + verses = list(elem.getiterator("L")) + logging.debug("verses = %s", verses) + logging.debug("verses = len %s", len(verses)) + if len(verses) > 0: + body.text = verses[0].text + logging.debug("first line = %s", body.text) + if len(verses) > 1: + for line in verses[1:]: + l = et.SubElement(body, "br") + logging.debug("another line = %s", line.text) + l.tail = line.text + ref = elem.find("SL") + logging.debug("ref = %s", ref) + logging.debug("ref = len %s", len(ref)) + out.append(body) + if ref is not None: + sig = et.Element("p") + sig.attrib['class'] = "reference" + sig.text = ref.text + out.append(sig) + return out + + +def parse_one(elem): + logging.debug("losung = %s", elem) + out = et.Element("article", attrib={ + "id": "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]), + int(elem.attrib["d"])) + }) + head = et.SubElement(out, "header") + title = et.SubElement(head, "h1") + title.text = elem.find("TL").text + for tst in ['OT', 'NT']: + sect = elem.find(tst) + for el in parse_body(sect): + out.append(el) + return out + + +def parse_file(filename): + tree = et.parse(filename).getroot() + doc = et.Element("html") + head = et.SubElement(doc, "head") + et.SubElement(head, "meta", attrib={"charset": "utf-8"}) + et.SubElement(head, "meta", attrib={ + "name": "viewport", + "content": "width=device-width, initial-scale=1.0, " + \ + " maximum-scale=2.0, user-scalable=yes"}) + style = et.SubElement(head, "style") + style.text = """ + body { + font-size: 120%; + } + h1 { + font-size: 1em; + } + article { + display: none; + } + .reference { + text-align: right; + } + """ + et.SubElement(head, "script", + attrib={ + "type": "text/javascript", + "src": "hesla.js" + }) + title = et.SubElement(head, "title") + title.text = "Title" + body = et.SubElement(doc, "body") + for los in tree.getiterator("LOSUNG"): + body.append(parse_one(los)) + + return et.tostring(doc, encoding="utf-8") + +if __name__ == "__main__": + print(parse_file("hes12-01.xml")) |