generate_html_cs.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

#!/usr/bin/python3
import re
import codecs
import lxml.etree as et
import sys
import logging
#logging.basicConfig(level=logging.DEBUG)
import jinja2

cur_year = 2013

book_abbrs = {
    "Gn": "Gen", "Ex": "Exod",
    "Lv": "Lev", # NA in source code,
    "Nu": "Num", "Dt": "Deut", "Jos": "Josh", "Jdc": "Judg",
    "Ruth": "Ruth", # NA in source code
    "1Sm": "1Sam", "2Sm": "2Sam", "1Rg": "1Kgs", "2Rg": "2Kgs",
    "1Chr": "1Chr", "2Chr": "2Chr",
    "Esr": "Ezra",
    "Neh": "Neh",
    "Esth": "Esth", # NA in source code,
    "Job": "Job", "Ps": "Ps", "Prv": "Prov", "Eccl": "Eccl",
    "Song": "Song", # NA in source code,
    "Is": "Isa", "Jr": "Jer", "Thr": "Lam", "Ez": "Ezek",
    "Dn": "Dan", "Hos": "Hos", "Joel": "Joel",
    "Am": "Amos",
    "Obad": "Obad", # NA in source code
    "Jon": "Jonah", "Mch": "Mic",
    "Nah": "Nah", # NA in source code
    "Hab": "Hab", "Zph": "Zeph", "Hgg": "Hag",
    "Zch": "Zech", "Ml": "Mal",
    "Mt": "Matt", "Mc": "Mark", "L": "Luke", "J": "John",
    "Act": "Acts",
    "R": "Rom", "1K": "1Cor", "2K": "2Cor",
    "G": "Gal", "E": "Eph", "Ph": "Phil", "Kol": "Col",
    "1Th": "1Thess", "2Th": "2Thess", "1T": "1Tim", "2T": "2Tim",
    "Tt": "Titus",
    "Phm": "Phlm",
    "H": "Heb", "Jc": "Jas",
    "1P": "1Pet", "2P": "2Pet",
    "1J": "1John", "2J": "2John", "3J": "3John",
    "Jd": "Jude", "Ap": "Rev"
}

def csv2dict(filename):
    out_dict = {}
    splitRE = re.compile(r'\s+')
    field_names = ['no', 'test', 'cs_abbr', 'cs_name',
            'de_abbr', 'de_name']
    with codecs.open(filename, 'rb', 'utf8') as csvfile:
        for row in csvfile:
            line_dict = dict(zip(field_names, splitRE.split(row)))
            out_dict[line_dict['cs_abbr']] = line_dict
    logging.debug("out_dict:\n%s", out_dict)
    return out_dict


def parse_body(elem):
    """Parse one verse element
    Example could be (or NT instead of OT):
      <OT>
       <S b="Ps" ch="91" v="9"/>
       <L>V Hospodinu je tvé útočiště.</L>
       <SL>Žalm 91,9</SL>
      </OT>
    """
    wword = {
        'text': ''
    }
    source_elem = elem.find('S')
    logging.debug("elem = %s", source_elem)
    logging.debug("elem = attrib %s", source_elem.attrib)

    # biblical reference (computer readable)
    wword['ref_id'] = {
        "book": source_elem.attrib['b'],
        "chapter": source_elem.attrib['ch'],
        "verse": source_elem.attrib['v'],
    }

    # biblical reference (compatible with the English Bible programs)
    wword['int_ref_id'] = wword['ref_id'].copy()
    wword['int_ref_id']['book'] = book_abbrs[wword['ref_id']['book']]

    # text of the verse
    verses = list(elem.getiterator("L"))
    logging.debug("verses = %s", verses)
    logging.debug("verses = len %s", len(verses))
    if len(verses) > 0:
        wword['text'] = verses[0].text
        if len(verses) > 1:
            for line in verses[1:]:
                logging.debug("verses = %s", verses)
                logging.debug("another line = %s", line.text)
                if line.text is not None:
                    wword['text'] += "<br>\n" + line.text
    logging.debug("body = %s", wword['text'])

    # references
    ref = elem.find("SL")
    logging.debug("ref = %s", ref)
    logging.debug("ref = len %s", len(ref))
    if ref is not None:
        wword['ref'] = ref.text
    return wword


def parse_one(elem):
    """
    Parse one losung. Example:
        <LOSUNG d="2" m="1">
         <TL>Pondělí 2. ledna 2012</TL>
         <OT>
          <S b="Ps" ch="91" v="9"/>
          <L>V Hospodinu je tvé útočiště.</L>
          <SL>Žalm 91,9</SL>
         </OT>
         <NT>
          <S b="1P" ch="5" v="7"/>
          <L>Všechnu svou starost vložte na něj, neboť mu na vás záleží.</L>
          <SL>1.Petrova 5,7</SL>
         </NT>
         <SR><SL>Jozue 24,1-2a.13-18.25-26</SL></SR>
         <CR><SL>Marek 1,1-13</SL></CR>
        </LOSUNG>
    """
    logging.debug("losung = %s", elem)
    out = {}
    out['date_id'] = "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]),
            int(elem.attrib["d"]))

    out['date_full'] = elem.find("TL").text
    out['watchwords'] = []

    for tst in ['OT', 'NT']:
        sect = elem.find(tst)
        out['watchwords'].append(parse_body(sect))

    out['readings'] = []
    for read in ['SR', 'CR']:
        sect = elem.find(read)
        logging.debug("read %s = %s", read, sect)
        out['readings'].append("\n".join([el.text
            for el in sect.findall("SL")]))

    return out


def parse_file(filename):
    tree = et.parse(filename).getroot()
    article_list = []
    env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'))
    template = env.get_template('czech.html')

    for los in tree.getiterator("LOSUNG"):
        article_list.append(parse_one(los))

    return template.render(articles=article_list)


if __name__ == "__main__":
    print(parse_file(sys.argv[1]))