1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
#!/usr/bin/python3
import re
import codecs
import lxml.etree as et
import sys
import logging
#logging.basicConfig(level=logging.DEBUG)
import jinja2
cur_year = 2013
book_abbrs = {
"Gn": "Gen", "Ex": "Exod",
"Lv": "Lev", # NA in source code,
"Nu": "Num", "Dt": "Deut", "Jos": "Josh", "Jdc": "Judg",
"Ruth": "Ruth", # NA in source code
"1Sm": "1Sam", "2Sm": "2Sam", "1Rg": "1Kgs", "2Rg": "2Kgs",
"1Chr": "1Chr", "2Chr": "2Chr",
"Esr": "Ezra",
"Neh": "Neh",
"Esth": "Esth", # NA in source code,
"Job": "Job", "Ps": "Ps", "Prv": "Prov", "Eccl": "Eccl",
"Song": "Song", # NA in source code,
"Is": "Isa", "Jr": "Jer", "Thr": "Lam", "Ez": "Ezek",
"Dn": "Dan", "Hos": "Hos", "Joel": "Joel",
"Am": "Amos",
"Obad": "Obad", # NA in source code
"Jon": "Jonah", "Mch": "Mic",
"Nah": "Nah", # NA in source code
"Hab": "Hab", "Zph": "Zeph", "Hgg": "Hag",
"Zch": "Zech", "Ml": "Mal",
"Mt": "Matt", "Mc": "Mark", "L": "Luke", "J": "John",
"Act": "Acts",
"R": "Rom", "1K": "1Cor", "2K": "2Cor",
"G": "Gal", "E": "Eph", "Ph": "Phil", "Kol": "Col",
"1Th": "1Thess", "2Th": "2Thess", "1T": "1Tim", "2T": "2Tim",
"Tt": "Titus",
"Phm": "Phlm",
"H": "Heb", "Jc": "Jas",
"1P": "1Pet", "2P": "2Pet",
"1J": "1John", "2J": "2John", "3J": "3John",
"Jd": "Jude", "Ap": "Rev"
}
def csv2dict(filename):
out_dict = {}
splitRE = re.compile(r'\s+')
field_names = ['no', 'test', 'cs_abbr', 'cs_name',
'de_abbr', 'de_name']
with codecs.open(filename, 'rb', 'utf8') as csvfile:
for row in csvfile:
line_dict = dict(zip(field_names, splitRE.split(row)))
out_dict[line_dict['cs_abbr']] = line_dict
logging.debug("out_dict:\n%s", out_dict)
return out_dict
def parse_body(elem):
"""Parse one verse element
Example could be (or NT instead of OT):
<OT>
<S b="Ps" ch="91" v="9"/>
<L>V Hospodinu je tvé útočiště.</L>
<SL>Žalm 91,9</SL>
</OT>
"""
wword = {
'text': ''
}
source_elem = elem.find('S')
logging.debug("elem = %s", source_elem)
logging.debug("elem = attrib %s", source_elem.attrib)
# biblical reference (computer readable)
wword['ref_id'] = {
"book": source_elem.attrib['b'],
"chapter": source_elem.attrib['ch'],
"verse": source_elem.attrib['v'],
}
# biblical reference (compatible with the English Bible programs)
wword['int_ref_id'] = wword['ref_id'].copy()
wword['int_ref_id']['book'] = book_abbrs[wword['ref_id']['book']]
# text of the verse
verses = list(elem.getiterator("L"))
logging.debug("verses = %s", verses)
logging.debug("verses = len %s", len(verses))
if len(verses) > 0:
wword['text'] = verses[0].text
if len(verses) > 1:
for line in verses[1:]:
logging.debug("verses = %s", verses)
logging.debug("another line = %s", line.text)
if line.text is not None:
wword['text'] += "<br>\n" + line.text
logging.debug("body = %s", wword['text'])
# references
ref = elem.find("SL")
logging.debug("ref = %s", ref)
logging.debug("ref = len %s", len(ref))
if ref is not None:
wword['ref'] = ref.text
return wword
def parse_one(elem):
"""
Parse one losung. Example:
<LOSUNG d="2" m="1">
<TL>Pondělí 2. ledna 2012</TL>
<OT>
<S b="Ps" ch="91" v="9"/>
<L>V Hospodinu je tvé útočiště.</L>
<SL>Žalm 91,9</SL>
</OT>
<NT>
<S b="1P" ch="5" v="7"/>
<L>Všechnu svou starost vložte na něj, neboť mu na vás záleží.</L>
<SL>1.Petrova 5,7</SL>
</NT>
<SR><SL>Jozue 24,1-2a.13-18.25-26</SL></SR>
<CR><SL>Marek 1,1-13</SL></CR>
</LOSUNG>
"""
logging.debug("losung = %s", elem)
out = {}
out['date_id'] = "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]),
int(elem.attrib["d"]))
out['date_full'] = elem.find("TL").text
out['watchwords'] = []
for tst in ['OT', 'NT']:
sect = elem.find(tst)
out['watchwords'].append(parse_body(sect))
out['readings'] = []
for read in ['SR', 'CR']:
sect = elem.find(read)
logging.debug("read %s = %s", read, sect)
out['readings'].append("\n".join([el.text
for el in sect.findall("SL")]))
return out
def parse_file(filename):
tree = et.parse(filename).getroot()
article_list = []
env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'))
template = env.get_template('czech.html')
for los in tree.getiterator("LOSUNG"):
article_list.append(parse_one(los))
return template.render(articles=article_list)
if __name__ == "__main__":
print(parse_file(sys.argv[1]))
|