From 3e1ebaaf407f61bb5fb6b3e052a2c42111c42ac0 Mon Sep 17 00:00:00 2001 From: Matěj Cepl Date: Sat, 22 Dec 2012 23:12:03 +0100 Subject: Czech version works and tests pass. --- generate_html_cs.py | 52 +++++++++++++++++++++++++++++++++++--------------- icon.xcf | Bin 3346966 -> 0 bytes test_generate_html.py | 20 ++++++++++++------- 3 files changed, 50 insertions(+), 22 deletions(-) delete mode 100644 icon.xcf diff --git a/generate_html_cs.py b/generate_html_cs.py index eb52933..2251af8 100755 --- a/generate_html_cs.py +++ b/generate_html_cs.py @@ -4,29 +4,30 @@ import codecs import lxml.etree as et import sys import logging -#logging.basicConfig(level=logging.DEBUG) +logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', + level=logging.INFO) import jinja2 cur_year = 2013 book_abbrs = { "Gn": "Gen", "Ex": "Exod", - "Lv": "Lev", # NA in source code, + "Lv": "Lev", # NA in source code, "Nu": "Num", "Dt": "Deut", "Jos": "Josh", "Jdc": "Judg", - "Ruth": "Ruth", # NA in source code + "Ruth": "Ruth", # NA in source code "1Sm": "1Sam", "2Sm": "2Sam", "1Rg": "1Kgs", "2Rg": "2Kgs", "1Chr": "1Chr", "2Chr": "2Chr", "Esr": "Ezra", "Neh": "Neh", - "Esth": "Esth", # NA in source code, + "Esth": "Esth", # NA in source code, "Job": "Job", "Ps": "Ps", "Prv": "Prov", "Eccl": "Eccl", - "Song": "Song", # NA in source code, + "Song": "Song", # NA in source code, "Is": "Isa", "Jr": "Jer", "Thr": "Lam", "Ez": "Ezek", "Dn": "Dan", "Hos": "Hos", "Joel": "Joel", "Am": "Amos", - "Obad": "Obad", # NA in source code + "Obad": "Obad", # NA in source code "Jon": "Jonah", "Mch": "Mic", - "Nah": "Nah", # NA in source code + "Nah": "Nah", # NA in source code "Hab": "Hab", "Zph": "Zeph", "Hgg": "Hag", "Zch": "Zech", "Ml": "Mal", "Mt": "Matt", "Mc": "Mark", "L": "Luke", "J": "John", @@ -42,11 +43,12 @@ book_abbrs = { "Jd": "Jude", "Ap": "Rev" } + def csv2dict(filename): out_dict = {} splitRE = re.compile(r'\s+') field_names = ['no', 'test', 'cs_abbr', 'cs_name', - 'de_abbr', 'de_name'] + 'de_abbr', 'de_name'] with codecs.open(filename, 'rb', 'utf8') as csvfile: for row in csvfile: line_dict = dict(zip(field_names, splitRE.split(row))) @@ -105,7 +107,7 @@ def parse_body(elem): return wword -def parse_one(elem): +def parse_losung(elem): """ Parse one losung. Example: @@ -126,8 +128,8 @@ def parse_one(elem): """ logging.debug("losung = %s", elem) out = {} - out['date_id'] = "%4d-%02d-%02d" % (cur_year, int(elem.attrib["m"]), - int(elem.attrib["d"])) + date_id = (cur_year, int(elem.attrib["m"]), int(elem.attrib["d"]),) + out['date_id'] = "%4d-%02d-%02d" % (date_id) out['date_full'] = elem.find("TL").text out['watchwords'] = [] @@ -141,20 +143,40 @@ def parse_one(elem): sect = elem.find(read) logging.debug("read %s = %s", read, sect) out['readings'].append("\n".join([el.text - for el in sect.findall("SL")])) + for el in sect.findall("SL")])) + + return date_id, out - return out +# TODO: instead of having list, have rather a dictionary with keys being +# dates; then we can have two-pass parsing, once we would parse +# elements, then ones and particular weekly/monthly/anniversary +# readings to the right place. +# +# * What should be the key of the dictionary? (year,month,day) tuple? +# * First step should be just refactoring of the current code to use +# dictionary. +# +# For it is necessary to distinguish, based on 'type' attribute +# ('week', 'sunday', 'holiday', 'month') and put generated text to the +# right property of the day object (and then in the template to the +# right place) def parse_file(filename): tree = et.parse(filename).getroot() - article_list = [] + article_dict = {} env = jinja2.Environment(loader=jinja2.FileSystemLoader('templates')) template = env.get_template('czech.html') for los in tree.getiterator("LOSUNG"): - article_list.append(parse_one(los)) + key, text = parse_losung(los) + article_dict[key] = text + article_list = [] + article_keys = sorted(article_dict.keys()) + for key in article_keys: + article_list.append(article_dict[key]) + # FIXME does Jinja2 somehow sort dictionary if it is on the input? return template.render(articles=article_list) diff --git a/icon.xcf b/icon.xcf deleted file mode 100644 index 76b6456..0000000 Binary files a/icon.xcf and /dev/null differ diff --git a/test_generate_html.py b/test_generate_html.py index b077b2c..18e9316 100755 --- a/test_generate_html.py +++ b/test_generate_html.py @@ -2,13 +2,14 @@ import unittest import generate_html_cs as generate_html import subprocess +#import sys import lxml.etree as et import lxml.html TEST_FILE = "hes12kni.tab" test_dict = {'no': '1', 'de_name': '1.Mose', - 'cs_name': '1.Mojžíšova', 'de_abbr': 'Gn', - 'test': '-', 'cs_abbr': 'Gn'} + 'cs_name': '1.Mojžíšova', 'de_abbr': 'Gn', + 'test': '-', 'cs_abbr': 'Gn'} TEST_LOSUNGEN = "hes12-01.xml" @@ -18,6 +19,7 @@ class TestProcessCSV(unittest.TestCase): self.assertEqual(len(los_dict), 78) self.assertEqual(los_dict['Gn'], test_dict) + class TestMethods(unittest.TestCase): def test_translate_book_abbrs(self): """ @@ -46,8 +48,10 @@ class TestMethods(unittest.TestCase): ''' test_element = et.fromstring(test_element_str) - wword = generate_html.parse_one(test_element) - self.assertEqual(wword['watchwords'][1]['int_ref_id']['book'],'John') + wword = generate_html.parse_losung(test_element) + self.assertEqual(wword[1]['watchwords'][1]['int_ref_id']['book'], + 'John') + class TestProcessLosungen(unittest.TestCase): def setUp(self): @@ -55,7 +59,9 @@ class TestProcessLosungen(unittest.TestCase): def test_parse_losungen(self): proc = subprocess.Popen(["tidy", "-e", "-f", "/dev/null"], - stdin=subprocess.PIPE, bufsize=-1, close_fds=True) + stdin=subprocess.PIPE, + bufsize=-1, + close_fds=True) proc.communicate(self.los_elements.encode("utf-8")) self.assertEqual(proc.returncode, 0) @@ -75,8 +81,8 @@ class TestProcessLosungen(unittest.TestCase): self.assertEqual(len(style_element), 1) meta_element = root.xpath("//head/meta[@content]") self.assertEqual(meta_element[0].attrib["content"], - "width=device-width, initial-scale=1.0, " + \ - "maximum-scale=2.0, user-scalable=yes") + "width=device-width, initial-scale=1.0, " + + "maximum-scale=2.0, user-scalable=yes") if __name__ == '__main__': -- cgit