From dba0b0a5b65c57c71c50ce1681f3286869e231bc Mon Sep 17 00:00:00 2001 From: Matěj Cepl Date: Mon, 12 Jan 2015 19:17:50 +0100 Subject: Add optional parameter to yamlish.load ignore_wrong_characters When set to True, then yamlish doesn't crash on bad UTF8 characters but rather skips them. Fixes #2 --- setup.py | 2 +- test/__init__.py | 19 +++++++++++-------- test/test_reader_nonUTF8.py | 10 ++++++---- yamlish.py | 21 ++++++++++++++++----- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/setup.py b/setup.py index 5cc3c91..53b072a 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name='yamlish', - version="0.14", + version="0.15", description='Python implementation of YAMLish', author='Matěj Cepl', author_email='mcepl@redhat.com', diff --git a/test/__init__.py b/test/__init__.py index 8005917..1cb1ad0 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -9,6 +9,7 @@ import textwrap INPUT = 1 OUTPUT = 2 +#logging.basicConfig(level=logging.DEBUG) def _generate_test_name(source): """ @@ -18,7 +19,7 @@ def _generate_test_name(source): return "test_%s" % out -def _create_input_test(test_src, tested_function): +def _create_input_test(test_src, tested_function, options=None): """ Decorate tested function to be used as a method for TestCase. """ @@ -31,10 +32,11 @@ def _create_input_test(test_src, tested_function): got = "" if 'error' in test_src: self.assertRaises(test_src['error'], tested_function, - test_src['in']) + test_src['in'], options) else: want = test_src['out'] - got = tested_function(test_src['in']) + got = tested_function(test_src['in'], options) + logging.debug('got = type %s', type(got)) logging.debug("test_src['out'] = %s", unicode(test_src['out'])) self.assertEqual(got, want, """Result matches expected = %s @@ -45,7 +47,7 @@ def _create_input_test(test_src, tested_function): return do_test_expected -def _create_output_test(test_src, tested_function): +def _create_output_test(test_src, tested_function, options=None): """ Decorate tested function to be used as a method for TestCase. """ @@ -58,7 +60,7 @@ def _create_output_test(test_src, tested_function): # We currently don't throw any exceptions in Writer, so this # this is always false if 'error' in test_src: - self.assertRaises(test_src['error'], yamlish.dumps, test_src['in']) + self.assertRaises(test_src['error'], yamlish.dumps, test_src['in'], options) else: logging.debug("out:\n%s", textwrap.dedent(test_src['out'])) want = yaml.load(textwrap.dedent(test_src['out'])) @@ -74,7 +76,8 @@ def _create_output_test(test_src, tested_function): return do_test_expected -def generate_testsuite(test_data, test_case_shell, test_fce, direction=INPUT): +def generate_testsuite(test_data, test_case_shell, test_fce, direction=INPUT, + options=None): """ Generate tests from the test data, class to build upon and function to use for testing. @@ -85,8 +88,8 @@ def generate_testsuite(test_data, test_case_shell, test_fce, direction=INPUT): continue name = _generate_test_name(in_test['name']) if direction == INPUT: - test_method = _create_input_test(in_test, test_fce) + test_method = _create_input_test(in_test, test_fce, options=options) elif direction == OUTPUT: - test_method = _create_output_test(in_test, test_fce) + test_method = _create_output_test(in_test, test_fce, options=options) test_method.__name__ = str('test_%s' % name) setattr(test_case_shell, test_method.__name__, test_method) diff --git a/test/test_reader_nonUTF8.py b/test/test_reader_nonUTF8.py index 12d38f6..6f2419a 100644 --- a/test/test_reader_nonUTF8.py +++ b/test/test_reader_nonUTF8.py @@ -6,16 +6,18 @@ import unittest test_data_list = [{ "name": 'Non UTF8 test', - "in": ['--- \xbd\xd0\xe1 \xd1\xeb\xdb\xde \xdc\xdd\xde\xd3\xde' + - '\xdd\xd0 \xe7\xd5\xdb\xdd\xd5;\n', '...', ], - "out": "Нас было много на челне;", + "in": [b"--- macro `BR\xc3\xc2\xa0fbi' not defined", + '...', ], + "out": "macro `BR\xa0fbi' not defined", }] +"" class TestReader(unittest.TestCase): # IGNORE:C0111 pass -test.generate_testsuite(test_data_list, TestReader, yamlish.load) +test.generate_testsuite(test_data_list, TestReader, yamlish.load, + options={'ignore_wrong_characters': True}) if __name__ == "__main__": unittest.main() diff --git a/yamlish.py b/yamlish.py index 28e8aea..a387d35 100644 --- a/yamlish.py +++ b/yamlish.py @@ -117,8 +117,9 @@ class NullHandler(logging.Handler): def emit(self, record): pass -log = logging.getLogger("bayeux") +log = logging.getLogger("yamlish") log.addHandler(NullHandler()) +#log.setLevel(logging.DEBUG) __docformat__ = 'reStructuredText' __version__ = "0.10" @@ -167,7 +168,7 @@ yaml.add_representer(str, str_representer_compact_multiline, yaml.add_representer(unicode, str_representer_compact_multiline, Dumper=_YamlishDumper) -def load(source): +def load(source, ignore_wrong_characters=False): """ Return object loaded from a YAML document in source. @@ -176,16 +177,26 @@ def load(source): many others). """ out = None - log.debug("inobj:\n%s", source) + log.debug("inobj: (%s)\n%s", type(source), source) + log.debug('before ignore_wrong_characters = %s', ignore_wrong_characters) if isinstance(source, (str, unicode)): out = yaml.load(source, Loader=_YamlishLoader) log.debug("out (string) = %s", out) elif hasattr(source, "__iter__"): inobj = "" for line in source: - inobj += line + '\n' - out = load(inobj) + try: + inobj += line + '\n' + except UnicodeDecodeError: + log.debug('in ignore_wrong_characters = %s', ignore_wrong_characters) + if ignore_wrong_characters: + inobj += line.decode('utf8', errors='ignore') + '\n' + else: + raise + log.debug('restarting load with inobj as string') + out = load(inobj, ignore_wrong_characters) log.debug("out (iter) = %s", out) + log.debug("out (iter) = type %s", type(out)) return out def dump(source, destination): -- cgit