aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sos/cleaner/__init__.py2
-rw-r--r--tests/cleaner_tests/unicode_in_file.py34
-rw-r--r--tests/test_data/fake_plugins/unicode_test.py20
-rw-r--r--tests/test_data/tmp/sos-test-unicode.txt5
4 files changed, 60 insertions, 1 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 7940999d..ef072a23 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -728,7 +728,7 @@ third party.
_skip.match(short_name) for _skip in _p.skip_patterns
)
]
- with open(filename, 'r') as fname:
+ with open(filename, 'r', errors='replace') as fname:
for line in fname:
try:
line, count = self.obfuscate_line(line, _parsers)
diff --git a/tests/cleaner_tests/unicode_in_file.py b/tests/cleaner_tests/unicode_in_file.py
new file mode 100644
index 00000000..522cedd7
--- /dev/null
+++ b/tests/cleaner_tests/unicode_in_file.py
@@ -0,0 +1,34 @@
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos_tests import StageTwoReportTest
+
+
+class UnicodeOpenTest(StageTwoReportTest):
+ """This test ensures that we can safely open files that have embedded
+ unicode in them, and that those files do not trigger an exception that
+ leaves them uncleaned.
+
+ :avocado: tags=stagetwo
+ """
+
+ sos_cmd = '--clean -o unicode_test,networking,host'
+ files = ['/tmp/sos-test-unicode.txt']
+ install_plugins = ['unicode_test']
+
+ def test_file_was_collected(self):
+ self.assertFileCollected('/tmp/sos-test-unicode.txt')
+
+ def test_file_was_opened(self):
+ # if this fails, then we hit an exception when opening the file
+ self.assertSosLogContains('Obfuscating tmp/sos-test-unicode.txt')
+ self.assertSosLogNotContains('.*Unable to parse.*')
+
+ def test_obfuscation_complete(self):
+ # make sure that we didn't stop processing the file after the unicode
+ self.assertFileNotHasContent('tmp/sos-test-unicode.txt', '192.168.1.1')
diff --git a/tests/test_data/fake_plugins/unicode_test.py b/tests/test_data/fake_plugins/unicode_test.py
new file mode 100644
index 00000000..541dfb5c
--- /dev/null
+++ b/tests/test_data/fake_plugins/unicode_test.py
@@ -0,0 +1,20 @@
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.report.plugins import Plugin, IndependentPlugin
+
+
+class UnicodeTest(Plugin, IndependentPlugin):
+ """Fake plugin to test the handling of a file with embedded unicode
+ """
+
+ plugin_name = 'unicode_test'
+ short_desc = 'Fake plugin to test unicode file handling'
+
+ def setup(self):
+ self.add_copy_spec('/tmp/sos-test-unicode.txt')
diff --git a/tests/test_data/tmp/sos-test-unicode.txt b/tests/test_data/tmp/sos-test-unicode.txt
new file mode 100644
index 00000000..7dc00099
--- /dev/null
+++ b/tests/test_data/tmp/sos-test-unicode.txt
@@ -0,0 +1,5 @@
+This is a line with no unicode in it.
+This line has some in it æßøĄ.
+If opened normally, the last line may cause errors.
+So this file is used to test opening with errors='replace'.
+This line has the address 192.168.1.1 in it to ensure our cleaner tests are actually processing this file.