aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2022-09-13 11:38:15 -0400
committerJake Hunsaker <jhunsake@redhat.com>2022-09-19 09:55:04 -0400
commit872ec715e7b292d8f17edceb84bf8b5ac97014a1 (patch)
tree043e90d9394d0cc809ee8dfc41dfc2411254f440
parent11317c6a8077026891c4a3c178510bcaf98b8800 (diff)
downloadsos-872ec715e7b292d8f17edceb84bf8b5ac97014a1.tar.gz
[cleaner] Replace encoding errors when opening files
A scenario was found that if a file had encoding issues when `sos clean` went to open the file for obfuscation, we would stop processing the file but leave it in the archive, which had the potential to leave unobfuscated information in that file in the archive. Fix this, by using the `errors='replace'` parameter when opening archive files. This allows us to continue parsing the file normally, while replacing the problematic characters with `?`s. Closes: #3015 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/cleaner/__init__.py2
-rw-r--r--tests/cleaner_tests/unicode_in_file.py34
-rw-r--r--tests/test_data/fake_plugins/unicode_test.py20
-rw-r--r--tests/test_data/tmp/sos-test-unicode.txt5
4 files changed, 60 insertions, 1 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 7940999d..ef072a23 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -728,7 +728,7 @@ third party.
_skip.match(short_name) for _skip in _p.skip_patterns
)
]
- with open(filename, 'r') as fname:
+ with open(filename, 'r', errors='replace') as fname:
for line in fname:
try:
line, count = self.obfuscate_line(line, _parsers)
diff --git a/tests/cleaner_tests/unicode_in_file.py b/tests/cleaner_tests/unicode_in_file.py
new file mode 100644
index 00000000..522cedd7
--- /dev/null
+++ b/tests/cleaner_tests/unicode_in_file.py
@@ -0,0 +1,34 @@
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos_tests import StageTwoReportTest
+
+
+class UnicodeOpenTest(StageTwoReportTest):
+ """This test ensures that we can safely open files that have embedded
+ unicode in them, and that those files do not trigger an exception that
+ leaves them uncleaned.
+
+ :avocado: tags=stagetwo
+ """
+
+ sos_cmd = '--clean -o unicode_test,networking,host'
+ files = ['/tmp/sos-test-unicode.txt']
+ install_plugins = ['unicode_test']
+
+ def test_file_was_collected(self):
+ self.assertFileCollected('/tmp/sos-test-unicode.txt')
+
+ def test_file_was_opened(self):
+ # if this fails, then we hit an exception when opening the file
+ self.assertSosLogContains('Obfuscating tmp/sos-test-unicode.txt')
+ self.assertSosLogNotContains('.*Unable to parse.*')
+
+ def test_obfuscation_complete(self):
+ # make sure that we didn't stop processing the file after the unicode
+ self.assertFileNotHasContent('tmp/sos-test-unicode.txt', '192.168.1.1')
diff --git a/tests/test_data/fake_plugins/unicode_test.py b/tests/test_data/fake_plugins/unicode_test.py
new file mode 100644
index 00000000..541dfb5c
--- /dev/null
+++ b/tests/test_data/fake_plugins/unicode_test.py
@@ -0,0 +1,20 @@
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.report.plugins import Plugin, IndependentPlugin
+
+
+class UnicodeTest(Plugin, IndependentPlugin):
+ """Fake plugin to test the handling of a file with embedded unicode
+ """
+
+ plugin_name = 'unicode_test'
+ short_desc = 'Fake plugin to test unicode file handling'
+
+ def setup(self):
+ self.add_copy_spec('/tmp/sos-test-unicode.txt')
diff --git a/tests/test_data/tmp/sos-test-unicode.txt b/tests/test_data/tmp/sos-test-unicode.txt
new file mode 100644
index 00000000..7dc00099
--- /dev/null
+++ b/tests/test_data/tmp/sos-test-unicode.txt
@@ -0,0 +1,5 @@
+This is a line with no unicode in it.
+This line has some in it æßøĄ.
+If opened normally, the last line may cause errors.
+So this file is used to test opening with errors='replace'.
+This line has the address 192.168.1.1 in it to ensure our cleaner tests are actually processing this file.