From 872ec715e7b292d8f17edceb84bf8b5ac97014a1 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 13 Sep 2022 11:38:15 -0400 Subject: [cleaner] Replace encoding errors when opening files A scenario was found that if a file had encoding issues when `sos clean` went to open the file for obfuscation, we would stop processing the file but leave it in the archive, which had the potential to leave unobfuscated information in that file in the archive. Fix this, by using the `errors='replace'` parameter when opening archive files. This allows us to continue parsing the file normally, while replacing the problematic characters with `?`s. Closes: #3015 Signed-off-by: Jake Hunsaker --- tests/cleaner_tests/unicode_in_file.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/cleaner_tests/unicode_in_file.py (limited to 'tests/cleaner_tests') diff --git a/tests/cleaner_tests/unicode_in_file.py b/tests/cleaner_tests/unicode_in_file.py new file mode 100644 index 00000000..522cedd7 --- /dev/null +++ b/tests/cleaner_tests/unicode_in_file.py @@ -0,0 +1,34 @@ +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos_tests import StageTwoReportTest + + +class UnicodeOpenTest(StageTwoReportTest): + """This test ensures that we can safely open files that have embedded + unicode in them, and that those files do not trigger an exception that + leaves them uncleaned. + + :avocado: tags=stagetwo + """ + + sos_cmd = '--clean -o unicode_test,networking,host' + files = ['/tmp/sos-test-unicode.txt'] + install_plugins = ['unicode_test'] + + def test_file_was_collected(self): + self.assertFileCollected('/tmp/sos-test-unicode.txt') + + def test_file_was_opened(self): + # if this fails, then we hit an exception when opening the file + self.assertSosLogContains('Obfuscating tmp/sos-test-unicode.txt') + self.assertSosLogNotContains('.*Unable to parse.*') + + def test_obfuscation_complete(self): + # make sure that we didn't stop processing the file after the unicode + self.assertFileNotHasContent('tmp/sos-test-unicode.txt', '192.168.1.1') -- cgit