diff options
-rw-r--r-- | man/en/sos-clean.1 | 12 | ||||
-rw-r--r-- | sos/cleaner/__init__.py | 21 | ||||
-rw-r--r-- | sos/cleaner/obfuscation_archive.py | 67 | ||||
-rw-r--r-- | sos/collector/__init__.py | 5 | ||||
-rw-r--r-- | sos/report/__init__.py | 6 | ||||
-rw-r--r-- | tests/report_tests/report_with_mask.py | 42 | ||||
-rw-r--r-- | tests/test_data/fake_plugins/binary_test.py | 21 | ||||
-rw-r--r-- | tests/test_data/var/log/binary_test.tar.xz | bin | 0 -> 208 bytes |
8 files changed, 167 insertions, 7 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1 index 4856b43b..b77bc63c 100644 --- a/man/en/sos-clean.1 +++ b/man/en/sos-clean.1 @@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports [\-\-map-file] [\-\-jobs] [\-\-no-update] + [\-\-keep-binary-files] .SH DESCRIPTION \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from @@ -77,6 +78,17 @@ Default: 4 .TP .B \-\-no-update Do not write the mapping file contents to /etc/sos/cleaner/default_mapping +.TP +.B \-\-keep-binary-files +Keep unprocessable binary files in the archive, rather than removing them. + +Note that binary files cannot be obfuscated, and thus keeping them in the archive +may result in otherwise sensitive information being included in the final archive. +Users should review any archive that keeps binary files in place before sending to +a third party. + +Default: False (remove encountered binary files) + .SH SEE ALSO .BR sos (1) .BR sos-report (1) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 55465b85..f88ff8a0 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent): 'keyword_file': None, 'map_file': '/etc/sos/cleaner/default_mapping', 'no_update': False, + 'keep_binary_files': False, 'target': '', 'usernames': [] } @@ -183,6 +184,11 @@ third party. action='store_true', help='Do not update the --map file with new ' 'mappings from this run') + clean_grp.add_argument('--keep-binary-files', default=False, + action='store_true', + dest='keep_binary_files', + help='Keep unprocessable binary files in the ' + 'archive instead of removing them') clean_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') @@ -467,6 +473,11 @@ third party. "%s concurrently\n" % (len(self.report_paths), self.opts.jobs)) self.ui_log.info(msg) + if self.opts.keep_binary_files: + self.ui_log.warning( + "WARNING: binary files that potentially contain sensitive " + "information will NOT be removed from the final archive\n" + ) pool = ThreadPoolExecutor(self.opts.jobs) pool.map(self.obfuscate_report, self.report_paths, chunksize=1) pool.shutdown(wait=True) @@ -539,6 +550,10 @@ third party. short_name = fname.split(archive.archive_name + '/')[1] if archive.should_skip_file(short_name): continue + if (not self.opts.keep_binary_files and + archive.should_remove_file(short_name)): + archive.remove_file(short_name) + continue try: count = self.obfuscate_file(fname, short_name, archive.archive_name) @@ -574,7 +589,11 @@ third party. arc_md.add_field('files_obfuscated', len(archive.file_sub_list)) arc_md.add_field('total_substitutions', archive.total_sub_count) self.completed_reports.append(archive) - archive.report_msg("Obfuscation completed") + rmsg = '' + if archive.removed_file_count: + rmsg = " [removed %s unprocessable files]" + rmsg = rmsg % archive.removed_file_count + archive.report_msg("Obfuscation completed%s" % rmsg) except Exception as err: self.ui_log.info("Exception while processing %s: %s" diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index c64ab13b..76841b51 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -28,6 +28,7 @@ class SoSObfuscationArchive(): file_sub_list = [] total_sub_count = 0 + removed_file_count = 0 def __init__(self, archive_path, tmpdir): self.archive_path = archive_path @@ -62,11 +63,7 @@ class SoSObfuscationArchive(): 'sys/firmware', 'sys/fs', 'sys/kernel/debug', - 'sys/module', - r'.*\.tar$', # TODO: support archive unpacking - # Be explicit with these tar matches to avoid matching commands - r'.*\.tar\.xz', - '.*.gz' + 'sys/module' ] @property @@ -76,6 +73,17 @@ class SoSObfuscationArchive(): except Exception: return False + def remove_file(self, fname): + """Remove a file from the archive. This is used when cleaner encounters + a binary file, which we cannot reliably obfuscate. + """ + full_fname = self.get_file_path(fname) + # don't call a blank remove() here + if full_fname: + self.log_info("Removing binary file '%s' from archive" % fname) + os.remove(full_fname) + self.removed_file_count += 1 + def extract(self): if self.is_tarfile: self.report_msg("Extracting...") @@ -227,3 +235,52 @@ class SoSObfuscationArchive(): if filename.startswith(_skip) or re.match(_skip, filename): return True return False + + def should_remove_file(self, fname): + """Determine if the file should be removed or not, due to an inability + to reliably obfuscate that file based on the filename. + + :param fname: Filename relative to the extracted archive root + :type fname: ``str`` + + :returns: ``True`` if the file cannot be reliably obfuscated + :rtype: ``bool`` + """ + obvious_removes = [ + r'.*\.gz', # TODO: support flat gz/xz extraction + r'.*\.xz', + r'.*\.bzip2', + r'.*\.tar\..*', # TODO: support archive unpacking + r'.*\.txz$', + r'.*\.tgz$', + r'.*\.bin', + r'.*\.journal', + r'.*\~$' + ] + + # if the filename matches, it is obvious we can remove them without + # doing the read test + for _arc_reg in obvious_removes: + if re.match(_arc_reg, fname): + return True + + return self.file_is_binary(fname) + + def file_is_binary(self, fname): + """Determine if the file is a binary file or not. + + + :param fname: Filename relative to the extracted archive root + :type fname: ``str`` + + :returns: ``True`` if file is binary, else ``False`` + :rtype: ``bool`` + """ + with open(self.get_file_path(fname), 'tr') as tfile: + try: + # when opened as above (tr), reading binary content will raise + # an exception + tfile.read(1) + return False + except UnicodeDecodeError: + return True diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 9884836c..469db60d 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -67,6 +67,7 @@ class SoSCollector(SoSComponent): 'jobs': 4, 'keywords': [], 'keyword_file': None, + 'keep_binary_files': False, 'label': '', 'list_options': False, 'log_size': 0, @@ -410,6 +411,10 @@ class SoSCollector(SoSComponent): dest='clean', default=False, action='store_true', help='Obfuscate sensistive information') + cleaner_grp.add_argument('--keep-binary-files', default=False, + action='store_true', dest='keep_binary_files', + help='Keep unprocessable binary files in the ' + 'archive instead of removing them') cleaner_grp.add_argument('--domains', dest='domains', default=[], action='extend', help='Additional domain names to obfuscate') diff --git a/sos/report/__init__.py b/sos/report/__init__.py index d4345409..2cedc76e 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -82,6 +82,7 @@ class SoSReport(SoSComponent): 'case_id': '', 'chroot': 'auto', 'clean': False, + 'keep_binary_files': False, 'desc': '', 'domains': [], 'dry_run': False, @@ -344,6 +345,11 @@ class SoSReport(SoSComponent): default='/etc/sos/cleaner/default_mapping', help=('Provide a previously generated mapping' ' file for obfuscation')) + cleaner_grp.add_argument('--keep-binary-files', default=False, + action='store_true', + dest='keep_binary_files', + help='Keep unprocessable binary files in the ' + 'archive instead of removing them') cleaner_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') diff --git a/tests/report_tests/report_with_mask.py b/tests/report_tests/report_with_mask.py index a62888ae..4f94ba33 100644 --- a/tests/report_tests/report_with_mask.py +++ b/tests/report_tests/report_with_mask.py @@ -6,7 +6,7 @@ # # See the LICENSE file in the source distribution for further information. -from sos_tests import StageOneReportTest +from sos_tests import StageOneReportTest, StageTwoReportTest import re @@ -67,3 +67,43 @@ class ReportWithCleanedKeywords(StageOneReportTest): def test_keyword_obfuscated_in_file(self): self.assertFileNotHasContent('sos_commands/kernel/uname_-a', 'Linux') + + +class DefaultRemoveBinaryFilesTest(StageTwoReportTest): + """Testing that binary files are removed by default + + :avocado: tags=stagetwo + """ + + files = ['/var/log/binary_test.tar.xz'] + install_plugins = ['binary_test'] + sos_cmd = '--clean -o binary_test,kernel,host' + + def test_binary_removed(self): + self.assertFileNotCollected('var/log/binary_test.tar.xz') + + def test_binaries_removed_reported(self): + self.assertOutputContains('\[removed .* unprocessable files\]') + + +class KeepBinaryFilesTest(StageTwoReportTest): + """Testing that --keep-binary-files will function as expected + + :avocado: tags=stagetwo + """ + + files = ['/var/log/binary_test.tar.xz'] + install_plugins = ['binary_test'] + sos_cmd = '--clean --keep-binary-files -o binary_test,kernel,host' + + def test_warning_message_shown(self): + self.assertOutputContains( + 'WARNING: binary files that potentially contain sensitive information ' + 'will NOT be removed from the final archive' + ) + + def test_binary_is_in_archive(self): + self.assertFileCollected('var/log/binary_test.tar.xz') + + def test_no_binaries_reported_removed(self): + self.assertOutputNotContains('\[removed .* unprocessable files\]') diff --git a/tests/test_data/fake_plugins/binary_test.py b/tests/test_data/fake_plugins/binary_test.py new file mode 100644 index 00000000..80bc841b --- /dev/null +++ b/tests/test_data/fake_plugins/binary_test.py @@ -0,0 +1,21 @@ +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.report.plugins import Plugin, IndependentPlugin + + +class BinaryPlugin(Plugin, IndependentPlugin): + """Test plugin for testing binary removal with --clean + """ + + plugin_name = 'binary_test' + short_desc = 'test plugin for removing binaries with --clean' + + + def setup(self): + self.add_copy_spec('/var/log/binary_test.tar.xz') diff --git a/tests/test_data/var/log/binary_test.tar.xz b/tests/test_data/var/log/binary_test.tar.xz Binary files differnew file mode 100644 index 00000000..6031c869 --- /dev/null +++ b/tests/test_data/var/log/binary_test.tar.xz |