aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2021-05-05 17:02:04 -0400
committerJake Hunsaker <jhunsake@redhat.com>2021-06-15 10:31:03 -0400
commit892bbd8114703f5a4d23aa77ba5829b7ba59446f (patch)
treebfb0aadf56224f18d0d6fc1783baaaca3326a694
parent34d997ebaea769b31e577526613e3e9ccb0893f8 (diff)
downloadsos-892bbd8114703f5a4d23aa77ba5829b7ba59446f.tar.gz
[cleaner] Remove binary files by default
Binary files generally speaking cannot be obfuscated, and as such we should remove them from archives being obfuscated by default so that sensitive data is not mistakenly included in an obfuscated archive. This commits adds a new `--keep-binary-files` option that if used will keep any encountered binary files in the final archive. The default option of `false` will ensure that encountered binary files are removed. The number of removed binary files per archive is reported when obfuscation is completed for that archive. Closes: #2478 Resolves: #2524 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--man/en/sos-clean.112
-rw-r--r--sos/cleaner/__init__.py21
-rw-r--r--sos/cleaner/obfuscation_archive.py67
-rw-r--r--sos/collector/__init__.py5
-rw-r--r--sos/report/__init__.py6
-rw-r--r--tests/report_tests/report_with_mask.py42
-rw-r--r--tests/test_data/fake_plugins/binary_test.py21
-rw-r--r--tests/test_data/var/log/binary_test.tar.xzbin0 -> 208 bytes
8 files changed, 167 insertions, 7 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
index 4856b43b..b77bc63c 100644
--- a/man/en/sos-clean.1
+++ b/man/en/sos-clean.1
@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
[\-\-map-file]
[\-\-jobs]
[\-\-no-update]
+ [\-\-keep-binary-files]
.SH DESCRIPTION
\fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
@@ -77,6 +78,17 @@ Default: 4
.TP
.B \-\-no-update
Do not write the mapping file contents to /etc/sos/cleaner/default_mapping
+.TP
+.B \-\-keep-binary-files
+Keep unprocessable binary files in the archive, rather than removing them.
+
+Note that binary files cannot be obfuscated, and thus keeping them in the archive
+may result in otherwise sensitive information being included in the final archive.
+Users should review any archive that keeps binary files in place before sending to
+a third party.
+
+Default: False (remove encountered binary files)
+
.SH SEE ALSO
.BR sos (1)
.BR sos-report (1)
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 55465b85..f88ff8a0 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent):
'keyword_file': None,
'map_file': '/etc/sos/cleaner/default_mapping',
'no_update': False,
+ 'keep_binary_files': False,
'target': '',
'usernames': []
}
@@ -183,6 +184,11 @@ third party.
action='store_true',
help='Do not update the --map file with new '
'mappings from this run')
+ clean_grp.add_argument('--keep-binary-files', default=False,
+ action='store_true',
+ dest='keep_binary_files',
+ help='Keep unprocessable binary files in the '
+ 'archive instead of removing them')
clean_grp.add_argument('--usernames', dest='usernames', default=[],
action='extend',
help='List of usernames to obfuscate')
@@ -467,6 +473,11 @@ third party.
"%s concurrently\n"
% (len(self.report_paths), self.opts.jobs))
self.ui_log.info(msg)
+ if self.opts.keep_binary_files:
+ self.ui_log.warning(
+ "WARNING: binary files that potentially contain sensitive "
+ "information will NOT be removed from the final archive\n"
+ )
pool = ThreadPoolExecutor(self.opts.jobs)
pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
pool.shutdown(wait=True)
@@ -539,6 +550,10 @@ third party.
short_name = fname.split(archive.archive_name + '/')[1]
if archive.should_skip_file(short_name):
continue
+ if (not self.opts.keep_binary_files and
+ archive.should_remove_file(short_name)):
+ archive.remove_file(short_name)
+ continue
try:
count = self.obfuscate_file(fname, short_name,
archive.archive_name)
@@ -574,7 +589,11 @@ third party.
arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
arc_md.add_field('total_substitutions', archive.total_sub_count)
self.completed_reports.append(archive)
- archive.report_msg("Obfuscation completed")
+ rmsg = ''
+ if archive.removed_file_count:
+ rmsg = " [removed %s unprocessable files]"
+ rmsg = rmsg % archive.removed_file_count
+ archive.report_msg("Obfuscation completed%s" % rmsg)
except Exception as err:
self.ui_log.info("Exception while processing %s: %s"
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
index c64ab13b..76841b51 100644
--- a/sos/cleaner/obfuscation_archive.py
+++ b/sos/cleaner/obfuscation_archive.py
@@ -28,6 +28,7 @@ class SoSObfuscationArchive():
file_sub_list = []
total_sub_count = 0
+ removed_file_count = 0
def __init__(self, archive_path, tmpdir):
self.archive_path = archive_path
@@ -62,11 +63,7 @@ class SoSObfuscationArchive():
'sys/firmware',
'sys/fs',
'sys/kernel/debug',
- 'sys/module',
- r'.*\.tar$', # TODO: support archive unpacking
- # Be explicit with these tar matches to avoid matching commands
- r'.*\.tar\.xz',
- '.*.gz'
+ 'sys/module'
]
@property
@@ -76,6 +73,17 @@ class SoSObfuscationArchive():
except Exception:
return False
+ def remove_file(self, fname):
+ """Remove a file from the archive. This is used when cleaner encounters
+ a binary file, which we cannot reliably obfuscate.
+ """
+ full_fname = self.get_file_path(fname)
+ # don't call a blank remove() here
+ if full_fname:
+ self.log_info("Removing binary file '%s' from archive" % fname)
+ os.remove(full_fname)
+ self.removed_file_count += 1
+
def extract(self):
if self.is_tarfile:
self.report_msg("Extracting...")
@@ -227,3 +235,52 @@ class SoSObfuscationArchive():
if filename.startswith(_skip) or re.match(_skip, filename):
return True
return False
+
+ def should_remove_file(self, fname):
+ """Determine if the file should be removed or not, due to an inability
+ to reliably obfuscate that file based on the filename.
+
+ :param fname: Filename relative to the extracted archive root
+ :type fname: ``str``
+
+ :returns: ``True`` if the file cannot be reliably obfuscated
+ :rtype: ``bool``
+ """
+ obvious_removes = [
+ r'.*\.gz', # TODO: support flat gz/xz extraction
+ r'.*\.xz',
+ r'.*\.bzip2',
+ r'.*\.tar\..*', # TODO: support archive unpacking
+ r'.*\.txz$',
+ r'.*\.tgz$',
+ r'.*\.bin',
+ r'.*\.journal',
+ r'.*\~$'
+ ]
+
+ # if the filename matches, it is obvious we can remove them without
+ # doing the read test
+ for _arc_reg in obvious_removes:
+ if re.match(_arc_reg, fname):
+ return True
+
+ return self.file_is_binary(fname)
+
+ def file_is_binary(self, fname):
+ """Determine if the file is a binary file or not.
+
+
+ :param fname: Filename relative to the extracted archive root
+ :type fname: ``str``
+
+ :returns: ``True`` if file is binary, else ``False``
+ :rtype: ``bool``
+ """
+ with open(self.get_file_path(fname), 'tr') as tfile:
+ try:
+ # when opened as above (tr), reading binary content will raise
+ # an exception
+ tfile.read(1)
+ return False
+ except UnicodeDecodeError:
+ return True
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index 9884836c..469db60d 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent):
'jobs': 4,
'keywords': [],
'keyword_file': None,
+ 'keep_binary_files': False,
'label': '',
'list_options': False,
'log_size': 0,
@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent):
dest='clean',
default=False, action='store_true',
help='Obfuscate sensistive information')
+ cleaner_grp.add_argument('--keep-binary-files', default=False,
+ action='store_true', dest='keep_binary_files',
+ help='Keep unprocessable binary files in the '
+ 'archive instead of removing them')
cleaner_grp.add_argument('--domains', dest='domains', default=[],
action='extend',
help='Additional domain names to obfuscate')
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index d4345409..2cedc76e 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
'case_id': '',
'chroot': 'auto',
'clean': False,
+ 'keep_binary_files': False,
'desc': '',
'domains': [],
'dry_run': False,
@@ -344,6 +345,11 @@ class SoSReport(SoSComponent):
default='/etc/sos/cleaner/default_mapping',
help=('Provide a previously generated mapping'
' file for obfuscation'))
+ cleaner_grp.add_argument('--keep-binary-files', default=False,
+ action='store_true',
+ dest='keep_binary_files',
+ help='Keep unprocessable binary files in the '
+ 'archive instead of removing them')
cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
action='extend',
help='List of usernames to obfuscate')
diff --git a/tests/report_tests/report_with_mask.py b/tests/report_tests/report_with_mask.py
index a62888ae..4f94ba33 100644
--- a/tests/report_tests/report_with_mask.py
+++ b/tests/report_tests/report_with_mask.py
@@ -6,7 +6,7 @@
#
# See the LICENSE file in the source distribution for further information.
-from sos_tests import StageOneReportTest
+from sos_tests import StageOneReportTest, StageTwoReportTest
import re
@@ -67,3 +67,43 @@ class ReportWithCleanedKeywords(StageOneReportTest):
def test_keyword_obfuscated_in_file(self):
self.assertFileNotHasContent('sos_commands/kernel/uname_-a', 'Linux')
+
+
+class DefaultRemoveBinaryFilesTest(StageTwoReportTest):
+ """Testing that binary files are removed by default
+
+ :avocado: tags=stagetwo
+ """
+
+ files = ['/var/log/binary_test.tar.xz']
+ install_plugins = ['binary_test']
+ sos_cmd = '--clean -o binary_test,kernel,host'
+
+ def test_binary_removed(self):
+ self.assertFileNotCollected('var/log/binary_test.tar.xz')
+
+ def test_binaries_removed_reported(self):
+ self.assertOutputContains('\[removed .* unprocessable files\]')
+
+
+class KeepBinaryFilesTest(StageTwoReportTest):
+ """Testing that --keep-binary-files will function as expected
+
+ :avocado: tags=stagetwo
+ """
+
+ files = ['/var/log/binary_test.tar.xz']
+ install_plugins = ['binary_test']
+ sos_cmd = '--clean --keep-binary-files -o binary_test,kernel,host'
+
+ def test_warning_message_shown(self):
+ self.assertOutputContains(
+ 'WARNING: binary files that potentially contain sensitive information '
+ 'will NOT be removed from the final archive'
+ )
+
+ def test_binary_is_in_archive(self):
+ self.assertFileCollected('var/log/binary_test.tar.xz')
+
+ def test_no_binaries_reported_removed(self):
+ self.assertOutputNotContains('\[removed .* unprocessable files\]')
diff --git a/tests/test_data/fake_plugins/binary_test.py b/tests/test_data/fake_plugins/binary_test.py
new file mode 100644
index 00000000..80bc841b
--- /dev/null
+++ b/tests/test_data/fake_plugins/binary_test.py
@@ -0,0 +1,21 @@
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.report.plugins import Plugin, IndependentPlugin
+
+
+class BinaryPlugin(Plugin, IndependentPlugin):
+ """Test plugin for testing binary removal with --clean
+ """
+
+ plugin_name = 'binary_test'
+ short_desc = 'test plugin for removing binaries with --clean'
+
+
+ def setup(self):
+ self.add_copy_spec('/var/log/binary_test.tar.xz')
diff --git a/tests/test_data/var/log/binary_test.tar.xz b/tests/test_data/var/log/binary_test.tar.xz
new file mode 100644
index 00000000..6031c869
--- /dev/null
+++ b/tests/test_data/var/log/binary_test.tar.xz
Binary files differ