aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--man/en/sos-clean.112
-rw-r--r--sos/cleaner/__init__.py21
-rw-r--r--sos/cleaner/obfuscation_archive.py67
-rw-r--r--sos/collector/__init__.py5
-rw-r--r--sos/report/__init__.py6
-rw-r--r--tests/report_tests/report_with_mask.py42
-rw-r--r--tests/test_data/fake_plugins/binary_test.py21
-rw-r--r--tests/test_data/var/log/binary_test.tar.xzbin0 -> 208 bytes
8 files changed, 167 insertions, 7 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
index 4856b43b..b77bc63c 100644
--- a/man/en/sos-clean.1
+++ b/man/en/sos-clean.1
@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
[\-\-map-file]
[\-\-jobs]
[\-\-no-update]
+ [\-\-keep-binary-files]
.SH DESCRIPTION
\fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
@@ -77,6 +78,17 @@ Default: 4
.TP
.B \-\-no-update
Do not write the mapping file contents to /etc/sos/cleaner/default_mapping
+.TP
+.B \-\-keep-binary-files
+Keep unprocessable binary files in the archive, rather than removing them.
+
+Note that binary files cannot be obfuscated, and thus keeping them in the archive
+may result in otherwise sensitive information being included in the final archive.
+Users should review any archive that keeps binary files in place before sending to
+a third party.
+
+Default: False (remove encountered binary files)
+
.SH SEE ALSO
.BR sos (1)
.BR sos-report (1)
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 55465b85..f88ff8a0 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent):
'keyword_file': None,
'map_file': '/etc/sos/cleaner/default_mapping',
'no_update': False,
+ 'keep_binary_files': False,
'target': '',
'usernames': []
}
@@ -183,6 +184,11 @@ third party.
action='store_true',
help='Do not update the --map file with new '
'mappings from this run')
+ clean_grp.add_argument('--keep-binary-files', default=False,
+ action='store_true',
+ dest='keep_binary_files',
+ help='Keep unprocessable binary files in the '
+ 'archive instead of removing them')
clean_grp.add_argument('--usernames', dest='usernames', default=[],
action='extend',
help='List of usernames to obfuscate')
@@ -467,6 +473,11 @@ third party.
"%s concurrently\n"
% (len(self.report_paths), self.opts.jobs))
self.ui_log.info(msg)
+ if self.opts.keep_binary_files:
+ self.ui_log.warning(
+ "WARNING: binary files that potentially contain sensitive "
+ "information will NOT be removed from the final archive\n"
+ )
pool = ThreadPoolExecutor(self.opts.jobs)
pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
pool.shutdown(wait=True)
@@ -539,6 +550,10 @@ third party.
short_name = fname.split(archive.archive_name + '/')[1]
if archive.should_skip_file(short_name):
continue
+ if (not self.opts.keep_binary_files and
+ archive.should_remove_file(short_name)):
+ archive.remove_file(short_name)
+ continue
try:
count = self.obfuscate_file(fname, short_name,
archive.archive_name)
@@ -574,7 +589,11 @@ third party.
arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
arc_md.add_field('total_substitutions', archive.total_sub_count)
self.completed_reports.append(archive)
- archive.report_msg("Obfuscation completed")
+ rmsg = ''
+ if archive.removed_file_count:
+ rmsg = " [removed %s unprocessable files]"
+ rmsg = rmsg % archive.removed_file_count
+ archive.report_msg("Obfuscation completed%s" % rmsg)
except Exception as err:
self.ui_log.info("Exception while processing %s: %s"
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
index c64ab13b..76841b51 100644
--- a/sos/cleaner/obfuscation_archive.py
+++ b/sos/cleaner/obfuscation_archive.py
@@ -28,6 +28,7 @@ class SoSObfuscationArchive():
file_sub_list = []
total_sub_count = 0
+ removed_file_count = 0
def __init__(self, archive_path, tmpdir):
self.archive_path = archive_path
@@ -62,11 +63,7 @@ class SoSObfuscationArchive():
'sys/firmware',
'sys/fs',
'sys/kernel/debug',
- 'sys/module',
- r'.*\.tar$', # TODO: support archive unpacking
- # Be explicit with these tar matches to avoid matching commands
- r'.*\.tar\.xz',
- '.*.gz'
+ 'sys/module'
]
@property
@@ -76,6 +73,17 @@ class SoSObfuscationArchive():
except Exception:
return False
+ def remove_file(self, fname):
+ """Remove a file from the archive. This is used when cleaner encounters
+ a binary file, which we cannot reliably obfuscate.
+ """
+ full_fname = self.get_file_path(fname)
+ # don't call a blank remove() here
+ if full_fname:
+ self.log_info("Removing binary file '%s' from archive" % fname)
+ os.remove(full_fname)
+ self.removed_file_count += 1
+
def extract(self):
if self.is_tarfile:
self.report_msg("Extracting...")
@@ -227,3 +235,52 @@ class SoSObfuscationArchive():
if filename.startswith(_skip) or re.match(_skip, filename):
return True
return False
+
+ def should_remove_file(self, fname):
+ """Determine if the file should be removed or not, due to an inability
+ to reliably obfuscate that file based on the filename.
+
+ :param fname: Filename relative to the extracted archive root
+ :type fname: ``str``
+
+ :returns: ``True`` if the file cannot be reliably obfuscated
+ :rtype: ``bool``
+ """
+ obvious_removes = [
+ r'.*\.gz', # TODO: support flat gz/xz extraction
+ r'.*\.xz',
+ r'.*\.bzip2',
+ r'.*\.tar\..*', # TODO: support archive unpacking
+ r'.*\.txz$',
+ r'.*\.tgz$',
+ r'.*\.bin',
+ r'.*\.journal',
+ r'.*\~$'
+ ]
+
+ # if the filename matches, it is obvious we can remove them without
+ # doing the read test
+ for _arc_reg in obvious_removes:
+ if re.match(_arc_reg, fname):
+ return True
+
+ return self.file_is_binary(fname)
+
+ def file_is_binary(self, fname):
+ """Determine if the file is a binary file or not.
+
+
+ :param fname: Filename relative to the extracted archive root
+ :type fname: ``str``
+
+ :returns: ``True`` if file is binary, else ``False``
+ :rtype: ``bool``
+ """
+ with open(self.get_file_path(fname), 'tr') as tfile:
+ try:
+ # when opened as above (tr), reading binary content will raise
+ # an exception
+ tfile.read(1)
+ return False
+ except UnicodeDecodeError:
+ return True
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index 9884836c..469db60d 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent):
'jobs': 4,
'keywords': [],
'keyword_file': None,
+ 'keep_binary_files': False,
'label': '',
'list_options': False,
'log_size': 0,
@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent):
dest='clean',
default=False, action='store_true',
help='Obfuscate sensistive information')
+ cleaner_grp.add_argument('--keep-binary-files', default=False,
+ action='store_true', dest='keep_binary_files',
+ help='Keep unprocessable binary files in the '
+ 'archive instead of removing them')
cleaner_grp.add_argument('--domains', dest='domains', default=[],
action='extend',
help='Additional domain names to obfuscate')
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index d4345409..2cedc76e 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
'case_id': '',
'chroot': 'auto',
'clean': False,
+ 'keep_binary_files': False,
'desc': '',
'domains': [],
'dry_run': False,
@@ -344,6 +345,11 @@ class SoSReport(SoSComponent):
default='/etc/sos/cleaner/default_mapping',
help=('Provide a previously generated mapping'
' file for obfuscation'))
+ cleaner_grp.add_argument('--keep-binary-files', default=False,
+ action='store_true',
+ dest='keep_binary_files',
+ help='Keep unprocessable binary files in the '
+ 'archive instead of removing them')
cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
action='extend',
help='List of usernames to obfuscate')
diff --git a/tests/report_tests/report_with_mask.py b/tests/report_tests/report_with_mask.py
index a62888ae..4f94ba33 100644
--- a/tests/report_tests/report_with_mask.py
+++ b/tests/report_tests/report_with_mask.py
@@ -6,7 +6,7 @@
#
# See the LICENSE file in the source distribution for further information.
-from sos_tests import StageOneReportTest
+from sos_tests import StageOneReportTest, StageTwoReportTest
import re
@@ -67,3 +67,43 @@ class ReportWithCleanedKeywords(StageOneReportTest):
def test_keyword_obfuscated_in_file(self):
self.assertFileNotHasContent('sos_commands/kernel/uname_-a', 'Linux')
+
+
+class DefaultRemoveBinaryFilesTest(StageTwoReportTest):
+ """Testing that binary files are removed by default
+
+ :avocado: tags=stagetwo
+ """
+
+ files = ['/var/log/binary_test.tar.xz']
+ install_plugins = ['binary_test']
+ sos_cmd = '--clean -o binary_test,kernel,host'
+
+ def test_binary_removed(self):
+ self.assertFileNotCollected('var/log/binary_test.tar.xz')
+
+ def test_binaries_removed_reported(self):
+ self.assertOutputContains('\[removed .* unprocessable files\]')
+
+
+class KeepBinaryFilesTest(StageTwoReportTest):
+ """Testing that --keep-binary-files will function as expected
+
+ :avocado: tags=stagetwo
+ """
+
+ files = ['/var/log/binary_test.tar.xz']
+ install_plugins = ['binary_test']
+ sos_cmd = '--clean --keep-binary-files -o binary_test,kernel,host'
+
+ def test_warning_message_shown(self):
+ self.assertOutputContains(
+ 'WARNING: binary files that potentially contain sensitive information '
+ 'will NOT be removed from the final archive'
+ )
+
+ def test_binary_is_in_archive(self):
+ self.assertFileCollected('var/log/binary_test.tar.xz')
+
+ def test_no_binaries_reported_removed(self):
+ self.assertOutputNotContains('\[removed .* unprocessable files\]')
diff --git a/tests/test_data/fake_plugins/binary_test.py b/tests/test_data/fake_plugins/binary_test.py
new file mode 100644
index 00000000..80bc841b
--- /dev/null
+++ b/tests/test_data/fake_plugins/binary_test.py
@@ -0,0 +1,21 @@
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.report.plugins import Plugin, IndependentPlugin
+
+
+class BinaryPlugin(Plugin, IndependentPlugin):
+ """Test plugin for testing binary removal with --clean
+ """
+
+ plugin_name = 'binary_test'
+ short_desc = 'test plugin for removing binaries with --clean'
+
+
+ def setup(self):
+ self.add_copy_spec('/var/log/binary_test.tar.xz')
diff --git a/tests/test_data/var/log/binary_test.tar.xz b/tests/test_data/var/log/binary_test.tar.xz
new file mode 100644
index 00000000..6031c869
--- /dev/null
+++ b/tests/test_data/var/log/binary_test.tar.xz
Binary files differ