aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2020-05-21 13:21:19 -0400
committerJake Hunsaker <jhunsake@redhat.com>2020-06-17 12:11:29 -0400
commitcbe18f0dba008b1ff2dbe15259263791f82e6507 (patch)
treed6ea8be17dec35eaf25a09a8140a21129ba3f107
parent582eacb8d92b741959ad556ffcacd748fa3bbf14 (diff)
downloadsos-cbe18f0dba008b1ff2dbe15259263791f82e6507.tar.gz
[collect] Extend --clean/--mask to SoSCollector
Adds functionality to SoSCollector runs to pass collected archives through `--clean`, much the same as the functionality has been extended to base `report` runs. Note that running this way will, like report, only result in a single obfuscated archive and the private mapping file. If users desire an unobfuscated copy as well, then `sos collect` and `sos clean` should be run separately. Related: #1987 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/cleaner/__init__.py61
-rw-r--r--sos/cleaner/obfuscation_archive.py57
-rw-r--r--sos/collector/__init__.py70
-rw-r--r--sos/component.py16
-rw-r--r--sos/report/__init__.py3
5 files changed, 154 insertions, 53 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index ec2d6ce2..ef963534 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -8,6 +8,7 @@
#
# See the LICENSE file in the source distribution for further information.
+import hashlib
import json
import logging
import os
@@ -58,14 +59,19 @@ class SoSCleaner(SoSComponent):
self.policy = hook_commons['policy']
self.from_cmdline = False
self.opts.map_file = '/etc/sos/cleaner/default_mapping'
- self.opts.jobs = 4
self.opts.no_update = False
+ if not hasattr(self.opts, 'jobs'):
+ self.opts.jobs = 4
self.soslog = logging.getLogger('sos')
self.ui_log = logging.getLogger('sos_ui')
+ # create the tmp subdir here to avoid a potential race condition
+ # when obfuscating a SoSCollector run during archive extraction
+ os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True)
self.validate_map_file()
os.umask(0o77)
self.in_place = in_place
+ self.hash_name = self.policy.get_preferred_hash_name()
self.parsers = [
SoSIPParser(self.opts.map_file),
@@ -180,9 +186,9 @@ third party.
self.arc_name = self.opts.target.split('/')[-1].split('.')[:-2][0]
try:
- archive.getmember(os.path.join(self.arc_name, 'logs'))
+ archive.getmember(os.path.join(self.arc_name, 'sos_logs'))
except Exception:
- # this is not a sos archive
+ # this is not an sos archive
self.ui_log.error("Invalid target: not an sos archive")
self._exit(1)
@@ -245,14 +251,18 @@ third party.
for _file in os.listdir(self.opts.target):
if _file == 'sos_logs':
self.report_paths.append(self.opts.target)
- if re.match('sosreport.*.tar.*', _file):
- self.report_paths.append(_file)
+ if re.match('sosreport.*.tar.*[^md5]', _file):
+ self.report_paths.append(os.path.join(self.opts.target,
+ _file))
if not self.report_paths:
self.ui_log.error("Invalid target: not an sos directory")
self._exit(1)
else:
self.inspect_target_archive()
+ # remove any lingering md5 files
+ self.report_paths = [p for p in self.report_paths if '.md5' not in p]
+
if not self.report_paths:
self.ui_log.error("No valid sos archives or directories found\n")
self._exit(1)
@@ -275,10 +285,10 @@ third party.
self.write_map_for_config(_map)
if self.in_place:
- return map_path
+ arc_paths = [a.final_archive_path for a in self.completed_reports]
+ return map_path, arc_paths
final_path = None
- self.hash_name = self.policy.get_preferred_hash_name()
if len(self.completed_reports) > 1:
# we have an archive of archives, so repack the obfuscated tarball
arc_name = self.arc_name + '-obfuscated'
@@ -288,7 +298,7 @@ third party.
arc_dest = arc.final_archive_path.split('/')[-1]
self.archive.add_file(arc.final_archive_path,
dest=arc_dest)
- checksum = self.get_new_checksum(arc)
+ checksum = self.get_new_checksum(arc.final_archive_path)
if checksum is not None:
dname = "checksums/%s.%s" % (arc_dest, self.hash_name)
self.archive.add_string(checksum, dest=dname)
@@ -304,7 +314,7 @@ third party.
else:
arc = self.completed_reports[0]
arc_path = arc.final_archive_path
- checksum = self.get_new_checksum(arc)
+ checksum = self.get_new_checksum(arc.final_archive_path)
if checksum is not None:
chksum_name = "%s.%s" % (arc_path.split('/')[-1],
self.hash_name)
@@ -371,11 +381,23 @@ third party.
self.log_error("Could not update mapping config file: %s"
% err)
- def get_new_checksum(self, archive):
- """Get a new checksum for each archive"""
- checksum = archive.generate_checksum(self.hash_name)
- if checksum:
- return checksum + '\n'
+ def get_new_checksum(self, archive_path):
+ """Calculate a new checksum for the obfuscated archive, as the previous
+ checksum will no longer be valid
+ """
+ try:
+ hash_size = 1024**2 # Hash 1MiB of content at a time.
+ archive_fp = open(archive_path, 'rb')
+ digest = hashlib.new(self.hash_name)
+ while True:
+ hashdata = archive_fp.read(hash_size)
+ if not hashdata:
+ break
+ digest.update(hashdata)
+ archive_fp.close()
+ return digest.hexdigest() + '\n'
+ except Exception as err:
+ self.log_debug("Could not generate new checksum: %s" % err)
return None
def obfuscate_report_paths(self):
@@ -386,6 +408,11 @@ third party.
be obfuscated concurrently.
"""
try:
+ if len(self.report_paths) > 1:
+ msg = ("Found %s total reports to obfuscate, processing up to "
+ "%s concurrently\n"
+ % (len(self.report_paths), self.opts.jobs))
+ self.ui_log.info(msg)
pool = ThreadPoolExecutor(self.opts.jobs)
pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
pool.shutdown(wait=True)
@@ -403,8 +430,9 @@ third party.
"""
try:
if not os.access(report, os.W_OK):
- self.log_info("Insufficient permissions on %s" % report)
- self.report_msg(report, "Insufficient permissions")
+ msg = "Insufficient permissions on %s" % report
+ self.log_info(msg)
+ self.ui_log.error(msg)
return
archive = SoSObfuscationArchive(report, self.tmpdir)
@@ -448,7 +476,6 @@ third party.
except Exception as err:
self.ui_log.info("Exception while processing %s: %s"
% (report, err))
- os._exit(1)
def prep_maps_from_archive(self, archive):
"""Open specific files from an archive and try to load those values
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
index fc2db4db..a5c788ef 100644
--- a/sos/cleaner/obfuscation_archive.py
+++ b/sos/cleaner/obfuscation_archive.py
@@ -8,9 +8,10 @@
#
# See the LICENSE file in the source distribution for further information.
-import hashlib
import logging
import os
+import shutil
+import stat
import tarfile
import re
@@ -104,7 +105,22 @@ class SoSObfuscationArchive():
"""Pack the extracted archive as a tarfile to then be re-compressed
"""
self.tarpath = self.extracted_path + '-obfuscated.tar'
- self.log_debug("building tar file %s" % self.tarpath)
+ # if we're running as non-root (e.g. collector), then we can have a
+ # situation where a particular path only has 0200 permissions, thus
+ # preventing it from being added via tarfile.add().
+ # Unfortunately our only choice here is to change the permissions
+ # that were preserved during report collection
+ if os.getuid() != 0:
+ self.log_debug('Verifying read permissions of archive contents')
+ for dirname, dirs, files in os.walk(self.extracted_path):
+ for filename in files:
+ fname = os.path.join(dirname, filename)
+ if not os.access(fname, os.R_OK):
+ self.log_debug("Adding owner read permissions to %s"
+ % fname.split(self.archive_path)[-1])
+ _perms = os.stat(fname).st_mode
+ os.chmod(fname, _perms | stat.S_IRUSR)
+ self.log_debug("Building tar file %s" % self.tarpath)
tar = tarfile.open(self.tarpath, mode="w")
tar.add(self.extracted_path,
arcname=os.path.split(self.archive_name)[1])
@@ -119,29 +135,32 @@ class SoSObfuscationArchive():
res = sos_get_command_output(exec_cmd, timeout=0, stderr=True)
if res['status'] == 0:
self.final_archive_path = self.tarpath + '.' + exec_cmd[0:2]
+ self.log_debug("Compressed to %s" % self.final_archive_path)
+ try:
+ self.remove_extracted_path()
+ except Exception as err:
+ self.log_debug("Failed to remove extraction directory: %s"
+ % err)
+ self.report_msg('Failed to remove temporary extraction '
+ 'directory')
else:
err = res['output'].split(':')[-1]
self.log_debug("Exception while compressing archive: %s" % err)
raise Exception(err)
- def generate_checksum(self, hash_name):
- """Calculate a new checksum for the obfuscated archive, as the previous
- checksum will no longer be valid
+ def remove_extracted_path(self):
+ """After the tarball has been re-compressed, remove the extracted path
+ so that we don't take up that duplicate space any longer during
+ execution
"""
- try:
- hash_size = 1024**2 # Hash 1MiB of content at a time.
- archive_fp = open(self.final_archive_path, 'rb')
- digest = hashlib.new(hash_name)
- while True:
- hashdata = archive_fp.read(hash_size)
- if not hashdata:
- break
- digest.update(hashdata)
- archive_fp.close()
- return digest.hexdigest()
- except Exception as err:
- self.log_debug("Could not generate new checksum: %s" % err)
- return None
+ def force_delete_file(action, name, exc):
+ os.chmod(name, stat.S_IWUSR)
+ if os.path.isfile(name):
+ os.remove(name)
+ else:
+ shutil.rmtree(name)
+ self.log_debug("Removing %s" % self.extracted_path)
+ shutil.rmtree(self.extracted_path, onerror=force_delete_file)
def extract_self(self):
"""Extract an archive into our tmpdir so that we may inspect it or
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index 3bbc271d..08665b94 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -25,6 +25,7 @@ from concurrent.futures import ThreadPoolExecutor
from getpass import getpass
from pipes import quote
from textwrap import fill
+from sos.cleaner import SoSCleaner
from sos.collector.sosnode import SosNode
from sos.collector.exceptions import ControlPersistUnsupportedException
from sos.options import ClusterOption
@@ -51,6 +52,7 @@ class SoSCollector(SoSComponent):
'allow_system_changes': False,
'become_root': False,
'case_id': False,
+ 'clean': False,
'cluster_type': None,
'cluster_options': [],
'chroot': 'auto',
@@ -277,6 +279,9 @@ class SoSCollector(SoSComponent):
dest='become_root',
help='Become root on the remote nodes')
collect_grp.add_argument('--case-id', help='Specify case number')
+ collect_grp.add_argument('--clean', '--mask', action='store_true',
+ default=False, dest='clean',
+ help='Locally obfuscate reports gathered')
collect_grp.add_argument('--cluster-type',
help='Specify a type of cluster profile')
collect_grp.add_argument('-c', '--cluster-option',
@@ -1088,16 +1093,46 @@ this utility or remote systems that it connects to.
def create_cluster_archive(self):
"""Calls for creation of tar archive then cleans up the temporary
files created by sos-collector"""
- self.log_info('Creating archive of sosreports...')
+ map_file = None
+ arc_paths = []
+ for host in self.client_list:
+ for fname in host.file_list:
+ arc_paths.append(fname)
+
+ if self.opts.clean:
+ hook_commons = {
+ 'policy': self.policy,
+ 'tmpdir': self.tmpdir,
+ 'sys_tmp': self.sys_tmp,
+ 'options': self.opts
+ }
+ try:
+ self.ui_log.info('')
+ cleaner = SoSCleaner(in_place=True,
+ hook_commons=hook_commons)
+ cleaner.set_target_path(self.tmpdir)
+ map_file, arc_paths = cleaner.execute()
+ except Exception as err:
+ self.ui_log.error("ERROR: unable to obfuscate reports: %s"
+ % err)
+
try:
- for host in self.client_list:
- for fname in host.file_list:
- dest = fname
- # place checksums in a different directory
- if fname.endswith(('.md5', )):
- dest = os.path.join('checksums', fname)
- name = os.path.join(self.tmpdir, fname)
- self.archive.add_file(name, dest=dest)
+ self.log_info('Creating archive of sosreports...')
+ for fname in arc_paths:
+ dest = fname.split('/')[-1]
+ if fname.endswith(('.md5',)):
+ dest = os.path.join('checksums', fname.split('/')[-1])
+ if self.opts.clean:
+ dest = cleaner.obfuscate_string(dest)
+ name = os.path.join(self.tmpdir, fname)
+ self.archive.add_file(name, dest=dest)
+ if map_file:
+ # regenerate the checksum for the obfuscated archive
+ checksum = cleaner.get_new_checksum(fname)
+ if checksum:
+ name = os.path.join('checksums', fname.split('/')[-1])
+ name += '.md5'
+ self.archive.add_string(checksum, name)
self.archive.add_file(self.sos_log_file,
dest=os.path.join('sos_logs', 'sos.log'))
self.archive.add_file(self.sos_ui_log_file,
@@ -1110,12 +1145,27 @@ this utility or remote systems that it connects to.
arc_name = self.archive.finalize(self.opts.compression_type)
final_name = os.path.join(self.sys_tmp, os.path.basename(arc_name))
+ if self.opts.clean:
+ final_name = cleaner.obfuscate_string(
+ final_name.replace('.tar', '-obfuscated.tar')
+ )
os.rename(arc_name, final_name)
+ if map_file:
+ # rename the map file to match the collector archive name, not
+ # the temp dir it was constructed in
+ map_name = cleaner.obfuscate_string(
+ os.path.join(self.sys_tmp,
+ "%s_private_map" % self.archive_name)
+ )
+ os.rename(map_file, map_name)
+ self.ui_log.info("A mapping of obfuscated elements is "
+ "available at\n\t%s" % map_name)
+
self.soslog.info('Archive created as %s' % final_name)
self.ui_log.info('\nThe following archive has been created. '
'Please provide it to your support team.')
- self.ui_log.info(' %s' % final_name)
+ self.ui_log.info('\t%s\n' % final_name)
except Exception as err:
msg = ("Could not finalize archive: %s\n\nData may still be "
"available uncompressed at %s" % (err, self.archive_path))
diff --git a/sos/component.py b/sos/component.py
index 9d80bc61..8b70ce34 100644
--- a/sos/component.py
+++ b/sos/component.py
@@ -180,12 +180,16 @@ class SoSComponent():
def cleanup(self):
# archive and tempfile cleanup may fail due to a fatal
# OSError exception (ENOSPC, EROFS etc.).
- if self.archive:
- self.archive.cleanup()
- if self.tempfile_util:
- self.tempfile_util.clean()
- if self.tmpdir:
- rmtree(self.tmpdir)
+ try:
+ if self.archive:
+ self.archive.cleanup()
+ if self.tempfile_util:
+ self.tempfile_util.clean()
+ if self.tmpdir:
+ rmtree(self.tmpdir)
+ except Exception as err:
+ print("Failed to finish cleanup: %s\nContents may remain in %s"
+ % (err, self.tmpdir))
def setup_archive(self, name=''):
enc_opts = {
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index c0c2eb1b..2eb26ab8 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -1115,7 +1115,8 @@ class SoSReport(SoSComponent):
}
cleaner = SoSCleaner(in_place=True, hook_commons=hook_commons)
cleaner.set_target_path(self.archive.get_archive_path())
- map_file = cleaner.execute()
+ # ignore the returned paths here
+ map_file, _paths = cleaner.execute()
except Exception as err:
print(_("ERROR: Unable to obfuscate report: %s" % err))