diff options
author | Jake Hunsaker <jhunsake@redhat.com> | 2020-05-21 13:21:19 -0400 |
---|---|---|
committer | Jake Hunsaker <jhunsake@redhat.com> | 2020-06-17 12:11:29 -0400 |
commit | cbe18f0dba008b1ff2dbe15259263791f82e6507 (patch) | |
tree | d6ea8be17dec35eaf25a09a8140a21129ba3f107 | |
parent | 582eacb8d92b741959ad556ffcacd748fa3bbf14 (diff) | |
download | sos-cbe18f0dba008b1ff2dbe15259263791f82e6507.tar.gz |
[collect] Extend --clean/--mask to SoSCollector
Adds functionality to SoSCollector runs to pass collected archives
through `--clean`, much the same as the functionality has been extended
to base `report` runs.
Note that running this way will, like report, only result in a single
obfuscated archive and the private mapping file. If users desire an
unobfuscated copy as well, then `sos collect` and `sos clean` should be
run separately.
Related: #1987
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r-- | sos/cleaner/__init__.py | 61 | ||||
-rw-r--r-- | sos/cleaner/obfuscation_archive.py | 57 | ||||
-rw-r--r-- | sos/collector/__init__.py | 70 | ||||
-rw-r--r-- | sos/component.py | 16 | ||||
-rw-r--r-- | sos/report/__init__.py | 3 |
5 files changed, 154 insertions, 53 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index ec2d6ce2..ef963534 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -8,6 +8,7 @@ # # See the LICENSE file in the source distribution for further information. +import hashlib import json import logging import os @@ -58,14 +59,19 @@ class SoSCleaner(SoSComponent): self.policy = hook_commons['policy'] self.from_cmdline = False self.opts.map_file = '/etc/sos/cleaner/default_mapping' - self.opts.jobs = 4 self.opts.no_update = False + if not hasattr(self.opts, 'jobs'): + self.opts.jobs = 4 self.soslog = logging.getLogger('sos') self.ui_log = logging.getLogger('sos_ui') + # create the tmp subdir here to avoid a potential race condition + # when obfuscating a SoSCollector run during archive extraction + os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True) self.validate_map_file() os.umask(0o77) self.in_place = in_place + self.hash_name = self.policy.get_preferred_hash_name() self.parsers = [ SoSIPParser(self.opts.map_file), @@ -180,9 +186,9 @@ third party. self.arc_name = self.opts.target.split('/')[-1].split('.')[:-2][0] try: - archive.getmember(os.path.join(self.arc_name, 'logs')) + archive.getmember(os.path.join(self.arc_name, 'sos_logs')) except Exception: - # this is not a sos archive + # this is not an sos archive self.ui_log.error("Invalid target: not an sos archive") self._exit(1) @@ -245,14 +251,18 @@ third party. for _file in os.listdir(self.opts.target): if _file == 'sos_logs': self.report_paths.append(self.opts.target) - if re.match('sosreport.*.tar.*', _file): - self.report_paths.append(_file) + if re.match('sosreport.*.tar.*[^md5]', _file): + self.report_paths.append(os.path.join(self.opts.target, + _file)) if not self.report_paths: self.ui_log.error("Invalid target: not an sos directory") self._exit(1) else: self.inspect_target_archive() + # remove any lingering md5 files + self.report_paths = [p for p in self.report_paths if '.md5' not in p] + if not self.report_paths: self.ui_log.error("No valid sos archives or directories found\n") self._exit(1) @@ -275,10 +285,10 @@ third party. self.write_map_for_config(_map) if self.in_place: - return map_path + arc_paths = [a.final_archive_path for a in self.completed_reports] + return map_path, arc_paths final_path = None - self.hash_name = self.policy.get_preferred_hash_name() if len(self.completed_reports) > 1: # we have an archive of archives, so repack the obfuscated tarball arc_name = self.arc_name + '-obfuscated' @@ -288,7 +298,7 @@ third party. arc_dest = arc.final_archive_path.split('/')[-1] self.archive.add_file(arc.final_archive_path, dest=arc_dest) - checksum = self.get_new_checksum(arc) + checksum = self.get_new_checksum(arc.final_archive_path) if checksum is not None: dname = "checksums/%s.%s" % (arc_dest, self.hash_name) self.archive.add_string(checksum, dest=dname) @@ -304,7 +314,7 @@ third party. else: arc = self.completed_reports[0] arc_path = arc.final_archive_path - checksum = self.get_new_checksum(arc) + checksum = self.get_new_checksum(arc.final_archive_path) if checksum is not None: chksum_name = "%s.%s" % (arc_path.split('/')[-1], self.hash_name) @@ -371,11 +381,23 @@ third party. self.log_error("Could not update mapping config file: %s" % err) - def get_new_checksum(self, archive): - """Get a new checksum for each archive""" - checksum = archive.generate_checksum(self.hash_name) - if checksum: - return checksum + '\n' + def get_new_checksum(self, archive_path): + """Calculate a new checksum for the obfuscated archive, as the previous + checksum will no longer be valid + """ + try: + hash_size = 1024**2 # Hash 1MiB of content at a time. + archive_fp = open(archive_path, 'rb') + digest = hashlib.new(self.hash_name) + while True: + hashdata = archive_fp.read(hash_size) + if not hashdata: + break + digest.update(hashdata) + archive_fp.close() + return digest.hexdigest() + '\n' + except Exception as err: + self.log_debug("Could not generate new checksum: %s" % err) return None def obfuscate_report_paths(self): @@ -386,6 +408,11 @@ third party. be obfuscated concurrently. """ try: + if len(self.report_paths) > 1: + msg = ("Found %s total reports to obfuscate, processing up to " + "%s concurrently\n" + % (len(self.report_paths), self.opts.jobs)) + self.ui_log.info(msg) pool = ThreadPoolExecutor(self.opts.jobs) pool.map(self.obfuscate_report, self.report_paths, chunksize=1) pool.shutdown(wait=True) @@ -403,8 +430,9 @@ third party. """ try: if not os.access(report, os.W_OK): - self.log_info("Insufficient permissions on %s" % report) - self.report_msg(report, "Insufficient permissions") + msg = "Insufficient permissions on %s" % report + self.log_info(msg) + self.ui_log.error(msg) return archive = SoSObfuscationArchive(report, self.tmpdir) @@ -448,7 +476,6 @@ third party. except Exception as err: self.ui_log.info("Exception while processing %s: %s" % (report, err)) - os._exit(1) def prep_maps_from_archive(self, archive): """Open specific files from an archive and try to load those values diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index fc2db4db..a5c788ef 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -8,9 +8,10 @@ # # See the LICENSE file in the source distribution for further information. -import hashlib import logging import os +import shutil +import stat import tarfile import re @@ -104,7 +105,22 @@ class SoSObfuscationArchive(): """Pack the extracted archive as a tarfile to then be re-compressed """ self.tarpath = self.extracted_path + '-obfuscated.tar' - self.log_debug("building tar file %s" % self.tarpath) + # if we're running as non-root (e.g. collector), then we can have a + # situation where a particular path only has 0200 permissions, thus + # preventing it from being added via tarfile.add(). + # Unfortunately our only choice here is to change the permissions + # that were preserved during report collection + if os.getuid() != 0: + self.log_debug('Verifying read permissions of archive contents') + for dirname, dirs, files in os.walk(self.extracted_path): + for filename in files: + fname = os.path.join(dirname, filename) + if not os.access(fname, os.R_OK): + self.log_debug("Adding owner read permissions to %s" + % fname.split(self.archive_path)[-1]) + _perms = os.stat(fname).st_mode + os.chmod(fname, _perms | stat.S_IRUSR) + self.log_debug("Building tar file %s" % self.tarpath) tar = tarfile.open(self.tarpath, mode="w") tar.add(self.extracted_path, arcname=os.path.split(self.archive_name)[1]) @@ -119,29 +135,32 @@ class SoSObfuscationArchive(): res = sos_get_command_output(exec_cmd, timeout=0, stderr=True) if res['status'] == 0: self.final_archive_path = self.tarpath + '.' + exec_cmd[0:2] + self.log_debug("Compressed to %s" % self.final_archive_path) + try: + self.remove_extracted_path() + except Exception as err: + self.log_debug("Failed to remove extraction directory: %s" + % err) + self.report_msg('Failed to remove temporary extraction ' + 'directory') else: err = res['output'].split(':')[-1] self.log_debug("Exception while compressing archive: %s" % err) raise Exception(err) - def generate_checksum(self, hash_name): - """Calculate a new checksum for the obfuscated archive, as the previous - checksum will no longer be valid + def remove_extracted_path(self): + """After the tarball has been re-compressed, remove the extracted path + so that we don't take up that duplicate space any longer during + execution """ - try: - hash_size = 1024**2 # Hash 1MiB of content at a time. - archive_fp = open(self.final_archive_path, 'rb') - digest = hashlib.new(hash_name) - while True: - hashdata = archive_fp.read(hash_size) - if not hashdata: - break - digest.update(hashdata) - archive_fp.close() - return digest.hexdigest() - except Exception as err: - self.log_debug("Could not generate new checksum: %s" % err) - return None + def force_delete_file(action, name, exc): + os.chmod(name, stat.S_IWUSR) + if os.path.isfile(name): + os.remove(name) + else: + shutil.rmtree(name) + self.log_debug("Removing %s" % self.extracted_path) + shutil.rmtree(self.extracted_path, onerror=force_delete_file) def extract_self(self): """Extract an archive into our tmpdir so that we may inspect it or diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 3bbc271d..08665b94 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -25,6 +25,7 @@ from concurrent.futures import ThreadPoolExecutor from getpass import getpass from pipes import quote from textwrap import fill +from sos.cleaner import SoSCleaner from sos.collector.sosnode import SosNode from sos.collector.exceptions import ControlPersistUnsupportedException from sos.options import ClusterOption @@ -51,6 +52,7 @@ class SoSCollector(SoSComponent): 'allow_system_changes': False, 'become_root': False, 'case_id': False, + 'clean': False, 'cluster_type': None, 'cluster_options': [], 'chroot': 'auto', @@ -277,6 +279,9 @@ class SoSCollector(SoSComponent): dest='become_root', help='Become root on the remote nodes') collect_grp.add_argument('--case-id', help='Specify case number') + collect_grp.add_argument('--clean', '--mask', action='store_true', + default=False, dest='clean', + help='Locally obfuscate reports gathered') collect_grp.add_argument('--cluster-type', help='Specify a type of cluster profile') collect_grp.add_argument('-c', '--cluster-option', @@ -1088,16 +1093,46 @@ this utility or remote systems that it connects to. def create_cluster_archive(self): """Calls for creation of tar archive then cleans up the temporary files created by sos-collector""" - self.log_info('Creating archive of sosreports...') + map_file = None + arc_paths = [] + for host in self.client_list: + for fname in host.file_list: + arc_paths.append(fname) + + if self.opts.clean: + hook_commons = { + 'policy': self.policy, + 'tmpdir': self.tmpdir, + 'sys_tmp': self.sys_tmp, + 'options': self.opts + } + try: + self.ui_log.info('') + cleaner = SoSCleaner(in_place=True, + hook_commons=hook_commons) + cleaner.set_target_path(self.tmpdir) + map_file, arc_paths = cleaner.execute() + except Exception as err: + self.ui_log.error("ERROR: unable to obfuscate reports: %s" + % err) + try: - for host in self.client_list: - for fname in host.file_list: - dest = fname - # place checksums in a different directory - if fname.endswith(('.md5', )): - dest = os.path.join('checksums', fname) - name = os.path.join(self.tmpdir, fname) - self.archive.add_file(name, dest=dest) + self.log_info('Creating archive of sosreports...') + for fname in arc_paths: + dest = fname.split('/')[-1] + if fname.endswith(('.md5',)): + dest = os.path.join('checksums', fname.split('/')[-1]) + if self.opts.clean: + dest = cleaner.obfuscate_string(dest) + name = os.path.join(self.tmpdir, fname) + self.archive.add_file(name, dest=dest) + if map_file: + # regenerate the checksum for the obfuscated archive + checksum = cleaner.get_new_checksum(fname) + if checksum: + name = os.path.join('checksums', fname.split('/')[-1]) + name += '.md5' + self.archive.add_string(checksum, name) self.archive.add_file(self.sos_log_file, dest=os.path.join('sos_logs', 'sos.log')) self.archive.add_file(self.sos_ui_log_file, @@ -1110,12 +1145,27 @@ this utility or remote systems that it connects to. arc_name = self.archive.finalize(self.opts.compression_type) final_name = os.path.join(self.sys_tmp, os.path.basename(arc_name)) + if self.opts.clean: + final_name = cleaner.obfuscate_string( + final_name.replace('.tar', '-obfuscated.tar') + ) os.rename(arc_name, final_name) + if map_file: + # rename the map file to match the collector archive name, not + # the temp dir it was constructed in + map_name = cleaner.obfuscate_string( + os.path.join(self.sys_tmp, + "%s_private_map" % self.archive_name) + ) + os.rename(map_file, map_name) + self.ui_log.info("A mapping of obfuscated elements is " + "available at\n\t%s" % map_name) + self.soslog.info('Archive created as %s' % final_name) self.ui_log.info('\nThe following archive has been created. ' 'Please provide it to your support team.') - self.ui_log.info(' %s' % final_name) + self.ui_log.info('\t%s\n' % final_name) except Exception as err: msg = ("Could not finalize archive: %s\n\nData may still be " "available uncompressed at %s" % (err, self.archive_path)) diff --git a/sos/component.py b/sos/component.py index 9d80bc61..8b70ce34 100644 --- a/sos/component.py +++ b/sos/component.py @@ -180,12 +180,16 @@ class SoSComponent(): def cleanup(self): # archive and tempfile cleanup may fail due to a fatal # OSError exception (ENOSPC, EROFS etc.). - if self.archive: - self.archive.cleanup() - if self.tempfile_util: - self.tempfile_util.clean() - if self.tmpdir: - rmtree(self.tmpdir) + try: + if self.archive: + self.archive.cleanup() + if self.tempfile_util: + self.tempfile_util.clean() + if self.tmpdir: + rmtree(self.tmpdir) + except Exception as err: + print("Failed to finish cleanup: %s\nContents may remain in %s" + % (err, self.tmpdir)) def setup_archive(self, name=''): enc_opts = { diff --git a/sos/report/__init__.py b/sos/report/__init__.py index c0c2eb1b..2eb26ab8 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -1115,7 +1115,8 @@ class SoSReport(SoSComponent): } cleaner = SoSCleaner(in_place=True, hook_commons=hook_commons) cleaner.set_target_path(self.archive.get_archive_path()) - map_file = cleaner.execute() + # ignore the returned paths here + map_file, _paths = cleaner.execute() except Exception as err: print(_("ERROR: Unable to obfuscate report: %s" % err)) |