diff options
author | Jake Hunsaker <jhunsake@redhat.com> | 2020-06-02 16:40:08 -0400 |
---|---|---|
committer | Jake Hunsaker <jhunsake@redhat.com> | 2020-06-17 12:11:29 -0400 |
commit | 0779f8cdc76baca874e51fce091dbb2ef5da60ff (patch) | |
tree | 4c93b4f1d2c68469ae3c3f7d705290842745e3b6 | |
parent | 9cc01d6d99491d626230e9526f51b22a16610528 (diff) | |
download | sos-0779f8cdc76baca874e51fce091dbb2ef5da60ff.tar.gz |
[cleaner] Obfuscate filenames based on previously obfuscated items
For each file processed, `sos clean` will now check the filename and
attemept to obfuscate it of any _known_ items, for example host names
used as part of command collections.
This must be done against known items, rather than regex parsing for
potential new items, due to the nature of filenames having a high
propensity to trigger false positives in regex patterns.
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r-- | sos/archive.py | 10 | ||||
-rw-r--r-- | sos/cleaner/__init__.py | 41 | ||||
-rw-r--r-- | sos/cleaner/obfuscation_archive.py | 55 | ||||
-rw-r--r-- | sos/cleaner/parsers/__init__.py | 13 | ||||
-rw-r--r-- | sos/report/__init__.py | 4 |
5 files changed, 99 insertions, 24 deletions
diff --git a/sos/archive.py b/sos/archive.py index 128e044f..31cfa1fa 100644 --- a/sos/archive.py +++ b/sos/archive.py @@ -548,6 +548,16 @@ class FileCacheArchive(Archive): self.add_string(self.manifest.get_json(indent=4), os.path.join('sos_reports', 'manifest.json')) + def rename_archive_root(self, cleaner): + """Rename the archive to an obfuscated version using an initialized + SoSCleaner instance + """ + self._name = cleaner.obfuscate_string(self._name) + _new_root = os.path.join(self._tmp_dir, self._name) + os.rename(self._archive_root, _new_root) + self._archive_root = _new_root + self._archive_name = os.path.join(self._tmp_dir, self.name()) + def finalize(self, method): self.log_info("finalizing archive '%s' using method '%s'" % (self._archive_root, method)) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 4a4e5959..5c0feb7a 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -298,12 +298,16 @@ third party. self.setup_archive(name=arc_name) for arc in self.completed_reports: if arc.is_tarfile: - arc_dest = arc.final_archive_path.split('/')[-1] + arc_dest = self.obfuscate_string( + arc.final_archive_path.split('/')[-1] + ) self.archive.add_file(arc.final_archive_path, dest=arc_dest) checksum = self.get_new_checksum(arc.final_archive_path) if checksum is not None: - dname = "checksums/%s.%s" % (arc_dest, self.hash_name) + dname = self.obfuscate_string( + "checksums/%s.%s" % (arc_dest, self.hash_name) + ) self.archive.add_string(checksum, dest=dname) else: for dirname, dirs, files in os.walk(arc.archive_path): @@ -311,7 +315,9 @@ third party. if filename.startswith('sosreport'): continue fname = os.path.join(dirname, filename) - dnm = fname.split(arc.archive_name)[-1].lstrip('/') + dnm = self.obfuscate_string( + fname.split(arc.archive_name)[-1].lstrip('/') + ) self.archive.add_file(fname, dest=dnm) arc_path = self.archive.finalize(self.opts.compression_type) else: @@ -319,12 +325,15 @@ third party. arc_path = arc.final_archive_path checksum = self.get_new_checksum(arc.final_archive_path) if checksum is not None: - chksum_name = "%s.%s" % (arc_path.split('/')[-1], - self.hash_name) + chksum_name = self.obfuscate_string( + "%s.%s" % (arc_path.split('/')[-1], self.hash_name) + ) with open(os.path.join(self.sys_tmp, chksum_name), 'w') as cf: cf.write(checksum) - final_path = os.path.join(self.sys_tmp, arc_path.split('/')[-1]) + final_path = self.obfuscate_string( + os.path.join(self.sys_tmp, arc_path.split('/')[-1]) + ) shutil.move(arc_path, final_path) arcstat = os.stat(final_path) @@ -462,6 +471,9 @@ third party. if method: archive.report_msg("Re-compressing...") try: + archive.rename_top_dir( + self.obfuscate_string(archive.archive_name) + ) cmd = self.policy.get_cmd_for_compress_method( method, self.opts.threads @@ -503,8 +515,8 @@ third party. with open(prep_file, 'r') as host_file: hostname = host_file.readline().strip() parser.load_hostname_into_map(hostname) - else: - self.obfuscate_file(prep_file) + self.obfuscate_file(prep_file, parser.prep_map_file, + archive.archive_name) def obfuscate_file(self, filename, short_name=None, arc_name=None): """Obfuscate and individual file, line by line. @@ -542,8 +554,21 @@ third party. if subs: shutil.copy(tfile.name, filename) tfile.close() + _ob_filename = self.obfuscate_string(short_name) + if _ob_filename != short_name: + arc_path = filename.split(short_name)[0] + _ob_path = os.path.join(arc_path, _ob_filename) + os.rename(filename, _ob_path) return subs + def obfuscate_string(self, string_data): + for parser in self.parsers: + try: + string_data = parser.parse_string_for_keys(string_data) + except Exception: + pass + return string_data + def obfuscate_line(self, line, filename): """Run a line through each of the obfuscation parsers, keeping a cumulative total of substitutions done on that particular line. diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index 283bee50..1fe37d25 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -34,6 +34,7 @@ class SoSObfuscationArchive(): self.final_archive_path = self.archive_path self.tmpdir = tmpdir self.archive_name = self.archive_path.split('/')[-1].split('.tar')[0] + self.ui_name = self.archive_name self.soslog = logging.getLogger('sos') self.ui_log = logging.getLogger('sos_ui') self.skip_list = self._load_skip_list() @@ -41,7 +42,7 @@ class SoSObfuscationArchive(): def report_msg(self, msg): """Helper to easily format ui messages on a per-report basis""" - self.ui_log.info("{:<50} {}".format(self.archive_name + ' :', msg)) + self.ui_log.info("{:<50} {}".format(self.ui_name + ' :', msg)) def _fmt_log_msg(self, msg): return "[cleaner:%s] %s" % (self.archive_name, msg) @@ -89,8 +90,45 @@ class SoSObfuscationArchive(): self.extracted_path = self.extract_self() else: self.extracted_path = self.archive_path + # if we're running as non-root (e.g. collector), then we can have a + # situation where a particular path has insufficient permissions for + # us to rewrite the contents and/or add it to the ending tarfile. + # Unfortunately our only choice here is to change the permissions + # that were preserved during report collection + if os.getuid() != 0: + self.log_debug('Verifying permissions of archive contents') + for dirname, dirs, files in os.walk(self.extracted_path): + try: + for _dir in dirs: + _dirname = os.path.join(dirname, _dir) + _dir_perms = os.stat(_dirname).st_mode + os.chmod(_dirname, _dir_perms | stat.S_IRWXU) + for filename in files: + fname = os.path.join(dirname, filename) + # protect against symlink race conditions + if not os.path.exists(fname) or os.path.islink(fname): + continue + if (not os.access(fname, os.R_OK) or not + os.access(fname, os.W_OK)): + self.log_debug( + "Adding owner rw permissions to %s" + % fname.split(self.archive_path)[-1] + ) + os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR) + except Exception as err: + self.log_debug("Error while trying to set perms: %s" % err) self.log_debug("Extracted path is %s" % self.extracted_path) + def rename_top_dir(self, new_name): + """Rename the top-level directory to new_name, which should be an + obfuscated string that scrubs the hostname from the top-level dir + which would be named after the unobfuscated sos report + """ + _path = self.extracted_path.replace(self.archive_name, new_name) + self.archive_name = new_name + os.rename(self.extracted_path, _path) + self.extracted_path = _path + def get_compression(self): """Return the compression type used by the archive, if any. This is then used by SoSCleaner to generate a policy-derived compression @@ -106,21 +144,6 @@ class SoSObfuscationArchive(): """Pack the extracted archive as a tarfile to then be re-compressed """ self.tarpath = self.extracted_path + '-obfuscated.tar' - # if we're running as non-root (e.g. collector), then we can have a - # situation where a particular path only has 0200 permissions, thus - # preventing it from being added via tarfile.add(). - # Unfortunately our only choice here is to change the permissions - # that were preserved during report collection - if os.getuid() != 0: - self.log_debug('Verifying read permissions of archive contents') - for dirname, dirs, files in os.walk(self.extracted_path): - for filename in files: - fname = os.path.join(dirname, filename) - if not os.access(fname, os.R_OK): - self.log_debug("Adding owner read permissions to %s" - % fname.split(self.archive_path)[-1]) - _perms = os.stat(fname).st_mode - os.chmod(fname, _perms | stat.S_IRUSR) self.log_debug("Building tar file %s" % self.tarpath) tar = tarfile.open(self.tarpath, mode="w") tar.add(self.extracted_path, diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index e04758f7..248e353c 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -72,6 +72,19 @@ class SoSCleanerParser(): line = line.replace(match.strip(), new_match) return line, count + def parse_string_for_keys(self, string_data): + """Parse a given string for instances of any obfuscated items, without + applying the normal regex comparisons first. This is mainly used to + obfuscate filenames that have, for example, hostnames in them. + + Rather than try to regex match the string_data, just use the builtin + checks for substrings matching known obfuscated keys + """ + for key, val in self.mapping.dataset.items(): + if key in string_data: + return string_data.replace(key, val) + return string_data + def get_map_contents(self): """Return the contents of the mapping used by the parser """ diff --git a/sos/report/__init__.py b/sos/report/__init__.py index 6d182a10..17a3d0a0 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -1154,6 +1154,8 @@ class SoSReport(SoSComponent): print(_("Creating compressed archive...")) # compression could fail for a number of reasons try: + if self.opts.clean: + self.archive.rename_archive_root(cleaner) archive = self.archive.finalize( self.opts.compression_type) except (OSError, IOError) as e: @@ -1176,6 +1178,8 @@ class SoSReport(SoSComponent): dir_name = os.path.basename(directory) try: final_dir = os.path.join(self.sys_tmp, dir_name) + if self.opts.clean: + final_dir = cleaner.obfuscate_string(final_dir) os.rename(directory, final_dir) directory = final_dir except (OSError, IOError): |