aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2020-06-02 16:40:08 -0400
committerJake Hunsaker <jhunsake@redhat.com>2020-06-17 12:11:29 -0400
commit0779f8cdc76baca874e51fce091dbb2ef5da60ff (patch)
tree4c93b4f1d2c68469ae3c3f7d705290842745e3b6
parent9cc01d6d99491d626230e9526f51b22a16610528 (diff)
downloadsos-0779f8cdc76baca874e51fce091dbb2ef5da60ff.tar.gz
[cleaner] Obfuscate filenames based on previously obfuscated items
For each file processed, `sos clean` will now check the filename and attemept to obfuscate it of any _known_ items, for example host names used as part of command collections. This must be done against known items, rather than regex parsing for potential new items, due to the nature of filenames having a high propensity to trigger false positives in regex patterns. Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/archive.py10
-rw-r--r--sos/cleaner/__init__.py41
-rw-r--r--sos/cleaner/obfuscation_archive.py55
-rw-r--r--sos/cleaner/parsers/__init__.py13
-rw-r--r--sos/report/__init__.py4
5 files changed, 99 insertions, 24 deletions
diff --git a/sos/archive.py b/sos/archive.py
index 128e044f..31cfa1fa 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -548,6 +548,16 @@ class FileCacheArchive(Archive):
self.add_string(self.manifest.get_json(indent=4),
os.path.join('sos_reports', 'manifest.json'))
+ def rename_archive_root(self, cleaner):
+ """Rename the archive to an obfuscated version using an initialized
+ SoSCleaner instance
+ """
+ self._name = cleaner.obfuscate_string(self._name)
+ _new_root = os.path.join(self._tmp_dir, self._name)
+ os.rename(self._archive_root, _new_root)
+ self._archive_root = _new_root
+ self._archive_name = os.path.join(self._tmp_dir, self.name())
+
def finalize(self, method):
self.log_info("finalizing archive '%s' using method '%s'"
% (self._archive_root, method))
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 4a4e5959..5c0feb7a 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -298,12 +298,16 @@ third party.
self.setup_archive(name=arc_name)
for arc in self.completed_reports:
if arc.is_tarfile:
- arc_dest = arc.final_archive_path.split('/')[-1]
+ arc_dest = self.obfuscate_string(
+ arc.final_archive_path.split('/')[-1]
+ )
self.archive.add_file(arc.final_archive_path,
dest=arc_dest)
checksum = self.get_new_checksum(arc.final_archive_path)
if checksum is not None:
- dname = "checksums/%s.%s" % (arc_dest, self.hash_name)
+ dname = self.obfuscate_string(
+ "checksums/%s.%s" % (arc_dest, self.hash_name)
+ )
self.archive.add_string(checksum, dest=dname)
else:
for dirname, dirs, files in os.walk(arc.archive_path):
@@ -311,7 +315,9 @@ third party.
if filename.startswith('sosreport'):
continue
fname = os.path.join(dirname, filename)
- dnm = fname.split(arc.archive_name)[-1].lstrip('/')
+ dnm = self.obfuscate_string(
+ fname.split(arc.archive_name)[-1].lstrip('/')
+ )
self.archive.add_file(fname, dest=dnm)
arc_path = self.archive.finalize(self.opts.compression_type)
else:
@@ -319,12 +325,15 @@ third party.
arc_path = arc.final_archive_path
checksum = self.get_new_checksum(arc.final_archive_path)
if checksum is not None:
- chksum_name = "%s.%s" % (arc_path.split('/')[-1],
- self.hash_name)
+ chksum_name = self.obfuscate_string(
+ "%s.%s" % (arc_path.split('/')[-1], self.hash_name)
+ )
with open(os.path.join(self.sys_tmp, chksum_name), 'w') as cf:
cf.write(checksum)
- final_path = os.path.join(self.sys_tmp, arc_path.split('/')[-1])
+ final_path = self.obfuscate_string(
+ os.path.join(self.sys_tmp, arc_path.split('/')[-1])
+ )
shutil.move(arc_path, final_path)
arcstat = os.stat(final_path)
@@ -462,6 +471,9 @@ third party.
if method:
archive.report_msg("Re-compressing...")
try:
+ archive.rename_top_dir(
+ self.obfuscate_string(archive.archive_name)
+ )
cmd = self.policy.get_cmd_for_compress_method(
method,
self.opts.threads
@@ -503,8 +515,8 @@ third party.
with open(prep_file, 'r') as host_file:
hostname = host_file.readline().strip()
parser.load_hostname_into_map(hostname)
- else:
- self.obfuscate_file(prep_file)
+ self.obfuscate_file(prep_file, parser.prep_map_file,
+ archive.archive_name)
def obfuscate_file(self, filename, short_name=None, arc_name=None):
"""Obfuscate and individual file, line by line.
@@ -542,8 +554,21 @@ third party.
if subs:
shutil.copy(tfile.name, filename)
tfile.close()
+ _ob_filename = self.obfuscate_string(short_name)
+ if _ob_filename != short_name:
+ arc_path = filename.split(short_name)[0]
+ _ob_path = os.path.join(arc_path, _ob_filename)
+ os.rename(filename, _ob_path)
return subs
+ def obfuscate_string(self, string_data):
+ for parser in self.parsers:
+ try:
+ string_data = parser.parse_string_for_keys(string_data)
+ except Exception:
+ pass
+ return string_data
+
def obfuscate_line(self, line, filename):
"""Run a line through each of the obfuscation parsers, keeping a
cumulative total of substitutions done on that particular line.
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
index 283bee50..1fe37d25 100644
--- a/sos/cleaner/obfuscation_archive.py
+++ b/sos/cleaner/obfuscation_archive.py
@@ -34,6 +34,7 @@ class SoSObfuscationArchive():
self.final_archive_path = self.archive_path
self.tmpdir = tmpdir
self.archive_name = self.archive_path.split('/')[-1].split('.tar')[0]
+ self.ui_name = self.archive_name
self.soslog = logging.getLogger('sos')
self.ui_log = logging.getLogger('sos_ui')
self.skip_list = self._load_skip_list()
@@ -41,7 +42,7 @@ class SoSObfuscationArchive():
def report_msg(self, msg):
"""Helper to easily format ui messages on a per-report basis"""
- self.ui_log.info("{:<50} {}".format(self.archive_name + ' :', msg))
+ self.ui_log.info("{:<50} {}".format(self.ui_name + ' :', msg))
def _fmt_log_msg(self, msg):
return "[cleaner:%s] %s" % (self.archive_name, msg)
@@ -89,8 +90,45 @@ class SoSObfuscationArchive():
self.extracted_path = self.extract_self()
else:
self.extracted_path = self.archive_path
+ # if we're running as non-root (e.g. collector), then we can have a
+ # situation where a particular path has insufficient permissions for
+ # us to rewrite the contents and/or add it to the ending tarfile.
+ # Unfortunately our only choice here is to change the permissions
+ # that were preserved during report collection
+ if os.getuid() != 0:
+ self.log_debug('Verifying permissions of archive contents')
+ for dirname, dirs, files in os.walk(self.extracted_path):
+ try:
+ for _dir in dirs:
+ _dirname = os.path.join(dirname, _dir)
+ _dir_perms = os.stat(_dirname).st_mode
+ os.chmod(_dirname, _dir_perms | stat.S_IRWXU)
+ for filename in files:
+ fname = os.path.join(dirname, filename)
+ # protect against symlink race conditions
+ if not os.path.exists(fname) or os.path.islink(fname):
+ continue
+ if (not os.access(fname, os.R_OK) or not
+ os.access(fname, os.W_OK)):
+ self.log_debug(
+ "Adding owner rw permissions to %s"
+ % fname.split(self.archive_path)[-1]
+ )
+ os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR)
+ except Exception as err:
+ self.log_debug("Error while trying to set perms: %s" % err)
self.log_debug("Extracted path is %s" % self.extracted_path)
+ def rename_top_dir(self, new_name):
+ """Rename the top-level directory to new_name, which should be an
+ obfuscated string that scrubs the hostname from the top-level dir
+ which would be named after the unobfuscated sos report
+ """
+ _path = self.extracted_path.replace(self.archive_name, new_name)
+ self.archive_name = new_name
+ os.rename(self.extracted_path, _path)
+ self.extracted_path = _path
+
def get_compression(self):
"""Return the compression type used by the archive, if any. This is
then used by SoSCleaner to generate a policy-derived compression
@@ -106,21 +144,6 @@ class SoSObfuscationArchive():
"""Pack the extracted archive as a tarfile to then be re-compressed
"""
self.tarpath = self.extracted_path + '-obfuscated.tar'
- # if we're running as non-root (e.g. collector), then we can have a
- # situation where a particular path only has 0200 permissions, thus
- # preventing it from being added via tarfile.add().
- # Unfortunately our only choice here is to change the permissions
- # that were preserved during report collection
- if os.getuid() != 0:
- self.log_debug('Verifying read permissions of archive contents')
- for dirname, dirs, files in os.walk(self.extracted_path):
- for filename in files:
- fname = os.path.join(dirname, filename)
- if not os.access(fname, os.R_OK):
- self.log_debug("Adding owner read permissions to %s"
- % fname.split(self.archive_path)[-1])
- _perms = os.stat(fname).st_mode
- os.chmod(fname, _perms | stat.S_IRUSR)
self.log_debug("Building tar file %s" % self.tarpath)
tar = tarfile.open(self.tarpath, mode="w")
tar.add(self.extracted_path,
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
index e04758f7..248e353c 100644
--- a/sos/cleaner/parsers/__init__.py
+++ b/sos/cleaner/parsers/__init__.py
@@ -72,6 +72,19 @@ class SoSCleanerParser():
line = line.replace(match.strip(), new_match)
return line, count
+ def parse_string_for_keys(self, string_data):
+ """Parse a given string for instances of any obfuscated items, without
+ applying the normal regex comparisons first. This is mainly used to
+ obfuscate filenames that have, for example, hostnames in them.
+
+ Rather than try to regex match the string_data, just use the builtin
+ checks for substrings matching known obfuscated keys
+ """
+ for key, val in self.mapping.dataset.items():
+ if key in string_data:
+ return string_data.replace(key, val)
+ return string_data
+
def get_map_contents(self):
"""Return the contents of the mapping used by the parser
"""
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index 6d182a10..17a3d0a0 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -1154,6 +1154,8 @@ class SoSReport(SoSComponent):
print(_("Creating compressed archive..."))
# compression could fail for a number of reasons
try:
+ if self.opts.clean:
+ self.archive.rename_archive_root(cleaner)
archive = self.archive.finalize(
self.opts.compression_type)
except (OSError, IOError) as e:
@@ -1176,6 +1178,8 @@ class SoSReport(SoSComponent):
dir_name = os.path.basename(directory)
try:
final_dir = os.path.join(self.sys_tmp, dir_name)
+ if self.opts.clean:
+ final_dir = cleaner.obfuscate_string(final_dir)
os.rename(directory, final_dir)
directory = final_dir
except (OSError, IOError):