aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2021-06-16 01:15:45 -0400
committerJake Hunsaker <jhunsake@redhat.com>2021-06-21 12:48:07 -0400
commit7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 (patch)
treefd65f14da4c10c3953fbecfe300e9979c768021b
parentb5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 (diff)
downloadsos-7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41.tar.gz
[cleaner] Explicitly obfuscate directory names within archives
This commits adds a step to `obfuscate_report()` that explicitly walks through all directories in the archive, and obfuscates the directory names if necessary. Since this uses `obfuscate_string()` for the directory names, a `skip_keys` list has been added to maps to allow parsers/maps to specify matched keys (such as short names for the hostname parser) that should not be considered when obfuscating directory names (e.g. 'www'). Closes: #2465 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/cleaner/__init__.py26
-rw-r--r--sos/cleaner/mappings/__init__.py4
-rw-r--r--sos/cleaner/mappings/hostname_map.py5
-rw-r--r--sos/cleaner/obfuscation_archive.py20
-rw-r--r--sos/cleaner/parsers/__init__.py2
5 files changed, 54 insertions, 3 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index b38c8dfc..88d4d0ea 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -562,6 +562,11 @@ third party.
except Exception as err:
self.log_debug("Unable to parse file %s: %s"
% (short_name, err))
+ try:
+ self.obfuscate_directory_names(archive)
+ except Exception as err:
+ self.log_info("Failed to obfuscate directories: %s" % err,
+ caller=archive.archive_name)
# if the archive was already a tarball, repack it
method = archive.get_compression()
@@ -663,6 +668,27 @@ third party.
return subs
+ def obfuscate_directory_names(self, archive):
+ """For all directories that exist within the archive, obfuscate the
+ directory name if it contains sensitive strings found during execution
+ """
+ self.log_info("Obfuscating directory names in archive %s"
+ % archive.archive_name)
+ for dirpath in sorted(archive.get_directory_list(), reverse=True):
+ for _name in os.listdir(dirpath):
+ _dirname = os.path.join(dirpath, _name)
+ _arc_dir = _dirname.split(archive.extracted_path)[-1]
+ if os.path.isdir(_dirname):
+ _ob_dirname = self.obfuscate_string(_name)
+ if _ob_dirname != _name:
+ _ob_arc_dir = _arc_dir.rstrip(_name)
+ _ob_arc_dir = os.path.join(
+ archive.extracted_path,
+ _ob_arc_dir.lstrip('/'),
+ _ob_dirname
+ )
+ os.rename(_dirname, _ob_arc_dir)
+
def obfuscate_string(self, string_data):
for parser in self.parsers:
try:
diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
index dd464e5a..5cf5c8b2 100644
--- a/sos/cleaner/mappings/__init__.py
+++ b/sos/cleaner/mappings/__init__.py
@@ -20,8 +20,10 @@ class SoSMap():
corresponding SoSMap() object, to allow for easy retrieval of obfuscated
items.
"""
-
+ # used for regex skips in parser.parse_line()
ignore_matches = []
+ # used for filename obfuscations in parser.parse_string_for_keys()
+ skip_keys = []
def __init__(self):
self.dataset = {}
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
index e0b7bf1d..c9a44d8d 100644
--- a/sos/cleaner/mappings/hostname_map.py
+++ b/sos/cleaner/mappings/hostname_map.py
@@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap):
'^com..*'
]
+ skip_keys = [
+ 'www',
+ 'api'
+ ]
+
host_count = 0
domain_count = 0
_domains = {}
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
index 88f978d9..90188358 100644
--- a/sos/cleaner/obfuscation_archive.py
+++ b/sos/cleaner/obfuscation_archive.py
@@ -202,10 +202,22 @@ class SoSObfuscationArchive():
"""Return a list of all files within the archive"""
self.file_list = []
for dirname, dirs, files in os.walk(self.extracted_path):
+ for _dir in dirs:
+ _dirpath = os.path.join(dirname, _dir)
+ # catch dir-level symlinks
+ if os.path.islink(_dirpath) and os.path.isdir(_dirpath):
+ self.file_list.append(_dirpath)
for filename in files:
self.file_list.append(os.path.join(dirname, filename))
return self.file_list
+ def get_directory_list(self):
+ """Return a list of all directories within the archive"""
+ dir_list = []
+ for dirname, dirs, files in os.walk(self.extracted_path):
+ dir_list.append(dirname)
+ return dir_list
+
def update_sub_count(self, fname, count):
"""Called when a file has finished being parsed and used to track
total substitutions made and number of files that had changes made
@@ -230,7 +242,8 @@ class SoSObfuscationArchive():
archive root
"""
- if not os.path.isfile(self.get_file_path(filename)):
+ if (not os.path.isfile(self.get_file_path(filename)) and not
+ os.path.islink(self.get_file_path(filename))):
return True
for _skip in self.skip_list:
@@ -266,7 +279,10 @@ class SoSObfuscationArchive():
if re.match(_arc_reg, fname):
return True
- return self.file_is_binary(fname)
+ if os.path.isfile(self.get_file_path(fname)):
+ return self.file_is_binary(fname)
+ # don't fail on dir-level symlinks
+ return False
def file_is_binary(self, fname):
"""Determine if the file is a binary file or not.
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
index cfa20b95..84874475 100644
--- a/sos/cleaner/parsers/__init__.py
+++ b/sos/cleaner/parsers/__init__.py
@@ -107,6 +107,8 @@ class SoSCleanerParser():
for pair in sorted(self.mapping.dataset.items(), reverse=True,
key=lambda x: len(x[0])):
key, val = pair
+ if key in self.mapping.skip_keys:
+ continue
if key in string_data:
string_data = string_data.replace(key, val)
return string_data