diff options
author | Jake Hunsaker <jhunsake@redhat.com> | 2021-06-16 01:15:45 -0400 |
---|---|---|
committer | Jake Hunsaker <jhunsake@redhat.com> | 2021-06-21 12:48:07 -0400 |
commit | 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 (patch) | |
tree | fd65f14da4c10c3953fbecfe300e9979c768021b | |
parent | b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 (diff) | |
download | sos-7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41.tar.gz |
[cleaner] Explicitly obfuscate directory names within archives
This commits adds a step to `obfuscate_report()` that explicitly walks
through all directories in the archive, and obfuscates the directory
names if necessary.
Since this uses `obfuscate_string()` for the directory names, a
`skip_keys` list has been added to maps to allow parsers/maps to
specify matched keys (such as short names for the hostname parser) that
should not be considered when obfuscating directory names (e.g. 'www').
Closes: #2465
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r-- | sos/cleaner/__init__.py | 26 | ||||
-rw-r--r-- | sos/cleaner/mappings/__init__.py | 4 | ||||
-rw-r--r-- | sos/cleaner/mappings/hostname_map.py | 5 | ||||
-rw-r--r-- | sos/cleaner/obfuscation_archive.py | 20 | ||||
-rw-r--r-- | sos/cleaner/parsers/__init__.py | 2 |
5 files changed, 54 insertions, 3 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index b38c8dfc..88d4d0ea 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -562,6 +562,11 @@ third party. except Exception as err: self.log_debug("Unable to parse file %s: %s" % (short_name, err)) + try: + self.obfuscate_directory_names(archive) + except Exception as err: + self.log_info("Failed to obfuscate directories: %s" % err, + caller=archive.archive_name) # if the archive was already a tarball, repack it method = archive.get_compression() @@ -663,6 +668,27 @@ third party. return subs + def obfuscate_directory_names(self, archive): + """For all directories that exist within the archive, obfuscate the + directory name if it contains sensitive strings found during execution + """ + self.log_info("Obfuscating directory names in archive %s" + % archive.archive_name) + for dirpath in sorted(archive.get_directory_list(), reverse=True): + for _name in os.listdir(dirpath): + _dirname = os.path.join(dirpath, _name) + _arc_dir = _dirname.split(archive.extracted_path)[-1] + if os.path.isdir(_dirname): + _ob_dirname = self.obfuscate_string(_name) + if _ob_dirname != _name: + _ob_arc_dir = _arc_dir.rstrip(_name) + _ob_arc_dir = os.path.join( + archive.extracted_path, + _ob_arc_dir.lstrip('/'), + _ob_dirname + ) + os.rename(_dirname, _ob_arc_dir) + def obfuscate_string(self, string_data): for parser in self.parsers: try: diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py index dd464e5a..5cf5c8b2 100644 --- a/sos/cleaner/mappings/__init__.py +++ b/sos/cleaner/mappings/__init__.py @@ -20,8 +20,10 @@ class SoSMap(): corresponding SoSMap() object, to allow for easy retrieval of obfuscated items. """ - + # used for regex skips in parser.parse_line() ignore_matches = [] + # used for filename obfuscations in parser.parse_string_for_keys() + skip_keys = [] def __init__(self): self.dataset = {} diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py index e0b7bf1d..c9a44d8d 100644 --- a/sos/cleaner/mappings/hostname_map.py +++ b/sos/cleaner/mappings/hostname_map.py @@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap): '^com..*' ] + skip_keys = [ + 'www', + 'api' + ] + host_count = 0 domain_count = 0 _domains = {} diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index 88f978d9..90188358 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -202,10 +202,22 @@ class SoSObfuscationArchive(): """Return a list of all files within the archive""" self.file_list = [] for dirname, dirs, files in os.walk(self.extracted_path): + for _dir in dirs: + _dirpath = os.path.join(dirname, _dir) + # catch dir-level symlinks + if os.path.islink(_dirpath) and os.path.isdir(_dirpath): + self.file_list.append(_dirpath) for filename in files: self.file_list.append(os.path.join(dirname, filename)) return self.file_list + def get_directory_list(self): + """Return a list of all directories within the archive""" + dir_list = [] + for dirname, dirs, files in os.walk(self.extracted_path): + dir_list.append(dirname) + return dir_list + def update_sub_count(self, fname, count): """Called when a file has finished being parsed and used to track total substitutions made and number of files that had changes made @@ -230,7 +242,8 @@ class SoSObfuscationArchive(): archive root """ - if not os.path.isfile(self.get_file_path(filename)): + if (not os.path.isfile(self.get_file_path(filename)) and not + os.path.islink(self.get_file_path(filename))): return True for _skip in self.skip_list: @@ -266,7 +279,10 @@ class SoSObfuscationArchive(): if re.match(_arc_reg, fname): return True - return self.file_is_binary(fname) + if os.path.isfile(self.get_file_path(fname)): + return self.file_is_binary(fname) + # don't fail on dir-level symlinks + return False def file_is_binary(self, fname): """Determine if the file is a binary file or not. diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index cfa20b95..84874475 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -107,6 +107,8 @@ class SoSCleanerParser(): for pair in sorted(self.mapping.dataset.items(), reverse=True, key=lambda x: len(x[0])): key, val = pair + if key in self.mapping.skip_keys: + continue if key in string_data: string_data = string_data.replace(key, val) return string_data |