diff options
author | Jake Hunsaker <jhunsake@redhat.com> | 2021-04-14 11:55:03 -0400 |
---|---|---|
committer | Jake Hunsaker <jhunsake@redhat.com> | 2021-04-16 10:35:29 -0400 |
commit | fc0218638f3e865c4315823e72aef2f46d012d07 (patch) | |
tree | b721ec4b2ab6d9612f38c93869475e3af643f2c6 | |
parent | 0add61f7c46d18c81d4d04e3ecdfe04fee677aca (diff) | |
download | sos-fc0218638f3e865c4315823e72aef2f46d012d07.tar.gz |
[clean] Load maps from all archives before obfuscation loop
Previously, maps were being prepped via archives after extraction. This
reduced the amount of file IO being done, but made it so that necessary
obfuscations from later archives in a series would not be obfuscated in
the archives obfuscated before those later archives were extracted.
Fix this by extracting the map prep files into memory for each archive
to prep the maps before we enter the obfuscation loop entirely.
Closes: #2490
Related: RHBZ#1930181
Resolves: #2492
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r-- | sos/cleaner/__init__.py | 69 | ||||
-rw-r--r-- | sos/cleaner/parsers/username_parser.py | 13 |
2 files changed, 45 insertions, 37 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index b9eb61ef..d10cdc55 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -292,6 +292,7 @@ third party. # we have at least one valid target to obfuscate self.completed_reports = [] + self.preload_all_archives_into_maps() self.obfuscate_report_paths() if not self.completed_reports: @@ -473,6 +474,44 @@ third party. self.ui_log.info("Exiting on user cancel") os._exit(130) + def preload_all_archives_into_maps(self): + """Before doing the actual obfuscation, if we have multiple archives + to obfuscate then we need to preload each of them into the mappings + to ensure that node1 is obfuscated in node2 as well as node2 being + obfuscated in node1's archive. + """ + self.log_info("Pre-loading multiple archives into obfuscation maps") + for _arc in self.report_paths: + is_dir = os.path.isdir(_arc) + if is_dir: + _arc_name = _arc + else: + archive = tarfile.open(_arc) + _arc_name = _arc.split('/')[-1].split('.tar')[0] + # for each parser, load the map_prep_file into memory, and then + # send that for obfuscation. We don't actually obfuscate the file + # here, do that in the normal archive loop + for _parser in self.parsers: + if not _parser.prep_map_file: + continue + _arc_path = os.path.join(_arc_name, _parser.prep_map_file) + try: + if is_dir: + _pfile = open(_arc_path, 'r') + content = _pfile.read() + else: + _pfile = archive.extractfile(_arc_path) + content = _pfile.read().decode('utf-8') + _pfile.close() + if isinstance(_parser, SoSUsernameParser): + _parser.load_usernames_into_map(content) + for line in content.splitlines(): + if isinstance(_parser, SoSHostnameParser): + _parser.load_hostname_into_map(line) + self.obfuscate_line(line, _parser.prep_map_file) + except Exception as err: + self.log_debug("Could not prep %s: %s" % (_arc_path, err)) + def obfuscate_report(self, report): """Individually handle each archive or directory we've discovered by running through each file therein. @@ -493,7 +532,6 @@ third party. start_time = datetime.now() arc_md.add_field('start_time', start_time) archive.extract() - self.prep_maps_from_archive(archive) archive.report_msg("Beginning obfuscation...") file_list = archive.get_file_list() @@ -542,35 +580,6 @@ third party. self.ui_log.info("Exception while processing %s: %s" % (report, err)) - def prep_maps_from_archive(self, archive): - """Open specific files from an archive and try to load those values - into our mappings before iterating through the entire archive. - - Positional arguments: - - :param archive SoSObfuscationArchive: An open archive object - """ - for parser in self.parsers: - if not parser.prep_map_file: - continue - prep_file = archive.get_file_path(parser.prep_map_file) - if not prep_file: - self.log_debug("Could not prepare %s: %s does not exist" - % (parser.name, parser.prep_map_file), - caller=archive.archive_name) - continue - # this is a bit clunky, but we need to load this particular - # parser in a different way due to how hostnames are validated for - # obfuscation - if isinstance(parser, SoSHostnameParser): - with open(prep_file, 'r') as host_file: - hostname = host_file.readline().strip() - parser.load_hostname_into_map(hostname) - if isinstance(parser, SoSUsernameParser): - parser.load_usernames_into_map(prep_file) - self.obfuscate_file(prep_file, parser.prep_map_file, - archive.archive_name) - def obfuscate_file(self, filename, short_name=None, arc_name=None): """Obfuscate and individual file, line by line. diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 5223c018..2bb6c7f3 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser): super(SoSUsernameParser, self).__init__(conf_file) self.mapping.load_names_from_options(opt_names) - def load_usernames_into_map(self, fname): + def load_usernames_into_map(self, content): """Since we don't get the list of usernames from a straight regex for this parser, we need to override the initial parser prepping here. """ - with open(fname, 'r') as lastfile: - for line in lastfile.read().splitlines()[1:]: - user = line.split()[0] - if user in self.skip_list: - continue - self.mapping.get(user) + for line in content.splitlines()[1:]: + user = line.split()[0] + if user in self.skip_list: + continue + self.mapping.get(user) def parse_line(self, line): count = 0 |