From fc0218638f3e865c4315823e72aef2f46d012d07 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 14 Apr 2021 11:55:03 -0400 Subject: [clean] Load maps from all archives before obfuscation loop Previously, maps were being prepped via archives after extraction. This reduced the amount of file IO being done, but made it so that necessary obfuscations from later archives in a series would not be obfuscated in the archives obfuscated before those later archives were extracted. Fix this by extracting the map prep files into memory for each archive to prep the maps before we enter the obfuscation loop entirely. Closes: #2490 Related: RHBZ#1930181 Resolves: #2492 Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 69 +++++++++++++++++++--------------- sos/cleaner/parsers/username_parser.py | 13 +++---- 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index b9eb61ef..d10cdc55 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -292,6 +292,7 @@ third party. # we have at least one valid target to obfuscate self.completed_reports = [] + self.preload_all_archives_into_maps() self.obfuscate_report_paths() if not self.completed_reports: @@ -473,6 +474,44 @@ third party. self.ui_log.info("Exiting on user cancel") os._exit(130) + def preload_all_archives_into_maps(self): + """Before doing the actual obfuscation, if we have multiple archives + to obfuscate then we need to preload each of them into the mappings + to ensure that node1 is obfuscated in node2 as well as node2 being + obfuscated in node1's archive. + """ + self.log_info("Pre-loading multiple archives into obfuscation maps") + for _arc in self.report_paths: + is_dir = os.path.isdir(_arc) + if is_dir: + _arc_name = _arc + else: + archive = tarfile.open(_arc) + _arc_name = _arc.split('/')[-1].split('.tar')[0] + # for each parser, load the map_prep_file into memory, and then + # send that for obfuscation. We don't actually obfuscate the file + # here, do that in the normal archive loop + for _parser in self.parsers: + if not _parser.prep_map_file: + continue + _arc_path = os.path.join(_arc_name, _parser.prep_map_file) + try: + if is_dir: + _pfile = open(_arc_path, 'r') + content = _pfile.read() + else: + _pfile = archive.extractfile(_arc_path) + content = _pfile.read().decode('utf-8') + _pfile.close() + if isinstance(_parser, SoSUsernameParser): + _parser.load_usernames_into_map(content) + for line in content.splitlines(): + if isinstance(_parser, SoSHostnameParser): + _parser.load_hostname_into_map(line) + self.obfuscate_line(line, _parser.prep_map_file) + except Exception as err: + self.log_debug("Could not prep %s: %s" % (_arc_path, err)) + def obfuscate_report(self, report): """Individually handle each archive or directory we've discovered by running through each file therein. @@ -493,7 +532,6 @@ third party. start_time = datetime.now() arc_md.add_field('start_time', start_time) archive.extract() - self.prep_maps_from_archive(archive) archive.report_msg("Beginning obfuscation...") file_list = archive.get_file_list() @@ -542,35 +580,6 @@ third party. self.ui_log.info("Exception while processing %s: %s" % (report, err)) - def prep_maps_from_archive(self, archive): - """Open specific files from an archive and try to load those values - into our mappings before iterating through the entire archive. - - Positional arguments: - - :param archive SoSObfuscationArchive: An open archive object - """ - for parser in self.parsers: - if not parser.prep_map_file: - continue - prep_file = archive.get_file_path(parser.prep_map_file) - if not prep_file: - self.log_debug("Could not prepare %s: %s does not exist" - % (parser.name, parser.prep_map_file), - caller=archive.archive_name) - continue - # this is a bit clunky, but we need to load this particular - # parser in a different way due to how hostnames are validated for - # obfuscation - if isinstance(parser, SoSHostnameParser): - with open(prep_file, 'r') as host_file: - hostname = host_file.readline().strip() - parser.load_hostname_into_map(hostname) - if isinstance(parser, SoSUsernameParser): - parser.load_usernames_into_map(prep_file) - self.obfuscate_file(prep_file, parser.prep_map_file, - archive.archive_name) - def obfuscate_file(self, filename, short_name=None, arc_name=None): """Obfuscate and individual file, line by line. diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 5223c018..2bb6c7f3 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser): super(SoSUsernameParser, self).__init__(conf_file) self.mapping.load_names_from_options(opt_names) - def load_usernames_into_map(self, fname): + def load_usernames_into_map(self, content): """Since we don't get the list of usernames from a straight regex for this parser, we need to override the initial parser prepping here. """ - with open(fname, 'r') as lastfile: - for line in lastfile.read().splitlines()[1:]: - user = line.split()[0] - if user in self.skip_list: - continue - self.mapping.get(user) + for line in content.splitlines()[1:]: + user = line.split()[0] + if user in self.skip_list: + continue + self.mapping.get(user) def parse_line(self, line): count = 0 -- cgit