[clean] Load maps from all archives before obfuscation loop

Previously, maps were being prepped via archives after extraction. This reduced the amount of file IO being done, but made it so that necessary obfuscations from later archives in a series would not be obfuscated in the archives obfuscated before those later archives were extracted. Fix this by extracting the map prep files into memory for each archive to prep the maps before we enter the obfuscation loop entirely. Closes: #2490 Related: RHBZ#1930181 Resolves: #2492 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
author: Jake Hunsaker <jhunsake@redhat.com> 2021-04-14 11:55:03 -0400
committer: Jake Hunsaker <jhunsake@redhat.com> 2021-04-16 10:35:29 -0400
commit: fc0218638f3e865c4315823e72aef2f46d012d07 (patch)
tree: b721ec4b2ab6d9612f38c93869475e3af643f2c6
parent: 0add61f7c46d18c81d4d04e3ecdfe04fee677aca (diff)
download: sos-fc0218638f3e865c4315823e72aef2f46d012d07.tar.gz
2 files changed, 45 insertions, 37 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index b9eb61ef..d10cdc55 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -292,6 +292,7 @@ third party.
 
         # we have at least one valid target to obfuscate
         self.completed_reports = []
+        self.preload_all_archives_into_maps()
         self.obfuscate_report_paths()
 
         if not self.completed_reports:
@@ -473,6 +474,44 @@ third party.
             self.ui_log.info("Exiting on user cancel")
             os._exit(130)
 
+    def preload_all_archives_into_maps(self):
+        """Before doing the actual obfuscation, if we have multiple archives
+        to obfuscate then we need to preload each of them into the mappings
+        to ensure that node1 is obfuscated in node2 as well as node2 being
+        obfuscated in node1's archive.
+        """
+        self.log_info("Pre-loading multiple archives into obfuscation maps")
+        for _arc in self.report_paths:
+            is_dir = os.path.isdir(_arc)
+            if is_dir:
+                _arc_name = _arc
+            else:
+                archive = tarfile.open(_arc)
+                _arc_name = _arc.split('/')[-1].split('.tar')[0]
+            # for each parser, load the map_prep_file into memory, and then
+            # send that for obfuscation. We don't actually obfuscate the file
+            # here, do that in the normal archive loop
+            for _parser in self.parsers:
+                if not _parser.prep_map_file:
+                    continue
+                _arc_path = os.path.join(_arc_name, _parser.prep_map_file)
+                try:
+                    if is_dir:
+                        _pfile = open(_arc_path, 'r')
+                        content = _pfile.read()
+                    else:
+                        _pfile = archive.extractfile(_arc_path)
+                        content = _pfile.read().decode('utf-8')
+                    _pfile.close()
+                    if isinstance(_parser, SoSUsernameParser):
+                        _parser.load_usernames_into_map(content)
+                    for line in content.splitlines():
+                        if isinstance(_parser, SoSHostnameParser):
+                            _parser.load_hostname_into_map(line)
+                        self.obfuscate_line(line, _parser.prep_map_file)
+                except Exception as err:
+                    self.log_debug("Could not prep %s: %s" % (_arc_path, err))
+
     def obfuscate_report(self, report):
         """Individually handle each archive or directory we've discovered by
         running through each file therein.
@@ -493,7 +532,6 @@ third party.
             start_time = datetime.now()
             arc_md.add_field('start_time', start_time)
             archive.extract()
-            self.prep_maps_from_archive(archive)
             archive.report_msg("Beginning obfuscation...")
 
             file_list = archive.get_file_list()
@@ -542,35 +580,6 @@ third party.
             self.ui_log.info("Exception while processing %s: %s"
                              % (report, err))
 
-    def prep_maps_from_archive(self, archive):
-        """Open specific files from an archive and try to load those values
-        into our mappings before iterating through the entire archive.
-
-        Positional arguments:
-
-            :param archive SoSObfuscationArchive:   An open archive object
-        """
-        for parser in self.parsers:
-            if not parser.prep_map_file:
-                continue
-            prep_file = archive.get_file_path(parser.prep_map_file)
-            if not prep_file:
-                self.log_debug("Could not prepare %s: %s does not exist"
-                               % (parser.name, parser.prep_map_file),
-                               caller=archive.archive_name)
-                continue
-            # this is a bit clunky, but we need to load this particular
-            # parser in a different way due to how hostnames are validated for
-            # obfuscation
-            if isinstance(parser, SoSHostnameParser):
-                with open(prep_file, 'r') as host_file:
-                    hostname = host_file.readline().strip()
-                    parser.load_hostname_into_map(hostname)
-            if isinstance(parser, SoSUsernameParser):
-                parser.load_usernames_into_map(prep_file)
-            self.obfuscate_file(prep_file, parser.prep_map_file,
-                                archive.archive_name)
-
     def obfuscate_file(self, filename, short_name=None, arc_name=None):
         """Obfuscate and individual file, line by line.
 
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
index 5223c018..2bb6c7f3 100644
--- a/sos/cleaner/parsers/username_parser.py
+++ b/sos/cleaner/parsers/username_parser.py
@@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser):
         super(SoSUsernameParser, self).__init__(conf_file)
         self.mapping.load_names_from_options(opt_names)
 
-    def load_usernames_into_map(self, fname):
+    def load_usernames_into_map(self, content):
         """Since we don't get the list of usernames from a straight regex for
         this parser, we need to override the initial parser prepping here.
         """
-        with open(fname, 'r') as lastfile:
-            for line in lastfile.read().splitlines()[1:]:
-                user = line.split()[0]
-                if user in self.skip_list:
-                    continue
-                self.mapping.get(user)
+        for line in content.splitlines()[1:]:
+            user = line.split()[0]
+            if user in self.skip_list:
+                continue
+            self.mapping.get(user)
 
     def parse_line(self, line):
         count = 0
author	Jake Hunsaker <jhunsake@redhat.com>	2021-04-14 11:55:03 -0400
committer	Jake Hunsaker <jhunsake@redhat.com>	2021-04-16 10:35:29 -0400
commit	fc0218638f3e865c4315823e72aef2f46d012d07 (patch)
tree	b721ec4b2ab6d9612f38c93869475e3af643f2c6
parent	0add61f7c46d18c81d4d04e3ecdfe04fee677aca (diff)
download	sos-fc0218638f3e865c4315823e72aef2f46d012d07.tar.gz