aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2022-05-18 13:39:38 -0400
committerJake Hunsaker <jhunsake@redhat.com>2022-06-08 09:00:00 -0400
commit1dc3625fabea7331570f713fd1c87ac812d72d92 (patch)
treed8a26dc8ebc432d35bb2cec1ce6db1eadf567fe7
parent7d1ee59fc659467e6860e72322e976ddc5c17db3 (diff)
downloadsos-1dc3625fabea7331570f713fd1c87ac812d72d92.tar.gz
[Plugin] Make forbidden path checks more efficient
Forbidden path checks have up until now worked by taking a given file path (potentially with globs), expanding that against all discovered files that actually exist on the system, and then comparing a potential collection path against that list. While this works, and works reasonably fast for most scenarios, it isn't very efficient and causes significant slow downs when a non-standard configuration is in play - e.g. thousands of block devices which sos would individually have to compare against tens of thousands of paths for every path the `block` plugin wants to collect. Improve this by first not expanding the forbidden path globs, but taking them as distinct patterns, translating from shell-style (to maintain historical precedent of using globs to specify paths to be skipped) to python regex patterns as needed. Second, use `re` to handle our pattern matching for comparison against the distinct patterns provided by a plugin to skip. Closes: #2938 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/report/plugins/__init__.py20
-rw-r--r--sos/report/plugins/cgroups.py6
-rw-r--r--sos/report/plugins/pulpcore.py2
-rw-r--r--sos/report/plugins/rhui.py2
4 files changed, 13 insertions, 17 deletions
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index 2a42e6b0..ba1397a8 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -46,11 +46,6 @@ def _mangle_command(command, name_max):
return mangledname
-def _path_in_path_list(path, path_list):
- return any((p == path or path.startswith(os.path.abspath(p)+os.sep)
- for p in path_list))
-
-
def _node_type(st):
""" return a string indicating the type of special node represented by
the stat buffer st (block, character, fifo, socket).
@@ -1407,7 +1402,9 @@ class Plugin():
return None
def _is_forbidden_path(self, path):
- return _path_in_path_list(path, self.forbidden_paths)
+ return any(
+ re.match(forbid, path) for forbid in self.forbidden_paths
+ )
def _is_policy_forbidden_path(self, path):
return any([
@@ -1495,14 +1492,12 @@ class Plugin():
'symlink': "no"
})
- def add_forbidden_path(self, forbidden, recursive=False):
+ def add_forbidden_path(self, forbidden):
"""Specify a path, or list of paths, to not copy, even if it's part of
an ``add_copy_spec()`` call
:param forbidden: A filepath to forbid collection from
:type forbidden: ``str`` or a ``list`` of strings
-
- :param recursive: Should forbidden glob be applied recursively
"""
if isinstance(forbidden, str):
forbidden = [forbidden]
@@ -1512,8 +1507,11 @@ class Plugin():
for forbid in forbidden:
self._log_info("adding forbidden path '%s'" % forbid)
- for path in glob.glob(forbid, recursive=recursive):
- self.forbidden_paths.append(path)
+ if "*" in forbid:
+ # calling translate() here on a dir-level path will break the
+ # re.match() call during path comparison
+ forbid = fnmatch.translate(forbid)
+ self.forbidden_paths.append(forbid)
def set_option(self, optionname, value):
"""Set the named option to value. Ensure the original type of the
diff --git a/sos/report/plugins/cgroups.py b/sos/report/plugins/cgroups.py
index 6e2a6918..20d299cf 100644
--- a/sos/report/plugins/cgroups.py
+++ b/sos/report/plugins/cgroups.py
@@ -31,10 +31,8 @@ class Cgroups(Plugin, DebianPlugin, UbuntuPlugin, CosPlugin):
self.add_cmd_output("systemd-cgls")
self.add_forbidden_path(
- "/sys/fs/cgroup/memory/**/memory.kmem.slabinfo",
- recursive=True)
-
- return
+ "/sys/fs/cgroup/memory/**/memory.kmem.slabinfo"
+ )
class RedHatCgroups(Cgroups, RedHatPlugin):
diff --git a/sos/report/plugins/pulpcore.py b/sos/report/plugins/pulpcore.py
index 6c4237ca..f6bc194c 100644
--- a/sos/report/plugins/pulpcore.py
+++ b/sos/report/plugins/pulpcore.py
@@ -89,7 +89,7 @@ class PulpCore(Plugin, IndependentPlugin):
"/etc/pki/pulp/*"
])
# skip collecting certificate keys
- self.add_forbidden_path("/etc/pki/pulp/**/*.key", recursive=True)
+ self.add_forbidden_path("/etc/pki/pulp/**/*.key")
self.add_cmd_output("curl -ks https://localhost/pulp/api/v3/status/",
suggest_filename="pulp_status")
diff --git a/sos/report/plugins/rhui.py b/sos/report/plugins/rhui.py
index add02461..8063fd51 100644
--- a/sos/report/plugins/rhui.py
+++ b/sos/report/plugins/rhui.py
@@ -30,7 +30,7 @@ class Rhui(Plugin, RedHatPlugin):
"/var/log/rhui/*",
])
# skip collecting certificate keys
- self.add_forbidden_path("/etc/pki/rhui/**/*.key", recursive=True)
+ self.add_forbidden_path("/etc/pki/rhui/**/*.key")
# call rhui-manager commands with 1m timeout and
# with an env. variable ensuring that "RHUI Username:"