diff options
author | Jake Hunsaker <jhunsake@redhat.com> | 2022-05-18 13:39:38 -0400 |
---|---|---|
committer | Jake Hunsaker <jhunsake@redhat.com> | 2022-06-08 09:00:00 -0400 |
commit | 1dc3625fabea7331570f713fd1c87ac812d72d92 (patch) | |
tree | d8a26dc8ebc432d35bb2cec1ce6db1eadf567fe7 | |
parent | 7d1ee59fc659467e6860e72322e976ddc5c17db3 (diff) | |
download | sos-1dc3625fabea7331570f713fd1c87ac812d72d92.tar.gz |
[Plugin] Make forbidden path checks more efficient
Forbidden path checks have up until now worked by taking a given file
path (potentially with globs), expanding that against all discovered
files that actually exist on the system, and then comparing a potential
collection path against that list.
While this works, and works reasonably fast for most scenarios, it isn't
very efficient and causes significant slow downs when a non-standard
configuration is in play - e.g. thousands of block devices which sos
would individually have to compare against tens of thousands of paths
for every path the `block` plugin wants to collect.
Improve this by first not expanding the forbidden path globs, but taking
them as distinct patterns, translating from shell-style (to maintain
historical precedent of using globs to specify paths to be skipped) to
python regex patterns as needed. Second, use `re` to handle our pattern
matching for comparison against the distinct patterns provided by a
plugin to skip.
Closes: #2938
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r-- | sos/report/plugins/__init__.py | 20 | ||||
-rw-r--r-- | sos/report/plugins/cgroups.py | 6 | ||||
-rw-r--r-- | sos/report/plugins/pulpcore.py | 2 | ||||
-rw-r--r-- | sos/report/plugins/rhui.py | 2 |
4 files changed, 13 insertions, 17 deletions
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py index 2a42e6b0..ba1397a8 100644 --- a/sos/report/plugins/__init__.py +++ b/sos/report/plugins/__init__.py @@ -46,11 +46,6 @@ def _mangle_command(command, name_max): return mangledname -def _path_in_path_list(path, path_list): - return any((p == path or path.startswith(os.path.abspath(p)+os.sep) - for p in path_list)) - - def _node_type(st): """ return a string indicating the type of special node represented by the stat buffer st (block, character, fifo, socket). @@ -1407,7 +1402,9 @@ class Plugin(): return None def _is_forbidden_path(self, path): - return _path_in_path_list(path, self.forbidden_paths) + return any( + re.match(forbid, path) for forbid in self.forbidden_paths + ) def _is_policy_forbidden_path(self, path): return any([ @@ -1495,14 +1492,12 @@ class Plugin(): 'symlink': "no" }) - def add_forbidden_path(self, forbidden, recursive=False): + def add_forbidden_path(self, forbidden): """Specify a path, or list of paths, to not copy, even if it's part of an ``add_copy_spec()`` call :param forbidden: A filepath to forbid collection from :type forbidden: ``str`` or a ``list`` of strings - - :param recursive: Should forbidden glob be applied recursively """ if isinstance(forbidden, str): forbidden = [forbidden] @@ -1512,8 +1507,11 @@ class Plugin(): for forbid in forbidden: self._log_info("adding forbidden path '%s'" % forbid) - for path in glob.glob(forbid, recursive=recursive): - self.forbidden_paths.append(path) + if "*" in forbid: + # calling translate() here on a dir-level path will break the + # re.match() call during path comparison + forbid = fnmatch.translate(forbid) + self.forbidden_paths.append(forbid) def set_option(self, optionname, value): """Set the named option to value. Ensure the original type of the diff --git a/sos/report/plugins/cgroups.py b/sos/report/plugins/cgroups.py index 6e2a6918..20d299cf 100644 --- a/sos/report/plugins/cgroups.py +++ b/sos/report/plugins/cgroups.py @@ -31,10 +31,8 @@ class Cgroups(Plugin, DebianPlugin, UbuntuPlugin, CosPlugin): self.add_cmd_output("systemd-cgls") self.add_forbidden_path( - "/sys/fs/cgroup/memory/**/memory.kmem.slabinfo", - recursive=True) - - return + "/sys/fs/cgroup/memory/**/memory.kmem.slabinfo" + ) class RedHatCgroups(Cgroups, RedHatPlugin): diff --git a/sos/report/plugins/pulpcore.py b/sos/report/plugins/pulpcore.py index 6c4237ca..f6bc194c 100644 --- a/sos/report/plugins/pulpcore.py +++ b/sos/report/plugins/pulpcore.py @@ -89,7 +89,7 @@ class PulpCore(Plugin, IndependentPlugin): "/etc/pki/pulp/*" ]) # skip collecting certificate keys - self.add_forbidden_path("/etc/pki/pulp/**/*.key", recursive=True) + self.add_forbidden_path("/etc/pki/pulp/**/*.key") self.add_cmd_output("curl -ks https://localhost/pulp/api/v3/status/", suggest_filename="pulp_status") diff --git a/sos/report/plugins/rhui.py b/sos/report/plugins/rhui.py index add02461..8063fd51 100644 --- a/sos/report/plugins/rhui.py +++ b/sos/report/plugins/rhui.py @@ -30,7 +30,7 @@ class Rhui(Plugin, RedHatPlugin): "/var/log/rhui/*", ]) # skip collecting certificate keys - self.add_forbidden_path("/etc/pki/rhui/**/*.key", recursive=True) + self.add_forbidden_path("/etc/pki/rhui/**/*.key") # call rhui-manager commands with 1m timeout and # with an env. variable ensuring that "RHUI Username:" |