From fc6721ac83c2626982f6c796f82b1386f123c66f Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 22 Mar 2019 12:22:15 -0400 Subject: [Plugin] Terminate running commands when a plugin exceeds timeout Previously, if a plugin timeout was hit the thread it was running in would be destroyed, however if it were running a command (ultimately handled by a Popen() call), that command would still continue to run under PID 1. This could then cause sos to report I/O exceptions after the archive has been closed with an attempt to write to the temporary directory used for the archive. Now, utilities.sos_get_command_output() accepts (and is handed from Plugin) a polling method that is used to monitor if the timeout has been hit and will call terminate() to end kill the running command if that polling method returns True. For commands run outside of a plugin, previous behavior remains the same since now AsyncReader.get_contents() will block until the command completes (regardless of exit code) rather than relying on the join() when an AsyncReader was initialized as was done before. Fixes: #1590 Resolves: #1615 Signed-off-by: Jake Hunsaker Signed-off-by: Bryn M. Reeves --- sos/plugins/__init__.py | 22 +++++++++++++++++++++- sos/sosreport.py | 9 +++++++-- sos/utilities.py | 18 ++++++++++++++++-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py index 767e253d..c7dde774 100644 --- a/sos/plugins/__init__.py +++ b/sos/plugins/__init__.py @@ -128,6 +128,7 @@ class Plugin(object): profiles = () sysroot = '/' plugin_timeout = 300 + _timeout_hit = False def __init__(self, commons): if not getattr(self, "option_list", False): @@ -184,6 +185,24 @@ class Plugin(object): return _timeout return self.plugin_timeout + def check_timeout(self): + ''' + Checks to see if the plugin has hit its timeout. + + This is set when the sos.collect_plugin() method hits a timeout and + terminates the thread. From there, a Popen() call can still continue to + run, and we need to manually terminate it. Thus, check_timeout() should + only be called in sos_get_command_output(). + + Since sos_get_command_output() is not plugin aware, this method is + handed to that call to use as a polling method, to avoid passing the + entire plugin object. + + Returns True if timeout has been hit, else False. + + ''' + return self._timeout_hit + @classmethod def name(cls): """Returns the plugin's name as a string. This should return a @@ -682,7 +701,8 @@ class Plugin(object): result = sos_get_command_output(prog, timeout=timeout, stderr=stderr, chroot=root, chdir=runat, env=env, binary=binary, - sizelimit=sizelimit) + sizelimit=sizelimit, + poller=self.check_timeout) if result['status'] == 124: self._log_warn("command '%s' timed out after %ds" diff --git a/sos/sosreport.py b/sos/sosreport.py index 6337a0c8..7cd27d96 100644 --- a/sos/sosreport.py +++ b/sos/sosreport.py @@ -23,7 +23,7 @@ import logging from argparse import ArgumentParser, Action from sos.plugins import import_plugin -from sos.utilities import ImporterHelper +from sos.utilities import ImporterHelper, SoSTimeoutError from shutil import rmtree import tempfile import hashlib @@ -993,11 +993,12 @@ class SoSReport(object): # it as a literal 0-second timeout timeout = self.loaded_plugins[plugin[0]-1][1].timeout or None t.result(timeout=timeout) - return True except TimeoutError: self.ui_log.error("\n Plugin %s timed out\n" % plugin[1]) self.running_plugs.remove(plugin[1]) + self.loaded_plugins[plugin[0]-1][1]._timeout_hit = True pool._threads.clear() + return True def collect_plugin(self, plugin): try: @@ -1037,6 +1038,10 @@ class SoSReport(object): status = "\n Finished running plugins" if status: self.ui_progress(status) + except SoSTimeoutError: + # we already log and handle the plugin timeout in the nested thread + # pool this is running in, so don't do anything here. + pass except (OSError, IOError) as e: if e.errno in fatal_fs_errors: self.ui_log.error("\n %s while collecting plugin data\n" diff --git a/sos/utilities.py b/sos/utilities.py index bd7004b9..1737478f 100644 --- a/sos/utilities.py +++ b/sos/utilities.py @@ -106,7 +106,7 @@ def is_executable(command): def sos_get_command_output(command, timeout=300, stderr=False, chroot=None, chdir=None, env=None, - binary=False, sizelimit=None): + binary=False, sizelimit=None, poller=None): """Execute a command and return a dictionary of status and output, optionally changing root or current working directory before executing command. @@ -153,6 +153,11 @@ def sos_get_command_output(command, timeout=300, stderr=False, preexec_fn=_child_prep_fn) reader = AsyncReader(p.stdout, sizelimit, binary) + if poller: + while reader.running: + if poller(): + p.terminate() + raise SoSTimeoutError stdout = reader.get_contents() while p.poll() is None: pass @@ -214,8 +219,8 @@ class AsyncReader(threading.Thread): sizelimit = sizelimit * 1048576 # convert to bytes slots = int(sizelimit / self.chunksize) self.deque = deque(maxlen=slots) + self.running = True self.start() - self.join() def run(self): '''Reads from the channel (pipe) that is the output pipe for a @@ -236,9 +241,14 @@ class AsyncReader(threading.Thread): except (ValueError, IOError): # pipe has closed, meaning command output is done pass + self.running = False def get_contents(self): '''Returns the contents of the deque as a string''' + # block until command completes or timesout (separate from the plugin + # hitting a timeout) + while self.running: + pass if not self.binary: return ''.join(ln.decode('utf-8', 'ignore') for ln in self.deque) else: @@ -289,4 +299,8 @@ class ImporterHelper(object): return plugins + +class SoSTimeoutError(OSError): + pass + # vim: set et ts=4 sw=4 : -- cgit