aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2020-04-09 11:19:16 -0400
committerJake Hunsaker <jhunsake@redhat.com>2020-04-22 10:01:00 -0400
commitc3782e303b87fb1754825fa78a84b9ac52ebbcb3 (patch)
tree5ae7d3bb0070eaeb8639fd0420447206e3cca5b5
parentb1d1f30132a8719d2bdff3c1c25a0b183b094c82 (diff)
downloadsos-c3782e303b87fb1754825fa78a84b9ac52ebbcb3.tar.gz
[collector] Initial import of sos-collector into sos
This commit represents the first import of the sos-collector project into sos natively for 4.0. It is not expected to be functional as of this commit. A minimum integration has been done at this point - namely tying SoSCollector() as a SoSComponent() subclass, and hooking up the parser functions. SoSCollector will load a policy in the same way as 'sos report', and should use the same logging methods, which will be done in a future commit in this series. As a divergence from the standalone sos-collector project, this integration aims to hook in host detection with the existing Policy model provided by sos. Additionally, we should be able to drop the Configuration dict-subclass approach by the time this series is ready for merge. Related: #1988 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/__init__.py4
-rw-r--r--sos/collector/__init__.py877
-rw-r--r--sos/collector/clusters/__init__.py223
-rw-r--r--sos/collector/clusters/jbon.py30
-rw-r--r--sos/collector/clusters/kubernetes.py54
-rw-r--r--sos/collector/clusters/ovirt.py181
-rw-r--r--sos/collector/clusters/pacemaker.py57
-rw-r--r--sos/collector/clusters/satellite.py39
-rw-r--r--sos/collector/configuration.py238
-rw-r--r--sos/collector/exceptions.py108
-rw-r--r--sos/collector/hosts/__init__.py125
-rw-r--r--sos/collector/hosts/debian.py31
-rw-r--r--sos/collector/hosts/redhat.py83
-rw-r--r--sos/collector/sosnode.py819
14 files changed, 2868 insertions, 1 deletions
diff --git a/sos/__init__.py b/sos/__init__.py
index 5d333f7a..c0ac67a3 100644
--- a/sos/__init__.py
+++ b/sos/__init__.py
@@ -51,8 +51,10 @@ class SoS():
# of shorthand names to accept in place of the full subcommand
# if no aliases are desired, pass an empty list
import sos.report
+ import sos.collector
self._components = {
- 'report': (sos.report.SoSReport, ['rep'])
+ 'report': (sos.report.SoSReport, ['rep']),
+ 'collect': (sos.collector.SoSCollector, ['collector'])
}
# build the top-level parser
_com_string = ''
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
new file mode 100644
index 00000000..5999c894
--- /dev/null
+++ b/sos/collector/__init__.py
@@ -0,0 +1,877 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import fnmatch
+import inspect
+import json
+import logging
+import os
+import random
+import re
+import string
+import tarfile
+import tempfile
+import shutil
+import subprocess
+import sys
+
+from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor
+from getpass import getpass
+from pipes import quote
+from textwrap import fill
+from sos.collector.configuration import Configuration
+from sos.collector.sosnode import SosNode
+from sos.collector.exceptions import ControlPersistUnsupportedException
+from sos.component import SoSComponent
+from sos import __version__
+
+COLLECTOR_LIB_DIR = '/var/lib/sos-collector'
+
+
+class SoSCollector(SoSComponent):
+ """Collect an sos report from multiple nodes simultaneously
+ """
+
+ arg_defaults = {
+ 'alloptions': False,
+ 'all_logs': False,
+ 'become_root': False,
+ 'batch': False,
+ 'case_id': False,
+ 'cluster_type': None,
+ 'cluster_options': [],
+ 'chroot': 'auto',
+ 'enable_plugins': [],
+ 'group': None,
+ 'save_group': '',
+ 'image': '',
+ 'ssh_key': '',
+ 'insecure_sudo': False,
+ 'plugin_options': [],
+ 'list_options': False,
+ 'label': '',
+ 'log_size': 0,
+ 'skip_plugins': [],
+ 'nodes': [],
+ 'no_pkg_check': False,
+ 'no_local': False,
+ 'master': '',
+ 'only_plugins': [],
+ 'ssh_port': 22,
+ 'password': False,
+ 'password_per_node': False,
+ 'preset': '',
+ 'sos_opt_line': '',
+ 'ssh_user': 'root',
+ 'timeout': 600,
+ 'verify': False,
+ 'compression': 'auto'
+ }
+
+ def __init__(self, parser, parsed_args, cmdline_args):
+ super(SoSCollector, self).__init__(parser, parsed_args, cmdline_args)
+ os.umask(0o77)
+ self.client_list = []
+ self.node_list = []
+ self.master = False
+ self.retrieved = 0
+ self.need_local_sudo = False
+ self.config = Configuration(parsed_args)
+ if not self.config['list_options']:
+ try:
+ self._check_for_control_persist()
+ self.log_debug('Executing %s' % ' '.join(s for s in sys.argv))
+ self.log_debug("Found cluster profiles: %s"
+ % self.clusters.keys())
+ self.log_debug("Found supported host types: %s"
+ % self.config['host_types'].keys())
+ self._parse_options()
+ self.prep()
+ except KeyboardInterrupt:
+ self._exit('Exiting on user cancel', 130)
+ except Exception:
+ raise
+
+ @classmethod
+ def add_parser_options(cls, parser):
+ parser.add_argument('-a', '--alloptions', action='store_true',
+ help='Enable all sos options')
+ parser.add_argument('--all-logs', action='store_true',
+ help='Collect logs regardless of size')
+ parser.add_argument('-b', '--become', action='store_true',
+ dest='become_root',
+ help='Become root on the remote nodes')
+ parser.add_argument('--batch', action='store_true',
+ help='Do not prompt interactively (except passwords)')
+ parser.add_argument('--case-id', help='Specify case number')
+ parser.add_argument('--cluster-type',
+ help='Specify a type of cluster profile')
+ parser.add_argument('-c', '--cluster-option', dest='cluster_options',
+ action='append',
+ help=('Specify a cluster options used by a profile'
+ ' and takes the form of cluster.option=value'
+ )
+ )
+ parser.add_argument('--chroot', default='',
+ choices=['auto', 'always', 'never'],
+ help="chroot executed commands to SYSROOT")
+ parser.add_argument('-e', '--enable-plugins', action="append",
+ help='Enable specific plugins for sosreport')
+ parser.add_argument('--group', default=None,
+ help='Use a predefined group JSON file')
+ parser.add_argument('--save-group', default='',
+ help='Save the resulting node list to a group')
+ parser.add_argument('--image',
+ help=('Specify the container image to use for '
+ 'containerized hosts. Defaults to the '
+ 'rhel7/support-tools image'))
+ parser.add_argument('-i', '--ssh-key', help='Specify an ssh key to use')
+ parser.add_argument('--insecure-sudo', action='store_true',
+ help='Use when passwordless sudo is configured')
+ parser.add_argument('-k', '--plugin-options', action="append",
+ help='Plugin option as plugname.option=value')
+ parser.add_argument('-l', '--list-options', action="store_true",
+ help='List options available for profiles')
+ parser.add_argument('--label', help='Assign a label to the archives')
+ parser.add_argument('--log-size', default=0, type=int,
+ help='Limit the size of individual logs (in MiB)')
+ parser.add_argument('-n', '--skip-plugins', action="append",
+ help='Skip these plugins')
+ parser.add_argument('--nodes', action="append",
+ help='Provide a comma delimited list of nodes, or a '
+ 'regex to match against')
+ parser.add_argument('--no-pkg-check', action='store_true',
+ help=('Do not run package checks. Use this '
+ 'with --cluster-type if there are rpm '
+ 'or apt issues on node'
+ )
+ )
+ parser.add_argument('--no-local', action='store_true',
+ help='Do not collect a sosreport from localhost')
+ parser.add_argument('--master', help='Specify a remote master node')
+ parser.add_argument('-o', '--only-plugins', action="append",
+ help='Run these plugins only')
+ parser.add_argument('-p', '--ssh-port', type=int,
+ help='Specify SSH port for all nodes')
+ parser.add_argument('--password', action='store_true', default=False,
+ help='Prompt for user password for nodes')
+ parser.add_argument('--password-per-node', action='store_true',
+ default=False,
+ help='Prompt for password separately for each node')
+ parser.add_argument('--preset', default='', required=False,
+ help='Specify a sos preset to use')
+ parser.add_argument('--sos-cmd', dest='sos_opt_line',
+ help=("Manually specify the commandline options for "
+ "sosreport on remote nodes")
+ )
+ parser.add_argument('--ssh-user',
+ help='Specify an SSH user. Default root')
+ parser.add_argument('--timeout', type=int, required=False,
+ help='Timeout for sosreport on each node. Default 300.'
+ )
+ parser.add_argument('--verify', action="store_true",
+ help="perform data verification during collection")
+ parser.add_argument('-z', '--compression-type', dest="compression",
+ choices=['auto', 'gzip', 'bzip2', 'xz'],
+ help="compression technology to use")
+
+ def _check_for_control_persist(self):
+ '''Checks to see if the local system supported SSH ControlPersist.
+
+ ControlPersist allows OpenSSH to keep a single open connection to a
+ remote host rather than building a new session each time. This is the
+ same feature that Ansible uses in place of paramiko, which we have a
+ need to drop in sos-collector.
+
+ This check relies on feedback from the ssh binary. The command being
+ run should always generate stderr output, but depending on what that
+ output reads we can determine if ControlPersist is supported or not.
+
+ For our purposes, a host that does not support ControlPersist is not
+ able to run sos-collector.
+
+ Returns
+ True if ControlPersist is supported, else raise Exception.
+ '''
+ ssh_cmd = ['ssh', '-o', 'ControlPersist']
+ cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = cmd.communicate()
+ err = err.decode('utf-8')
+ if 'Bad configuration option' in err or 'Usage:' in err:
+ raise ControlPersistUnsupportedException
+ return True
+
+ def _exit(self, msg, error=1):
+ '''Used to safely terminate if sos-collector encounters an error'''
+ self.log_error(msg)
+ try:
+ self.close_all_connections()
+ except Exception:
+ pass
+ sys.exit(error)
+
+ def _parse_options(self):
+ '''If there are cluster options set on the CLI, override the defaults
+ '''
+ if self.config['cluster_options']:
+ for opt in self.config['cluster_options']:
+ match = False
+ for clust in self.clusters:
+ for option in self.clusters[clust].options:
+ if opt.name == option.name:
+ match = True
+ break
+ if not match:
+ self._exit('Unknown cluster option provided: %s.%s'
+ % (opt.cluster, opt.name))
+
+ def _validate_option(self, default, cli):
+ '''Checks to make sure that the option given on the CLI is valid.
+ Valid in this sense means that the type of value given matches what a
+ cluster profile expects (str for str, bool for bool, etc).
+
+ For bool options, this will also convert the string equivalent to an
+ actual boolean value
+ '''
+ if not default.opt_type == bool:
+ if not default.opt_type == cli.opt_type:
+ msg = "Invalid option type for %s. Expected %s got %s"
+ self._exit(msg % (cli.name, default.opt_type, cli.opt_type))
+ return cli.value
+ else:
+ val = cli.value.lower()
+ if val not in ['true', 'on', 'false', 'off']:
+ msg = ("Invalid value for %s. Accepted values are: 'true', "
+ "'false', 'on', 'off'")
+ self._exit(msg % cli.name)
+ else:
+ if val in ['true', 'on']:
+ return True
+ else:
+ return False
+
+ def log_info(self, msg):
+ '''Log info messages to both console and log file'''
+ self.logger.info(msg)
+ self.console.info(msg)
+
+ def log_warn(self, msg):
+ '''Log warn messages to both console and log file'''
+ self.logger.warn(msg)
+ self.console.warn('WARNING: %s' % msg)
+
+ def log_error(self, msg):
+ '''Log error messages to both console and log file'''
+ self.logger.error(msg)
+ self.console.error(msg)
+
+ def log_debug(self, msg):
+ '''Log debug message to both console and log file'''
+ caller = inspect.stack()[1][3]
+ msg = '[sos_collector:%s] %s' % (caller, msg)
+ self.logger.debug(msg)
+ if self.config['verbose']:
+ self.console.debug(msg)
+
+ def create_tmp_dir(self, location='/var/tmp'):
+ '''Creates a temp directory to transfer sosreports to'''
+ tmpdir = tempfile.mkdtemp(prefix='sos-collector-', dir=location)
+ self.config['tmp_dir'] = tmpdir
+ self.config['tmp_dir_created'] = True
+
+ def list_options(self):
+ '''Display options for available clusters'''
+
+ sys.stdout.write('\nThe following clusters are supported by this '
+ 'installation\n')
+ sys.stdout.write('Use the short name with --cluster-type or cluster '
+ 'options (-c)\n\n')
+ for cluster in sorted(self.clusters):
+ sys.stdout.write(" {:<15} {:30}\n".format(
+ cluster,
+ self.clusters[cluster].cluster_name))
+
+ _opts = {}
+ for _cluster in self.clusters:
+ for opt in self.clusters[_cluster].options:
+ if opt.name not in _opts.keys():
+ _opts[opt.name] = opt
+ else:
+ for clust in opt.cluster:
+ if clust not in _opts[opt.name].cluster:
+ _opts[opt.name].cluster.append(clust)
+
+ sys.stdout.write('\nThe following cluster options are available:\n\n')
+ sys.stdout.write(' {:25} {:15} {:<10} {:10} {:<}\n'.format(
+ 'Cluster',
+ 'Option Name',
+ 'Type',
+ 'Default',
+ 'Description'
+ ))
+
+ for _opt in sorted(_opts, key=lambda x: _opts[x].cluster):
+ opt = _opts[_opt]
+ optln = ' {:25} {:15} {:<10} {:<10} {:<10}\n'.format(
+ ', '.join(c for c in sorted(opt.cluster)),
+ opt.name,
+ opt.opt_type.__name__,
+ str(opt.value),
+ opt.description)
+ sys.stdout.write(optln)
+ sys.stdout.write('\nOptions take the form of cluster.name=value'
+ '\nE.G. "ovirt.no-database=True" or '
+ '"pacemaker.offline=False"\n')
+
+ def delete_tmp_dir(self):
+ '''Removes the temp directory and all collected sosreports'''
+ shutil.rmtree(self.config['tmp_dir'])
+
+ def _get_archive_name(self):
+ '''Generates a name for the tarball archive'''
+ nstr = 'sos-collector'
+ if self.config['label']:
+ nstr += '-%s' % self.config['label']
+ if self.config['case_id']:
+ nstr += '-%s' % self.config['case_id']
+ dt = datetime.strftime(datetime.now(), '%Y-%m-%d')
+
+ try:
+ string.lowercase = string.ascii_lowercase
+ except NameError:
+ pass
+
+ rand = ''.join(random.choice(string.lowercase) for x in range(5))
+ return '%s-%s-%s' % (nstr, dt, rand)
+
+ def _get_archive_path(self):
+ '''Returns the path, including filename, of the tarball we build
+ that contains the collected sosreports
+ '''
+ self.arc_name = self._get_archive_name()
+ compr = 'gz'
+ return self.config['out_dir'] + self.arc_name + '.tar.' + compr
+
+ def _fmt_msg(self, msg):
+ width = 80
+ _fmt = ''
+ for line in msg.splitlines():
+ _fmt = _fmt + fill(line, width, replace_whitespace=False) + '\n'
+ return _fmt
+
+ def _load_group_config(self):
+ '''
+ Attempts to load the host group specified on the command line.
+ Host groups are defined via JSON files, typically saved under
+ /var/lib/sos-collector/, although users can specify a full filepath
+ on the commandline to point to one existing anywhere on the system
+
+ Host groups define a list of nodes and/or regexes and optionally the
+ master and cluster-type options.
+ '''
+ if os.path.exists(self.config['group']):
+ fname = self.config['group']
+ elif os.path.exists(
+ os.path.join(COLLECTOR_LIB_DIR, self.config['group'])
+ ):
+ fname = os.path.join(COLLECTOR_LIB_DIR, self.config['group'])
+ else:
+ raise OSError('Group not found')
+
+ self.log_debug("Loading host group %s" % fname)
+
+ with open(fname, 'r') as hf:
+ _group = json.load(hf)
+ for key in ['master', 'cluster_type']:
+ if _group[key]:
+ self.log_debug("Setting option '%s' to '%s' per host group"
+ % (key, _group[key]))
+ self.config[key] = _group[key]
+ if _group['nodes']:
+ self.log_debug("Adding %s to node list" % _group['nodes'])
+ self.config['nodes'].extend(_group['nodes'])
+
+ def write_host_group(self):
+ '''
+ Saves the results of this run of sos-collector to a host group file
+ on the system so it can be used later on.
+
+ The host group will save the options master, cluster_type, and nodes
+ as determined by sos-collector prior to execution of sosreports.
+ '''
+ cfg = {
+ 'name': self.config['save_group'],
+ 'master': self.config['master'],
+ 'cluster_type': self.config['cluster_type'],
+ 'nodes': [n for n in self.node_list]
+ }
+ if not os.path.isdir(COLLECTOR_LIB_DIR):
+ raise OSError("%s no such directory" % COLLECTOR_LIB_DIR)
+ fname = COLLECTOR_LIB_DIR + '/' + cfg['name']
+ with open(fname, 'w') as hf:
+ json.dump(cfg, hf)
+ os.chmod(fname, 0o644)
+ return fname
+
+ def prep(self):
+ '''Based on configuration, performs setup for collection'''
+ disclaimer = ("""\
+This utility is used to collect sosreports from multiple \
+nodes simultaneously. It uses OpenSSH's ControlPersist feature \
+to connect to nodes and run commands remotely. If your system \
+installation of OpenSSH is older than 5.6, please upgrade.
+
+An archive of sosreport tarballs collected from the nodes will be \
+generated in %s and may be provided to an appropriate support representative.
+
+The generated archive may contain data considered sensitive \
+and its content should be reviewed by the originating \
+organization before being passed to any third party.
+
+No configuration changes will be made to the system running \
+this utility or remote systems that it connects to.
+""")
+ self.console.info("\nsos-collector (version %s)\n" % __version__)
+ intro_msg = self._fmt_msg(disclaimer % self.config['tmp_dir'])
+ self.console.info(intro_msg)
+ prompt = "\nPress ENTER to continue, or CTRL-C to quit\n"
+ if not self.config['batch']:
+ input(prompt)
+
+ if (not self.config['password'] and not
+ self.config['password_per_node']):
+ self.log_debug('password not specified, assuming SSH keys')
+ msg = ('sos-collector ASSUMES that SSH keys are installed on all '
+ 'nodes unless the --password option is provided.\n')
+ self.console.info(self._fmt_msg(msg))
+
+ if self.config['password'] or self.config['password_per_node']:
+ self.log_debug('password specified, not using SSH keys')
+ msg = ('Provide the SSH password for user %s: '
+ % self.config['ssh_user'])
+ self.config['password'] = getpass(prompt=msg)
+
+ if self.config['need_sudo'] and not self.config['insecure_sudo']:
+ if not self.config['password']:
+ self.log_debug('non-root user specified, will request '
+ 'sudo password')
+ msg = ('A non-root user has been provided. Provide sudo '
+ 'password for %s on remote nodes: '
+ % self.config['ssh_user'])
+ self.config['sudo_pw'] = getpass(prompt=msg)
+ else:
+ if not self.config['insecure_sudo']:
+ self.config['sudo_pw'] = self.config['password']
+
+ if self.config['become_root']:
+ if not self.config['ssh_user'] == 'root':
+ self.log_debug('non-root user asking to become root remotely')
+ msg = ('User %s will attempt to become root. '
+ 'Provide root password: ' % self.config['ssh_user'])
+ self.config['root_password'] = getpass(prompt=msg)
+ self.config['need_sudo'] = False
+ else:
+ self.log_info('Option to become root but ssh user is root.'
+ ' Ignoring request to change user on node')
+ self.config['become_root'] = False
+
+ if self.config['group']:
+ try:
+ self._load_group_config()
+ except Exception as err:
+ self.log_error("Could not load specified group %s: %s"
+ % (self.config['group'], err))
+
+ if self.config['master']:
+ self.connect_to_master()
+ self.config['no_local'] = True
+ else:
+ try:
+ self.master = SosNode('localhost', self.config)
+ except Exception as err:
+ self.log_debug("Unable to determine local installation: %s" %
+ err)
+ self._exit('Unable to determine local installation. Use the '
+ '--no-local option if localhost should not be '
+ 'included.\nAborting...\n', 1)
+
+ if self.config['cluster_type']:
+ if self.config['cluster_type'] == 'none':
+ self.config['cluster'] = self.clusters['jbon']
+ else:
+ self.config['cluster'] = self.clusters[
+ self.config['cluster_type']
+ ]
+ self.config['cluster'].master = self.master
+ else:
+ self.determine_cluster()
+ if self.config['cluster'] is None and not self.config['nodes']:
+ msg = ('Cluster type could not be determined and no nodes provided'
+ '\nAborting...')
+ self._exit(msg, 1)
+ if self.config['cluster']:
+ self.config['cluster'].setup()
+ self.config['cluster'].modify_sos_cmd()
+ self.get_nodes()
+ if self.config['save_group']:
+ gname = self.config['save_group']
+ try:
+ fname = self.write_host_group()
+ self.log_info("Wrote group '%s' to %s" % (gname, fname))
+ except Exception as err:
+ self.log_error("Could not save group %s: %s" % (gname, err))
+ self.intro()
+ self.configure_sos_cmd()
+
+ def intro(self):
+ '''Prints initial messages and collects user and case if not
+ provided already.
+ '''
+ self.console.info('')
+
+ if not self.node_list and not self.master.connected:
+ self._exit('No nodes were detected, or nodes do not have sos '
+ 'installed.\nAborting...')
+
+ self.console.info('The following is a list of nodes to collect from:')
+ if self.master.connected:
+ self.console.info('\t%-*s' % (self.config['hostlen'],
+ self.config['master']))
+
+ for node in sorted(self.node_list):
+ self.console.info("\t%-*s" % (self.config['hostlen'], node))
+
+ self.console.info('')
+
+ if not self.config['case_id'] and not self.config['batch']:
+ msg = 'Please enter the case id you are collecting reports for: '
+ self.config['case_id'] = input(msg)
+
+ def configure_sos_cmd(self):
+ '''Configures the sosreport command that is run on the nodes'''
+ if self.config['sos_opt_line']:
+ filt = ['&', '|', '>', '<', ';']
+ if any(f in self.config['sos_opt_line'] for f in filt):
+ self.log_warn('Possible shell script found in provided sos '
+ 'command. Ignoring --sos-cmd option entirely.')
+ self.config['sos_opt_line'] = None
+ else:
+ self.config['sos_cmd'] = '%s %s' % (
+ self.config['sos_cmd'], quote(self.config['sos_opt_line']))
+ self.log_debug("User specified manual sosreport command. "
+ "Command set to %s" % self.config['sos_cmd'])
+ return True
+ if self.config['case_id']:
+ self.config['sos_cmd'] += ' --case-id=%s' % (
+ quote(self.config['case_id']))
+ if self.config['alloptions']:
+ self.config['sos_cmd'] += ' --alloptions'
+ if self.config['all_logs']:
+ self.config['sos_cmd'] += ' --all-logs'
+ if self.config['verify']:
+ self.config['sos_cmd'] += ' --verify'
+ if self.config['log_size']:
+ self.config['sos_cmd'] += (' --log-size=%s'
+ % quote(self.config['log_size']))
+ if self.config['sysroot']:
+ self.config['sos_cmd'] += ' -s %s' % quote(self.config['sysroot'])
+ if self.config['chroot']:
+ self.config['sos_cmd'] += ' -c %s' % quote(self.config['chroot'])
+ if self.config['compression']:
+ self.config['sos_cmd'] += ' -z %s' % (
+ quote(self.config['compression']))
+ self.log_debug('Initial sos cmd set to %s' % self.config['sos_cmd'])
+
+ def connect_to_master(self):
+ '''If run with --master, we will run cluster checks again that
+ instead of the localhost.
+ '''
+ try:
+ self.master = SosNode(self.config['master'], self.config)
+ except Exception as e:
+ self.log_debug('Failed to connect to master: %s' % e)
+ self._exit('Could not connect to master node. Aborting...', 1)
+
+ def determine_cluster(self):
+ '''This sets the cluster type and loads that cluster's cluster.
+
+ If no cluster type is matched and no list of nodes is provided by
+ the user, then we abort.
+
+ If a list of nodes is given, this is not run, however the cluster
+ can still be run if the user sets a --cluster-type manually
+ '''
+ checks = list(self.clusters.values())
+ for cluster in self.clusters.values():
+ checks.remove(cluster)
+ cluster.master = self.master
+ if cluster.check_enabled():
+ cname = cluster.__class__.__name__
+ self.log_debug("Installation matches %s, checking for layered "
+ "profiles" % cname)
+ for remaining in checks:
+ if issubclass(remaining.__class__, cluster.__class__):
+ rname = remaining.__class__.__name__
+ self.log_debug("Layered profile %s found. "
+ "Checking installation"
+ % rname)
+ remaining.master = self.master
+ if remaining.check_enabled():
+ self.log_debug("Installation matches both layered "
+ "profile %s and base profile %s, "
+ "setting cluster type to layered "
+ "profile" % (rname, cname))
+ cluster = remaining
+ break
+
+ self.config['cluster'] = cluster
+ self.config['cluster_type'] = cluster.name()
+ self.log_info(
+ 'Cluster type set to %s' % self.config['cluster_type'])
+ break
+
+ def get_nodes_from_cluster(self):
+ '''Collects the list of nodes from the determined cluster cluster'''
+ if self.config['cluster_type']:
+ nodes = self.config['cluster']._get_nodes()
+ self.log_debug('Node list: %s' % nodes)
+ return nodes
+
+ def reduce_node_list(self):
+ '''Reduce duplicate entries of the localhost and/or master node
+ if applicable'''
+ if (self.config['hostname'] in self.node_list and
+ self.config['no_local']):
+ self.node_list.remove(self.config['hostname'])
+ for i in self.config['ip_addrs']:
+ if i in self.node_list:
+ self.node_list.remove(i)
+ # remove the master node from the list, since we already have
+ # an open session to it.
+ if self.config['master']:
+ for n in self.node_list:
+ if n == self.master.hostname or n == self.config['master']:
+ self.node_list.remove(n)
+ self.node_list = list(set(n for n in self.node_list if n))
+ self.log_debug('Node list reduced to %s' % self.node_list)
+
+ def compare_node_to_regex(self, node):
+ '''Compares a discovered node name to a provided list of nodes from
+ the user. If there is not a match, the node is removed from the list'''
+ for regex in self.config['nodes']:
+ try:
+ regex = fnmatch.translate(regex)
+ if re.match(regex, node):
+ return True
+ except re.error as err:
+ msg = 'Error comparing %s to provided node regex %s: %s'
+ self.log_debug(msg % (node, regex, err))
+ return False
+
+ def get_nodes(self):
+ ''' Sets the list of nodes to collect sosreports from '''
+ if not self.config['master'] and not self.config['cluster']:
+ msg = ('Could not determine a cluster type and no list of '
+ 'nodes or master node was provided.\nAborting...'
+ )
+ self._exit(msg)
+
+ try:
+ nodes = self.get_nodes_from_cluster()
+ if self.config['nodes']:
+ for node in nodes:
+ if self.compare_node_to_regex(node):
+ self.node_list.append(node)
+ else:
+ self.node_list = nodes
+ except Exception as e:
+ self.log_debug("Error parsing node list: %s" % e)
+ self.log_debug('Setting node list to --nodes option')
+ self.node_list = self.config['nodes']
+ for node in self.node_list:
+ if any(i in node for i in ('*', '\\', '?', '(', ')', '/')):
+ self.node_list.remove(node)
+
+ # force add any non-regex node strings from nodes option
+ if self.config['nodes']:
+ for node in self.config['nodes']:
+ if any(i in node for i in '*\\?()/[]'):
+ continue
+ if node not in self.node_list:
+ self.log_debug("Force adding %s to node list" % node)
+ self.node_list.append(node)
+
+ if not self.config['master']:
+ host = self.config['hostname'].split('.')[0]
+ # trust the local hostname before the node report from cluster
+ for node in self.node_list:
+ if host == node.split('.')[0]:
+ self.node_list.remove(node)
+ self.node_list.append(self.config['hostname'])
+ self.reduce_node_list()
+ try:
+ self.config['hostlen'] = len(max(self.node_list, key=len))
+ except (TypeError, ValueError):
+ self.config['hostlen'] = len(self.config['master'])
+
+ def _connect_to_node(self, node):
+ '''Try to connect to the node, and if we can add to the client list to
+ run sosreport on
+
+ Positional arguments
+ node - a tuple specifying (address, password). If no password, set
+ to None
+ '''
+ try:
+ client = SosNode(node[0], self.config, password=node[1])
+ if client.connected:
+ self.client_list.append(client)
+ else:
+ client.close_ssh_session()
+ except Exception:
+ pass
+
+ def collect(self):
+ ''' For each node, start a collection thread and then tar all
+ collected sosreports '''
+ if self.master.connected:
+ self.client_list.append(self.master)
+
+ self.console.info("\nConnecting to nodes...")
+ filters = [self.master.address, self.master.hostname]
+ nodes = [(n, None) for n in self.node_list if n not in filters]
+
+ if self.config['password_per_node']:
+ _nodes = []
+ for node in nodes:
+ msg = ("Please enter the password for %s@%s: "
+ % (self.config['ssh_user'], node[0]))
+ node_pwd = getpass(msg)
+ _nodes.append((node[0], node_pwd))
+ nodes = _nodes
+
+ try:
+ pool = ThreadPoolExecutor(self.config['threads'])
+ pool.map(self._connect_to_node, nodes, chunksize=1)
+ pool.shutdown(wait=True)
+
+ self.report_num = len(self.client_list)
+ if self.config['no_local'] and self.master.address == 'localhost':
+ self.report_num -= 1
+
+ self.console.info("\nBeginning collection of sosreports from %s "
+ "nodes, collecting a maximum of %s "
+ "concurrently\n"
+ % (self.report_num, self.config['threads'])
+ )
+
+ pool = ThreadPoolExecutor(self.config['threads'])
+ pool.map(self._collect, self.client_list, chunksize=1)
+ pool.shutdown(wait=True)
+ except KeyboardInterrupt:
+ self.log_error('Exiting on user cancel\n')
+ os._exit(130)
+ except Exception as err:
+ self.log_error('Could not connect to nodes: %s' % err)
+ os._exit(1)
+
+ if hasattr(self.config['cluster'], 'run_extra_cmd'):
+ self.console.info('Collecting additional data from master node...')
+ files = self.config['cluster']._run_extra_cmd()
+ if files:
+ self.master.collect_extra_cmd(files)
+ msg = '\nSuccessfully captured %s of %s sosreports'
+ self.log_info(msg % (self.retrieved, self.report_num))
+ self.close_all_connections()
+ if self.retrieved > 0:
+ self.create_cluster_archive()
+ else:
+ msg = 'No sosreports were collected, nothing to archive...'
+ self._exit(msg, 1)
+
+ def _collect(self, client):
+ '''Runs sosreport on each node'''
+ try:
+ if not client.local:
+ client.sosreport()
+ else:
+ if not self.config['no_local']:
+ client.sosreport()
+ if client.retrieved:
+ self.retrieved += 1
+ except Exception as err:
+ self.log_error("Error running sosreport: %s" % err)
+
+ def close_all_connections(self):
+ '''Close all ssh sessions for nodes'''
+ for client in self.client_list:
+ self.log_debug('Closing SSH connection to %s' % client.address)
+ client.close_ssh_session()
+
+ def create_cluster_archive(self):
+ '''Calls for creation of tar archive then cleans up the temporary
+ files created by sos-collector'''
+ self.log_info('Creating archive of sosreports...')
+ self.create_sos_archive()
+ if self.archive:
+ self.logger.info('Archive created as %s' % self.archive)
+ self.cleanup()
+ self.console.info('\nThe following archive has been created. '
+ 'Please provide it to your support team.')
+ self.console.info(' %s' % self.archive)
+
+ def create_sos_archive(self):
+ '''Creates a tar archive containing all collected sosreports'''
+ try:
+ self.archive = self._get_archive_path()
+ with tarfile.open(self.archive, "w:gz") as tar:
+ for host in self.client_list:
+ for fname in host.file_list:
+ try:
+ if '.md5' in fname:
+ arc_name = (self.arc_name + '/md5/' +
+ fname.split('/')[-1])
+ else:
+ arc_name = (self.arc_name + '/' +
+ fname.split('/')[-1])
+ tar.add(
+ os.path.join(self.config['tmp_dir'], fname),
+ arcname=arc_name
+ )
+ except Exception as err:
+ self.log_error("Could not add %s to archive: %s"
+ % (arc_name, err))
+ tar.add(
+ self.logfile.name,
+ arcname=self.arc_name + '/logs/sos-collector.log'
+ )
+ tar.add(
+ self.console_log_file.name,
+ arcname=self.arc_name + '/logs/ui.log'
+ )
+ tar.close()
+ except Exception as e:
+ msg = 'Could not create archive: %s' % e
+ self._exit(msg, 2)
+
+ def cleanup(self):
+ ''' Removes the tmp dir and all sosarchives therein.
+
+ If tmp dir was supplied by user, only the sos archives within
+ that dir are removed.
+ '''
+ if self.config['tmp_dir_created']:
+ self.delete_tmp_dir()
+ else:
+ for f in os.listdir(self.config['tmp_dir']):
+ if re.search('sosreport-*tar*', f):
+ os.remove(os.path.join(self.config['tmp_dir'], f))
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
new file mode 100644
index 00000000..7c3d01a8
--- /dev/null
+++ b/sos/collector/clusters/__init__.py
@@ -0,0 +1,223 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import logging
+import subprocess
+
+from sos.collector.configuration import ClusterOption
+
+
+class Cluster(object):
+
+ option_list = []
+ packages = ('',)
+ sos_plugins = []
+ sos_plugin_options = {}
+ sos_preset = ''
+ cluster_name = None
+
+ def __init__(self, config):
+ '''This is the class that cluster profile should subclass in order to
+ add support for different clustering technologies and environments to
+ sos-collector.
+
+ A profile should at minimum define a package that indicates the node is
+ configured for the type of cluster the profile is intended to serve and
+ then additionall be able to return a list of enumerated nodes via the
+ get_nodes() method
+ '''
+
+ self.master = None
+ self.config = config
+ self.cluster_type = [self.__class__.__name__]
+ for cls in self.__class__.__bases__:
+ if cls.__name__ != 'Cluster':
+ self.cluster_type.append(cls.__name__)
+ self.node_list = None
+ self.logger = logging.getLogger('sos_collector')
+ self.console = logging.getLogger('sos_collector_console')
+ self.options = []
+ self._get_options()
+
+ @classmethod
+ def name(cls):
+ '''Returns the cluster's name as a string.
+ '''
+ if cls.cluster_name:
+ return cls.cluster_name
+ return cls.__name__.lower()
+
+ def _get_options(self):
+ '''Loads the options defined by a cluster and sets the default value'''
+ for opt in self.option_list:
+ option = ClusterOption(name=opt[0], opt_type=opt[1].__class__,
+ value=opt[1], cluster=self.cluster_type,
+ description=opt[2])
+ self.options.append(option)
+
+ def _fmt_msg(self, msg):
+ return '[%s] %s' % (self.cluster_type, msg)
+
+ def log_info(self, msg):
+ '''Used to print info messages'''
+ self.logger.info(self._fmt_msg(msg))
+ self.console.info(msg)
+
+ def log_error(self, msg):
+ '''Used to print error messages'''
+ self.logger.error(self._fmt_msg(msg))
+ self.console.error(msg)
+
+ def log_debug(self, msg):
+ '''Used to print debug messages'''
+ self.logger.debug(self._fmt_msg(msg))
+ if self.config['verbose']:
+ self.console.debug(self._fmt_msg(msg))
+
+ def log_warn(self, msg):
+ '''Used to print warning messages'''
+ self.logger.warn(self._fmt_msg(msg))
+ self.console.warn(msg)
+
+ def get_option(self, option):
+ '''This is used to by clusters to check if a cluster option was
+ supplied to sos-collector.
+ '''
+ # check CLI before defaults
+ for opt in self.config['cluster_options']:
+ if opt.name == option and opt.cluster in self.cluster_type:
+ return opt.value
+ # provide defaults otherwise
+ for opt in self.options:
+ if opt.name == option:
+ return opt.value
+ return False
+
+ def exec_master_cmd(self, cmd, need_root=False):
+ '''Used to retrieve output from a (master) node in a cluster'''
+ res = self.master.run_command(cmd, get_pty=True, need_root=need_root)
+ if res['stdout']:
+ res['stdout'] = res['stdout'].replace('Password:', '')
+ return res
+
+ def setup(self):
+ '''This MAY be used by a cluster to do prep work in case there are
+ extra commands to be run even if a node list is given by the user, and
+ thus get_nodes() would not be called
+ '''
+ pass
+
+ def check_enabled(self):
+ '''This may be overridden by clusters
+
+ This is called by sos-collector on each cluster type that exists, and
+ is meant to return True when the cluster type matches a criteria
+ that indicates that is the cluster type is in use.
+
+ Only the first cluster type to determine a match is run
+ '''
+ for pkg in self.packages:
+ if self.master.is_installed(pkg):
+ return True
+ return False
+
+ def get_nodes(self):
+ '''This MUST be overridden by a cluster.
+ A cluster should use this method to return a list or string that
+ contains all the nodes that a report should be collected from
+ '''
+ pass
+
+ def _get_nodes(self):
+ try:
+ return self.format_node_list()
+ except Exception as e:
+ self.log_debug('Failed to get node list: %s' % e)
+ return []
+
+ def get_node_label(self, node):
+ '''Used by SosNode() to retrieve the appropriate label from the cluster
+ as set by set_node_label() in the cluster profile.
+ '''
+ return self.set_node_label(node)
+
+ def set_node_label(self, node):
+ '''This may be overridden by clusters.
+
+ If there is a distinction between masters and nodes, or types of nodes,
+ then this can be used to label the sosreport archive differently.
+ '''
+ return ''
+
+ def modify_sos_cmd(self):
+ '''This is used to modify the sosreport command run on the nodes.
+ By default, sosreport is run without any options, using this will
+ allow the profile to specify what plugins to run or not and what
+ options to use.
+
+ This will NOT override user supplied options.
+ '''
+ if self.sos_preset:
+ if not self.config['preset']:
+ self.config['preset'] = self.sos_preset
+ else:
+ self.log_debug('Cluster specified preset %s but user has also '
+ 'defined a preset. Using user specification.'
+ % self.sos_preset)
+ if self.sos_plugins:
+ for plug in self.sos_plugins:
+ if plug not in self.config['sos_cmd']:
+ self.config['enable_plugins'].append(plug)
+ if self.sos_plugin_options:
+ for opt in self.sos_plugin_options:
+ if not any(opt in o for o in self.config['plugin_options']):
+ option = '%s=%s' % (opt, self.sos_plugin_options[opt])
+ self.config['plugin_options'].append(option)
+
+ def format_node_list(self):
+ '''Format the returned list of nodes from a cluster into a known
+ format. This being a list that contains no duplicates
+ '''
+ try:
+ nodes = self.get_nodes()
+ except Exception as e:
+ self.log_error('\n%s failed to enumerate nodes: %s'
+ % (self.cluster_type, e))
+ raise
+ if isinstance(nodes, list):
+ node_list = [n.strip() for n in nodes if n]
+ elif isinstance(nodes, str):
+ node_list = [n.split(',').strip() for n in nodes]
+ node_list = list(set(node_list))
+ for node in node_list:
+ if node.startswith(('-', '_', '(', ')', '[', ']', '/', '\\')):
+ node_list.remove(node)
+ return node_list
+
+ def _run_extra_cmd(self):
+ '''Ensures that any files returned by a cluster's run_extra_cmd()
+ method are properly typed as a list for iterative collection. If any
+ of the files are an additional sosreport (e.g. the ovirt db dump) then
+ the md5 sum file is automatically added to the list
+ '''
+ files = []
+ try:
+ res = self.run_extra_cmd()
+ if res:
+ if not isinstance(res, list):
+ res = [res]
+ for extra_file in res:
+ extra_file = extra_file.strip()
+ files.append(extra_file)
+ if 'sosreport' in extra_file:
+ files.append(extra_file + '.md5')
+ except AttributeError:
+ pass
+ return files
diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py
new file mode 100644
index 00000000..488fbd16
--- /dev/null
+++ b/sos/collector/clusters/jbon.py
@@ -0,0 +1,30 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.collector.clusters import Cluster
+
+
+class jbon(Cluster):
+ '''Just a Bunch of Nodes
+
+ Used when --cluster-type=none (or jbon), to avoid cluster checks, and just
+ use the provided --nodes list
+ '''
+
+ cluster_name = 'Just a Bunch Of Nodes (no cluster)'
+ packages = None
+
+ def get_nodes(self):
+ return []
+
+ def check_enabled(self):
+ # This should never be called, but as insurance explicitly never
+ # allow this to be enabled via the determine_cluster() path
+ return False
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
new file mode 100644
index 00000000..6a867e31
--- /dev/null
+++ b/sos/collector/clusters/kubernetes.py
@@ -0,0 +1,54 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from pipes import quote
+from sos.collector.clusters import Cluster
+
+
+class kubernetes(Cluster):
+
+ cluster_name = 'Community Kubernetes'
+ packages = ('kubernetes-master',)
+ sos_plugins = ['kubernetes']
+ sos_plugin_options = {'kubernetes.all': 'on'}
+
+ cmd = 'kubectl'
+
+ option_list = [
+ ('label', '', 'Filter node list to those with matching label'),
+ ('role', '', 'Filter node list to those with matching role')
+ ]
+
+ def get_nodes(self):
+ self.cmd += ' get nodes'
+ if self.get_option('label'):
+ self.cmd += ' -l %s ' % quote(self.get_option('label'))
+ res = self.exec_master_cmd(self.cmd)
+ if res['status'] == 0:
+ nodes = []
+ roles = [x for x in self.get_option('role').split(',') if x]
+ for nodeln in res['stdout'].splitlines()[1:]:
+ node = nodeln.split()
+ if not roles:
+ nodes.append(node[0])
+ else:
+ if node[2] in roles:
+ nodes.append(node[0])
+ return nodes
+ else:
+ raise Exception('Node enumeration did not return usable output')
+
+
+class openshift(kubernetes):
+
+ cluster_name = 'OpenShift Container Platform'
+ packages = ('atomic-openshift',)
+ sos_preset = 'ocp'
+ cmd = 'oc'
diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py
new file mode 100644
index 00000000..5b34f480
--- /dev/null
+++ b/sos/collector/clusters/ovirt.py
@@ -0,0 +1,181 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import fnmatch
+
+from pipes import quote
+from sos.collector.clusters import Cluster
+
+ENGINE_KEY = '/etc/pki/ovirt-engine/keys/engine_id_rsa'
+
+
+class ovirt(Cluster):
+
+ cluster_name = 'Community oVirt'
+ packages = ('ovirt-engine',)
+ db_exec = '/usr/share/ovirt-engine/dbscripts/engine-psql.sh -c'
+
+ option_list = [
+ ('no-database', False, 'Do not collect a database dump'),
+ ('cluster', '', 'Only collect from hosts in this cluster'),
+ ('datacenter', '', 'Only collect from hosts in this datacenter'),
+ ('no-hypervisors', False, 'Do not collect from hypervisors'),
+ ('spm-only', False, 'Only collect from SPM host(s)')
+ ]
+
+ def _run_db_query(self, query):
+ '''
+ Wrapper for running DB queries on the master. Any scrubbing of the
+ query should be done _before_ passing the query to this method.
+ '''
+ cmd = "%s %s" % (self.db_exec, quote(query))
+ return self.exec_master_cmd(cmd, need_root=True)
+
+ def _sql_scrub(self, val):
+ '''
+ Manually sanitize SQL queries since we can't leave this up to the
+ driver since we do not have an actual DB connection
+ '''
+ if not val:
+ return '%'
+
+ invalid_chars = ['\x00', '\\', '\n', '\r', '\032', '"', '\'']
+ if any(x in invalid_chars for x in val):
+ self.log_warn("WARNING: Cluster option \'%s\' contains invalid "
+ "characters. Using '%%' instead." % val)
+ return '%'
+
+ return val
+
+ def _check_for_engine_keys(self):
+ '''
+ Checks for the presence of the VDSM ssh keys the manager uses for
+ communication with hypervisors.
+
+ This only runs if we're locally on the RHV-M, *and* if no ssh-keys are
+ called out on the command line, *and* no --password option is given.
+ '''
+ if self.master.local:
+ if not any([self.config['ssh_key'], self.config['password'],
+ self.config['password_per_node']]):
+ if self.master.file_exists(ENGINE_KEY):
+ self.config['ssh_key'] = ENGINE_KEY
+ self.log_debug("Found engine SSH key. User command line"
+ " does not specify a key or password, using"
+ " engine key.")
+
+ def setup(self):
+ self.pg_pass = False
+ if not self.get_option('no-database'):
+ self.conf = self.parse_db_conf()
+ self.format_db_cmd()
+ self._check_for_engine_keys()
+
+ def format_db_cmd(self):
+ cluster = self._sql_scrub(self.get_option('cluster'))
+ datacenter = self._sql_scrub(self.get_option('datacenter'))
+ self.dbquery = ("SELECT host_name from vds where cluster_id in "
+ "(select cluster_id FROM cluster WHERE name like '%s'"
+ " and storage_pool_id in (SELECT id FROM storage_pool "
+ "WHERE name like '%s'))" % (cluster, datacenter))
+ if self.get_option('spm-only'):
+ # spm_status is an integer with the following meanings
+ # 0 - Normal (not SPM)
+ # 1 - Contending (SPM election in progress, but is not SPM)
+ # 2 - SPM
+ self.dbquery += ' AND spm_status = 2'
+ self.log_debug('Query command for ovirt DB set to: %s' % self.dbquery)
+
+ def get_nodes(self):
+ if self.get_option('no-hypervisors'):
+ return []
+ res = self._run_db_query(self.dbquery)
+ if res['status'] == 0:
+ nodes = res['stdout'].splitlines()[2:-1]
+ return [n.split('(')[0].strip() for n in nodes]
+ else:
+ raise Exception('database query failed, return code: %s'
+ % res['status'])
+
+ def run_extra_cmd(self):
+ if not self.get_option('no-database') and self.conf:
+ return self.collect_database()
+ return False
+
+ def parse_db_conf(self):
+ conf = {}
+ engconf = '/etc/ovirt-engine/engine.conf.d/10-setup-database.conf'
+ res = self.exec_master_cmd('cat %s' % engconf, need_root=True)
+ if res['status'] == 0:
+ config = res['stdout'].splitlines()
+ for line in config:
+ try:
+ k = str(line.split('=')[0])
+ v = str(line.split('=')[1].replace('"', ''))
+ conf[k] = v
+ except IndexError:
+ pass
+ return conf
+ return False
+
+ def collect_database(self):
+ sos_opt = (
+ '-k {plugin}.dbname={db} '
+ '-k {plugin}.dbhost={dbhost} '
+ '-k {plugin}.dbport={dbport} '
+ '-k {plugin}.username={dbuser} '
+ ).format(plugin='postgresql',
+ db=self.conf['ENGINE_DB_DATABASE'],
+ dbhost=self.conf['ENGINE_DB_HOST'],
+ dbport=self.conf['ENGINE_DB_PORT'],
+ dbuser=self.conf['ENGINE_DB_USER']
+ )
+ cmd = ('PGPASSWORD={} /usr/sbin/sosreport --name=postgresql '
+ '--batch -o postgresql {}'
+ ).format(self.conf['ENGINE_DB_PASSWORD'], sos_opt)
+ db_sos = self.exec_master_cmd(cmd, need_root=True)
+ for line in db_sos['stdout'].splitlines():
+ if fnmatch.fnmatch(line, '*sosreport-*tar*'):
+ return line.strip()
+ self.log_error('Failed to gather database dump')
+ return False
+
+
+class rhv(ovirt):
+
+ cluster_name = 'Red Hat Virtualization'
+ packages = ('rhevm', 'rhvm')
+ sos_preset = 'rhv'
+
+ def set_node_label(self, node):
+ if node.address == self.master.address:
+ return 'manager'
+ if node.is_installed('ovirt-node-ng-nodectl'):
+ return 'rhvh'
+ else:
+ return 'rhelh'
+
+
+class rhhi_virt(rhv):
+
+ cluster_name = 'Red Hat Hyperconverged Infrastructure - Virtualization'
+ sos_plugins = ('gluster',)
+ sos_plugin_options = {'gluster.dump': 'on'}
+ sos_preset = 'rhv'
+
+ def check_enabled(self):
+ return (self.master.is_installed('rhvm') and self._check_for_rhhiv())
+
+ def _check_for_rhhiv(self):
+ ret = self._run_db_query('SELECT count(server_id) FROM gluster_server')
+ if ret['status'] == 0:
+ # if there are any entries in this table, RHHI-V is in use
+ return ret['stdout'].splitlines()[2].strip() != '0'
+ return False
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
new file mode 100644
index 00000000..c64ec654
--- /dev/null
+++ b/sos/collector/clusters/pacemaker.py
@@ -0,0 +1,57 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.collector.clusters import Cluster
+
+
+class pacemaker(Cluster):
+
+ cluster_name = 'Pacemaker High Availability Cluster Manager'
+ sos_plugins = ['pacemaker']
+ packages = ('pacemaker',)
+ option_list = [
+ ('online', True, 'Collect nodes listed as online'),
+ ('offline', True, 'Collect nodes listed as offline')
+ ]
+
+ def get_nodes(self):
+ self.res = self.exec_master_cmd('pcs status')
+ if self.res['status'] != 0:
+ self.log_error('Cluster status could not be determined. Is the '
+ 'cluster running on this node?')
+ return []
+ if 'node names do not match' in self.res['stdout']:
+ self.log_warn('Warning: node name mismatch reported. Attempts to '
+ 'connect to some nodes may fail.\n')
+ return self.parse_pcs_output()
+
+ def parse_pcs_output(self):
+ nodes = []
+ if self.get_option('online'):
+ nodes += self.get_online_nodes()
+ if self.get_option('offline'):
+ nodes += self.get_offline_nodes()
+ return nodes
+
+ def get_online_nodes(self):
+ for line in self.res['stdout'].splitlines():
+ if line.startswith('Online:'):
+ nodes = line.split('[')[1].split(']')[0]
+ return [n for n in nodes.split(' ') if n]
+
+ def get_offline_nodes(self):
+ offline = []
+ for line in self.res['stdout'].splitlines():
+ if line.startswith('Node') and line.endswith('(offline)'):
+ offline.append(line.split()[1].replace(':', ''))
+ if line.startswith('OFFLINE:'):
+ nodes = line.split('[')[1].split(']')[0]
+ offline.extend([n for n in nodes.split(' ') if n])
+ return offline
diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py
new file mode 100644
index 00000000..fb666a40
--- /dev/null
+++ b/sos/collector/clusters/satellite.py
@@ -0,0 +1,39 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from pipes import quote
+from sos.collector.clusters import Cluster
+
+
+class satellite(Cluster):
+ """Red Hat Satellite 6"""
+
+ cluster_name = 'Red Hat Satellite 6'
+ packages = ('satellite', 'satellite-installer')
+
+ def _psql_cmd(self, query):
+ _cmd = "su postgres -c %s"
+ _dbcmd = "psql foreman -c %s"
+ return _cmd % quote(_dbcmd % quote(query))
+
+ def get_nodes(self):
+ cmd = self._psql_cmd('select name from smart_proxies')
+ res = self.exec_master_cmd(cmd, need_root=True)
+ if res['status'] == 0:
+ idx = 2
+ if 'could not change' in res['stdout']:
+ idx = 3
+ nodes = [n.strip() for n in res['stdout'].splitlines()[idx:-1]]
+ return nodes
+
+ def set_node_label(self, node):
+ if node.address == self.master.address:
+ return 'satellite'
+ return 'capsule'
diff --git a/sos/collector/configuration.py b/sos/collector/configuration.py
new file mode 100644
index 00000000..8ec63139
--- /dev/null
+++ b/sos/collector/configuration.py
@@ -0,0 +1,238 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import inspect
+import os
+import pipes
+import re
+import six
+import socket
+
+
+class Configuration(dict):
+ """ Dict subclass that is used to handle configuration information
+ needed by both SosCollector and the SosNode classes """
+
+ def __init__(self, args=None):
+ self.args = args
+ self.set_defaults()
+ self.parse_config()
+ self.parse_options()
+ self.check_user_privs()
+ self.parse_node_strings()
+ self['host_types'] = self._load_supported_hosts()
+ self['cluster_types'] = self._load_clusters()
+
+ def set_defaults(self):
+ self['sos_mod'] = {}
+ self['master'] = ''
+ self['strip_sos_path'] = ''
+ self['ssh_port'] = 22
+ self['ssh_user'] = 'root'
+ self['ssh_key'] = None
+ self['sos_cmd'] = 'sosreport --batch'
+ self['no_local'] = False
+ self['tmp_dir'] = '/var/tmp'
+ self['out_dir'] = '/var/tmp/'
+ self['nodes'] = []
+ self['debug'] = False
+ self['tmp_dir_created'] = False
+ self['cluster_type'] = None
+ self['cluster'] = None
+ self['password'] = False
+ self['label'] = None
+ self['case_id'] = None
+ self['timeout'] = 300
+ self['all_logs'] = False
+ self['alloptions'] = False
+ self['no_pkg_check'] = False
+ self['hostname'] = socket.gethostname()
+ ips = [i[4][0] for i in socket.getaddrinfo(socket.gethostname(), None)]
+ self['ip_addrs'] = list(set(ips))
+ self['cluster_options'] = []
+ self['image'] = None
+ self['skip_plugins'] = []
+ self['enable_plugins'] = []
+ self['plugin_options'] = []
+ self['only_plugins'] = []
+ self['list_options'] = False
+ self['hostlen'] = len(self['master']) or len(self['hostname'])
+ self['need_sudo'] = False
+ self['sudo_pw'] = ''
+ self['become_root'] = False
+ self['root_password'] = ''
+ self['threads'] = 4
+ self['compression'] = ''
+ self['verify'] = False
+ self['chroot'] = ''
+ self['sysroot'] = ''
+ self['sos_opt_line'] = ''
+ self['batch'] = False
+ self['verbose'] = False
+ self['preset'] = ''
+ self['insecure_sudo'] = False
+ self['log_size'] = 0
+ self['host_types'] = []
+ self['password_per_node'] = False
+ self['group'] = None
+ self['save_group'] = ''
+
+ def parse_node_strings(self):
+ '''
+ Parses the given --nodes option(s) to properly format the regex
+ list that we use. We cannot blindly split on ',' chars since it is a
+ valid regex character, so we need to scan along the given strings and
+ check at each comma if we should use the preceeding string by itself
+ or not, based on if there is a valid regex at that index.
+ '''
+ if not self['nodes']:
+ return
+ nodes = []
+ if not isinstance(self['nodes'], list):
+ self['nodes'] = [self['nodes']]
+ for node in self['nodes']:
+ idxs = [i for i, m in enumerate(node) if m == ',']
+ idxs.append(len(node))
+ start = 0
+ pos = 0
+ for idx in idxs:
+ try:
+ pos = idx
+ reg = node[start:idx]
+ re.compile(re.escape(reg))
+ # make sure we aren't splitting a regex value
+ if '[' in reg and ']' not in reg:
+ continue
+ nodes.append(reg.lstrip(','))
+ start = idx
+ except re.error:
+ continue
+ if pos != len(node):
+ nodes.append(node[pos+1:])
+ self['nodes'] = nodes
+
+ def parse_config(self):
+ for k in self.args:
+ if self.args[k]:
+ self[k] = self.args[k]
+ if self['sos_opt_line']:
+ self['sos_opt_line'] = pipes.quote(self['sos_opt_line'])
+
+ def parse_cluster_options(self):
+ opts = []
+ if not isinstance(self['cluster_options'], list):
+ self['cluster_options'] = [self['cluster_options']]
+ if self['cluster_options']:
+ for option in self['cluster_options']:
+ cluster = option.split('.')[0]
+ name = option.split('.')[1].split('=')[0]
+ try:
+ # there are no instances currently where any cluster option
+ # should contain a legitimate space.
+ value = option.split('=')[1].split()[0]
+ except IndexError:
+ # conversion to boolean is handled during validation
+ value = 'True'
+
+ opts.append(
+ ClusterOption(name, value, value.__class__, cluster)
+ )
+ self['cluster_options'] = opts
+
+ def parse_options(self):
+ self.parse_cluster_options()
+ for opt in ['skip_plugins', 'enable_plugins', 'plugin_options',
+ 'only_plugins']:
+ if self[opt]:
+ opts = []
+ if isinstance(self[opt], six.string_types):
+ self[opt] = [self[opt]]
+ for option in self[opt]:
+ opts += option.split(',')
+ self[opt] = opts
+
+ def check_user_privs(self):
+ if not self['ssh_user'] == 'root':
+ self['need_sudo'] = True
+
+ def _import_modules(self, modname):
+ '''Import and return all found classes in a module'''
+ mod_short_name = modname.split('.')[2]
+ module = __import__(modname, globals(), locals(), [mod_short_name])
+ modules = inspect.getmembers(module, inspect.isclass)
+ for mod in modules:
+ if mod[0] in ('SosHost', 'Cluster'):
+ modules.remove(mod)
+ return modules
+
+ def _find_modules_in_path(self, path, modulename):
+ '''Given a path and a module name, find everything that can be imported
+ and then import it
+
+ path - the filesystem path of the package
+ modulename - the name of the module in the package
+
+ E.G. a path of 'clusters', and a modulename of 'ovirt' equates to
+ importing soscollector.clusters.ovirt
+ '''
+ modules = []
+ if os.path.exists(path):
+ for pyfile in sorted(os.listdir(path)):
+ if not pyfile.endswith('.py'):
+ continue
+ if '__' in pyfile:
+ continue
+ fname, ext = os.path.splitext(pyfile)
+ modname = 'soscollector.%s.%s' % (modulename, fname)
+ modules.extend(self._import_modules(modname))
+ return modules
+
+ def _load_modules(self, package, submod):
+ '''Helper to import cluster and host types'''
+ modules = []
+ for path in package.__path__:
+ if os.path.isdir(path):
+ modules.extend(self._find_modules_in_path(path, submod))
+ return modules
+
+ def _load_clusters(self):
+ '''Load an instance of each cluster so that sos-collector can later
+ determine what type of cluster is in use
+ '''
+ import soscollector.clusters
+ package = soscollector.clusters
+ supported_clusters = {}
+ clusters = self._load_modules(package, 'clusters')
+ for cluster in clusters:
+ supported_clusters[cluster[0]] = cluster[1](self)
+ return supported_clusters
+
+ def _load_supported_hosts(self):
+ '''Load all the supported/defined host types for sos-collector.
+ These will then be used to match against each node we run on
+ '''
+ import soscollector.hosts
+ package = soscollector.hosts
+ supported_hosts = {}
+ hosts = self._load_modules(package, 'hosts')
+ for host in hosts:
+ supported_hosts[host[0]] = host[1]
+ return supported_hosts
+
+
+class ClusterOption():
+ '''Used to store/manipulate options for cluster profiles.'''
+
+ def __init__(self, name, value, opt_type, cluster, description=None):
+ self.name = name
+ self.value = value
+ self.opt_type = opt_type
+ self.cluster = cluster
+ self.description = description
diff --git a/sos/collector/exceptions.py b/sos/collector/exceptions.py
new file mode 100644
index 00000000..1d1e76df
--- /dev/null
+++ b/sos/collector/exceptions.py
@@ -0,0 +1,108 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+
+class InvalidPasswordException(Exception):
+ '''Raised when the provided password is rejected by the remote host'''
+
+ def __init__(self):
+ message = 'Invalid password provided'
+ super(InvalidPasswordException, self).__init__(message)
+
+
+class TimeoutPasswordAuthException(Exception):
+ '''Raised when a timeout is hit waiting for an auth reply using a password
+ '''
+
+ def __init__(self):
+ message = 'Timeout hit while waiting for password validation'
+ super(TimeoutPasswordAuthException, self).__init__(message)
+
+
+class PasswordRequestException(Exception):
+ '''Raised when the remote host requests a password that was not anticipated
+ '''
+
+ def __init__(self):
+ message = 'Host requested password, but none provided'
+ super(PasswordRequestException, self).__init__(message)
+
+
+class AuthPermissionDeniedException(Exception):
+ '''Raised when authentication attempts return a permission error'''
+
+ def __init__(self):
+ message = 'Permission denied while trying to authenticate'
+ super(AuthPermissionDeniedException, self).__init__(message)
+
+
+class ConnectionException(Exception):
+ '''Raised when an attempt to connect fails'''
+
+ def __init__(self, address='', port=''):
+ message = ("Could not connect to host %s on specified port %s"
+ % (address, port))
+ super(ConnectionException, self).__init__(message)
+
+
+class CommandTimeoutException(Exception):
+ '''Raised when a timeout expires'''
+
+ def __init__(self, command=None):
+ message = 'Timeout expired'
+ if command:
+ message += " executing %s" % command
+ super(CommandTimeoutException, self).__init__(message)
+
+
+class ConnectionTimeoutException(Exception):
+ '''Raised when a timeout expires while trying to connect to the host'''
+
+ def __init__(self):
+ message = 'Timeout expires while trying to connect'
+ super(ConnectionTimeoutException, self).__init__(message)
+
+
+class ControlSocketMissingException(Exception):
+ '''Raised when the SSH control socket is missing'''
+
+ def __init__(self, path=''):
+ message = "SSH control socket %s does not exist" % path
+ super(ControlSocketMissingException, self).__init__(message)
+
+
+class ControlPersistUnsupportedException(Exception):
+ '''Raised when SSH ControlPersist is unsupported locally'''
+
+ def __init__(self):
+ message = 'ControlPersist unsupported by local SSH installation'
+ super(ControlPersistUnsupportedException, self).__init__(message)
+
+
+class UnsupportedHostException(Exception):
+ '''Raised when the host type is unsupported or undetermined'''
+
+ def __init__(self):
+ message = 'Host did not match any supported distributions'
+ super(UnsupportedHostException, self).__init__(message)
+
+
+__all__ = [
+ 'AuthPermissionDeniedException',
+ 'CommandTimeoutException',
+ 'ConnectionException',
+ 'ConnectionTimeoutException',
+ 'ControlPersistUnsupportedException',
+ 'ControlSocketMissingException',
+ 'InvalidPasswordException',
+ 'PasswordRequestException',
+ 'TimeoutPasswordAuthException',
+ 'UnsupportedHostException'
+]
diff --git a/sos/collector/hosts/__init__.py b/sos/collector/hosts/__init__.py
new file mode 100644
index 00000000..c66ee44d
--- /dev/null
+++ b/sos/collector/hosts/__init__.py
@@ -0,0 +1,125 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+
+class SosHost():
+ '''Base class for defining host types - usually defined by distribution
+
+ This should be subclassed for any distro/release that sos-collector can be
+ expected to run on. At minimum it needs to define a package manager and a
+ way to identify the node as a particular distribution - usually through
+ inspection of /etc/os-release or related file.
+
+ The check_enabled() method should handle looking for the necessary string
+ inside the release_file, or any other way to uniquely identify the host
+ installation.
+
+ The release_file should be set to an identifying file like /etc/os-release
+ that can be inspected.
+
+ '''
+ distribution = ''
+ release_file = '/etc/os-release'
+ package_manager = {
+ 'name': '',
+ 'query': ''
+ }
+ release = ''
+ containerized = False
+ container_runtime = None
+ container_image = None
+ sos_path_strip = None
+ sos_pkg_name = None # package name in deb/rpm/etc
+ sos_bin_path = None # path to sosreport binary
+ sos_container_name = 'sos-collector-tmp'
+
+ def __init__(self, address):
+ self.address = address
+
+ def _check_enabled(self, rel_string):
+ self.release = rel_string.strip()
+ return self.check_enabled(rel_string)
+
+ def check_enabled(self, rel_string):
+ '''Should handle identifying the given host as being of the defined
+ distribution.
+
+ MUST return either True or False.
+ '''
+ return False
+
+ def report_facts(self):
+ '''Assemble relevant information and return as a dict'''
+ facts = {
+ 'distribution': self.distribution,
+ 'release': self.release,
+ 'containerized': self.containerized,
+ 'container_runtime': self.container_runtime,
+ 'sos_prefix': self.set_sos_prefix() % {
+ 'image': self.container_image},
+ 'cleanup_command': self.set_cleanup_cmd()
+ }
+ return facts
+
+ def pkg_query(self, pkg):
+ '''Returns the command string to query a given package.
+
+ Note that this DOES NOT run the query itself. That is left to the
+ SosNode instance that maintains the SSH connection.
+ '''
+ return self.package_manager['query'] + ' %s' % pkg
+
+ def set_sos_prefix(self):
+ '''If sosreport commands need to always be prefixed with something,
+ for example running in a specific container image, then it should be
+ defined here.
+
+ If no prefix should be set, return an empty string instead of None.
+ '''
+ return ''
+
+ def set_cleanup_cmd(self):
+ '''If a host requires additional cleanup, the command should be set and
+ returned here
+ '''
+ return ''
+
+ def create_sos_container(self):
+ '''Returns the command that will create the container that will be
+ used for running commands inside a container on hosts that require it.
+
+ This will use the container runtime defined for the host type to
+ launch a container. From there, we use the defined runtime to exec into
+ the container's namespace.
+ '''
+ return ''
+
+ def restart_sos_container(self):
+ '''Restarts the container created for sos-collector if it has stopped.
+
+ This is called immediately after create_sos_container() as the command
+ to create the container will exit and the container will stop. For
+ current container runtimes, subsequently starting the container will
+ default to opening a bash shell in the container to keep it running,
+ thus allowing us to exec into it again.
+ '''
+ return "%s start %s" % (self.container_runtime,
+ self.sos_container_name)
+
+ def format_container_command(self, cmd):
+ '''Returns the command that allows us to exec into the created
+ container for sos-collector.
+ '''
+ if self.container_runtime:
+ return '%s exec %s %s' % (self.container_runtime,
+ self.sos_container_name,
+ cmd)
+ else:
+ return cmd
diff --git a/sos/collector/hosts/debian.py b/sos/collector/hosts/debian.py
new file mode 100644
index 00000000..bfe6e24e
--- /dev/null
+++ b/sos/collector/hosts/debian.py
@@ -0,0 +1,31 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.collector.hosts import SosHost
+
+
+class DebianHost(SosHost):
+ '''Base class for defining Debian based systems'''
+
+ distribution = 'Debian'
+ releases = ['ubuntu', 'debian']
+ package_manager = {
+ 'name': 'dpkg',
+ 'query': "dpkg-query -W -f='${Package}-${Version}\\\n' "
+ }
+ sos_pkg_name = 'sosreport'
+ sos_bin_path = '/usr/bin/sosreport'
+
+ def check_enabled(self, rel_string):
+ for release in self.releases:
+ if release in rel_string:
+ return True
+ return False
+# vim:ts=4 et sw=4
diff --git a/sos/collector/hosts/redhat.py b/sos/collector/hosts/redhat.py
new file mode 100644
index 00000000..967b6f5d
--- /dev/null
+++ b/sos/collector/hosts/redhat.py
@@ -0,0 +1,83 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.collector.hosts import SosHost
+
+
+class RedHatHost(SosHost):
+ '''Base class for defining Red Hat family systems'''
+
+ distribution = 'Red Hat'
+ release_file = '/etc/redhat-release'
+ releases = ['fedora', 'red hat', 'centos']
+ package_manager = {
+ 'name': 'rpm',
+ 'query': 'rpm -q'
+ }
+ sos_pkg_name = 'sos'
+ sos_bin_path = '/usr/sbin/sosreport'
+
+ def check_enabled(self, rel_string):
+ for release in self.releases:
+ if release in rel_string.lower() and 'CoreOS' not in rel_string:
+ return True
+ return False
+
+
+class RedHatAtomicHost(RedHatHost):
+
+ containerized = True
+ container_runtime = 'docker'
+ container_image = 'registry.access.redhat.com/rhel7/support-tools'
+ sos_path_strip = '/host'
+
+ def check_enabled(self, rel_string):
+ return 'Atomic Host' in rel_string
+
+ def create_sos_container(self):
+ _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
+ " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
+ "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
+ "/etc/machine-id:/etc/machine-id -v "
+ "/etc/localtime:/etc/localtime -v /:/host {image}")
+ return _cmd.format(
+ runtime=self.container_runtime,
+ name=self.sos_container_name,
+ image=self.container_image
+ )
+
+ def set_cleanup_cmd(self):
+ return 'docker rm --force sos-collector-tmp'
+
+
+class RedHatCoreOSHost(RedHatHost):
+
+ containerized = True
+ container_runtime = 'podman'
+ container_image = 'registry.redhat.io/rhel8/support-tools'
+ sos_path_strip = '/host'
+
+ def check_enabled(self, rel_string):
+ return 'CoreOS' in rel_string
+
+ def create_sos_container(self):
+ _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
+ " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
+ "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
+ "/etc/machine-id:/etc/machine-id -v "
+ "/etc/localtime:/etc/localtime -v /:/host {image}")
+ return _cmd.format(
+ runtime=self.container_runtime,
+ name=self.sos_container_name,
+ image=self.container_image
+ )
+
+ def set_cleanup_cmd(self):
+ return 'podman rm --force %s' % self.sos_container_name
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
new file mode 100644
index 00000000..50e4b3e2
--- /dev/null
+++ b/sos/collector/sosnode.py
@@ -0,0 +1,819 @@
+# Copyright Red Hat 2020, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import fnmatch
+import inspect
+import logging
+import os
+import pexpect
+import re
+import shutil
+
+from distutils.version import LooseVersion
+from pipes import quote
+from sos.collector.exceptions import *
+
+
+class SosNode():
+
+ def __init__(self, address, config, password=None, force=False,
+ load_facts=True):
+ self.address = address.strip()
+ self.local = False
+ self.hostname = None
+ self.config = config
+ self._password = password or self.config['password']
+ self.sos_path = None
+ self.retrieved = False
+ self.hash_retrieved = False
+ self.file_list = []
+ self.sos_info = {
+ 'version': None,
+ 'enabled': [],
+ 'disabled': [],
+ 'options': [],
+ 'presets': []
+ }
+ filt = ['localhost', '127.0.0.1', self.config['hostname']]
+ self.logger = logging.getLogger('sos_collector')
+ self.console = logging.getLogger('sos_collector_console')
+ self.control_path = ("%s/.sos-collector-%s"
+ % (self.config['tmp_dir'], self.address))
+ self.ssh_cmd = self._create_ssh_command()
+ if self.address not in filt or force:
+ try:
+ self.connected = self._create_ssh_session()
+ except Exception as err:
+ self.log_error('Unable to open SSH session: %s' % err)
+ raise
+ else:
+ self.connected = True
+ self.local = True
+ if self.connected and load_facts:
+ self.host = self.determine_host()
+ if not self.host:
+ self.connected = False
+ self.close_ssh_session()
+ return None
+ if self.local:
+ if self.check_in_container():
+ self.host.containerized = False
+ self.log_debug("Host facts found to be %s" %
+ self.host.report_facts())
+ self.get_hostname()
+ if self.host.containerized:
+ self.create_sos_container()
+ self._load_sos_info()
+
+ def _create_ssh_command(self):
+ '''Build the complete ssh command for this node'''
+ cmd = "ssh -oControlPath=%s " % self.control_path
+ cmd += "%s@%s " % (self.config['ssh_user'], self.address)
+ return cmd
+
+ def _fmt_msg(self, msg):
+ return '{:<{}} : {}'.format(self._hostname, self.config['hostlen'] + 1,
+ msg)
+
+ def check_in_container(self):
+ '''
+ Tries to identify if we are currently running in a container or not.
+ '''
+ if os.path.exists('/run/.containerenv'):
+ self.log_debug('Found /run/.containerenv. Running in container.')
+ return True
+ if os.environ.get('container') is not None:
+ self.log_debug("Found env var 'container'. Running in container")
+ return True
+ return False
+
+ def create_sos_container(self):
+ '''If the host is containerized, create the container we'll be using
+ '''
+ if self.host.containerized:
+ res = self.run_command(self.host.create_sos_container())
+ if res['status'] in [0, 125]: # 125 means container exists
+ ret = self.run_command(self.host.restart_sos_container())
+ if ret['status'] == 0:
+ self.log_debug("Temporary container %s created"
+ % self.host.sos_container_name)
+ return True
+ else:
+ self.log_error("Could not start container after create: %s"
+ % ret['stdout'])
+ raise Exception
+ else:
+ self.log_error("Could not create container on host: %s"
+ % res['stdout'])
+ raise Exception
+
+ def file_exists(self, fname):
+ '''Checks for the presence of fname on the remote node'''
+ if not self.local:
+ try:
+ res = self.run_command("stat %s" % fname)
+ return res['status'] == 0
+ except Exception as err:
+ return False
+ else:
+ try:
+ os.stat(fname)
+ return True
+ except Exception:
+ return False
+
+ @property
+ def _hostname(self):
+ if self.hostname and 'localhost' not in self.hostname:
+ return self.hostname
+ return self.address
+
+ @property
+ def control_socket_exists(self):
+ '''Check if the SSH control socket exists
+
+ The control socket is automatically removed by the SSH daemon in the
+ event that the last connection to the node was greater than the timeout
+ set by the ControlPersist option. This can happen for us if we are
+ collecting from a large number of nodes, and the timeout expires before
+ we start collection.
+ '''
+ return os.path.exists(self.control_path)
+
+ def _sanitize_log_msg(self, msg):
+ '''Attempts to obfuscate sensitive information in log messages such as
+ passwords'''
+ reg = r'(?P<var>(pass|key|secret|PASS|KEY|SECRET).*?=)(?P<value>.*?\s)'
+ return re.sub(reg, r'\g<var>****** ', msg)
+
+ def log_info(self, msg):
+ '''Used to print and log info messages'''
+ caller = inspect.stack()[1][3]
+ lmsg = '[%s:%s] %s' % (self._hostname, caller, msg)
+ self.logger.info(lmsg)
+ self.console.info(self._fmt_msg(msg))
+
+ def log_error(self, msg):
+ '''Used to print and log error messages'''
+ caller = inspect.stack()[1][3]
+ lmsg = '[%s:%s] %s' % (self._hostname, caller, msg)
+ self.logger.error(lmsg)
+ self.console.error(self._fmt_msg(msg))
+
+ def log_debug(self, msg):
+ '''Used to print and log debug messages'''
+ msg = self._sanitize_log_msg(msg)
+ caller = inspect.stack()[1][3]
+ msg = '[%s:%s] %s' % (self._hostname, caller, msg)
+ self.logger.debug(msg)
+ if self.config['verbose']:
+ self.console.debug(msg)
+
+ def get_hostname(self):
+ '''Get the node's hostname'''
+ sout = self.run_command('hostname')
+ self.hostname = sout['stdout'].strip()
+ self.log_debug(
+ 'Hostname set to %s' % self.hostname)
+
+ def _format_cmd(self, cmd):
+ '''If we need to provide a sudo or root password to a command, then
+ here we prefix the command with the correct bits
+ '''
+ if self.config['become_root']:
+ return "su -c %s" % quote(cmd)
+ if self.config['need_sudo']:
+ return "sudo -S %s" % cmd
+ return cmd
+
+ def _fmt_output(self, output=None, rc=0):
+ '''Formats the returned output from a command into a dict'''
+ if rc == 0:
+ stdout = output
+ stderr = ''
+ else:
+ stdout = ''
+ stderr = output
+ res = {'status': rc,
+ 'stdout': stdout,
+ 'stderr': stderr}
+ return res
+
+ def _load_sos_info(self):
+ '''Queries the node for information about the installed version of sos
+ '''
+ cmd = self.host.pkg_query(self.host.sos_pkg_name)
+ res = self.run_command(cmd, use_container=True)
+ if res['status'] == 0:
+ ver = res['stdout'].splitlines()[-1].split('-')[1]
+ self.sos_info['version'] = ver
+ self.log_debug('sos version is %s' % self.sos_info['version'])
+ else:
+ self.log_error('sos is not installed on this node')
+ self.connected = False
+ return False
+ cmd = 'sosreport -l'
+ sosinfo = self.run_command(cmd, use_container=True)
+ if sosinfo['status'] == 0:
+ self._load_sos_plugins(sosinfo['stdout'])
+ if self.check_sos_version('3.6'):
+ self._load_sos_presets()
+
+ def _load_sos_presets(self):
+ cmd = 'sosreport --list-presets'
+ res = self.run_command(cmd, use_container=True)
+ if res['status'] == 0:
+ for line in res['stdout'].splitlines():
+ if line.strip().startswith('name:'):
+ pname = line.split('name:')[1].strip()
+ self.sos_info['presets'].append(pname)
+
+ def _load_sos_plugins(self, sosinfo):
+ ENABLED = 'The following plugins are currently enabled:'
+ DISABLED = 'The following plugins are currently disabled:'
+ OPTIONS = 'The following plugin options are available:'
+ PROFILES = 'Profiles:'
+
+ enablereg = ENABLED + '(.*?)' + DISABLED
+ disreg = DISABLED + '(.*?)' + OPTIONS
+ optreg = OPTIONS + '(.*?)' + PROFILES
+ proreg = PROFILES + '(.*?)' + '\n\n'
+
+ self.sos_info['enabled'] = self._regex_sos_help(enablereg, sosinfo)
+ self.sos_info['disabled'] = self._regex_sos_help(disreg, sosinfo)
+ self.sos_info['options'] = self._regex_sos_help(optreg, sosinfo)
+ self.sos_info['profiles'] = self._regex_sos_help(proreg, sosinfo, True)
+
+ def _regex_sos_help(self, regex, sosinfo, is_list=False):
+ res = []
+ for result in re.findall(regex, sosinfo, re.S):
+ for line in result.splitlines():
+ if not is_list:
+ try:
+ res.append(line.split()[0])
+ except Exception:
+ pass
+ else:
+ r = line.split(',')
+ res.extend(p.strip() for p in r if p.strip())
+ return res
+
+ def read_file(self, to_read):
+ '''Reads the specified file and returns the contents'''
+ try:
+ self.log_debug("Reading file %s" % to_read)
+ if not self.local:
+ res = self.run_command("cat %s" % to_read, timeout=5)
+ if res['status'] == 0:
+ return res['stdout']
+ else:
+ if 'No such file' in res['stdout']:
+ self.log_debug("File %s does not exist on node"
+ % to_read)
+ else:
+ self.log_error("Error reading %s: %s" %
+ (to_read, res['stdout'].split(':')[1:]))
+ return ''
+ else:
+ with open(to_read, 'r') as rfile:
+ return rfile.read()
+ except Exception as err:
+ self.log_error("Exception while reading %s: %s" % (to_read, err))
+ return ''
+
+ def determine_host(self):
+ '''Attempts to identify the host installation against supported
+ distributions
+ '''
+ for host_type in self.config['host_types']:
+ host = self.config['host_types'][host_type](self.address)
+ rel_string = self.read_file(host.release_file)
+ if host._check_enabled(rel_string):
+ self.log_debug("Host installation found to be %s" %
+ host.distribution)
+ return host
+ self.log_error('Unable to determine host installation. Ignoring node')
+ raise UnsupportedHostException
+
+ def check_sos_version(self, ver):
+ '''Checks to see if the sos installation on the node is AT LEAST the
+ given ver. This means that if the installed version is greater than
+ ver, this will still return True
+ '''
+ return LooseVersion(self.sos_info['version']) >= ver
+
+ def is_installed(self, pkg):
+ '''Checks if a given package is installed on the node'''
+ cmd = self.host.pkg_query(pkg)
+ res = self.run_command(cmd)
+ if res['status'] == 0:
+ return True
+ return False
+
+ def run_command(self, cmd, timeout=180, get_pty=False, need_root=False,
+ force_local=False, use_container=False):
+ '''Runs a given cmd, either via the SSH session or locally
+
+ Arguments:
+ cmd - the full command to be run
+ timeout - time in seconds to wait for the command to complete
+ get_pty - If a shell is absolutely needed to run a command, set
+ this to True
+ need_root - if a command requires root privileges, setting this to
+ True tells sos-collector to format the command with
+ sudo or su - as appropriate and to input the password
+ force_local - force a command to run locally. Mainly used for scp.
+ use_container - Run this command in a container *IF* the host is
+ containerized
+ '''
+ if not self.control_socket_exists and not self.local:
+ self.log_debug('Control socket does not exist, attempting to '
+ 're-create')
+ try:
+ _sock = self._create_ssh_session()
+ if not _sock:
+ self.log_debug('Failed to re-create control socket')
+ raise ControlSocketMissingException
+ except Exception as err:
+ self.log_error('Cannot run command: control socket does not '
+ 'exist')
+ self.log_debug("Error while trying to create new SSH control "
+ "socket: %s" % err)
+ raise
+ if cmd.startswith('sosreport'):
+ cmd = cmd.replace('sosreport', self.host.sos_bin_path)
+ need_root = True
+ if need_root:
+ get_pty = True
+ cmd = self._format_cmd(cmd)
+ if use_container and self.host.containerized:
+ cmd = self.host.format_container_command(cmd)
+ self.log_debug('Running command %s' % cmd)
+ if 'atomic' in cmd:
+ get_pty = True
+ if not self.local and not force_local:
+ cmd = "%s %s" % (self.ssh_cmd, quote(cmd))
+ else:
+ if get_pty:
+ cmd = "/bin/bash -c %s" % quote(cmd)
+ res = pexpect.spawn(cmd, encoding='utf-8')
+ if need_root:
+ if self.config['need_sudo']:
+ res.sendline(self.config['sudo_pw'])
+ if self.config['become_root']:
+ res.sendline(self.config['root_password'])
+ output = res.expect([pexpect.EOF, pexpect.TIMEOUT],
+ timeout=timeout)
+ if output == 0:
+ out = res.before
+ res.close()
+ rc = res.exitstatus
+ return {'status': rc, 'stdout': out}
+ elif output == 1:
+ raise CommandTimeoutException(cmd)
+
+ def sosreport(self):
+ '''Run a sosreport on the node, then collect it'''
+ self.finalize_sos_cmd()
+ self.log_debug('Final sos command set to %s' % self.sos_cmd)
+ try:
+ path = self.execute_sos_command()
+ if path:
+ self.finalize_sos_path(path)
+ else:
+ self.log_error('Unable to determine path of sos archive')
+ if self.sos_path:
+ self.retrieved = self.retrieve_sosreport()
+ except Exception:
+ pass
+ self.cleanup()
+
+ def _create_ssh_session(self):
+ '''
+ Using ControlPersist, create the initial connection to the node.
+
+ This will generate an OpenSSH ControlPersist socket within the tmp
+ directory created or specified for sos-collector to use.
+
+ At most, we will wait 30 seconds for a connection. This involves a 15
+ second wait for the initial connection attempt, and a subsequent 15
+ second wait for a response when we supply a password.
+
+ Since we connect to nodes in parallel (using the --threads value), this
+ means that the time between 'Connecting to nodes...' and 'Beginning
+ collection of sosreports' that users see can be up to an amount of time
+ equal to 30*(num_nodes/threads) seconds.
+
+ Returns
+ True if session is successfully opened, else raise Exception
+ '''
+ # Don't use self.ssh_cmd here as we need to add a few additional
+ # parameters to establish the initial connection
+ self.log_debug('Opening SSH session to create control socket')
+ connected = False
+ ssh_key = ''
+ ssh_port = ''
+ if self.config['ssh_port'] != 22:
+ ssh_port = "-p%s " % self.config['ssh_port']
+ if self.config['ssh_key']:
+ ssh_key = "-i%s" % self.config['ssh_key']
+ cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto "
+ "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s "
+ "\"echo Connected\"" % (ssh_key,
+ ssh_port,
+ self.control_path,
+ self.config['ssh_user'],
+ self.address))
+ res = pexpect.spawn(cmd, encoding='utf-8')
+
+ connect_expects = [
+ u'Connected',
+ u'password:',
+ u'.*Permission denied.*',
+ u'.* port .*: No route to host',
+ u'.*Could not resolve hostname.*',
+ pexpect.TIMEOUT
+ ]
+
+ index = res.expect(connect_expects, timeout=15)
+ if index == 0:
+ connected = True
+ elif index == 1:
+ if self._password:
+ pass_expects = [
+ u'Connected',
+ u'Permission denied, please try again.',
+ pexpect.TIMEOUT
+ ]
+ res.sendline(self._password)
+ pass_index = res.expect(pass_expects, timeout=15)
+ if pass_index == 0:
+ connected = True
+ elif pass_index == 1:
+ # Note that we do not get an exitstatus here, so matching
+ # this line means an invalid password will be reported for
+ # both invalid passwords and invalid user names
+ raise InvalidPasswordException
+ elif pass_index == 2:
+ raise TimeoutPasswordAuthException
+ else:
+ raise PasswordRequestException
+ elif index == 2:
+ raise AuthPermissionDeniedException
+ elif index == 3:
+ raise ConnectionException(self.address, self.config['ssh_port'])
+ elif index == 4:
+ raise ConnectionException(self.address)
+ elif index == 5:
+ raise ConnectionTimeoutException
+ else:
+ raise Exception("Unknown error, client returned %s" % res.before)
+ if connected:
+ self.log_debug("Successfully created control socket at %s"
+ % self.control_path)
+ return True
+ return False
+
+ def close_ssh_session(self):
+ '''Remove the control socket to effectively terminate the session'''
+ if self.local:
+ return True
+ try:
+ res = self.run_command("rm -f %s" % self.control_path,
+ force_local=True)
+ if res['status'] == 0:
+ return True
+ self.log_error("Could not remove ControlPath %s: %s"
+ % (self.control_path, res['stdout']))
+ return False
+ except Exception as e:
+ self.log_error('Error closing SSH session: %s' % e)
+ return False
+
+ def _preset_exists(self, preset):
+ '''Verifies if the given preset exists on the node'''
+ return preset in self.sos_info['presets']
+
+ def _plugin_exists(self, plugin):
+ '''Verifies if the given plugin exists on the node'''
+ return any(plugin in s for s in [self.sos_info['enabled'],
+ self.sos_info['disabled']])
+
+ def _check_enabled(self, plugin):
+ '''Checks to see if the plugin is default enabled on node'''
+ return plugin in self.sos_info['enabled']
+
+ def _check_disabled(self, plugin):
+ '''Checks to see if the plugin is default disabled on node'''
+ return plugin in self.sos_info['disabled']
+
+ def _plugin_option_exists(self, opt):
+ '''Attempts to verify that the given option is available on the node.
+ Note that we only get available options for enabled plugins, so if a
+ plugin has been force-enabled we cannot validate if the plugin option
+ is correct or not'''
+ plug = opt.split('.')[0]
+ if not self._plugin_exists(plug):
+ return False
+ if (self._check_disabled(plug) and
+ plug not in self.config['enable_plugins']):
+ return False
+ if self._check_enabled(plug):
+ return opt in self.sos_info['options']
+ # plugin exists, but is normally disabled. Assume user knows option is
+ # valid when enabling the plugin
+ return True
+
+ def _fmt_sos_opt_list(self, opts):
+ '''Returns a comma delimited list for sos plugins that are confirmed
+ to exist on the node'''
+ return ','.join(o for o in opts if self._plugin_exists(o))
+
+ def finalize_sos_cmd(self):
+ '''Use host facts and compare to the cluster type to modify the sos
+ command if needed'''
+ self.sos_cmd = self.config['sos_cmd']
+ label = self.determine_sos_label()
+ if label:
+ self.sos_cmd = ' %s %s' % (self.sos_cmd, quote(label))
+
+ if self.config['sos_opt_line']:
+ return True
+
+ if self.config['only_plugins']:
+ plugs = [o for o in self.config['only_plugins']
+ if self._plugin_exists(o)]
+ if len(plugs) != len(self.config['only_plugins']):
+ not_only = list(set(self.config['only_plugins']) - set(plugs))
+ self.log_debug('Requested plugins %s were requested to be '
+ 'enabled but do not exist' % not_only)
+ only = self._fmt_sos_opt_list(self.config['only_plugins'])
+ if only:
+ self.sos_cmd += ' --only-plugins=%s' % quote(only)
+ return True
+
+ if self.config['skip_plugins']:
+ # only run skip-plugins for plugins that are enabled
+ skip = [o for o in self.config['skip_plugins']
+ if self._check_enabled(o)]
+ if len(skip) != len(self.config['skip_plugins']):
+ not_skip = list(set(self.config['skip_plugins']) - set(skip))
+ self.log_debug('Requested to skip plugins %s, but plugins are '
+ 'already not enabled' % not_skip)
+ skipln = self._fmt_sos_opt_list(skip)
+ if skipln:
+ self.sos_cmd += ' --skip-plugins=%s' % quote(skipln)
+
+ if self.config['enable_plugins']:
+ # only run enable for plugins that are disabled
+ opts = [o for o in self.config['enable_plugins']
+ if o not in self.config['skip_plugins']
+ and self._check_disabled(o) and self._plugin_exists(o)]
+ if len(opts) != len(self.config['enable_plugins']):
+ not_on = list(set(self.config['enable_plugins']) - set(opts))
+ self.log_debug('Requested to enable plugins %s, but plugins '
+ 'are already enabled or do not exist' % not_on)
+ enable = self._fmt_sos_opt_list(opts)
+ if enable:
+ self.sos_cmd += ' --enable-plugins=%s' % quote(enable)
+
+ if self.config['plugin_options']:
+ opts = [o for o in self.config['plugin_options']
+ if self._plugin_exists(o.split('.')[0])
+ and self._plugin_option_exists(o.split('=')[0])]
+ if opts:
+ self.sos_cmd += ' -k %s' % quote(','.join(o for o in opts))
+
+ if self.config['preset']:
+ if self._preset_exists(self.config['preset']):
+ self.sos_cmd += ' --preset=%s' % quote(self.config['preset'])
+ else:
+ self.log_debug('Requested to enable preset %s but preset does '
+ 'not exist on node' % self.config['preset'])
+
+ def determine_sos_label(self):
+ '''Determine what, if any, label should be added to the sosreport'''
+ label = ''
+ label += self.config['cluster'].get_node_label(self)
+
+ if self.config['label']:
+ label += ('%s' % self.config['label'] if not label
+ else '-%s' % self.config['label'])
+
+ if not label:
+ return None
+
+ self.log_debug('Label for sosreport set to %s' % label)
+ if self.check_sos_version('3.6'):
+ lcmd = '--label'
+ else:
+ lcmd = '--name'
+ label = '%s-%s' % (self.address.split('.')[0], label)
+ return '%s=%s' % (lcmd, label)
+
+ def finalize_sos_path(self, path):
+ '''Use host facts to determine if we need to change the sos path
+ we are retrieving from'''
+ pstrip = self.host.sos_path_strip
+ if pstrip:
+ path = path.replace(pstrip, '')
+ path = path.split()[0]
+ self.log_debug('Final sos path: %s' % path)
+ self.sos_path = path
+ self.archive = path.split('/')[-1]
+
+ def determine_sos_error(self, rc, stdout):
+ if rc == -1:
+ return 'sosreport process received SIGKILL on node'
+ if rc == 1:
+ if 'sudo' in stdout:
+ return 'sudo attempt failed'
+ if rc == 127:
+ return 'sosreport terminated unexpectedly. Check disk space'
+ if len(stdout) > 0:
+ return stdout.split('\n')[0:1]
+ else:
+ return 'sos exited with code %s' % rc
+
+ def execute_sos_command(self):
+ '''Run sosreport and capture the resulting file path'''
+ self.log_info("Generating sosreport...")
+ try:
+ path = False
+ res = self.run_command(self.sos_cmd,
+ timeout=self.config['timeout'],
+ get_pty=True, need_root=True,
+ use_container=True)
+ if res['status'] == 0:
+ for line in res['stdout'].splitlines():
+ if fnmatch.fnmatch(line, '*sosreport-*tar*'):
+ path = line.strip()
+ else:
+ err = self.determine_sos_error(res['status'], res['stdout'])
+ self.log_debug("Error running sosreport. rc = %s msg = %s"
+ % (res['status'], res['stdout'] or
+ res['stderr']))
+ raise Exception(err)
+ return path
+ except CommandTimeoutException:
+ self.log_error('Timeout exceeded')
+ raise
+ except Exception as e:
+ self.log_error('Error running sosreport: %s' % e)
+ raise
+
+ def retrieve_file(self, path):
+ '''Copies the specified file from the host to our temp dir'''
+ destdir = self.config['tmp_dir'] + '/'
+ dest = destdir + path.split('/')[-1]
+ try:
+ if not self.local:
+ if self.file_exists(path):
+ self.log_debug("Copying remote %s to local %s" %
+ (path, destdir))
+ cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % (
+ self.control_path,
+ self.config['ssh_user'],
+ self.address,
+ path,
+ destdir
+ )
+ res = self.run_command(cmd, force_local=True)
+ return res['status'] == 0
+ else:
+ self.log_debug("Attempting to copy remote file %s, but it "
+ "does not exist on filesystem" % path)
+ return False
+ else:
+ self.log_debug("Moving %s to %s" % (path, destdir))
+ shutil.copy(path, dest)
+ return True
+ except Exception as err:
+ self.log_debug("Failed to retrieve %s: %s" % (path, err))
+ return False
+
+ def remove_file(self, path):
+ '''Removes the spciefied file from the host. This should only be used
+ after we have retrieved the file already
+ '''
+ path = ''.join(path.split())
+ try:
+ if len(path) <= 2: # ensure we have a non '/' path
+ self.log_debug("Refusing to remove path %s: appears to be "
+ "incorrect and possibly dangerous" % path)
+ return False
+ if self.file_exists(path):
+ self.log_debug("Removing file %s" % path)
+ cmd = "rm -f %s" % path
+ res = self.run_command(cmd, need_root=True)
+ return res['status'] == 0
+ else:
+ self.log_debug("Attempting to remove remote file %s, but it "
+ "does not exist on filesystem" % path)
+ return False
+ except Exception as e:
+ self.log_debug('Failed to remove %s: %s' % (path, e))
+ return False
+
+ def retrieve_sosreport(self):
+ '''Collect the sosreport archive from the node'''
+ if self.sos_path:
+ if self.config['need_sudo'] or self.config['become_root']:
+ try:
+ self.make_archive_readable(self.sos_path)
+ except Exception:
+ self.log_error('Failed to make archive readable')
+ return False
+ try:
+ self.make_archive_readable(self.sos_path + '.md5')
+ except Exception:
+ self.log_debug('Failed to make md5 readable')
+ self.logger.info('Retrieving sosreport from %s' % self.address)
+ self.log_info('Retrieving sosreport...')
+ ret = self.retrieve_file(self.sos_path)
+ if ret:
+ self.log_info('Successfully collected sosreport')
+ self.file_list.append(self.sos_path.split('/')[-1])
+ else:
+ self.log_error('Failed to retrieve sosreport')
+ raise SystemExit
+ self.hash_retrieved = self.retrieve_file(self.sos_path + '.md5')
+ if self.hash_retrieved:
+ self.file_list.append(self.sos_path.split('/')[-1] + '.md5')
+ return True
+ else:
+ # sos sometimes fails but still returns a 0 exit code
+ if self.stderr.read():
+ e = self.stderr.read()
+ else:
+ e = [x.strip() for x in self.stdout.readlines() if x.strip][-1]
+ self.logger.error(
+ 'Failed to run sosreport on %s: %s' % (self.address, e))
+ self.log_error('Failed to run sosreport. %s' % e)
+ return False
+
+ def remove_sos_archive(self):
+ '''Remove the sosreport archive from the node, since we have
+ collected it and it would be wasted space otherwise'''
+ if self.sos_path is None:
+ return
+ if 'sosreport' not in self.sos_path:
+ self.log_debug("Node sosreport path %s looks incorrect. Not "
+ "attempting to remove path" % self.sos_path)
+ return
+ removed = self.remove_file(self.sos_path)
+ if not removed:
+ self.log_error('Failed to remove sosreport')
+
+ def cleanup(self):
+ '''Remove the sos archive from the node once we have it locally'''
+ self.remove_sos_archive()
+ if self.hash_retrieved:
+ self.remove_file(self.sos_path + '.md5')
+ cleanup = self.host.set_cleanup_cmd()
+ if cleanup:
+ self.run_command(cleanup)
+
+ def collect_extra_cmd(self, filenames):
+ '''Collect the file created by a cluster outside of sos'''
+ for filename in filenames:
+ try:
+ if self.config['need_sudo'] or self.config['become_root']:
+ try:
+ self.make_archive_readable(filename)
+ except Exception as err:
+ self.log_error("Unable to retrieve file %s" % filename)
+ self.log_debug("Failed to make file %s readable: %s"
+ % (filename, err))
+ continue
+ ret = self.retrieve_file(filename)
+ if ret:
+ self.file_list.append(filename.split('/')[-1])
+ self.remove_file(filename)
+ else:
+ self.log_error("Unable to retrieve file %s" % filename)
+ except Exception as e:
+ msg = 'Error collecting additional data from master: %s' % e
+ self.log_error(msg)
+
+ def make_archive_readable(self, filepath):
+ '''Used to make the given archive world-readable, which is slightly
+ better than changing the ownership outright.
+
+ This is only used when we're not connecting as root.
+ '''
+ cmd = 'chmod o+r %s' % filepath
+ res = self.run_command(cmd, timeout=10, need_root=True)
+ if res['status'] == 0:
+ return True
+ else:
+ msg = "Exception while making %s readable. Return code was %s"
+ self.log_error(msg % (filepath, res['status']))
+ raise Exception