diff options
author | navid <navid@ef72aa8b-4018-0410-8976-d6e080ef94d8> | 2007-08-17 13:42:51 +0000 |
---|---|---|
committer | navid <navid@ef72aa8b-4018-0410-8976-d6e080ef94d8> | 2007-08-17 13:42:51 +0000 |
commit | 8092e9862adb06e6cec8f02e0dfe3d8a50030949 (patch) | |
tree | a764e897e8e571914346f29e8e2123647505cacc /src/lib | |
parent | b451a9da5eedbacd9216409cf8e21b19a0d848c6 (diff) | |
download | sos-8092e9862adb06e6cec8f02e0dfe3d8a50030949.tar.gz |
merged navid-dev -r r350:r364 into trunk/rhel-5-startr1.7-8
git-svn-id: svn+ssh://svn.fedorahosted.org/svn/sos/trunk@368 ef72aa8b-4018-0410-8976-d6e080ef94d8
Diffstat (limited to 'src/lib')
-rwxr-xr-x | src/lib/sos/helpers.py | 16 | ||||
-rw-r--r-- | src/lib/sos/plugins/autofs.py | 8 | ||||
-rw-r--r-- | src/lib/sos/plugins/cluster.py | 373 | ||||
-rw-r--r-- | src/lib/sos/plugins/filesys.py | 1 | ||||
-rw-r--r-- | src/lib/sos/plugins/kernel.py | 51 | ||||
-rw-r--r-- | src/lib/sos/plugins/ldap.py | 16 | ||||
-rw-r--r-- | src/lib/sos/plugins/networking.py | 3 | ||||
-rw-r--r-- | src/lib/sos/plugins/process.py | 6 | ||||
-rw-r--r-- | src/lib/sos/plugins/squid.py | 6 | ||||
-rw-r--r-- | src/lib/sos/plugins/veritas.py | 5 | ||||
-rw-r--r-- | src/lib/sos/plugins/yum.py | 5 | ||||
-rw-r--r-- | src/lib/sos/plugintools.py | 98 | ||||
-rwxr-xr-x | src/lib/sos/policyredhat.py | 113 |
13 files changed, 394 insertions, 307 deletions
diff --git a/src/lib/sos/helpers.py b/src/lib/sos/helpers.py index bcdee6fb..bc9c51ff 100755 --- a/src/lib/sos/helpers.py +++ b/src/lib/sos/helpers.py @@ -25,7 +25,7 @@ """ helper functions used by sosreport and plugins """ -import os, popen2, fcntl, select, itertools, sys, commands +import os, popen2, fcntl, select, itertools, sys, commands, logging from time import time from tempfile import mkdtemp @@ -60,6 +60,19 @@ def makeNonBlocking(afd): def sosGetCommandOutput(command): """ Execute a command and gather stdin, stdout, and return status. """ + soslog = logging.getLogger('sos') + + # Log if binary is not runnable or does not exist + for path in os.environ["PATH"].split(":"): + cmdfile = command.strip("(").split()[0] + # handle both absolute or relative paths + if ( ( not os.path.isabs(cmdfile) and os.access(os.path.join(path,cmdfile), os.X_OK) ) or \ + ( os.path.isabs(cmdfile) and os.access(cmdfile, os.X_OK) ) ): + break + else: + soslog.log(logging.VERBOSE, "binary '%s' does not exist or is not runnable" % cmdfile) + return (127, "", 0) + stime = time() inpipe, pipe = os.popen4(command, 'r') inpipe.close() @@ -123,4 +136,3 @@ def sosRelPath(path1, path2, sep=os.path.sep, pardir=os.path.pardir): if not common: return path2 # leave path absolute if nothing at all in common return sep.join( [pardir]*len(u1) + u2 ) - diff --git a/src/lib/sos/plugins/autofs.py b/src/lib/sos/plugins/autofs.py index 85cb72a7..2cb22767 100644 --- a/src/lib/sos/plugins/autofs.py +++ b/src/lib/sos/plugins/autofs.py @@ -47,7 +47,7 @@ class autofs(sos.plugintools.PluginBase): debugout=self.doRegexFindAll(r"^daemon.*\s+(\/var.*)", "/etc/syslog.conf") for i in debugout: return i - + def setup(self): self.addCopySpec("/etc/auto*") self.addCopySpec("/etc/sysconfig/autofs") @@ -58,6 +58,10 @@ class autofs(sos.plugintools.PluginBase): self.collectExtOutput("/bin/egrep -e 'automount|pid.*nfs' /proc/mounts") self.collectExtOutput("/bin/mount | egrep -e 'automount|pid.*nfs'") self.collectExtOutput("/sbin/chkconfig --list autofs") - self.addCopySpec(self.getdaemondebug()) + + # if debugging to file is enabled, grab that file too + daemon_debug_file = self.getdaemondebug() + if daemon_debug_file: + self.addCopySpec(daemon_debug_file) return diff --git a/src/lib/sos/plugins/cluster.py b/src/lib/sos/plugins/cluster.py index 81b71f5a..6067f9c4 100644 --- a/src/lib/sos/plugins/cluster.py +++ b/src/lib/sos/plugins/cluster.py @@ -26,9 +26,19 @@ class cluster(sos.plugintools.PluginBase): def checkenabled(self): # enable if any related package is installed - for pkg in [ "ccs", "cman", "cman-kernel", "magma", "magma-plugins", - "rgmanager", "fence", "dlm", "dlm-kernel", "gulm", - "GFS", "GFS-kernel", "lvm2-cluster" ]: + rhelver = self.cInfo["policy"].rhelVersion() + if rhelver == 4: + pkgs_to_check = [ "ccs", "cman", "cman-kernel", "magma", "magma-plugins", + "rgmanager", "fence", "dlm", "dlm-kernel", "gulm", + "GFS", "GFS-kernel", "lvm2-cluster" ] + elif rhelver == 5: + pkgs_to_check = [ "rgmanager", "luci", "ricci", "system-config-cluster", + "gfs-utils", "gnbd", "kmod-gfs", "kmod-gnbd", "lvm2-cluster" ] + else: + # can't guess what RHEL version we are running + pkgs_to_check = [] + + for pkg in pkgs_to_check: if self.cInfo["policy"].pkgByName(pkg) != None: return True @@ -42,160 +52,160 @@ class cluster(sos.plugintools.PluginBase): return False def has_gfs(self): - fp = open("/proc/mounts","r") - for line in fp.readlines(): - mntline = line.split(" ") - if mntline[2] == "gfs": - return True - fp.close() - return False + try: + if len(self.doRegexFindAll(r'^\S+\s+\S+\s+gfs\s+.*$', "/etc/mtab")): + return True + except: + return False def diagnose(self): - try: rhelver = self.cInfo["policy"].pkgDictByName("redhat-release")[0] - except: rhelver = None - - # FIXME: we should only run tests specific for the version, now just do them all regardless - if rhelver == "4" or True: - # check that kernel module packages are installed for - # running kernel version - pkgs_check = [ "dlm-kernel" , "cman-kernel" ] - if self.has_gfs(): pkgs_check.append("GFS-kernel") - - for pkgname in pkgs_check: - found = 0 - if self.cInfo["policy"].isKernelSMP() and self.cInfo["policy"].pkgByName(pkgname): - found = 1 # -one- means package found (but not for same version as kernel) - pkgname = pkgname + "-smp" - - for pkg in self.cInfo["policy"].allPkgsByName(pkgname): - found = 1 - for reqline in self.cInfo["policy"].pkgRequires("%s-%s-%s" % (pkg[0],pkg[1],pkg[2]) ): - if reqline[0] == 'kernel-smp' and reqline[1] == '=': - reqline[2] = reqline[2] + "smp" - - if ( (not self.cInfo["policy"].isKernelSMP() and reqline[0] == 'kernel') or (self.cInfo["policy"].isKernelSMP() and reqline[0] == 'kernel-smp') ) and reqline[1] == '=' and reqline[2] == self.cInfo["policy"].kernelVersion(): - found = 2 - break - - if found == 0: - self.addDiagnose("required package is missing: %s" % pkgname) - elif found == 1: - self.addDiagnose("required package is not installed for current kernel: %s" % pkgname) - - # check if the minimum set of packages is installed - # for RHEL4 RHCS(ccs, cman, cman-kernel, magma, magma-plugins, (dlm, dlm-kernel) || gulm, perl-Net-Telnet, rgmanager, fence) - # RHEL4 GFS (GFS, GFS-kernel, ccs, lvm2-cluster, fence) - - for pkg in [ "ccs", "cman", "magma", "magma-plugins", "perl-Net-Telnet", "rgmanager", "fence" ]: - if self.cInfo["policy"].pkgByName(pkg) == None: - self.addDiagnose("required package is missing: %s" % pkg) - + rhelver = self.cInfo["policy"].rhelVersion() + + # check if the minimum set of packages is installed + # for RHEL4 RHCS(ccs, cman, cman-kernel, magma, magma-plugins, (dlm, dlm-kernel) || gulm, perl-Net-Telnet, rgmanager, fence) + # RHEL4 GFS (GFS, GFS-kernel, ccs, lvm2-cluster, fence) + + kernel_pkgs = [] + pkgs_check = [] + mods_check = [] + serv_check = [] + + if rhelver == 4: + kernel_pkgs = [ "dlm-kernel" , "cman-kernel" ] + if self.has_gfs(): + kernel_pkgs.append("GFS-kernel") + pkgs_check.extend( [ "ccs", "cman", "magma", "magma-plugins", "perl-Net-Telnet", "rgmanager", "fence" ] ) + mods_check.extend( [ "cman", "dlm" ] ) + if self.has_gfs(): + mods_check.append("gfs") + serv_check.extend( [ "cman", "ccsd", "rgmanager", "fenced" ] ) + if self.has_gfs(): + serv_check.extend( ["gfs", "clvmd"] ) + elif rhelver == 5: + if self.has_gfs(): + kernel_pkgs.append("kmod-gfs") + pkgs_check.extend ( [ "cman", "perl-Net-Telnet", "rgmanager" ] ) + mods_check.extend( [ "dlm" ] ) + if self.has_gfs(): + mods_check.extend( ["gfs", "gfs2"] ) + serv_check.extend( [ "cman", "rgmanager" ] ) + if self.has_gfs(): + serv_check.extend( ["gfs", "clvmd"] ) + + # check that kernel module packages are installed for + # running kernel version + + for pkgname in kernel_pkgs: + found = 0 + + # FIXME: make sure it works on RHEL4 + for pkg in self.cInfo["policy"].allPkgsByNameRegex( "^" + pkgname ): + found = 1 + for reqline in pkg.dsFromHeader('requirename'): + reqline = reqline[0].split() + try: + if reqline[1].startswith("kernel") and reqline[2] == "=" and reqline[3] == self.cInfo["policy"].kernelVersion(): + found = 2 + break + except IndexError: + pass + + if found == 0: + self.addDiagnose("required kernel package is missing: %s" % pkgname) + elif found == 1: + self.addDiagnose("required package is not installed for current kernel: %s" % pkgname) + + for pkg in pkgs_check: + if self.cInfo["policy"].pkgByName(pkg) == None: + self.addDiagnose("required package is missing: %s" % pkg) + + if rhelver == "4": # (dlm, dlm-kernel) || gulm if not ((self.cInfo["policy"].pkgByName("dlm") and self.cInfo["policy"].pkgByName("dlm-kernel")) or self.cInfo["policy"].pkgByName("gulm")): self.addDiagnose("required packages are missing: (dlm, dlm-kernel) || gulm") - # let's make modules are loaded - mods_check = [ "cman", "dlm" ] - if self.has_gfs(): mods_check.append("gfs") - for module in mods_check: - if len(self.fileGrep("^%s " % module, "/proc/modules")) == 0: - self.addDiagnose("required package is present but not loaded: %s" % module) - - # check if all the needed daemons are active at sosreport time - # check if they are started at boot time in RHEL4 RHCS (cman, ccsd, rgmanager, fenced) - # and GFS (gfs, ccsd, clvmd, fenced) - checkserv = [ "cman", "ccsd", "rgmanager", "fenced" ] - if self.has_gfs(): checkserv.extend( ["gfs", "clvmd"] ) - for service in checkserv: - status, output = commands.getstatusoutput("/sbin/service %s status" % service) - if status: - self.addDiagnose("service %s is not running" % service) - else: - # service is running, extra sanity checks - if service == "fenced": - # also make sure fenced is a registered cluster service - try: - if len(self.fileGrep("^Fence Domain:\W", "/proc/cluster/services")) == 0: - self.addDiagnose("fencing service is not registered with cman") - except: - pass - elif service == "rgmanager": - # also make sure rgmanager is a registered cluster service - try: - if len(self.fileGrep("^User:\W*usrm::manager", "/proc/cluster/services")) == 0: - self.addDiagnose("rgmanager is not registered with cman") - except: - pass - - if not self.cInfo["policy"].runlevelDefault() in self.cInfo["policy"].runlevelByService(service): - self.addDiagnose("service %s is not started in default runlevel" % service) - - # FIXME: any cman service whose state != run ? - # Fence Domain: "default" 2 2 run - - - # is cluster quorate - if not self.is_cluster_quorate(): - self.addDiagnose("cluster node is not quorate") - - # if there is no cluster.conf, diagnose() finishes here. - try: - os.stat("/etc/cluster/cluster.conf") - except: - self.addDiagnose("/etc/cluster/cluster.conf is missing") - return - - # setup XML xpath context - xml = libxml2.parseFile("/etc/cluster/cluster.conf") - xpathContext = xml.xpathNewContext() - - # check fencing (warn on no fencing) - if len(xpathContext.xpathEval("/cluster/clusternodes/clusternode[not(fence/method/device)]")): - if self.has_gfs(): - self.addDiagnose("one or more nodes have no fencing agent configured: fencing is required for GFS to work") - else: - self.addDiagnose("one or more nodes have no fencing agent configured: the cluster infrastructure might not work as intended") - - # check fencing (warn on manual) - if len(xpathContext.xpathEval("/cluster/clusternodes/clusternode[/cluster/fencedevices/fencedevice[@agent='fence_manual']/@name=fence/method/device/@name]")): - self.addDiagnose("one or more nodes have manual fencing agent configured (data integrity is not guaranteed)") - - # if fence_ilo or fence_drac, make sure acpid is not running - hostname = commands.getoutput("/bin/uname -n").split(".")[0] - if len(xpathContext.xpathEval('/cluster/clusternodes/clusternode[@name = "%s" and /cluster/fencedevices/fencedevice[@agent="fence_rsa" or @agent="fence_drac"]/@name=fence/method/device/@name]' % hostname )): - status, output = commands.getstatusoutput("/sbin/service acpid status") - if status == 0 or self.cInfo["policy"].runlevelDefault() in self.cInfo["policy"].runlevelByService("acpid"): - self.addDiagnose("acpid is enabled, this may cause problems with your fencing method.") - - # check for fs exported via nfs without nfsid attribute - if len(xpathContext.xpathEval("/cluster/rm/service//fs[not(@fsid)]/nfsexport")): - self.addDiagnose("one or more nfs export do not have a fsid attribute set.") - - # cluster.conf file version and the in-memory cluster configuration version matches - status, cluster_version = commands.getstatusoutput("cman_tool status | grep 'Config version'") - if not status: cluster_version = cluster_version[16:] - else: cluster_version = None - conf_version = xpathContext.xpathEval("/cluster/@config_version")[0].content - - if status == 0 and conf_version != cluster_version: - self.addDiagnose("cluster.conf and in-memory configuration version differ (%s != %s)" % (conf_version, cluster_version) ) - - # make sure the first part of the lock table matches the cluster name - # and that the locking protocol is sane - cluster_name = xpathContext.xpathEval("/cluster/@name")[0].content - - for fs in self.fileGrep(r'^[^#][/\w]*\W*[/\w]*\W*gfs', "/etc/fstab"): - # for each gfs entry - fs = fs.split() - - lockproto = self.get_gfs_sb_field(fs[0], "sb_lockproto") - if lockproto and lockproto != self.get_locking_proto(): - self.addDiagnose("gfs mountpoint (%s) is using the wrong locking protocol (%s)" % (fs[0], lockproto) ) - - locktable = self.get_gfs_sb_field(fs[0], "sb_locktable") - try: locktable = locktable.split(":")[0] - except: continue - if locktable != cluster_name: - self.addDiagnose("gfs mountpoint (%s) is using the wrong locking table" % fs[0]) + for module in mods_check: + if len(self.fileGrep("^%s\s+" % module, "/proc/modules")) == 0: + self.addDiagnose("required module is not loaded: %s" % module) + + # check if all the needed daemons are active at sosreport time + # check if they are started at boot time in RHEL4 RHCS (cman, ccsd, rgmanager, fenced) + # and GFS (gfs, ccsd, clvmd, fenced) + + for service in serv_check: + status, output = commands.getstatusoutput("/sbin/service %s status &> /dev/null" % service) + if status != 0: + self.addDiagnose("service %s is not running" % service) + + if not self.cInfo["policy"].runlevelDefault() in self.cInfo["policy"].runlevelByService(service): + self.addDiagnose("service %s is not started in default runlevel" % service) + + # FIXME: missing important cman services + # FIXME: any cman service whose state != run ? + # Fence Domain: "default" 2 2 run - + + # is cluster quorate + if not self.is_cluster_quorate(): + self.addDiagnose("cluster node is not quorate") + + # if there is no cluster.conf, diagnose() finishes here. + try: + os.stat("/etc/cluster/cluster.conf") + except: + self.addDiagnose("/etc/cluster/cluster.conf is missing") + return + + # setup XML xpath context + xml = libxml2.parseFile("/etc/cluster/cluster.conf") + xpathContext = xml.xpathNewContext() + + # check fencing (warn on no fencing) + if len(xpathContext.xpathEval("/cluster/clusternodes/clusternode[not(fence/method/device)]")): + if self.has_gfs(): + self.addDiagnose("one or more nodes have no fencing agent configured: fencing is required for GFS to work") + else: + self.addDiagnose("one or more nodes have no fencing agent configured: the cluster infrastructure might not work as intended") + + # check fencing (warn on manual) + if len(xpathContext.xpathEval("/cluster/clusternodes/clusternode[/cluster/fencedevices/fencedevice[@agent='fence_manual']/@name=fence/method/device/@name]")): + self.addDiagnose("one or more nodes have manual fencing agent configured (data integrity is not guaranteed)") + + # if fence_ilo or fence_drac, make sure acpid is not running + hostname = commands.getoutput("/bin/uname -n").split(".")[0] + if len(xpathContext.xpathEval('/cluster/clusternodes/clusternode[@name = "%s" and /cluster/fencedevices/fencedevice[@agent="fence_rsa" or @agent="fence_drac"]/@name=fence/method/device/@name]' % hostname )): + status, output = commands.getstatusoutput("/sbin/service acpid status") + if status == 0 or self.cInfo["policy"].runlevelDefault() in self.cInfo["policy"].runlevelByService("acpid"): + self.addDiagnose("acpid is enabled, this may cause problems with your fencing method.") + + # check for fs exported via nfs without nfsid attribute + if len(xpathContext.xpathEval("/cluster/rm/service//fs[not(@fsid)]/nfsexport")): + self.addDiagnose("one or more nfs export do not have a fsid attribute set.") + + # cluster.conf file version and the in-memory cluster configuration version matches + status, cluster_version = commands.getstatusoutput("cman_tool status | grep 'Config version'") + if not status: cluster_version = cluster_version[16:] + else: cluster_version = None + conf_version = xpathContext.xpathEval("/cluster/@config_version")[0].content + + if status == 0 and conf_version != cluster_version: + self.addDiagnose("cluster.conf and in-memory configuration version differ (%s != %s)" % (conf_version, cluster_version) ) + + # make sure the first part of the lock table matches the cluster name + # and that the locking protocol is sane + cluster_name = xpathContext.xpathEval("/cluster/@name")[0].content + + for fs in self.fileGrep(r'^[^#][/\w]*\W*[/\w]*\W*gfs', "/etc/fstab"): + # for each gfs entry + fs = fs.split() + lockproto = self.get_gfs_sb_field(fs[0], "sb_lockproto") + if lockproto and lockproto != self.get_locking_proto(): + self.addDiagnose("gfs mountpoint (%s) is using the wrong locking protocol (%s)" % (fs[0], lockproto) ) + + locktable = self.get_gfs_sb_field(fs[0], "sb_locktable") + try: locktable = locktable.split(":")[0] + except: continue + if locktable != cluster_name: + self.addDiagnose("gfs mountpoint (%s) is using the wrong locking table" % fs[0]) def setup(self): self.collectExtOutput("/sbin/fdisk -l") @@ -204,12 +214,12 @@ class cluster(sos.plugintools.PluginBase): self.addCopySpec("/etc/cluster") self.collectExtOutput("/usr/sbin/rg_test test /etc/cluster/cluster.conf") self.addCopySpec("/proc/cluster") - self.collectExtOutput("/usr/bin/cman_tool status") - self.collectExtOutput("/usr/bin/cman_tool services") - self.collectExtOutput("/usr/bin/cman_tool -af nodes") - self.collectExtOutput("/usr/bin/ccs_tool lsnode") - self.collectExtOutput("/usr/bin/openais-cfgtool -s") - self.collectExtOutput("/usr/bin/clustat") + self.collectExtOutput("cman_tool status") + self.collectExtOutput("cman_tool services") + self.collectExtOutput("cman_tool -af nodes") + self.collectExtOutput("ccs_tool lsnode") + self.collectExtOutput("openais-cfgtool -s") + self.collectExtOutput("clustat") self.collectExtOutput("/sbin/ipvsadm -L") @@ -232,20 +242,24 @@ class cluster(sos.plugintools.PluginBase): self.addCopySpec("/var/log/messages") def do_lockdump(self): - try: - fp = open("/proc/cluster/services","r") - except: - return - for line in fp.readlines(): - if line[0:14] == "DLM Lock Space": - try: - lockspace = line.split('"')[1] - except: - pass - else: - commands.getstatusoutput("echo %s > /proc/cluster/dlm_locks" % lockspace) - self.collectOutputNow("cat /proc/cluster/dlm_locks", root_symlink = "dlm_locks_%s" % lockspace) - fp.close() + status, output = commands.getstatusoutput("cman_tool services") + if status: + # command somehow failed + return False + + import re + + rhelver = self.get_redhat_release() + + if rhelver == "4": + regex = r'^DLM Lock Space:\s*"([^"]*)".*$' + elif rhelver == "5Server" or rhelver == "5Client": + regex = r'^dlm\s+[^\s]+\s+([^\s]+)\s.*$' + + reg=re.compile(regex,re.MULTILINE) + for lockspace in reg.findall(output): + commands.getstatusoutput("echo %s > /proc/cluster/dlm_locks" % lockspace) + self.collectOutputNow("cat /proc/cluster/dlm_locks", root_symlink = "dlm_locks_%s" % lockspace) def get_locking_proto(self): # FIXME: what's the best way to find out ? @@ -253,15 +267,11 @@ class cluster(sos.plugintools.PluginBase): return "lock_gulm" def do_gfslockdump(self): - fp = open("/proc/mounts","r") - for line in fp.readlines(): - mntline = line.split(" ") - if mntline[2] == "gfs": - self.collectExtOutput("/sbin/gfs_tool lockdump %s" % mntline[1], root_symlink = "gfs_lockdump_" + self.mangleCommand(mntline[1]) ) - fp.close() - - def do_rgmgr_bt(self): - # FIXME: threads backtrace + for mntpoint in self.doRegexFindAll(r'^\S+\s+([^\s]+)\s+gfs\s+.*$', "/proc/mounts"): + self.collectExtOutput("/sbin/gfs_tool lockdump %s" % mntpoint, root_symlink = "gfs_lockdump_" + self.mangleCommand(mntpoint) ) + + def do_rgmanager_bt(self): + # FIXME: threads backtrace via SIGALRM return def postproc(self): @@ -269,8 +279,7 @@ class cluster(sos.plugintools.PluginBase): return def is_cluster_quorate(self): - # FIXME: use self.fileGrep() instead - output = commands.getoutput("/bin/cat /proc/cluster/status | grep '^Membership state: '") + output = commands.getoutput("cman_tool status | grep '^Membership state: '") try: if output[18:] == "Cluster-Member": return True diff --git a/src/lib/sos/plugins/filesys.py b/src/lib/sos/plugins/filesys.py index 3ae4da51..73bd3d88 100644 --- a/src/lib/sos/plugins/filesys.py +++ b/src/lib/sos/plugins/filesys.py @@ -28,7 +28,6 @@ class filesys(sos.plugintools.PluginBase): self.addCopySpec("/etc/mdadm.conf") self.collectExtOutput("/bin/df -al", root_symlink = "df") - self.collectExtOutput("/usr/sbin/lsof -b +M -n -l", root_symlink = "lsof") self.collectExtOutput("/bin/mount -l", root_symlink = "mount") self.collectExtOutput("/sbin/blkid") diff --git a/src/lib/sos/plugins/kernel.py b/src/lib/sos/plugins/kernel.py index 3640f3fa..a5ffd855 100644 --- a/src/lib/sos/plugins/kernel.py +++ b/src/lib/sos/plugins/kernel.py @@ -49,22 +49,24 @@ class kernel(sos.plugintools.PluginBase): def setup(self): self.collectExtOutput("/bin/uname -a", root_symlink = "uname") self.moduleFile = self.collectOutputNow("/sbin/lsmod", root_symlink = "lsmod") + if self.isOptionEnabled('modinfo'): - runcmd = "" - for kmod in commands.getoutput('/sbin/lsmod | /bin/cut -f1 -d" " 2>/dev/null | /bin/grep -v Module 2>/dev/null').split('\n'): - if '' != kmod.strip(): - runcmd = runcmd + " " + kmod - if len(runcmd): - self.collectExtOutput("/sbin/modinfo " + runcmd) + runcmd = "" + for kmod in commands.getoutput('/sbin/lsmod | /bin/cut -f1 -d" " 2>/dev/null | /bin/grep -v Module 2>/dev/null').split('\n'): + if '' != kmod.strip(): + runcmd = runcmd + " " + kmod + if len(runcmd): + self.collectExtOutput("/sbin/modinfo " + runcmd) + self.collectExtOutput("/sbin/sysctl -a") self.collectExtOutput("/sbin/ksyms") self.addCopySpec("/sys/module/*/parameters") self.addCopySpec("/proc/filesystems") self.addCopySpec("/proc/ksyms") self.addCopySpec("/proc/slabinfo") + # FIXME: kver should have this stuff cached somewhere kver = commands.getoutput('/bin/uname -r') - depfile = "/lib/modules/%s/modules.dep" % (kver,) - self.addCopySpec(depfile) + self.addCopySpec("/lib/modules/%s/modules.dep" % kver) self.addCopySpec("/etc/conf.modules") self.addCopySpec("/etc/modules.conf") self.addCopySpec("/etc/modprobe.conf") @@ -72,25 +74,18 @@ class kernel(sos.plugintools.PluginBase): self.addCopySpec("/proc/cmdline") self.addCopySpec("/proc/driver") self.addCopySpec("/proc/sys/kernel/tainted") - # FIXME: both RHEL4 and RHEL5 don't need sysrq to be enabled to trigger via sysrq-trigger - if self.isOptionEnabled('sysrq') and os.access("/proc/sysrq-trigger", os.W_OK) and os.access("/proc/sys/kernel/sysrq", os.R_OK): - sysrq_state = commands.getoutput("/bin/cat /proc/sys/kernel/sysrq") - commands.getoutput("/bin/echo 1 > /proc/sys/kernel/sysrq") - for key in ['m', 'p', 't']: - commands.getoutput("/bin/echo %s > /proc/sysrq-trigger" % (key,)) - commands.getoutput("/bin/echo %s > /proc/sys/kernel/sysrq" % (sysrq_state,)) - # No need to grab syslog here if we can't trigger sysrq, so keep this - # inside the if - self.addCopySpec("/var/log/messages") - + + if self.isOptionEnabled('sysrq') and os.access("/proc/sysrq-trigger", os.W_OK): + for key in ['m', 'p', 't']: + commands.getoutput("/bin/echo %s > /proc/sysrq-trigger" % (key,)) + self.addCopySpec("/var/log/messages") + return - def analyze(self): - infd = open("/proc/modules", "r") - modules = infd.readlines() - infd.close() + def diagnose(self): - for modname in modules: + infd = open("/proc/modules", "r") + for modname in infd.readlines(): modname=modname.split(" ")[0] modinfo_srcver = commands.getoutput("/sbin/modinfo -F srcversion %s" % modname) if not os.access("/sys/module/%s/srcversion" % modname, os.R_OK): @@ -99,13 +94,17 @@ class kernel(sos.plugintools.PluginBase): sys_srcver = infd.read().strip("\n") infd.close() if modinfo_srcver != sys_srcver: - self.addAlert("Loaded module %s differs from the one present on the file-system") + self.addDiagnose("Loaded module %s differs from the one present on the file-system") # this would be a good moment to check the module's signature # but at the moment there's no easy way to do that outside of # the kernel. i will probably need to write a C lib (derived from # the kernel sources to do this verification. + infd.close() + + def analyze(self): + savedtaint = os.path.join(self.cInfo['dstroot'], "/proc/sys/kernel/tainted") infd = open(savedtaint, "r") line = infd.read() @@ -114,12 +113,10 @@ class kernel(sos.plugintools.PluginBase): if (line != "0"): self.addAlert("Kernel taint flag is <%s>\n" % line) - infd = open(self.moduleFile, "r") modules = infd.readlines() infd.close() - #print(modules) for tainter in self.taintList: p = re.compile(tainter['regex']) for line in modules: diff --git a/src/lib/sos/plugins/ldap.py b/src/lib/sos/plugins/ldap.py index 59ab53fc..47ac0612 100644 --- a/src/lib/sos/plugins/ldap.py +++ b/src/lib/sos/plugins/ldap.py @@ -40,17 +40,19 @@ class ldap(sos.plugintools.PluginBase): def diagnose(self): # Validate ldap client options ldapopts=self.get_ldap_opts() - try: - os.stat(ldapopts["TLS_CACERTDIR"]) - except: - self.addDiagnose("%s does not exist and can cause connection issues "+ - "involving TLS" % ldapopts["TLS_CACERTDIR"]) + if ldapopts.has_key("TLS_CACERTDIR"): + try: + os.stat(ldapopts["TLS_CACERTDIR"]) + except: + self.addDiagnose("%s does not exist and can cause connection issues involving TLS" % ldapopts["TLS_CACERTDIR"]) def setup(self): self.addCopySpec("/etc/ldap.conf") self.addCopySpec("/etc/openldap") - self.addCopySpec(self.get_slapd_debug()) - return + + slapd_debug_file = self.get_slapd_debug() + if slapd_debug_file: + self.addCopySpec(slapd_debug_file) def postproc(self): self.doRegexSub("/etc/ldap.conf", r"(\s*bindpw\s*)\S+", r"\1***") diff --git a/src/lib/sos/plugins/networking.py b/src/lib/sos/plugins/networking.py index 1dcb0375..aaf78234 100644 --- a/src/lib/sos/plugins/networking.py +++ b/src/lib/sos/plugins/networking.py @@ -53,13 +53,12 @@ class networking(sos.plugintools.PluginBase): self.addCopySpec("/etc/resolv.conf") ifconfigFile=self.collectOutputNow("/sbin/ifconfig -a", root_symlink = "ifconfig") self.collectExtOutput("/sbin/route -n", root_symlink = "route") - self.collectExtOutput("/sbin/ipchains -nvL") self.collectIPTable("filter") self.collectIPTable("nat") self.collectIPTable("mangle") self.collectExtOutput("/bin/netstat -s") self.collectExtOutput("/bin/netstat -neopa", root_symlink = "netstat") - # FIXME: we should collect "ip route table <tablename>" for all tables (from "ip rule") + self.collectExtOutput("/sbin/ip route show table all") self.collectExtOutput("/sbin/ip link") self.collectExtOutput("/sbin/ip address") self.collectExtOutput("/sbin/ifenslave -a") diff --git a/src/lib/sos/plugins/process.py b/src/lib/sos/plugins/process.py index ce4ef227..d0243b46 100644 --- a/src/lib/sos/plugins/process.py +++ b/src/lib/sos/plugins/process.py @@ -25,6 +25,7 @@ class process(sos.plugintools.PluginBase): self.collectExtOutput("/bin/ps auxwwwm") self.collectExtOutput("/bin/ps alxwww") self.collectExtOutput("/usr/bin/pstree", root_symlink = "pstree") + self.collectExtOutput("/usr/sbin/lsof -b +M -n -l", root_symlink = "lsof") return def find_mountpoint(s): @@ -50,12 +51,9 @@ class process(sos.plugintools.PluginBase): # this should never happen... pass else: + # still D after 0.1 * range(1,5) seconds dpids.append(int(line[1])) - # FIXME: for each hung PID, list file-systems from /proc/$PID/fd -# for pid in dpids: -# realpath - if len(dpids): self.addDiagnose("one or more processes are in state D (sosreport might hang)") diff --git a/src/lib/sos/plugins/squid.py b/src/lib/sos/plugins/squid.py index fdd3b8cf..7e0c3376 100644 --- a/src/lib/sos/plugins/squid.py +++ b/src/lib/sos/plugins/squid.py @@ -18,10 +18,8 @@ import os class squid(sos.plugintools.PluginBase): """squid related information """ - def checkenabled(self): - if self.cInfo["policy"].pkgByName("squid") != None or os.path.exists("/etc/squid/squid.conf"): - return True - return False + files = [ "/etc/squid/squid.conf" ] + packages = [ "squid" ] def setup(self): self.addCopySpec("/etc/squid/squid.conf") diff --git a/src/lib/sos/plugins/veritas.py b/src/lib/sos/plugins/veritas.py index a66b11af..a041c81b 100644 --- a/src/lib/sos/plugins/veritas.py +++ b/src/lib/sos/plugins/veritas.py @@ -70,9 +70,8 @@ class veritas(sos.plugintools.PluginBase): "VRTSvlic"] def checkenabled(self): - for i in commands.getoutput("/bin/rpm -qa | /bin/grep -i VRTS"): - pkg = i.split('-')[0] - if self.cInfo["policy"].pkgByName(pkg) != None: + for pkgname in self.package_list: + if self.cInfo["policy"].allPkgsByName(pkgname): return True return False diff --git a/src/lib/sos/plugins/yum.py b/src/lib/sos/plugins/yum.py index 0cdf0740..0f0d049e 100644 --- a/src/lib/sos/plugins/yum.py +++ b/src/lib/sos/plugins/yum.py @@ -30,10 +30,7 @@ class yum(sos.plugintools.PluginBase): # repo sanity checking # TODO: elaborate/validate actual repo files, however this directory should # be empty on RHEL 5+ systems. - try: rhelver = self.cInfo["policy"].pkgDictByName("redhat-release")[0] - except: rhelver = None - - if rhelver == "5" or True: + if self.cInfo["policy"].rhelVersion() == 5: if len(os.listdir("/etc/yum.repos.d/")): self.addAlert("/etc/yum.repos.d/ contains additional repository "+ "information and can cause rpm conflicts.") diff --git a/src/lib/sos/plugintools.py b/src/lib/sos/plugintools.py index eb53ca86..da323401 100644 --- a/src/lib/sos/plugintools.py +++ b/src/lib/sos/plugintools.py @@ -65,6 +65,9 @@ class PluginBase: self.time_start = None self.time_stop = None + self.packages = [] + self.files = [] + self.soslog = logging.getLogger('sos') # get the option list into a dictionary @@ -181,7 +184,7 @@ class PluginBase: dstslname = sosRelPath(self.cInfo['rptdir'], abspath) self.copiedDirs.append({'srcpath':srcpath, 'dstpath':dstslname, 'symlink':"yes", 'pointsto':os.path.abspath(srcpath+'/'+afile) }) else: - self.soslog.log(logging.VERBOSE2, "copying symlink %s" % srcpath) + self.soslog.log(logging.VERBOSE3, "copying symlink %s" % srcpath) try: dstslname, abspath = self.__copyFile(srcpath) self.copiedFiles.append({'srcpath':srcpath, 'dstpath':dstslname, 'symlink':"yes", 'pointsto':link}) @@ -206,6 +209,7 @@ class PluginBase: else: # This is not a directory or a symlink tdstpath, abspath = self.__copyFile(srcpath) + self.soslog.log(logging.VERBOSE3, "copying file %s" % srcpath) self.copiedFiles.append({'srcpath':srcpath, 'dstpath':tdstpath, 'symlink':"no"}) # save in our list return abspath @@ -259,6 +263,9 @@ class PluginBase: def addCopySpecLimit(self,fname,sizelimit = None): """Add a file specification (with limits) """ + if not ( fname and len(fname) ): + self.soslog.warning("invalid file path") + return False files = glob.glob(fname) files.sort() cursize = 0 @@ -272,52 +279,21 @@ class PluginBase: """ Add a file specification (can be file, dir,or shell glob) to be copied into the sosreport by this module """ + if not ( copyspec and len(copyspec) ): + self.soslog.warning("invalid file path") + return False # Glob case handling is such that a valid non-glob is a reduced glob for filespec in glob.glob(copyspec): self.copyPaths.append(filespec) - def copyFileGlob(self, srcglob): - """ Deprecated - please modify modules to use addCopySpec() - """ - sys.stderr.write("Warning: thecopyFileGlob() function has been deprecated. Please") - sys.stderr.write("use addCopySpec() instead. Calling addCopySpec() now.") - self.addCopySpec(srcglob) - - def copyFileOrDir(self, srcpath): - """ Deprecated - please modify modules to use addCopySpec() - """ - sys.stderr.write("Warning: the copyFileOrDir() function has been deprecated. Please\n") - sys.stderr.write("use addCopySpec() instead. Calling addCopySpec() now.\n") - raise ValueError - #self.addCopySpec(srcpath) - - def runExeInd(self, exe): - """ Deprecated - use callExtProg() - """ - sys.stderr.write("Warning: the runExeInd() function has been deprecated. Please use\n") - sys.stderr.write("the callExtProg() function. This should only be called\n") - sys.stderr.write("if collect() is overridden.") - pass - def callExtProg(self, prog): """ Execute a command independantly of the output gathering part of sosreport """ - # Log if binary is not runnable or does not exist - if not os.access(prog.split()[0], os.X_OK): - self.soslog.log(logging.VERBOSE, "binary '%s' does not exist or is not runnable" % prog.split()[0]) - # pylint: disable-msg = W0612 status, shout, runtime = sosGetCommandOutput(prog) return status - def runExe(self, exe): - """ Deprecated - use collectExtOutput() - """ - sys.stderr.write("Warning: the runExe() function has been deprecated. Please use\n") - sys.stderr.write("the collectExtOutput() function.\n") - pass - def collectExtOutput(self, exe, suggest_filename = None, root_symlink = None): """ Run a program and collect the output @@ -362,10 +338,6 @@ class PluginBase: """ Execute a command and save the output to a file for inclusion in the report """ - # First check to make sure the binary exists and is runnable. - if not os.access(exe.split()[0], os.X_OK): - self.soslog.log(logging.VERBOSE, "binary '%s' does not exist or is not runnable, trying anyways" % exe.split()[0]) - # FIXME: we should have a timeout or we may end waiting forever # pylint: disable-msg = W0612 @@ -379,7 +351,7 @@ class PluginBase: if not os.path.isdir(os.path.dirname(outfn)): os.mkdir(os.path.dirname(outfn)) - if not (status == 127 or status == 32512): + if not (status == 127 or status == 32512): # if not command_not_found outfd = open(outfn, "w") if len(shout): outfd.write(shout+"\n") outfd.close() @@ -486,23 +458,35 @@ class PluginBase: try: self.doCopyFileOrDir(path) except SystemExit: - raise SystemExit + if threaded: + return SystemExit + else: + raise SystemExit except KeyboardInterrupt: - raise KeyboardInterrupt + if threaded: + return KeyboardInterrupt + else: + raise KeyboardInterrupt except Exception, e: - self.soslog.log(logging.VERBOSE, "error copying from pathspec %s (%s), traceback follows:" % (path,e)) - self.soslog.log(logging.VERBOSE, traceback.format_exc()) + self.soslog.log(logging.VERBOSE2, "error copying from pathspec %s (%s), traceback follows:" % (path,e)) + self.soslog.log(logging.VERBOSE2, traceback.format_exc()) for (prog,suggest_filename,root_symlink) in self.collectProgs: self.soslog.debug("collecting output of '%s'" % prog) try: self.collectOutputNow(prog, suggest_filename, root_symlink) except SystemExit: - raise SystemExit + if threaded: + return SystemExit + else: + raise SystemExit except KeyboardInterrupt: - raise KeyboardInterrupt + if threaded: + return KeyboardInterrupt + else: + raise KeyboardInterrupt except: - self.soslog.log(logging.VERBOSE, "error collection output of '%s', traceback follows:" % prog) - self.soslog.log(logging.VERBOSE, traceback.format_exc()) + self.soslog.log(logging.VERBOSE2, "error collection output of '%s', traceback follows:" % prog) + self.soslog.log(logging.VERBOSE2, traceback.format_exc()) self.time_stop = time() @@ -520,6 +504,16 @@ class PluginBase: """ This function can be overidden to let the plugin decide whether it should run or not. """ + # some files or packages have been specified for this package + if len(self.files) or len(self.packages): + for file in self.files: + if os.path.exists(files): + return True + for pkgname in self.packages: + if self.cInfo["policy"].pkgByName(pkgname): + return True + return False + return True def defaultenabled(self): @@ -592,8 +586,11 @@ class PluginBase: html = html + "<p>Commands Executed:<br><ul>\n" # convert file name to relative path from our root for cmd in self.executedCommands: - cmdOutRelPath = sosRelPath(self.cInfo['rptdir'], self.cInfo['cmddir'] + "/" + cmd['file']) - html = html + '<li><a href="%s">%s</a></li>\n' % (cmdOutRelPath, cmd['exe']) + if cmd["file"] and len(cmd["file"]): + cmdOutRelPath = sosRelPath(self.cInfo['rptdir'], self.cInfo['cmddir'] + "/" + cmd['file']) + html = html + '<li><a href="%s">%s</a></li>\n' % (cmdOutRelPath, cmd['exe']) + else: + html = html + '<li>%s</li>\n' % (cmd['exe']) html = html + "</ul></p>\n" # Alerts @@ -609,4 +606,3 @@ class PluginBase: html = html + self.customText + "</p>\n" return html - diff --git a/src/lib/sos/policyredhat.py b/src/lib/sos/policyredhat.py index d2139afa..54890b00 100755 --- a/src/lib/sos/policyredhat.py +++ b/src/lib/sos/policyredhat.py @@ -26,8 +26,7 @@ from sos.helpers import * import random import re import md5 - -SOME_PATH = "/tmp/SomePath" +import rpm #class SosError(Exception): # def __init__(self, code, message): @@ -37,11 +36,22 @@ SOME_PATH = "/tmp/SomePath" # def __str__(self): # return 'Sos Error %s: %s' % (self.code, self.message) +def memoized(function): + ''' function decorator to allow caching of return values + ''' + function.cache={} + def f(*args): + try: + return function.cache[args] + except KeyError: + result = function.cache[args] = function(*args) + return result + return f class SosPolicy: "This class implements various policies for sos" def __init__(self): - #print "Policy init" + self.report_file = None return def setCommons(self, commons): @@ -55,41 +65,61 @@ class SosPolicy: #print "validating %s" % pluginpath return True + def pkgProvides(self, name): + pkg = self.pkgByName(name) + return pkg['providename'] + def pkgRequires(self, name): - # FIXME: we're relying on rpm to sort the output list + pkg = self.pkgByName(name) + return pkg['requirename'] + cmd = "/bin/rpm -q --requires %s" % (name) return [requires[:-1].split() for requires in os.popen(cmd).readlines()] def allPkgsByName(self, name): - # FIXME: we're relying on rpm to sort the output list - cmd = "/bin/rpm --qf '%%{N} %%{V} %%{R} %%{ARCH}\n' -q %s" % (name,) - pkgs = os.popen(cmd).readlines() - return [pkg[:-1].split() for pkg in pkgs if pkg.startswith(name)] + return self.allPkgs("name", name) + + def allPkgsByNameRegex(self, regex_name): + reg = re.compile(regex_name) + return [pkg for pkg in self.allPkgs() if reg.match(pkg['name'])] def pkgByName(self, name): # TODO: do a full NEVRA compare and return newest version, best arch try: # lame attempt at locating newest - pkg = self.allPkgsByName(name)[-1] - except IndexError: - pkg = None - - return pkg + return self.allPkgsByName(name)[-1] + except: + pass + return None def pkgDictByName(self, name): + # FIXME: what does this do? pkgName = self.pkgByName(name) if pkgName and len(pkgName) > len(name): return pkgName[len(name)+1:].split("-") else: return None + def allPkgs(self, ds = None, value = None): + if not hasattr(self, "rpm_ts"): + self.rpm_ts = rpm.TransactionSet() + if ds and value: + mi = self.rpm_ts.dbMatch(ds, value) + else: + mi = self.rpm_ts.dbMatch() + return [pkg for pkg in mi] + def runlevelByService(self, name): ret = [] try: for tabs in commands.getoutput("/sbin/chkconfig --list %s" % name).split(): - (runlevel, onoff) = tabs.split(":") - if onoff == "on": - ret.append(int(runlevel)) + try: + (runlevel, onoff) = tabs.split(":", 1) + except: + pass + else: + if onoff == "on": + ret.append(int(runlevel)) except: pass return ret @@ -105,9 +135,21 @@ class SosPolicy: def kernelVersion(self): return commands.getoutput("/bin/uname -r").strip("\n") + def rhelVersion(self): + try: + pkgname = self.pkgByName("redhat-release")["version"] + if pkgname[0] == "4": + return 4 + elif pkgname in [ "5Server", "5Client" ]: + return 5 + except: pass + return False + def isKernelSMP(self): - if self.kernelVersion()[-3:]=="smp": return True - else: return False + if commands.getoutput("/bin/uname -v").split()[1] == "SMP": + return True + else: + return False def pkgNVRA(self, pkg): fields = pkg.split("-") @@ -166,6 +208,9 @@ class SosPolicy: # FIXME: use python internal command os.system("/bin/mv %s %s" % (aliasdir, self.cInfo['dstroot'])) + # FIXME: encrypt using gnupg + # gpg --trust-model always --batch --keyring /usr/share/sos/rhsupport.pub --no-default-keyring --compress-level 0 --encrypt --recipient support@redhat.com --output filename.gpg filename.tar + # add last 6 chars from md5sum to file name fp = open(tarballName, "r") md5out = md5.new(fp.read()).hexdigest() @@ -187,5 +232,37 @@ class SosPolicy: print _("Please send this file to your support representative.") sys.stdout.write("\n") + self.report_file = tarballName + return + def uploadResults(self): + # make sure a report exists + if not self.report_file: + return False + + # make sure it's readable + try: + fp = open(self.report_file, "r") + except: + return False + + try: + from ftplib import FTP + upload_name = os.path.basename(self.report_file) + + ftp = FTP('dropbox.redhat.com') + ftp.login() + ftp.cwd("/incoming") + ftp.set_pasv(True) + ftp.storbinary('STOR %s' % upload_name, fp) + ftp.quit() + except: + print _("There was a problem uploading your report to Red Hat support.") + else: + print _('Your report was uploaded successfully with name:') + print " " + upload_name + print + print _("Please communicate this name to your support representative.") + + fp.close() |