aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/sos/plugins/cluster.py115
-rw-r--r--src/lib/sos/plugins/general.py4
-rw-r--r--src/lib/sos/plugins/kernel.py8
-rw-r--r--src/lib/sos/plugins/networking.py2
-rw-r--r--src/lib/sos/plugins/process.py42
-rw-r--r--src/lib/sos/plugins/rpm.py4
-rw-r--r--src/lib/sos/plugintools.py31
-rwxr-xr-xsrc/lib/sos/policyredhat.py22
8 files changed, 160 insertions, 68 deletions
diff --git a/src/lib/sos/plugins/cluster.py b/src/lib/sos/plugins/cluster.py
index 02fede4f..62315bc6 100644
--- a/src/lib/sos/plugins/cluster.py
+++ b/src/lib/sos/plugins/cluster.py
@@ -13,7 +13,7 @@
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sos.plugintools
-import commands, os
+import commands, os, re
import time, libxml2
class cluster(sos.plugintools.PluginBase):
@@ -22,7 +22,7 @@ class cluster(sos.plugintools.PluginBase):
optionList = [("gfslockdump", 'gather output of gfs lockdumps', 'slow', False),
('lockdump', 'gather dlm lockdumps', 'slow', False),
- ('taskdump', 'trigger 3 SysRq+t dumps every 5 seconds', 'slow', False)]
+ ('taskdump', 'trigger 3 sysrq+t dumps every 5 seconds (dangerous)', 'slow', False)]
def checkenabled(self):
# enable if any related package is installed
@@ -33,7 +33,7 @@ class cluster(sos.plugintools.PluginBase):
return True
# enable if any related file is present
- for fname in [ "/etc/cluster/cluster.conf" ]:
+ for fname in [ "/etc/cluster/cluster.conf", "/proc/cluster" ]:
try: os.stat(fname)
except:pass
else: return True
@@ -93,6 +93,13 @@ class cluster(sos.plugintools.PluginBase):
if not ((self.cInfo["policy"].pkgByName("dlm") and self.cInfo["policy"].pkgByName("dlm-kernel")) or self.cInfo["policy"].pkgByName("gulm")):
self.addDiagnose("required packages are missing: (dlm, dlm-kernel) || gulm")
+ # let's make modules are loaded
+ mods_check = [ "cman", "dlm" ]
+ if self.has_gfs(): mods_check.append("gfs")
+ for module in mods_check:
+ if len(self.fileGrep("^%s " % module, "/proc/modules")) == 0:
+ self.addDiagnose("required package is present but not loaded: %s" % module)
+
# check if all the needed daemons are active at sosreport time
# check if they are started at boot time in RHEL4 RHCS (cman, ccsd, rgmanager, fenced)
# and GFS (gfs, ccsd, clvmd, fenced)
@@ -102,29 +109,59 @@ class cluster(sos.plugintools.PluginBase):
status, output = commands.getstatusoutput("/sbin/service %s status" % service)
if status:
self.addDiagnose("service %s is not running" % service)
+ else:
+ # service is running, extra sanity checks
+ if service == "fenced":
+ # also make sure fenced is a registered cluster service
+ try:
+ if len(self.fileGrep("^Fence Domain:\W", "/proc/cluster/services")) == 0:
+ self.addDiagnose("fencing service is not registered with cman")
+ except:
+ pass
+ elif service == "rgmanager":
+ # also make sure rgmanager is a registered cluster service
+ try:
+ if len(self.fileGrep("^User:\W*usrm::manager", "/proc/cluster/services")) == 0:
+ self.addDiagnose("rgmanager is not registered with cman")
+ except:
+ pass
if not self.cInfo["policy"].runlevelDefault() in self.cInfo["policy"].runlevelByService(service):
self.addDiagnose("service %s is not started in default runlevel" % service)
+ # FIXME: any cman service whose state != run ?
+ # Fence Domain: "default" 2 2 run -
+
# is cluster quorate
if not self.is_cluster_quorate():
self.addDiagnose("cluster node is not quorate")
# if there is no cluster.conf, diagnose() finishes here.
- try: os.stat("/etc/cluster/cluster.conf")
- except: return
+ try:
+ os.stat("/etc/cluster/cluster.conf")
+ except:
+ self.addDiagnose("/etc/cluster/cluster.conf is missing")
+ return
# setup XML xpath context
xml = libxml2.parseFile("/etc/cluster/cluster.conf")
xpathContext = xml.xpathNewContext()
- # check fencing (warn on empty or manual)
+ # check fencing (warn on no fencing)
if len(xpathContext.xpathEval("/cluster/clusternodes/clusternode[not(fence/method/device)]")):
self.addDiagnose("one or more nodes have no fencing agent configured")
+ # check fencing (warn on manual)
if len(xpathContext.xpathEval("/cluster/clusternodes/clusternode[/cluster/fencedevices/fencedevice[@agent='fence_manual']/@name=fence/method/device/@name]")):
self.addDiagnose("one or more nodes have manual fencing agent configured (data integrity is not guaranteed)")
+ # if fence_ilo or fence_drac, make sure acpid is not running
+ hostname = commands.getoutput("/bin/uname -n").split(".")[0]
+ if len(xpathContext.xpathEval('/cluster/clusternodes/clusternode[@name = "%s" and /cluster/fencedevices/fencedevice[@agent="fence_rsa" or @agent="fence_drac"]/@name=fence/method/device/@name]' % hostname )):
+ status, output = commands.getstatusoutput("/sbin/service acpid status")
+ if status == 0 or self.cInfo["policy"].runlevelDefault() in self.cInfo["policy"].runlevelByService("acpid"):
+ self.addDiagnose("acpid is enabled, this may cause problems with your fencing method.")
+
# check for fs exported via nfs without nfsid attribute
if len(xpathContext.xpathEval("/cluster/rm/service//fs[not(@fsid)]/nfsexport")):
self.addDiagnose("one or more nfs file-system doesn't have a fsid attribute set.")
@@ -142,16 +179,15 @@ class cluster(sos.plugintools.PluginBase):
# and that the locking protocol is sane
cluster_name = xpathContext.xpathEval("/cluster/@name")[0].content
- fp = open("/etc/fstab","r")
- for fs in fp.readline().split():
-# fs = fs.split()
- if not fs or fs[0] == "#" or len(fs) < 6 or fs[2]!="gfs": continue
- lockproto = get_gfs_sb_field(fs[1], "sb_lockproto")
- if lockproto != "lock_" + get_locking_proto:
+ for fs in self.fileGrep(r'^[^#][/\w]*\W*[/\w]*\W*gfs', "/etc/fstab"):
+ # for each gfs entry
+ fs = fs.split()
+
+ lockproto = self.get_gfs_sb_field(fs[0], "sb_lockproto")
+ if lockproto and lockproto != self.get_locking_proto():
self.addDiagnose("gfs mountpoint (%s) is using the wrong locking protocol (%s)" % (fs[0], lockproto) )
- locktable = get_gfs_sb_field(fs[1], "sb_locktable")
- if not locktable: continue
+ locktable = self.get_gfs_sb_field(fs[0], "sb_locktable")
try: locktable = locktable.split(":")[0]
except: continue
if locktable != cluster_name:
@@ -178,11 +214,15 @@ class cluster(sos.plugintools.PluginBase):
return
def do_taskdump(self):
+ if not os.access("/proc/sysrq-trigger", os.W_OK):
+ return
+
commands.getstatusoutput("echo t > /proc/sysrq-trigger")
- time.sleep(3)
+ time.sleep(5)
commands.getstatusoutput("echo t > /proc/sysrq-trigger")
- time.sleep(3)
+ time.sleep(5)
commands.getstatusoutput("echo t > /proc/sysrq-trigger")
+
self.addCopySpec("/var/log/messages")
def do_lockdump(self):
@@ -200,22 +240,27 @@ class cluster(sos.plugintools.PluginBase):
def get_locking_proto(self):
# FIXME: what's the best way to find out ?
- return "dlm"
- return "gulm"
+ return "lock_dlm"
+ return "lock_gulm"
def do_gfslockdump(self):
fp = open("/proc/mounts","r")
for line in fp.readlines():
mntline = line.split(" ")
if mntline[2] == "gfs":
- self.collectExtOutput("/sbin/gfs_tool lockdump %s" % mntline[1])
+ self.collectExtOutput("/sbin/gfs_tool lockdump %s" % mntline[1], root_symlink = "gfs_lockdump_" + self.mangleCommand(mntline[1]) )
fp.close()
+ def do_rgmgr_bt(self):
+ # FIXME: threads backtrace
+ return
+
def postproc(self):
self.doRegexSub("/etc/cluster/cluster.conf", r"(\s*\<fencedevice\s*.*\s*passwd\s*=\s*)\S+(\")", r"\1***")
return
def is_cluster_quorate(self):
+ # FIXME: use self.fileGrep() instead
output = commands.getoutput("/bin/cat /proc/cluster/status | grep '^Membership state: '")
try:
if output[18:] == "Cluster-Member":
@@ -226,32 +271,8 @@ class cluster(sos.plugintools.PluginBase):
pass
return None
- def get_gfs_sb_field(self, mntpoint, field):
- for line in commands.getoutput("/sbin/gfs_tool sb %s all" % fs[1]):
- tostrip = " " + field + " = "
- if line.startwith(tostrip):
- return line[len(tostrip):]
- return None
-
- def xpath_query_count(self, fname, query):
- return len(self.xpath_query(fname, query))
-
- def xpath_query(self, fname, query):
- xml = libxml2.parseFile(fname)
- xpathContext = xml.xpathNewContext()
- return xpathContext.xpathEval(query)
-
- # FIXME: use python libxml internals
- tmpout = commands.getoutput("/bin/echo xpath %s | /usr/bin/xmllint --shell /etc/cluster/cluster.conf")
- for tmpline in tmpout:
- if tmpline.startswith("Set contains "):
- tmpvalue = tmpline[14:].split(" ")[0]
- if tmpvalue == "NULL": return 0
- try: tmpvalue = int(tmpvalue)
- except: return False
- return tmpvalue
+ def get_gfs_sb_field(self, device, field):
+ for line in commands.getoutput("/sbin/gfs_tool sb %s all" % device).split("\n"):
+ if re.match('^\W*%s = ' % field, line):
+ return line.split("=")[1].strip()
return False
-
-
-
-
diff --git a/src/lib/sos/plugins/general.py b/src/lib/sos/plugins/general.py
index 06f55228..fd18136b 100644
--- a/src/lib/sos/plugins/general.py
+++ b/src/lib/sos/plugins/general.py
@@ -16,10 +16,10 @@ import sos.plugintools
import glob
class general(sos.plugintools.PluginBase):
- """very basic system information
+ """basic system information
"""
- optionList = [("syslogsize", "maximum size (in MiB) of logs to collect per syslog file", "", 15)]
+ optionList = [("syslogsize", "max size (MiB) to collect per syslog file", "", 15)]
def setup(self):
self.addCopySpec("/etc/redhat-release")
diff --git a/src/lib/sos/plugins/kernel.py b/src/lib/sos/plugins/kernel.py
index 07cc6ac7..9a752c13 100644
--- a/src/lib/sos/plugins/kernel.py
+++ b/src/lib/sos/plugins/kernel.py
@@ -18,8 +18,8 @@ import commands, os, re
class kernel(sos.plugintools.PluginBase):
"""kernel related information
"""
- optionList = [("modinfo", 'Gathers module information on all modules', 'fast', True),
- ('sysrq', 'Trigger SysRq dumps', 'fast', False)]
+ optionList = [("modinfo", 'gathers module information on all modules', 'fast', True),
+ ('sysrq', 'trigger SysRq+t dumps', 'fast', False)]
moduleFile = ""
taintList = [
{'regex':'mvfs*', 'description':'Clearcase module'},
@@ -72,9 +72,7 @@ class kernel(sos.plugintools.PluginBase):
self.addCopySpec("/proc/cmdline")
self.addCopySpec("/proc/driver")
self.addCopySpec("/proc/sys/kernel/tainted")
- # trigger some sysrq's. I'm not sure I like doing it this way, but
- # since we end up with the sysrq dumps in syslog whether we run the
- # syslog report before or after this, I suppose I can live with it.
+ # FIXME: both RHEL4 and RHEL5 don't need sysrq to be enabled to trigger via sysrq-trigger
if self.isOptionEnabled('sysrq') and os.access("/proc/sysrq-trigger", os.W_OK) and os.access("/proc/sys/kernel/sysrq", os.R_OK):
sysrq_state = commands.getoutput("/bin/cat /proc/sys/kernel/sysrq")
commands.getoutput("/bin/echo 1 > /proc/sys/kernel/sysrq")
diff --git a/src/lib/sos/plugins/networking.py b/src/lib/sos/plugins/networking.py
index 6d11146b..b7331849 100644
--- a/src/lib/sos/plugins/networking.py
+++ b/src/lib/sos/plugins/networking.py
@@ -57,7 +57,7 @@ class networking(sos.plugintools.PluginBase):
self.addCopySpec("/etc/xinetd.d")
self.addCopySpec("/etc/host*")
self.addCopySpec("/etc/resolv.conf")
- ifconfigFile=self.collectExtOutput("/sbin/ifconfig -a", root_symlink = "ifconfig")
+ ifconfigFile=self.collectOutputNow("/sbin/ifconfig -a", root_symlink = "ifconfig")
self.collectExtOutput("/sbin/route -n", root_symlink = "route")
self.collectExtOutput("/sbin/ipchains -nvL")
self.collectIPTable("filter")
diff --git a/src/lib/sos/plugins/process.py b/src/lib/sos/plugins/process.py
index 7ab6e6f5..baa185b4 100644
--- a/src/lib/sos/plugins/process.py
+++ b/src/lib/sos/plugins/process.py
@@ -13,6 +13,9 @@
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sos.plugintools
+import commands
+import time
+import os
class process(sos.plugintools.PluginBase):
"""process information
@@ -20,8 +23,41 @@ class process(sos.plugintools.PluginBase):
def setup(self):
self.collectExtOutput("/bin/ps auxww", root_symlink = "ps")
self.collectExtOutput("/usr/bin/pstree", root_symlink = "pstree")
- self.collectExtOutput("/usr/bin/ipcs -a")
- self.collectExtOutput("/usr/bin/ipcs -u")
- self.collectExtOutput("/usr/bin/ipcs -l")
return
+ def find_mountpoint(s):
+ if (os.path.ismount(s) or len(s)==0): return s
+ else: return mountpoint(os.path.split(s)[0])
+
+ def diagnose(self):
+ # check that no process is in state D
+ dpids = []
+ status, output = commands.getstatusoutput("/bin/ps -A -o state,pid --no-heading")
+ if not status:
+ for line in output.split("\n"):
+ line = line.split()
+ if line[0] == "D":
+ # keep an eye on the process to see if the stat changes
+ for inc in range(1,10):
+ try:
+ if len(self.fileGrep("^State: D", " /proc/%d/status" % int(line[1]))) == 0:
+ # status is not D, good. let's get out of the loop.
+ time.sleep(0.1)
+ continue
+ except IOError:
+ # this should never happen...
+ pass
+ else:
+ dpids.append(int(line[1]))
+
+ # FIXME: for each hung PID, list file-systems from /proc/$PID/fd
+# for pid in dpids:
+# realpath
+
+ if len(dpids):
+ self.addDiagnose("one or more processes are in state D")
+
+ return
+
+
+
diff --git a/src/lib/sos/plugins/rpm.py b/src/lib/sos/plugins/rpm.py
index 9c6f2659..c899d141 100644
--- a/src/lib/sos/plugins/rpm.py
+++ b/src/lib/sos/plugins/rpm.py
@@ -17,8 +17,8 @@ import sos.plugintools
class rpm(sos.plugintools.PluginBase):
"""RPM information
"""
- optionList = [("rpmq", "Queries for package information via rpm -q", "fast", True),
- ("rpmva", "Runs a verify on all packages", "slow", True)]
+ optionList = [("rpmq", "queries for package information via rpm -q", "fast", True),
+ ("rpmva", "runs a verify on all packages", "slow", True)]
def setup(self):
self.addCopySpec("/var/log/rpmpkgs")
diff --git a/src/lib/sos/plugintools.py b/src/lib/sos/plugintools.py
index 8f07fb62..f58c8151 100644
--- a/src/lib/sos/plugintools.py
+++ b/src/lib/sos/plugintools.py
@@ -313,13 +313,27 @@ class PluginBase:
"""
self.collectProgs.append( (exe,suggest_filename,root_symlink) )
+ def fileGrep(self, regexp, fname):
+ results = []
+
+ fp = open(fname, "r")
+ for line in fp.readlines():
+ if re.match(regexp, line):
+ results.append(line)
+ fp.close()
+ return results
+
+ def mangleCommand(self, exe):
+ # FIXME: this can be improved
+ mangledname = re.sub(r"^/(usr/|)(bin|sbin)/", "", exe)
+ mangledname = re.sub(r"[^\w\-\.\/]+", "_", mangledname)
+ mangledname = re.sub(r"/", ".", mangledname).strip(" ._-")[0:64]
+ return mangledname
+
def makeCommandFilename(self, exe):
""" The internal function to build up a filename based on a command """
- mangledname = re.sub(r"[^\w\-\.\/]+", "_", exe)
- mangledname = re.sub(r"/", ".", mangledname).strip(" ._-")[0:64]
-
- outfn = self.cInfo['cmddir'] + "/" + self.piName + "/" + mangledname
+ outfn = self.cInfo['cmddir'] + "/" + self.piName + "/" + self.mangleCommand(exe)
# check for collisions
if os.path.exists(outfn):
@@ -341,6 +355,8 @@ class PluginBase:
if not os.access(exe.split()[0], os.X_OK):
self.soslog.log(logging.VERBOSE, "binary '%s' does not exist or is not runnable, trying anyways" % exe.split()[0])
+ # FIXME: we should have a timeout or we may end waiting forever
+
# pylint: disable-msg = W0612
status, shout, runtime = sosGetCommandOutput(exe)
@@ -363,16 +379,17 @@ class PluginBase:
os.symlink(outfn[len(self.cInfo['dstroot'])+1:], root_symlink.strip("/."))
os.chdir(curdir)
- outfn = outfn[len(self.cInfo['cmddir'])+1:]
+ outfn_strip = outfn[len(self.cInfo['cmddir'])+1:]
else:
self.soslog.log(logging.VERBOSE, "could not run command: %s" % exe)
outfn = None
+ outfn_strip = None
# sosStatus(status)
# save info for later
- self.executedCommands.append({'exe': exe, 'file':outfn}) # save in our list
- self.cInfo['xmlreport'].add_command(cmdline=exe,exitcode=status,f_stdout=outfn,runtime=runtime)
+ self.executedCommands.append({'exe': exe, 'file':outfn_strip}) # save in our list
+ self.cInfo['xmlreport'].add_command(cmdline=exe,exitcode=status,f_stdout=outfn_strip,runtime=runtime)
return outfn
def writeTextToCommand(self, exe, text):
diff --git a/src/lib/sos/policyredhat.py b/src/lib/sos/policyredhat.py
index 7382f740..cbf76d26 100755
--- a/src/lib/sos/policyredhat.py
+++ b/src/lib/sos/policyredhat.py
@@ -24,6 +24,7 @@ import string
from tempfile import gettempdir
from sos.helpers import *
import random
+import re
SOME_PATH = "/tmp/SomePath"
@@ -114,9 +115,11 @@ class SosPolicy:
try:
name = raw_input("Please enter your first initial and last name [%s]: " % localname)
+ name = re.sub(r"[^a-zA-Z.0-9]", "", name)
if len(name) == 0: name = localname
ticketNumber = raw_input("Please enter the case number that you are generating this report for: ")
+ ticketNumber = re.sub(r"[^0-9]", "", ticketNumber)
except KeyboardInterrupt:
print _("<interrupted>")
print _("Temporary files have been stored in %s") % self.cInfo['dstroot']
@@ -155,8 +158,25 @@ class SosPolicy:
# FIXME: use python internal command
os.system("/bin/mv %s %s" % (aliasdir, self.cInfo['dstroot']))
+ # add last 6 chars from md5sum to file name
+ status, md5out = commands.getstatusoutput('''/usr/bin/md5sum "%s"''' % tarballName)
+ if not status and len(md5out):
+ oldtarballName = tarballName
+ try:
+ md5out = md5out.strip().split()[0]
+ tarballName = os.path.join(ourtempdir, "sosreport-%s-%s.tar.bz2" % (namestr, md5out[-6:]) )
+ os.system("/bin/mv %s %s" % (oldtarballName, tarballName) )
+ except:
+ md5out = False
+ else:
+ md5out = False
+
sys.stdout.write("\n")
- print "Your sosreport has been generated and saved in %s" % tarballName
+ print "Your sosreport has been generated and saved in:\n %s" % tarballName
+ print
+ if md5out:
+ print "The md5sum is: " + md5out
+ print
print "Please send this file to your support representative."
sys.stdout.write("\n")