aboutsummaryrefslogtreecommitdiffstats
path: root/src/extras/sos-html-logs/lib/logs_abstraction.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/extras/sos-html-logs/lib/logs_abstraction.py')
-rw-r--r--src/extras/sos-html-logs/lib/logs_abstraction.py415
1 files changed, 415 insertions, 0 deletions
diff --git a/src/extras/sos-html-logs/lib/logs_abstraction.py b/src/extras/sos-html-logs/lib/logs_abstraction.py
new file mode 100644
index 00000000..5708c960
--- /dev/null
+++ b/src/extras/sos-html-logs/lib/logs_abstraction.py
@@ -0,0 +1,415 @@
+#!/usr/bin/env python
+
+import os, sys, time, re, pdb
+from threading import Thread, Lock
+from helpers import *
+from operator import itemgetter
+import traceback
+
+class cluster_class:
+
+ def __init__(self):
+ self.hosts = {}
+ self.index = {}
+ self.daemon_log_counter = []
+ self.parsers = []
+
+ def host_names(self):
+ return ksort(self.hosts)
+
+ def register_parser(self, parser_class):
+ self.parsers.append(parser_class)
+
+ def get_parser(self, parser_name):
+ for parser in self.parsers:
+ if parser.__class__.__name__ == parser_name:
+ return parser
+
+ def get_host(self, host):
+ return self.hosts[host]
+
+ def tell(self):
+ toret = {}
+ for host in self.hosts:
+ toret[host] = self.hosts[host].tell()
+ return toret
+
+ def tell_sum(self):
+ toret = 0
+ for host in self.hosts:
+ toret += self.hosts[host].tell()
+ return toret
+
+ def size(self):
+ toret = 0
+ for host in self.hosts:
+ toret += self.hosts[host].size()
+ return toret
+
+ def seek(self, positions):
+ # make sure positions in argument are valid
+ for host in self.hosts:
+ if host not in positions.keys():
+ print "cannot find", positions
+ raise "Invalid_Positions"
+
+ # seek each host to saved position
+ for host in positions:
+ self.hosts[host].seek(positions[host])
+
+ return True
+
+ def seek_beginning(self):
+ for host in self.hosts:
+ self.hosts[host].seek(0)
+
+ return True
+
+ def add_log(self, logname):
+ log = logfile_class(logname)
+ hostname = log.hostname()
+ sys.stderr.write("""adding log "%s" for host %s\n""" % (logname, hostname))
+ if not self.hosts.has_key(hostname):
+ self.hosts[hostname] = host_class()
+ self.hosts[hostname].add_log(log)
+
+ def get_position_by_date(self, goto_date):
+ try:
+ return self.index[goto_date]["position"]
+ except KeyError:
+ # can't find position in cache, calculate on the fly
+ #
+ for cmp_date in ksort(self.index):
+ if goto_date <= cmp_date:
+ return self.index[cmp_date]["position"]
+ return None
+
+ def parse(self, threaded = False):
+
+ if threaded and (not hasattr(self,"parse_t") or self.parse_t == None):
+ self.parse_t = Thread(target=self.parse, name='parse-thread', args = [True] )
+ self.parse_t.start()
+ return self.parse_t
+
+ print "parsing begins"
+
+ daemon_log_counter = {}
+
+ self.seek_beginning()
+
+ for date in self:
+
+ self.index[date.date] = { "position":date.position, "log_counter":{} }
+
+ for host in self.hosts:
+ self.index[date.date]["log_counter"][host]=0
+
+ try:
+ for log in date[host]:
+ self.index[date.date]["log_counter"][host]+=1
+
+ for parser_class in self.parsers:
+ parser_class.parse_line(date, log)
+
+ # count how many logs per daemon
+ try:
+ daemon_log_counter[log.daemon()]+=1
+ except KeyError:
+ daemon_log_counter[log.daemon()]=1
+
+ except "Eof":
+ # no more logs for this host
+ pass
+
+ self.daemon_log_counter = sorted(daemon_log_counter.items(), key=itemgetter(1), reverse=True)
+
+ print "parsing ends."
+
+ def eof(self):
+ for host in self.hosts:
+ if not self.hosts[host].eof():
+# print "All logs are not EOF yet", host
+ return False
+ print "All logs are EOF!"
+ return True
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ if self.eof():
+ raise StopIteration
+
+ return log_date_class(cluster = self)
+
+ def instance(self):
+ toret = cluster_class()
+
+ for host in self.hosts:
+ toret.hosts[host] = host_class()
+
+ for log in self.hosts[host].logs:
+ toret.hosts[host].logs.append(logfile_class(log.fname))
+
+ toret.index = self.index
+ toret.daemon_log_counter = self.daemon_log_counter
+ toret.parsers = self.parsers
+
+ return toret
+
+class log_date_class:
+ def __init__(self, cluster):
+ self.cluster = cluster
+ self.date = None
+ self.hosts = cluster.hosts.keys()
+
+ self.position = cluster.tell()
+
+ newtime = None
+
+ # 1st run, must find out what is the oldest date for each host
+ for host in self.hosts:
+ while True:
+ try:
+ newtime = time.strptime("2007 " + cluster.hosts[host].readline()[0:15], "%Y %b %d %H:%M:%S")
+ except "Eof":
+ break
+ except ValueError:
+ print "parsing error in line", cluster.hosts[host].tell()
+ else:
+ break
+
+ if newtime:
+ if not self.date or newtime < self.date:
+ self.date = newtime
+
+ if not cluster.hosts[host].eof():
+ cluster.hosts[host].backline()
+
+ # this should almost never happen, but just in case.
+ if not self.date:
+ raise "Strange_Eof"
+
+ def __str__(self):
+ return time.strftime("%b %d %H:%M:%S", self.date)
+
+ def __getitem__(self, host):
+ return log_date_host(self.cluster, self.cluster.hosts[host], self.date)
+
+ def __iter__(self):
+ return self
+
+class log_date_host:
+ def __init__(self, cluster, host, date):
+ self.cluster = cluster
+ self.host = host
+ self.date = date
+
+ self.parent_date = date
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ position = self.host.tell()
+
+ self.host.readline()
+
+ try:
+ if time.strptime("2007 " + self.host.cur_line[0:15], "%Y %b %d %H:%M:%S") <= self.date:
+ return log_line_class(self.parent_date, self.host, position, self.host.cur_line)
+ except:
+ return log_line_class(self.parent_date, self.host, position, self.host.cur_line)
+
+ self.host.backline()
+
+ raise StopIteration
+
+class log_line_class:
+ def __init__(self, date, host, position, line):
+ self.host = host
+ self.position = position
+ self.line = line
+ self.parse = Memoize(self.parse_uncached)
+
+ self.parent_date = date
+ self.parent_host = host
+
+ def parse_uncached(self):
+ try:
+ return re.findall(r"""^(... .. ..:..:..) %s ([-_0-9a-zA-Z \.\/\(\)]+)(\[[0-9]+\])?(:)? (.*)$""" % self.hostname(), self.line)[0]
+ except:
+ return [ None, None, None, None, None ]
+
+ def __str__(self):
+ return self.line
+
+ def date(self):
+ try:
+ return time.strptime("2007 " + self.line[0:15], "%Y %b %d %H:%M:%S")
+ except:
+ return False
+
+ def hostname(self):
+ return self.line[16:].split(" ", 1)[0]
+
+ def daemon(self):
+ return self.parse()[1]
+
+ def message(self):
+ return self.parse()[4]
+
+class host_class:
+
+ def __init__(self):
+ self.logs = []
+
+ self.log_idx = 0 # first log
+ self.log_ptr = 0 # first char
+
+ self.cur_line = None
+
+ def __str__(self):
+ return self.hostname()
+
+ def add_log(self, logfile):
+
+ for inc in range(0,len(self.logs)):
+ if logfile.time_end() < self.logs[inc].time_begin():
+ self.logs.insert(inc, logfile)
+ break
+ else:
+ self.logs.append(logfile)
+
+ def hostname(self):
+ return self.logs[0].hostname()
+# try: return self.logs[0].hostname()
+# except: return None
+
+ def tell(self):
+ sumsize = 0
+ if self.log_idx > 0:
+ for inc in range(0, self.log_idx):
+ sumsize += self.logs[inc].size()
+ try:
+ sumsize += self.fp().tell()
+ except TypeError:
+ pass
+ return sumsize
+
+ def size(self):
+ sumsize = 0
+ for inc in range(0, len(self.logs)):
+ sumsize += self.logs[inc].size()
+ return sumsize
+
+ def eof(self):
+ if self.tell() >= self.size():
+ return True
+ return False
+
+ def seek(self, offset, whence = 0):
+ if whence == 1: offset = self.tell() + offset
+ elif whence == 2: offset = self.size() + offset
+
+ sumsize = 0
+ for inc in range(0, len(self.logs)):
+ if offset <= sumsize + self.logs[inc].size():
+ offset -= sumsize
+ self.log_idx = inc
+ self.log_ptr = offset
+ self.logs[inc].seek(offset)
+ return True
+ sumsize += self.logs[inc].size()
+ raise "Off_Boundaries"
+
+ def seek_and_read(self, offset, whence = 0):
+ self.seek(offset, whence)
+ return self.readline()
+
+ def time(self):
+ return time.strptime("2007 " + self.cur_line[0:15], "%Y %b %d %H:%M:%S")
+
+ def fp(self):
+ return self.logs[self.log_idx]
+
+ def backline(self):
+ self.seek(-len(self.cur_line), 1)
+
+ def readline(self):
+ if self.eof():
+ raise "Eof"
+
+ while True:
+ position = self.fp().tell()
+ fromfile = self.fp().fname
+ toret = self.fp().readline()
+ if len(toret) == 0:
+ if self.log_idx < len(self.logs):
+ self.log_idx += 1
+ self.fp().seek(0)
+ continue
+ else:
+ return ""
+
+ if len(toret) > 0 or toret == "":
+ self.cur_line = toret
+ self.cur_file = fromfile
+ self.cur_pos = position
+ return toret
+ else:
+ print "invalid line", toret
+
+class logfile_class:
+
+ def __init__(self,fname):
+ self.fname = fname
+ self.fp = open(fname)
+
+ def hostname(self):
+ pos = self.fp.tell()
+ self.seek(0)
+ toret = self.fp.readline()[16:].split(" ")[0]
+ self.fp.seek(pos)
+ return toret
+
+ def time_begin(self):
+ pos = self.fp.tell()
+ self.fp.seek(0)
+ toret = time.strptime(self.fp.readline()[0:15], "%b %d %H:%M:%S")
+ self.fp.seek(pos)
+ return toret
+
+ def time_end(self):
+ pos = self.fp.tell()
+ bs = 1024
+ if self.size() < bs: bs = self.size()
+ self.fp.seek(-bs, 2)
+ buf = self.fp.read(bs)
+ bufsplit = buf.split("\n")
+ bufsplit.reverse()
+ for line in bufsplit:
+ if len(line) == 0: continue
+ try: toret = time.strptime(line[0:15], "%b %d %H:%M:%S")
+ except ValueError: print "Error in conversion"; continue
+ else: break
+ self.fp.seek(pos)
+ return toret
+
+ def size(self):
+ return os.path.getsize(self.fname)
+
+ def eof(self):
+ return self.fp.tell() > self.size()
+
+ def readline(self):
+ return self.fp.readline()
+
+ def seek(self,pos):
+# if cmdline["verbose"]:
+# print "seeking to position %d for file %s" % (pos, self.fname)
+# traceback.print_stack()
+ self.fp.seek(pos)
+
+ def tell(self):
+ return self.fp.tell()