aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sos/cleaner/mappings/hostname_map.py59
-rw-r--r--sos/cleaner/parsers/__init__.py3
-rw-r--r--sos/cleaner/parsers/hostname_parser.py30
3 files changed, 81 insertions, 11 deletions
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
index c9a44d8d..d4b2c88e 100644
--- a/sos/cleaner/mappings/hostname_map.py
+++ b/sos/cleaner/mappings/hostname_map.py
@@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap):
host = domain.split('.')
if len(host) == 1:
# don't block on host's shortname
- return True
+ return host[0] in self.hosts.keys()
else:
domain = host[0:-1]
for known_domain in self._domains:
@@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap):
return False
def get(self, item):
- if item.startswith(('.', '_')):
- item = item.lstrip('._')
- item = item.strip()
+ prefix = ''
+ suffix = ''
+ final = None
+ # The regex pattern match may include a leading and/or trailing '_'
+ # character due to the need to use word boundary matching, so we need
+ # to strip these from the string during processing, but still keep them
+ # in the returned string to not mangle the string replacement in the
+ # context of the file or filename
+ while item.startswith(('.', '_')):
+ prefix += item[0]
+ item = item[1:]
+ while item.endswith(('.', '_')):
+ suffix += item[-1]
+ item = item[0:-1]
if not self.domain_name_in_loaded_domains(item.lower()):
return item
- return super(SoSHostnameMap, self).get(item)
+ if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')):
+ ext = '.' + item.split('.')[-1]
+ item = item.replace(ext, '')
+ suffix += ext
+ if item not in self.dataset.keys():
+ # try to account for use of '-' in names that include hostnames
+ # and don't create new mappings for each of these
+ for _existing in sorted(self.dataset.keys(), reverse=True,
+ key=lambda x: len(x)):
+ _host_substr = False
+ _test = item.split(_existing)
+ _h = _existing.split('.')
+ # avoid considering a full FQDN match as a new match off of
+ # the hostname of an existing match
+ if _h[0] and _h[0] in self.hosts.keys():
+ _host_substr = True
+ if len(_test) == 1 or not _test[0]:
+ # does not match existing obfuscation
+ continue
+ elif _test[0].endswith('.') and not _host_substr:
+ # new hostname in known domain
+ final = super(SoSHostnameMap, self).get(item)
+ break
+ elif item.split(_test[0]):
+ # string that includes existing FQDN obfuscation substring
+ # so, only obfuscate the FQDN part
+ try:
+ itm = item.split(_test[0])[1]
+ final = _test[0] + super(SoSHostnameMap, self).get(itm)
+ break
+ except Exception:
+ # fallback to still obfuscating the entire item
+ pass
+
+ if not final:
+ final = super(SoSHostnameMap, self).get(item)
+ return prefix + final + suffix
def sanitize_item(self, item):
host = item.split('.')
@@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap):
"""Obfuscate the short name of the host with an incremented counter
based on the total number of obfuscated host names
"""
+ if not hostname:
+ return hostname
if hostname not in self.hosts:
ob_host = "host%s" % self.host_count
self.hosts[hostname] = ob_host
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
index 84874475..57d2020a 100644
--- a/sos/cleaner/parsers/__init__.py
+++ b/sos/cleaner/parsers/__init__.py
@@ -87,7 +87,8 @@ class SoSCleanerParser():
for match in matches:
match = match.strip()
new_match = self.mapping.get(match)
- line = line.replace(match, new_match)
+ if new_match != match:
+ line = line.replace(match, new_match)
return line, count
def parse_string_for_keys(self, string_data):
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
index 9982024b..3de6bb08 100644
--- a/sos/cleaner/parsers/hostname_parser.py
+++ b/sos/cleaner/parsers/hostname_parser.py
@@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser):
map_file_key = 'hostname_map'
prep_map_file = 'sos_commands/host/hostname'
regex_patterns = [
- r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))'
+ r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
]
def __init__(self, conf_file=None, opt_domains=None):
@@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser):
"""Override the default parse_line() method to also check for the
shortname of the host derived from the hostname.
"""
+
+ def _check_line(ln, count, search, repl=None):
+ """Perform a second manual check for substrings that may have been
+ missed by regex matching
+ """
+ if search in self.mapping.skip_keys:
+ return ln, count
+ if search in ln:
+ count += ln.count(search)
+ ln = ln.replace(search, self.mapping.get(repl or search))
+ return ln, count
+
count = 0
line, count = super(SoSHostnameParser, self).parse_line(line)
- for short_name in self.short_names:
- if short_name in line:
- count += 1
- line = line.replace(short_name, self.mapping.get(short_name))
+ # make an additional pass checking for '_' formatted substrings that
+ # the regex patterns won't catch
+ hosts = [h for h in self.mapping.dataset.keys() if '.' in h]
+ for host in sorted(hosts, reverse=True, key=lambda x: len(x)):
+ fqdn = host
+ for c in '.-':
+ fqdn = fqdn.replace(c, '_')
+ line, count = _check_line(line, count, fqdn, host)
+
+ for short_name in sorted(self.short_names, reverse=True):
+ line, count = _check_line(line, count, short_name)
+
return line, count