aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJake Hunsaker <jhunsake@redhat.com>2023-05-26 15:26:20 -0400
committerJake Hunsaker <jacob.r.hunsaker@gmail.com>2023-06-22 14:06:38 -0400
commita7c87f9ee6df68923aea0bf8ca536f72b51aa974 (patch)
tree56ba2c892484d507feed71a82112e9de2fb17042
parent2b89a12ebb263d5d844bac91a9e2f75448615766 (diff)
downloadsos-a7c87f9ee6df68923aea0bf8ca536f72b51aa974.tar.gz
[hostname] Add new prepper
Adds a new Prepper for handling hostname determination for preparing the mapping and parser. As part of this new prepper, pass the CLI options to each prepper for use. Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
-rw-r--r--sos/cleaner/__init__.py9
-rw-r--r--sos/cleaner/archives/insights.py1
-rw-r--r--sos/cleaner/archives/sos.py4
-rw-r--r--sos/cleaner/mappings/hostname_map.py6
-rw-r--r--sos/cleaner/parsers/hostname_parser.py59
-rw-r--r--sos/cleaner/preppers/__init__.py3
-rw-r--r--sos/cleaner/preppers/hostname.py63
-rw-r--r--tests/unittests/cleaner_tests.py8
8 files changed, 78 insertions, 75 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index d440185d..8783e892 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -126,7 +126,7 @@ class SoSCleaner(SoSComponent):
self.cleaner_md = self.manifest.components.add_section('cleaner')
self.parsers = [
- SoSHostnameParser(self.cleaner_mapping, self.opts.domains),
+ SoSHostnameParser(self.cleaner_mapping),
SoSIPParser(self.cleaner_mapping),
SoSIPv6Parser(self.cleaner_mapping),
SoSMacParser(self.cleaner_mapping),
@@ -364,6 +364,11 @@ third party.
# we have at least one valid target to obfuscate
self.completed_reports = []
+ # TODO: as we separate mappings and parsers further, do this in a less
+ # janky manner
+ for parser in self.parsers:
+ if parser.name == 'Hostname Parser':
+ parser.mapping.set_initial_counts()
self.preload_all_archives_into_maps()
self.generate_parser_item_regexes()
self.obfuscate_report_paths()
@@ -639,7 +644,7 @@ third party.
for _prep in helper.get_modules():
preps.extend(import_module(f"sos.cleaner.preppers.{_prep}"))
for prepper in sorted(preps, key=lambda x: x.priority):
- yield prepper()
+ yield prepper(options=self.opts)
def preload_all_archives_into_maps(self):
"""Before doing the actual obfuscation, if we have multiple archives
diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py
index c8cf6a97..30cddce1 100644
--- a/sos/cleaner/archives/insights.py
+++ b/sos/cleaner/archives/insights.py
@@ -23,7 +23,6 @@ class InsightsArchive(SoSObfuscationArchive):
description = 'insights-client archive'
prep_files = {
- 'hostname': 'data/insights_commands/hostname_-f',
'mac': 'data/insights_commands/ip_addr'
}
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
index 9248a18d..3090174c 100644
--- a/sos/cleaner/archives/sos.py
+++ b/sos/cleaner/archives/sos.py
@@ -23,10 +23,6 @@ class SoSReportArchive(SoSObfuscationArchive):
type_name = 'report'
description = 'sos report archive'
prep_files = {
- 'hostname': [
- 'sos_commands/host/hostname',
- 'etc/hosts'
- ],
'mac': 'sos_commands/networking/ip_-d_address',
'username': [
'sos_commands/login/lastlog_-u_1000-60000',
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
index c997ce33..4bdad103 100644
--- a/sos/cleaner/mappings/hostname_map.py
+++ b/sos/cleaner/mappings/hostname_map.py
@@ -41,7 +41,7 @@ class SoSHostnameMap(SoSMap):
]
strip_exts = ('.yaml', '.yml', '.crt', '.key', '.pem', '.log', '.repo',
- '.rules')
+ '.rules', '.conf', '.cfg')
host_count = 0
domain_count = 0
@@ -80,10 +80,6 @@ class SoSHostnameMap(SoSMap):
self._domains[_domain_to_inject] = _ob_domain
self.set_initial_counts()
- def load_domains_from_options(self, domains):
- for domain in domains:
- self.sanitize_domain(domain.split('.'))
-
def get_regex_result(self, item):
"""Override the base get_regex_result() to provide a regex that, if
this is an FQDN or a straight domain, will include an underscore
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
index 07eb40f6..a7396298 100644
--- a/sos/cleaner/parsers/hostname_parser.py
+++ b/sos/cleaner/parsers/hostname_parser.py
@@ -21,14 +21,9 @@ class SoSHostnameParser(SoSCleanerParser):
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
]
- def __init__(self, config, opt_domains=None):
+ def __init__(self, config):
self.mapping = SoSHostnameMap()
super(SoSHostnameParser, self).__init__(config)
- self.mapping.load_domains_from_map()
- self.mapping.load_domains_from_options(opt_domains)
- self.short_names = []
- self.load_short_names_from_mapping()
- self.mapping.set_initial_counts()
def parse_line(self, line):
"""This will be called for every line in every file we process, so that
@@ -47,55 +42,3 @@ class SoSHostnameParser(SoSCleanerParser):
line, _rcount = self._parse_line_with_compiled_regexes(line)
count += _rcount
return line, count
-
- def load_short_names_from_mapping(self):
- """When we load the mapping file into the hostname map, we have to do
- some dancing to get those loaded properly into the "intermediate" dicts
- that the map uses to hold hosts and domains. Similarly, we need to also
- extract shortnames known to the map here.
- """
- for hname in self.mapping.dataset.keys():
- if len(hname.split('.')) == 1:
- # we have a short name only with no domain
- if hname not in self.short_names:
- self.short_names.append(hname)
-
- def load_hostname_into_map(self, hostname_string):
- """Force add the domainname found in /sos_commands/host/hostname into
- the map. We have to do this here since the normal map prep approach
- from the parser would be ignored since the system's hostname is not
- guaranteed
- """
- if 'localhost' in hostname_string:
- return
- domains = hostname_string.split('.')
- if len(domains) > 1:
- self.short_names.append(domains[0])
- else:
- self.short_names.append(hostname_string)
- if len(domains) > 3:
- # make sure we implicitly get example.com if the system's hostname
- # is something like foo.bar.example.com
- high_domain = '.'.join(domains[-2:])
- self.mapping.add(high_domain)
- self.mapping.add(hostname_string)
-
- def load_hostname_from_etc_hosts(self, content):
- """Parse an archive's copy of /etc/hosts, which requires handling that
- is separate from the output of the `hostname` command. Just like
- load_hostname_into_map(), this has to be done explicitly and we
- cannot rely upon the more generic methods to do this reliably.
- """
- lines = content.splitlines()
- for line in lines:
- if line.startswith('#') or 'localhost' in line:
- continue
- hostln = line.split()[1:]
- for host in hostln:
- if len(host.split('.')) == 1:
- # only generate a mapping for fqdns but still record the
- # short name here for later obfuscation with parse_line()
- self.short_names.append(host)
- self.mapping.add_regex_item(host)
- else:
- self.mapping.add(host)
diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py
index b1487354..790c9e15 100644
--- a/sos/cleaner/preppers/__init__.py
+++ b/sos/cleaner/preppers/__init__.py
@@ -47,7 +47,7 @@ class SoSPrepper():
name = 'Undefined'
priority = 100
- def __init__(self):
+ def __init__(self, options):
self.regex_items = {
'hostname': set(),
'ip': set(),
@@ -56,6 +56,7 @@ class SoSPrepper():
'mac': set(),
'username': set()
}
+ self.opts = options
self.soslog = logging.getLogger('sos')
self.ui_log = logging.getLogger('sos_ui')
diff --git a/sos/cleaner/preppers/hostname.py b/sos/cleaner/preppers/hostname.py
new file mode 100644
index 00000000..0812597e
--- /dev/null
+++ b/sos/cleaner/preppers/hostname.py
@@ -0,0 +1,63 @@
+# Copyright 2023 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.cleaner.preppers import SoSPrepper
+
+
+class HostnamePrepper(SoSPrepper):
+ """
+ Prepper for providing domain and hostname information to the hostname
+ mapping.
+
+ The items from hostname sources are handled manually via the _get_items
+ method, rather than passing the file directly, as the parser does not know
+ what hostnames or domains to match on initially.
+
+ This will also populate the regex_items list with local short names.
+ """
+
+ name = 'hostname'
+
+ def _get_items_for_hostname(self, archive):
+ items = []
+ _file = 'hostname'
+ if archive.is_sos:
+ _file = 'sos_commands/host/hostname'
+ elif archive.is_insights:
+ _file = 'data/insights_commands/hostname_-f'
+
+ content = archive.get_file_content(_file)
+ if content and content != 'localhost':
+ domains = content.split('.')
+ if len(domains) > 1:
+ items.append(domains[0])
+ self.regex_items['hostname'].add((domains[0]))
+ if len(domains) > 3:
+ # make sure we get example.com if the system's hostname
+ # is something like foo.bar.example.com
+ top_domain = '.'.join(domains[-2:])
+ items.append(top_domain.strip())
+ items.append(content.strip())
+
+ _hosts = archive.get_file_content('etc/hosts')
+ for line in _hosts.splitlines():
+ if line.startswith('#') or 'localhost' in line:
+ continue
+ hostln = line.split()[1:]
+ for host in hostln:
+ if len(host.split('.')) == 1:
+ self.regex_items['hostname'].add(host)
+ else:
+ items.append(host)
+
+ for domain in self.opts.domains:
+ items.append(domain)
+
+ return items
diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py
index 6e0be6c8..8bf1b239 100644
--- a/tests/unittests/cleaner_tests.py
+++ b/tests/unittests/cleaner_tests.py
@@ -24,7 +24,7 @@ from sos.cleaner.preppers import SoSPrepper
from sos.cleaner.preppers.hostname import HostnamePrepper
from sos.cleaner.preppers.ip import IPPrepper
from sos.cleaner.archives.sos import SoSReportArchive
-
+from sos.options import SoSOptions
class CleanerMapTests(unittest.TestCase):
@@ -288,13 +288,13 @@ class PrepperTests(unittest.TestCase):
"""
def setUp(self):
- self.prepper = SoSPrepper()
+ self.prepper = SoSPrepper(SoSOptions())
self.archive = SoSReportArchive(
archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz',
tmpdir='/tmp'
)
- self.host_prepper = HostnamePrepper()
- self.ipv4_prepper = IPPrepper()
+ self.host_prepper = HostnamePrepper(SoSOptions(domains=[]))
+ self.ipv4_prepper = IPPrepper(SoSOptions())
def test_parser_method_translation(self):
self.assertEqual([], self.prepper.get_parser_file_list('hostname', None))