From a7c87f9ee6df68923aea0bf8ca536f72b51aa974 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 26 May 2023 15:26:20 -0400 Subject: [hostname] Add new prepper Adds a new Prepper for handling hostname determination for preparing the mapping and parser. As part of this new prepper, pass the CLI options to each prepper for use. Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 9 +++-- sos/cleaner/archives/insights.py | 1 - sos/cleaner/archives/sos.py | 4 --- sos/cleaner/mappings/hostname_map.py | 6 +--- sos/cleaner/parsers/hostname_parser.py | 59 +------------------------------ sos/cleaner/preppers/__init__.py | 3 +- sos/cleaner/preppers/hostname.py | 63 ++++++++++++++++++++++++++++++++++ tests/unittests/cleaner_tests.py | 8 ++--- 8 files changed, 78 insertions(+), 75 deletions(-) create mode 100644 sos/cleaner/preppers/hostname.py diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index d440185d..8783e892 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -126,7 +126,7 @@ class SoSCleaner(SoSComponent): self.cleaner_md = self.manifest.components.add_section('cleaner') self.parsers = [ - SoSHostnameParser(self.cleaner_mapping, self.opts.domains), + SoSHostnameParser(self.cleaner_mapping), SoSIPParser(self.cleaner_mapping), SoSIPv6Parser(self.cleaner_mapping), SoSMacParser(self.cleaner_mapping), @@ -364,6 +364,11 @@ third party. # we have at least one valid target to obfuscate self.completed_reports = [] + # TODO: as we separate mappings and parsers further, do this in a less + # janky manner + for parser in self.parsers: + if parser.name == 'Hostname Parser': + parser.mapping.set_initial_counts() self.preload_all_archives_into_maps() self.generate_parser_item_regexes() self.obfuscate_report_paths() @@ -639,7 +644,7 @@ third party. for _prep in helper.get_modules(): preps.extend(import_module(f"sos.cleaner.preppers.{_prep}")) for prepper in sorted(preps, key=lambda x: x.priority): - yield prepper() + yield prepper(options=self.opts) def preload_all_archives_into_maps(self): """Before doing the actual obfuscation, if we have multiple archives diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py index c8cf6a97..30cddce1 100644 --- a/sos/cleaner/archives/insights.py +++ b/sos/cleaner/archives/insights.py @@ -23,7 +23,6 @@ class InsightsArchive(SoSObfuscationArchive): description = 'insights-client archive' prep_files = { - 'hostname': 'data/insights_commands/hostname_-f', 'mac': 'data/insights_commands/ip_addr' } diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py index 9248a18d..3090174c 100644 --- a/sos/cleaner/archives/sos.py +++ b/sos/cleaner/archives/sos.py @@ -23,10 +23,6 @@ class SoSReportArchive(SoSObfuscationArchive): type_name = 'report' description = 'sos report archive' prep_files = { - 'hostname': [ - 'sos_commands/host/hostname', - 'etc/hosts' - ], 'mac': 'sos_commands/networking/ip_-d_address', 'username': [ 'sos_commands/login/lastlog_-u_1000-60000', diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py index c997ce33..4bdad103 100644 --- a/sos/cleaner/mappings/hostname_map.py +++ b/sos/cleaner/mappings/hostname_map.py @@ -41,7 +41,7 @@ class SoSHostnameMap(SoSMap): ] strip_exts = ('.yaml', '.yml', '.crt', '.key', '.pem', '.log', '.repo', - '.rules') + '.rules', '.conf', '.cfg') host_count = 0 domain_count = 0 @@ -80,10 +80,6 @@ class SoSHostnameMap(SoSMap): self._domains[_domain_to_inject] = _ob_domain self.set_initial_counts() - def load_domains_from_options(self, domains): - for domain in domains: - self.sanitize_domain(domain.split('.')) - def get_regex_result(self, item): """Override the base get_regex_result() to provide a regex that, if this is an FQDN or a straight domain, will include an underscore diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index 07eb40f6..a7396298 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -21,14 +21,9 @@ class SoSHostnameParser(SoSCleanerParser): r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] - def __init__(self, config, opt_domains=None): + def __init__(self, config): self.mapping = SoSHostnameMap() super(SoSHostnameParser, self).__init__(config) - self.mapping.load_domains_from_map() - self.mapping.load_domains_from_options(opt_domains) - self.short_names = [] - self.load_short_names_from_mapping() - self.mapping.set_initial_counts() def parse_line(self, line): """This will be called for every line in every file we process, so that @@ -47,55 +42,3 @@ class SoSHostnameParser(SoSCleanerParser): line, _rcount = self._parse_line_with_compiled_regexes(line) count += _rcount return line, count - - def load_short_names_from_mapping(self): - """When we load the mapping file into the hostname map, we have to do - some dancing to get those loaded properly into the "intermediate" dicts - that the map uses to hold hosts and domains. Similarly, we need to also - extract shortnames known to the map here. - """ - for hname in self.mapping.dataset.keys(): - if len(hname.split('.')) == 1: - # we have a short name only with no domain - if hname not in self.short_names: - self.short_names.append(hname) - - def load_hostname_into_map(self, hostname_string): - """Force add the domainname found in /sos_commands/host/hostname into - the map. We have to do this here since the normal map prep approach - from the parser would be ignored since the system's hostname is not - guaranteed - """ - if 'localhost' in hostname_string: - return - domains = hostname_string.split('.') - if len(domains) > 1: - self.short_names.append(domains[0]) - else: - self.short_names.append(hostname_string) - if len(domains) > 3: - # make sure we implicitly get example.com if the system's hostname - # is something like foo.bar.example.com - high_domain = '.'.join(domains[-2:]) - self.mapping.add(high_domain) - self.mapping.add(hostname_string) - - def load_hostname_from_etc_hosts(self, content): - """Parse an archive's copy of /etc/hosts, which requires handling that - is separate from the output of the `hostname` command. Just like - load_hostname_into_map(), this has to be done explicitly and we - cannot rely upon the more generic methods to do this reliably. - """ - lines = content.splitlines() - for line in lines: - if line.startswith('#') or 'localhost' in line: - continue - hostln = line.split()[1:] - for host in hostln: - if len(host.split('.')) == 1: - # only generate a mapping for fqdns but still record the - # short name here for later obfuscation with parse_line() - self.short_names.append(host) - self.mapping.add_regex_item(host) - else: - self.mapping.add(host) diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py index b1487354..790c9e15 100644 --- a/sos/cleaner/preppers/__init__.py +++ b/sos/cleaner/preppers/__init__.py @@ -47,7 +47,7 @@ class SoSPrepper(): name = 'Undefined' priority = 100 - def __init__(self): + def __init__(self, options): self.regex_items = { 'hostname': set(), 'ip': set(), @@ -56,6 +56,7 @@ class SoSPrepper(): 'mac': set(), 'username': set() } + self.opts = options self.soslog = logging.getLogger('sos') self.ui_log = logging.getLogger('sos_ui') diff --git a/sos/cleaner/preppers/hostname.py b/sos/cleaner/preppers/hostname.py new file mode 100644 index 00000000..0812597e --- /dev/null +++ b/sos/cleaner/preppers/hostname.py @@ -0,0 +1,63 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.preppers import SoSPrepper + + +class HostnamePrepper(SoSPrepper): + """ + Prepper for providing domain and hostname information to the hostname + mapping. + + The items from hostname sources are handled manually via the _get_items + method, rather than passing the file directly, as the parser does not know + what hostnames or domains to match on initially. + + This will also populate the regex_items list with local short names. + """ + + name = 'hostname' + + def _get_items_for_hostname(self, archive): + items = [] + _file = 'hostname' + if archive.is_sos: + _file = 'sos_commands/host/hostname' + elif archive.is_insights: + _file = 'data/insights_commands/hostname_-f' + + content = archive.get_file_content(_file) + if content and content != 'localhost': + domains = content.split('.') + if len(domains) > 1: + items.append(domains[0]) + self.regex_items['hostname'].add((domains[0])) + if len(domains) > 3: + # make sure we get example.com if the system's hostname + # is something like foo.bar.example.com + top_domain = '.'.join(domains[-2:]) + items.append(top_domain.strip()) + items.append(content.strip()) + + _hosts = archive.get_file_content('etc/hosts') + for line in _hosts.splitlines(): + if line.startswith('#') or 'localhost' in line: + continue + hostln = line.split()[1:] + for host in hostln: + if len(host.split('.')) == 1: + self.regex_items['hostname'].add(host) + else: + items.append(host) + + for domain in self.opts.domains: + items.append(domain) + + return items diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index 6e0be6c8..8bf1b239 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -24,7 +24,7 @@ from sos.cleaner.preppers import SoSPrepper from sos.cleaner.preppers.hostname import HostnamePrepper from sos.cleaner.preppers.ip import IPPrepper from sos.cleaner.archives.sos import SoSReportArchive - +from sos.options import SoSOptions class CleanerMapTests(unittest.TestCase): @@ -288,13 +288,13 @@ class PrepperTests(unittest.TestCase): """ def setUp(self): - self.prepper = SoSPrepper() + self.prepper = SoSPrepper(SoSOptions()) self.archive = SoSReportArchive( archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz', tmpdir='/tmp' ) - self.host_prepper = HostnamePrepper() - self.ipv4_prepper = IPPrepper() + self.host_prepper = HostnamePrepper(SoSOptions(domains=[])) + self.ipv4_prepper = IPPrepper(SoSOptions()) def test_parser_method_translation(self): self.assertEqual([], self.prepper.get_parser_file_list('hostname', None)) -- cgit