diff options
-rw-r--r-- | sos/cleaner/__init__.py | 99 | ||||
-rw-r--r-- | sos/cleaner/archives/__init__.py | 8 | ||||
-rw-r--r-- | sos/cleaner/preppers/__init__.py | 125 | ||||
-rw-r--r-- | tests/unittests/cleaner_tests.py | 57 |
4 files changed, 240 insertions, 49 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index b8e4aafd..d440185d 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -13,6 +13,7 @@ import json import logging import os import shutil +import sos.cleaner.preppers import tempfile from concurrent.futures import ThreadPoolExecutor @@ -31,7 +32,7 @@ from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory, SoSCollectorDirectory) from sos.cleaner.archives.generic import DataDirArchive, TarballArchive from sos.cleaner.archives.insights import InsightsArchive -from sos.utilities import get_human_readable +from sos.utilities import get_human_readable, import_module, ImporterHelper from textwrap import fill @@ -583,6 +584,63 @@ third party. for parser in self.parsers: parser.generate_item_regexes() + def _prepare_archive_with_prepper(self, archive, prepper): + """ + For each archive we've determined we need to operate on, pass it to + each prepper so that we can extract necessary files and/or items for + direct regex replacement. Preppers define these methods per parser, + so it is possible that a single prepper will read the same file for + different parsers/mappings. This is preferable to the alternative of + building up monolithic lists of file paths, as we'd still need to + manipulate these on a per-archive basis. + + :param archive: The archive we are currently using to prepare our + mappings with + :type archive: ``SoSObfuscationArchive`` subclass + + :param prepper: The individual prepper we're using to source items + :type prepper: ``SoSPrepper`` subclass + """ + for _parser in self.parsers: + pname = _parser.name.lower().split()[0].strip() + for _file in prepper.get_parser_file_list(pname, archive): + content = archive.get_file_content(_file) + if not content: + continue + self.log_debug(f"Prepping {pname} parser with file {_file} " + f"from {archive.ui_name}") + for line in content.splitlines(): + try: + _parser.parse_line(line) + except Exception as err: + self.log_debug( + f"Failed to prep {pname} map from {_file}: {err}" + ) + map_items = prepper.get_items_for_map(pname, archive) + if map_items: + self.log_debug(f"Prepping {pname} mapping with items from " + f"{archive.ui_name}") + for item in map_items: + _parser.mapping.add(item) + + for ritem in prepper.regex_items[pname]: + _parser.mapping.add_regex_item(ritem) + + def get_preppers(self): + """ + Discover all locally available preppers so that we can prepare the + mappings with obfuscation matches in a controlled manner + + :returns: All preppers that can be leveraged locally + :rtype: A generator of `SoSPrepper` items + """ + helper = ImporterHelper(sos.cleaner.preppers) + preps = [] + for _prep in helper.get_modules(): + preps.extend(import_module(f"sos.cleaner.preppers.{_prep}")) + for prepper in sorted(preps, key=lambda x: x.priority): + yield prepper() + def preload_all_archives_into_maps(self): """Before doing the actual obfuscation, if we have multiple archives to obfuscate then we need to preload each of them into the mappings @@ -590,42 +648,9 @@ third party. obfuscated in node1's archive. """ self.log_info("Pre-loading all archives into obfuscation maps") - for _arc in self.report_paths: - for _parser in self.parsers: - try: - pfile = _arc.prep_files[_parser.name.lower().split()[0]] - if not pfile: - continue - except (IndexError, KeyError): - continue - if isinstance(pfile, str): - pfile = [pfile] - for parse_file in pfile: - self.log_debug("Attempting to load %s" % parse_file) - try: - content = _arc.get_file_content(parse_file) - if not content: - continue - if isinstance(_parser, SoSUsernameParser): - _parser.load_usernames_into_map(content) - elif isinstance(_parser, SoSHostnameParser): - if 'hostname' in parse_file: - _parser.load_hostname_into_map( - content.splitlines()[0] - ) - elif 'etc/hosts' in parse_file: - _parser.load_hostname_from_etc_hosts( - content - ) - else: - for line in content.splitlines(): - self.obfuscate_line(line) - except Exception as err: - self.log_info( - "Could not prepare %s from %s (archive: %s): %s" - % (_parser.name, parse_file, _arc.archive_name, - err) - ) + for prepper in self.get_preppers(): + for archive in self.report_paths: + self._prepare_archive_with_prepper(archive, prepper) def obfuscate_report(self, archive): """Individually handle each archive or directory we've discovered by diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index 6a6f46d9..a185ae34 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -166,8 +166,12 @@ class SoSObfuscationArchive(): ) return '' else: - with open(self.format_file_name(fname), 'r') as to_read: - return to_read.read() + try: + with open(self.format_file_name(fname), 'r') as to_read: + return to_read.read() + except Exception as err: + self.log_debug(f"Failed to get contents of {fname}: {err}") + return '' def extract(self, quiet=False): if self.is_tarfile: diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py new file mode 100644 index 00000000..b1487354 --- /dev/null +++ b/sos/cleaner/preppers/__init__.py @@ -0,0 +1,125 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com> + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +import logging + + +class SoSPrepper(): + """ + A prepper is a way to prepare loaded mappings with selected items within + an sos report prior to beginning the full obfuscation routine. + + This was previously handled directly within archives, however this is a bit + cumbersome and doesn't allow for all the flexibility we could use in this + effort. + + Preppers are separated from parsers but will leverage them in order to feed + parser-matched strings from files highlighted by a Prepper() to the + appropriate mapping for initial obfuscation. + + Preppers may specify their own priority in order to influence the order in + which mappings are prepped. Further, Preppers have two ways to prepare + the maps - either by generating a list of filenames or via directly pulling + content out of select files without the assistance of a parser. A lower + priority value means the prepper should run sooner than those with higher + values. + + For the former approach, `Prepper._get_$parser_file_list()` should be used + and should yield filenames that exist in target archives. For the latter, + the `Prepper._get_items_for_$map()` should be used. + + Finally, a `regex_items` dict is available for storing individual regex + items for parsers that rely on them. These items will be added after all + files and other individual items are handled. This dict has keys set to + parser/mapping names, and the values should be sets of items, so preppers + should add to them like so: + + self.regex_items['hostname'].add('myhostname') + """ + + name = 'Undefined' + priority = 100 + + def __init__(self): + self.regex_items = { + 'hostname': set(), + 'ip': set(), + 'ipv6': set(), + 'keyword': set(), + 'mac': set(), + 'username': set() + } + self.soslog = logging.getLogger('sos') + self.ui_log = logging.getLogger('sos_ui') + + def _fmt_log_msg(self, msg): + return f"[prepper:{self.name}] {msg}" + + def log_debug(self, msg): + self.soslog.debug(self._fmt_log_msg(msg)) + + def log_info(self, msg): + self.soslog.info(self._fmt_log_msg(msg)) + + def log_error(self, msg): + self.soslog.error(self._fmt_log_msg(msg)) + + def get_parser_file_list(self, parser, archive): + """ + Helper that calls the appropriate Prepper method for the specified + parser. This allows Preppers to be able to provide items for multiple + types of parsers without needing to handle repetitious logic to + determine which parser we're interested within each individual call. + + The convention to use is to define `_get_$parser_file_list()` methods + within Preppers, e.g. `_get_hostname_file_list()` would be used to + provide filenames for the hostname parser. If such a method is not + defined within a Prepper for a given parser, we handle that here so + that individual Preppers do not need to. + + :param parser: The _name_ of the parser to get a file list for + :type parser: ``str`` + + :param archive: The archive we are operating on currently for the + specified parser + :type archive: ``SoSObfuscationArchive`` + + :returns: A list of filenames within the archive to prep with + :rtype: ``list`` + """ + _check = f"_get_{parser}_file_list" + if hasattr(self, _check): + return getattr(self, _check)(archive) + return [] + + def get_items_for_map(self, mapping, archive): + """ + Similar to `get_parser_file_list()`, a helper for calling the specific + method for generating items for the given `map`. This allows Preppers + to be able to provide items for multiple types of maps, without the + need to handle repetitious logic to determine which parser we're + interested in within each individual call. + + :param mapping: The _name_ of the mapping to get items for + :type mapping: ``str`` + + :param archive: The archive we are operating on currently for the + specified parser + :type archive: ``SoSObfuscationArchive`` + + :returns: A list of distinct items to obfuscate without using a parser + :rtype: ``list`` + """ + _check = f"_get_items_for_{mapping}" + if hasattr(self, _check): + return getattr(self, _check)(archive) + return [] + +# vim: set et ts=4 sw=4 : diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index c28239a7..6e0be6c8 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -20,6 +20,10 @@ from sos.cleaner.mappings.mac_map import SoSMacMap from sos.cleaner.mappings.hostname_map import SoSHostnameMap from sos.cleaner.mappings.keyword_map import SoSKeywordMap from sos.cleaner.mappings.ipv6_map import SoSIPv6Map +from sos.cleaner.preppers import SoSPrepper +from sos.cleaner.preppers.hostname import HostnamePrepper +from sos.cleaner.preppers.ip import IPPrepper +from sos.cleaner.archives.sos import SoSReportArchive class CleanerMapTests(unittest.TestCase): @@ -28,7 +32,7 @@ class CleanerMapTests(unittest.TestCase): self.mac_map = SoSMacMap() self.ip_map = SoSIPMap() self.host_map = SoSHostnameMap() - self.host_map.load_domains_from_options(['redhat.com']) + self.host_map.sanitize_item('redhat.com') self.kw_map = SoSKeywordMap() self.ipv6_map = SoSIPv6Map() @@ -152,13 +156,14 @@ class CleanerParserTests(unittest.TestCase): self.ip_parser = SoSIPParser(config={}) self.ipv6_parser = SoSIPv6Parser(config={}) self.mac_parser = SoSMacParser(config={}) - self.host_parser = SoSHostnameParser(config={}, - opt_domains=['foobar.com']) - self.kw_parser = SoSKeywordParser(config={}, keywords=['foobar']) + self.host_parser = SoSHostnameParser(config={}) + self.host_parser.mapping.add('foobar.com') + self.kw_parser = SoSKeywordParser(config={}) + self.kw_parser.mapping.add('foobar') self.kw_parser_none = SoSKeywordParser(config={}) self.kw_parser.generate_item_regexes() - self.uname_parser = SoSUsernameParser(config={}, - opt_names=['DOMAIN\myusername']) + self.uname_parser = SoSUsernameParser(config={}) + self.uname_parser.mapping.add('DOMAIN\myusername') def test_ip_parser_valid_ipv4_line(self): line = 'foobar foo 10.0.0.1/24 barfoo bar' @@ -210,22 +215,22 @@ class CleanerParserTests(unittest.TestCase): def test_hostname_load_hostname_string(self): fqdn = 'myhost.subnet.example.com' - self.host_parser.load_hostname_into_map(fqdn) + self.host_parser.mapping.add(fqdn) def test_hostname_valid_domain_line(self): - self.host_parser.load_hostname_into_map('myhost.subnet.example.com') + self.host_parser.mapping.add('myhost.subnet.example.com') line = 'testing myhost.subnet.example.com in a string' _test = self.host_parser.parse_line(line)[0] self.assertNotEqual(line, _test) def test_hostname_short_name_in_line(self): - self.host_parser.load_hostname_into_map('myhost.subnet.example.com') + self.host_parser.mapping.add('myhost.subnet.example.com') line = 'testing just myhost in a line' _test = self.host_parser.parse_line(line)[0] self.assertNotEqual(line, _test) def test_obfuscate_whole_fqdn_for_given_domainname(self): - self.host_parser.load_hostname_into_map('sostestdomain.domain') + self.host_parser.mapping.add('sostestdomain.domain') line = 'let obfuscate soshost.sostestdomain.domain' _test = self.host_parser.parse_line(line)[0] self.assertFalse('soshost' in _test) @@ -274,3 +279,35 @@ class CleanerParserTests(unittest.TestCase): line = "DOMAIN\myusername" _test = self.uname_parser.parse_line(line)[0] self.assertNotEqual(line, _test) + + +class PrepperTests(unittest.TestCase): + """ + Ensure that the translations for different parser/mapping methods are + working + """ + + def setUp(self): + self.prepper = SoSPrepper() + self.archive = SoSReportArchive( + archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz', + tmpdir='/tmp' + ) + self.host_prepper = HostnamePrepper() + self.ipv4_prepper = IPPrepper() + + def test_parser_method_translation(self): + self.assertEqual([], self.prepper.get_parser_file_list('hostname', None)) + + def test_mapping_method_translation(self): + self.assertEqual([], self.prepper.get_items_for_map('foobar', None)) + + def test_hostname_prepper_map_items(self): + self.assertEqual(['cleanertest'], self.host_prepper.get_items_for_map('hostname', self.archive)) + + def test_ipv4_prepper_parser_files(self): + self.assertEqual(['sos_commands/networking/ip_-o_addr'], self.ipv4_prepper.get_parser_file_list('ip', self.archive)) + + def test_ipv4_prepper_invalid_parser_files(self): + self.assertEqual([], self.ipv4_prepper.get_parser_file_list('foobar', self.archive)) + |