aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sos/cleaner/__init__.py99
-rw-r--r--sos/cleaner/archives/__init__.py8
-rw-r--r--sos/cleaner/preppers/__init__.py125
-rw-r--r--tests/unittests/cleaner_tests.py57
4 files changed, 240 insertions, 49 deletions
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index b8e4aafd..d440185d 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -13,6 +13,7 @@ import json
import logging
import os
import shutil
+import sos.cleaner.preppers
import tempfile
from concurrent.futures import ThreadPoolExecutor
@@ -31,7 +32,7 @@ from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
SoSCollectorDirectory)
from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
from sos.cleaner.archives.insights import InsightsArchive
-from sos.utilities import get_human_readable
+from sos.utilities import get_human_readable, import_module, ImporterHelper
from textwrap import fill
@@ -583,6 +584,63 @@ third party.
for parser in self.parsers:
parser.generate_item_regexes()
+ def _prepare_archive_with_prepper(self, archive, prepper):
+ """
+ For each archive we've determined we need to operate on, pass it to
+ each prepper so that we can extract necessary files and/or items for
+ direct regex replacement. Preppers define these methods per parser,
+ so it is possible that a single prepper will read the same file for
+ different parsers/mappings. This is preferable to the alternative of
+ building up monolithic lists of file paths, as we'd still need to
+ manipulate these on a per-archive basis.
+
+ :param archive: The archive we are currently using to prepare our
+ mappings with
+ :type archive: ``SoSObfuscationArchive`` subclass
+
+ :param prepper: The individual prepper we're using to source items
+ :type prepper: ``SoSPrepper`` subclass
+ """
+ for _parser in self.parsers:
+ pname = _parser.name.lower().split()[0].strip()
+ for _file in prepper.get_parser_file_list(pname, archive):
+ content = archive.get_file_content(_file)
+ if not content:
+ continue
+ self.log_debug(f"Prepping {pname} parser with file {_file} "
+ f"from {archive.ui_name}")
+ for line in content.splitlines():
+ try:
+ _parser.parse_line(line)
+ except Exception as err:
+ self.log_debug(
+ f"Failed to prep {pname} map from {_file}: {err}"
+ )
+ map_items = prepper.get_items_for_map(pname, archive)
+ if map_items:
+ self.log_debug(f"Prepping {pname} mapping with items from "
+ f"{archive.ui_name}")
+ for item in map_items:
+ _parser.mapping.add(item)
+
+ for ritem in prepper.regex_items[pname]:
+ _parser.mapping.add_regex_item(ritem)
+
+ def get_preppers(self):
+ """
+ Discover all locally available preppers so that we can prepare the
+ mappings with obfuscation matches in a controlled manner
+
+ :returns: All preppers that can be leveraged locally
+ :rtype: A generator of `SoSPrepper` items
+ """
+ helper = ImporterHelper(sos.cleaner.preppers)
+ preps = []
+ for _prep in helper.get_modules():
+ preps.extend(import_module(f"sos.cleaner.preppers.{_prep}"))
+ for prepper in sorted(preps, key=lambda x: x.priority):
+ yield prepper()
+
def preload_all_archives_into_maps(self):
"""Before doing the actual obfuscation, if we have multiple archives
to obfuscate then we need to preload each of them into the mappings
@@ -590,42 +648,9 @@ third party.
obfuscated in node1's archive.
"""
self.log_info("Pre-loading all archives into obfuscation maps")
- for _arc in self.report_paths:
- for _parser in self.parsers:
- try:
- pfile = _arc.prep_files[_parser.name.lower().split()[0]]
- if not pfile:
- continue
- except (IndexError, KeyError):
- continue
- if isinstance(pfile, str):
- pfile = [pfile]
- for parse_file in pfile:
- self.log_debug("Attempting to load %s" % parse_file)
- try:
- content = _arc.get_file_content(parse_file)
- if not content:
- continue
- if isinstance(_parser, SoSUsernameParser):
- _parser.load_usernames_into_map(content)
- elif isinstance(_parser, SoSHostnameParser):
- if 'hostname' in parse_file:
- _parser.load_hostname_into_map(
- content.splitlines()[0]
- )
- elif 'etc/hosts' in parse_file:
- _parser.load_hostname_from_etc_hosts(
- content
- )
- else:
- for line in content.splitlines():
- self.obfuscate_line(line)
- except Exception as err:
- self.log_info(
- "Could not prepare %s from %s (archive: %s): %s"
- % (_parser.name, parse_file, _arc.archive_name,
- err)
- )
+ for prepper in self.get_preppers():
+ for archive in self.report_paths:
+ self._prepare_archive_with_prepper(archive, prepper)
def obfuscate_report(self, archive):
"""Individually handle each archive or directory we've discovered by
diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py
index 6a6f46d9..a185ae34 100644
--- a/sos/cleaner/archives/__init__.py
+++ b/sos/cleaner/archives/__init__.py
@@ -166,8 +166,12 @@ class SoSObfuscationArchive():
)
return ''
else:
- with open(self.format_file_name(fname), 'r') as to_read:
- return to_read.read()
+ try:
+ with open(self.format_file_name(fname), 'r') as to_read:
+ return to_read.read()
+ except Exception as err:
+ self.log_debug(f"Failed to get contents of {fname}: {err}")
+ return ''
def extract(self, quiet=False):
if self.is_tarfile:
diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py
new file mode 100644
index 00000000..b1487354
--- /dev/null
+++ b/sos/cleaner/preppers/__init__.py
@@ -0,0 +1,125 @@
+# Copyright 2023 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import logging
+
+
+class SoSPrepper():
+ """
+ A prepper is a way to prepare loaded mappings with selected items within
+ an sos report prior to beginning the full obfuscation routine.
+
+ This was previously handled directly within archives, however this is a bit
+ cumbersome and doesn't allow for all the flexibility we could use in this
+ effort.
+
+ Preppers are separated from parsers but will leverage them in order to feed
+ parser-matched strings from files highlighted by a Prepper() to the
+ appropriate mapping for initial obfuscation.
+
+ Preppers may specify their own priority in order to influence the order in
+ which mappings are prepped. Further, Preppers have two ways to prepare
+ the maps - either by generating a list of filenames or via directly pulling
+ content out of select files without the assistance of a parser. A lower
+ priority value means the prepper should run sooner than those with higher
+ values.
+
+ For the former approach, `Prepper._get_$parser_file_list()` should be used
+ and should yield filenames that exist in target archives. For the latter,
+ the `Prepper._get_items_for_$map()` should be used.
+
+ Finally, a `regex_items` dict is available for storing individual regex
+ items for parsers that rely on them. These items will be added after all
+ files and other individual items are handled. This dict has keys set to
+ parser/mapping names, and the values should be sets of items, so preppers
+ should add to them like so:
+
+ self.regex_items['hostname'].add('myhostname')
+ """
+
+ name = 'Undefined'
+ priority = 100
+
+ def __init__(self):
+ self.regex_items = {
+ 'hostname': set(),
+ 'ip': set(),
+ 'ipv6': set(),
+ 'keyword': set(),
+ 'mac': set(),
+ 'username': set()
+ }
+ self.soslog = logging.getLogger('sos')
+ self.ui_log = logging.getLogger('sos_ui')
+
+ def _fmt_log_msg(self, msg):
+ return f"[prepper:{self.name}] {msg}"
+
+ def log_debug(self, msg):
+ self.soslog.debug(self._fmt_log_msg(msg))
+
+ def log_info(self, msg):
+ self.soslog.info(self._fmt_log_msg(msg))
+
+ def log_error(self, msg):
+ self.soslog.error(self._fmt_log_msg(msg))
+
+ def get_parser_file_list(self, parser, archive):
+ """
+ Helper that calls the appropriate Prepper method for the specified
+ parser. This allows Preppers to be able to provide items for multiple
+ types of parsers without needing to handle repetitious logic to
+ determine which parser we're interested within each individual call.
+
+ The convention to use is to define `_get_$parser_file_list()` methods
+ within Preppers, e.g. `_get_hostname_file_list()` would be used to
+ provide filenames for the hostname parser. If such a method is not
+ defined within a Prepper for a given parser, we handle that here so
+ that individual Preppers do not need to.
+
+ :param parser: The _name_ of the parser to get a file list for
+ :type parser: ``str``
+
+ :param archive: The archive we are operating on currently for the
+ specified parser
+ :type archive: ``SoSObfuscationArchive``
+
+ :returns: A list of filenames within the archive to prep with
+ :rtype: ``list``
+ """
+ _check = f"_get_{parser}_file_list"
+ if hasattr(self, _check):
+ return getattr(self, _check)(archive)
+ return []
+
+ def get_items_for_map(self, mapping, archive):
+ """
+ Similar to `get_parser_file_list()`, a helper for calling the specific
+ method for generating items for the given `map`. This allows Preppers
+ to be able to provide items for multiple types of maps, without the
+ need to handle repetitious logic to determine which parser we're
+ interested in within each individual call.
+
+ :param mapping: The _name_ of the mapping to get items for
+ :type mapping: ``str``
+
+ :param archive: The archive we are operating on currently for the
+ specified parser
+ :type archive: ``SoSObfuscationArchive``
+
+ :returns: A list of distinct items to obfuscate without using a parser
+ :rtype: ``list``
+ """
+ _check = f"_get_items_for_{mapping}"
+ if hasattr(self, _check):
+ return getattr(self, _check)(archive)
+ return []
+
+# vim: set et ts=4 sw=4 :
diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py
index c28239a7..6e0be6c8 100644
--- a/tests/unittests/cleaner_tests.py
+++ b/tests/unittests/cleaner_tests.py
@@ -20,6 +20,10 @@ from sos.cleaner.mappings.mac_map import SoSMacMap
from sos.cleaner.mappings.hostname_map import SoSHostnameMap
from sos.cleaner.mappings.keyword_map import SoSKeywordMap
from sos.cleaner.mappings.ipv6_map import SoSIPv6Map
+from sos.cleaner.preppers import SoSPrepper
+from sos.cleaner.preppers.hostname import HostnamePrepper
+from sos.cleaner.preppers.ip import IPPrepper
+from sos.cleaner.archives.sos import SoSReportArchive
class CleanerMapTests(unittest.TestCase):
@@ -28,7 +32,7 @@ class CleanerMapTests(unittest.TestCase):
self.mac_map = SoSMacMap()
self.ip_map = SoSIPMap()
self.host_map = SoSHostnameMap()
- self.host_map.load_domains_from_options(['redhat.com'])
+ self.host_map.sanitize_item('redhat.com')
self.kw_map = SoSKeywordMap()
self.ipv6_map = SoSIPv6Map()
@@ -152,13 +156,14 @@ class CleanerParserTests(unittest.TestCase):
self.ip_parser = SoSIPParser(config={})
self.ipv6_parser = SoSIPv6Parser(config={})
self.mac_parser = SoSMacParser(config={})
- self.host_parser = SoSHostnameParser(config={},
- opt_domains=['foobar.com'])
- self.kw_parser = SoSKeywordParser(config={}, keywords=['foobar'])
+ self.host_parser = SoSHostnameParser(config={})
+ self.host_parser.mapping.add('foobar.com')
+ self.kw_parser = SoSKeywordParser(config={})
+ self.kw_parser.mapping.add('foobar')
self.kw_parser_none = SoSKeywordParser(config={})
self.kw_parser.generate_item_regexes()
- self.uname_parser = SoSUsernameParser(config={},
- opt_names=['DOMAIN\myusername'])
+ self.uname_parser = SoSUsernameParser(config={})
+ self.uname_parser.mapping.add('DOMAIN\myusername')
def test_ip_parser_valid_ipv4_line(self):
line = 'foobar foo 10.0.0.1/24 barfoo bar'
@@ -210,22 +215,22 @@ class CleanerParserTests(unittest.TestCase):
def test_hostname_load_hostname_string(self):
fqdn = 'myhost.subnet.example.com'
- self.host_parser.load_hostname_into_map(fqdn)
+ self.host_parser.mapping.add(fqdn)
def test_hostname_valid_domain_line(self):
- self.host_parser.load_hostname_into_map('myhost.subnet.example.com')
+ self.host_parser.mapping.add('myhost.subnet.example.com')
line = 'testing myhost.subnet.example.com in a string'
_test = self.host_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)
def test_hostname_short_name_in_line(self):
- self.host_parser.load_hostname_into_map('myhost.subnet.example.com')
+ self.host_parser.mapping.add('myhost.subnet.example.com')
line = 'testing just myhost in a line'
_test = self.host_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)
def test_obfuscate_whole_fqdn_for_given_domainname(self):
- self.host_parser.load_hostname_into_map('sostestdomain.domain')
+ self.host_parser.mapping.add('sostestdomain.domain')
line = 'let obfuscate soshost.sostestdomain.domain'
_test = self.host_parser.parse_line(line)[0]
self.assertFalse('soshost' in _test)
@@ -274,3 +279,35 @@ class CleanerParserTests(unittest.TestCase):
line = "DOMAIN\myusername"
_test = self.uname_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)
+
+
+class PrepperTests(unittest.TestCase):
+ """
+ Ensure that the translations for different parser/mapping methods are
+ working
+ """
+
+ def setUp(self):
+ self.prepper = SoSPrepper()
+ self.archive = SoSReportArchive(
+ archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz',
+ tmpdir='/tmp'
+ )
+ self.host_prepper = HostnamePrepper()
+ self.ipv4_prepper = IPPrepper()
+
+ def test_parser_method_translation(self):
+ self.assertEqual([], self.prepper.get_parser_file_list('hostname', None))
+
+ def test_mapping_method_translation(self):
+ self.assertEqual([], self.prepper.get_items_for_map('foobar', None))
+
+ def test_hostname_prepper_map_items(self):
+ self.assertEqual(['cleanertest'], self.host_prepper.get_items_for_map('hostname', self.archive))
+
+ def test_ipv4_prepper_parser_files(self):
+ self.assertEqual(['sos_commands/networking/ip_-o_addr'], self.ipv4_prepper.get_parser_file_list('ip', self.archive))
+
+ def test_ipv4_prepper_invalid_parser_files(self):
+ self.assertEqual([], self.ipv4_prepper.get_parser_file_list('foobar', self.archive))
+