diff options
-rw-r--r-- | man/en/sos-clean.1 | 7 | ||||
-rw-r--r-- | sos/cleaner/__init__.py | 16 | ||||
-rw-r--r-- | sos/cleaner/mappings/__init__.py | 2 | ||||
-rw-r--r-- | sos/cleaner/mappings/keyword_map.py | 31 | ||||
-rw-r--r-- | sos/cleaner/parsers/keyword_parser.py | 38 | ||||
-rw-r--r-- | sos/collector/__init__.py | 4 | ||||
-rw-r--r-- | sos/report/__init__.py | 4 | ||||
-rw-r--r-- | tests/cleaner_tests.py | 20 |
8 files changed, 117 insertions, 5 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1 index 85bd6dfc..fab677fc 100644 --- a/man/en/sos-clean.1 +++ b/man/en/sos-clean.1 @@ -47,6 +47,13 @@ match a domain given via this option will also be obfuscated. For example, if \fB\-\-domains redhat.com\fR is specified, then 'redhat.com' will be obfuscated, as will 'www.redhat.com' and subdomains such as 'foo.redhat.com'. .TP +.B \-\-keywords KEYWORDS +Provide a comma-delimited list of keywords to scrub in addition to the default parsers. + +Keywords provided by this option will be obfuscated as "obfuscatedwordX" where X is an +integer based on the keyword's index in the parser. Note that keywords will be replaced as +both standalone words and in substring matches. +.TP .B \-\-map FILE Provide a location to a valid mapping file to use as a reference for existing obfuscation pairs. If one is found, the contents are loaded before parsing is started. This allows consistency between diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 44e32086..a2647d70 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -25,6 +25,7 @@ from sos.component import SoSComponent from sos.cleaner.parsers.ip_parser import SoSIPParser from sos.cleaner.parsers.mac_parser import SoSMacParser from sos.cleaner.parsers.hostname_parser import SoSHostnameParser +from sos.cleaner.parsers.keyword_parser import SoSKeywordParser from sos.cleaner.obfuscation_archive import SoSObfuscationArchive from sos.utilities import get_human_readable from textwrap import fill @@ -41,6 +42,7 @@ class SoSCleaner(SoSComponent): arg_defaults = { 'domains': [], 'jobs': 4, + 'keywords': [], 'map_file': '/etc/sos/cleaner/default_mapping', 'no_update': False, 'target': '' @@ -80,7 +82,8 @@ class SoSCleaner(SoSComponent): self.parsers = [ SoSHostnameParser(self.opts.map_file, self.opts.domains), SoSIPParser(self.opts.map_file), - SoSMacParser(self.opts.map_file) + SoSMacParser(self.opts.map_file), + SoSKeywordParser(self.opts.map_file, self.opts.keywords) ] self.log_info("Cleaner initialized. From cmdline: %s" @@ -161,6 +164,9 @@ third party. help='List of domain names to obfuscate') clean_grp.add_argument('-j', '--jobs', default=4, type=int, help='Number of concurrent archives to clean') + clean_grp.add_argument('--keywords', action='extend', default=[], + dest='keywords', + help='List of keywords to obfuscate') clean_grp.add_argument('--map', dest='map_file', default='/etc/sos/cleaner/default_mapping', help=('Provide a previously generated mapping ' @@ -530,15 +536,17 @@ third party. :param archive SoSObfuscationArchive: An open archive object """ for parser in self.parsers: - # this is a bit clunky, but we need to load this particular - # parser in a different way due to how hostnames are validated for - # obfuscation + if not parser.prep_map_file: + continue prep_file = archive.get_file_path(parser.prep_map_file) if not prep_file: self.log_debug("Could not prepare %s: %s does not exist" % (parser.name, parser.prep_map_file), caller=archive.archive_name) continue + # this is a bit clunky, but we need to load this particular + # parser in a different way due to how hostnames are validated for + # obfuscation if isinstance(parser, SoSHostnameParser): with open(prep_file, 'r') as host_file: hostname = host_file.readline().strip() diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py index 27fd1d50..dd464e5a 100644 --- a/sos/cleaner/mappings/__init__.py +++ b/sos/cleaner/mappings/__init__.py @@ -21,7 +21,7 @@ class SoSMap(): items. """ - ignore_list = [] + ignore_matches = [] def __init__(self): self.dataset = {} diff --git a/sos/cleaner/mappings/keyword_map.py b/sos/cleaner/mappings/keyword_map.py new file mode 100644 index 00000000..ddc324c0 --- /dev/null +++ b/sos/cleaner/mappings/keyword_map.py @@ -0,0 +1,31 @@ +# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com> + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.mappings import SoSMap + + +class SoSKeywordMap(SoSMap): + """Mapping store for user provided keywords + + By default, this map will perform no matching or obfuscation. It relies + entirely on the use of the --keywords option by the user. + + Any keywords provided are then obfuscated into 'obfuscatedwordX', where X + is an incrementing integer. + """ + + word_count = 0 + + def sanitize_item(self, item): + _ob_item = "obfuscatedword%s" % self.word_count + self.word_count += 1 + if _ob_item in self.dataset.values(): + return self.sanitize_item(item) + return _ob_item diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py new file mode 100644 index 00000000..169ce759 --- /dev/null +++ b/sos/cleaner/parsers/keyword_parser.py @@ -0,0 +1,38 @@ +# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com> + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.parsers import SoSCleanerParser +from sos.cleaner.mappings.keyword_map import SoSKeywordMap + + +class SoSKeywordParser(SoSCleanerParser): + """Handles parsing for user provided keywords + """ + + name = 'Keyword Parser' + map_file_key = 'keyword_map' + prep_map_file = '' + + def __init__(self, conf_file=None, keywords=None): + self.mapping = SoSKeywordMap() + self.user_keywords = [] + super(SoSKeywordParser, self).__init__(conf_file) + for _keyword in self.mapping.dataset.keys(): + self.user_keywords.append(_keyword) + if keywords: + self.user_keywords.extend(keywords) + + def parse_line(self, line): + count = 0 + for keyword in self.user_keywords: + if keyword in line: + line = line.replace(keyword, self.mapping.get(keyword)) + count += 1 + return line, count diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 57c767ac..e24f1801 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -63,6 +63,7 @@ class SoSCollector(SoSComponent): 'group': None, 'image': '', 'jobs': 4, + 'keywords': [], 'label': '', 'list_options': False, 'log_size': 0, @@ -344,6 +345,9 @@ class SoSCollector(SoSComponent): cleaner_grp.add_argument('--domains', dest='domains', default=[], action='extend', help='Additional domain names to obfuscate') + cleaner_grp.add_argument('--keywords', action='extend', default=[], + dest='keywords', + help='List of keywords to obfuscate') cleaner_grp.add_argument('--no-update', action='store_true', default=False, dest='no_update', help='Do not update the default cleaner map') diff --git a/sos/report/__init__.py b/sos/report/__init__.py index 1cc10f0b..2c62bf61 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -85,6 +85,7 @@ class SoSReport(SoSComponent): 'dry_run': False, 'experimental': False, 'enableplugins': [], + 'keywords': [], 'plugopts': [], 'label': '', 'list_plugins': False, @@ -291,6 +292,9 @@ class SoSReport(SoSComponent): cleaner_grp.add_argument('--domains', dest='domains', default=[], action='extend', help='Additional domain names to obfuscate') + cleaner_grp.add_argument('--keywords', action='extend', default=[], + dest='keywords', + help='List of keywords to obfuscate') cleaner_grp.add_argument('--no-update', action='store_true', default=False, dest='no_update', help='Do not update the default cleaner map') diff --git a/tests/cleaner_tests.py b/tests/cleaner_tests.py index 4e292a05..aee1147d 100644 --- a/tests/cleaner_tests.py +++ b/tests/cleaner_tests.py @@ -12,9 +12,11 @@ from ipaddress import ip_interface from sos.cleaner.parsers.ip_parser import SoSIPParser from sos.cleaner.parsers.mac_parser import SoSMacParser from sos.cleaner.parsers.hostname_parser import SoSHostnameParser +from sos.cleaner.parsers.keyword_parser import SoSKeywordParser from sos.cleaner.mappings.ip_map import SoSIPMap from sos.cleaner.mappings.mac_map import SoSMacMap from sos.cleaner.mappings.hostname_map import SoSHostnameMap +from sos.cleaner.mappings.keyword_map import SoSKeywordMap class CleanerMapTests(unittest.TestCase): @@ -23,6 +25,7 @@ class CleanerMapTests(unittest.TestCase): self.mac_map = SoSMacMap() self.ip_map = SoSIPMap() self.host_map = SoSHostnameMap(['redhat.com']) + self.kw_map = SoSKeywordMap() def test_mac_map_obfuscate_valid_v4(self): _test = self.mac_map.get('12:34:56:78:90:ab') @@ -89,12 +92,19 @@ class CleanerMapTests(unittest.TestCase): _test = self.host_map.get('example.foobar.com') self.assertEqual(_test, 'example.foobar.com') + def test_keyword_single(self): + _test = self.kw_map.get('foobar') + self.assertEqual(_test, 'obfuscatedword0') + + class CleanerParserTests(unittest.TestCase): def setUp(self): self.ip_parser = SoSIPParser() self.mac_parser = SoSMacParser() self.host_parser = SoSHostnameParser(opt_domains='foobar.com') + self.kw_parser = SoSKeywordParser(keywords=['foobar']) + self.kw_parser_none = SoSKeywordParser() def test_ip_parser_valid_ipv4_line(self): line = 'foobar foo 10.0.0.1/24 barfoo bar' @@ -135,3 +145,13 @@ class CleanerParserTests(unittest.TestCase): line = 'testing just myhost in a line' _test = self.host_parser.parse_line(line)[0] self.assertNotEqual(line, _test) + + def test_keyword_parser_valid_line(self): + line = 'this is my foobar test line' + _test = self.kw_parser.parse_line(line)[0] + self.assertNotEqual(line, _test) + + def test_keyword_parser_no_change_by_default(self): + line = 'this is my foobar test line' + _test = self.kw_parser_none.parse_line(line)[0] + self.assertEqual(line, _test) |