aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--man/en/sos-clean.17
-rw-r--r--sos/cleaner/__init__.py16
-rw-r--r--sos/cleaner/mappings/__init__.py2
-rw-r--r--sos/cleaner/mappings/keyword_map.py31
-rw-r--r--sos/cleaner/parsers/keyword_parser.py38
-rw-r--r--sos/collector/__init__.py4
-rw-r--r--sos/report/__init__.py4
-rw-r--r--tests/cleaner_tests.py20
8 files changed, 117 insertions, 5 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
index 85bd6dfc..fab677fc 100644
--- a/man/en/sos-clean.1
+++ b/man/en/sos-clean.1
@@ -47,6 +47,13 @@ match a domain given via this option will also be obfuscated.
For example, if \fB\-\-domains redhat.com\fR is specified, then 'redhat.com' will
be obfuscated, as will 'www.redhat.com' and subdomains such as 'foo.redhat.com'.
.TP
+.B \-\-keywords KEYWORDS
+Provide a comma-delimited list of keywords to scrub in addition to the default parsers.
+
+Keywords provided by this option will be obfuscated as "obfuscatedwordX" where X is an
+integer based on the keyword's index in the parser. Note that keywords will be replaced as
+both standalone words and in substring matches.
+.TP
.B \-\-map FILE
Provide a location to a valid mapping file to use as a reference for existing obfuscation pairs.
If one is found, the contents are loaded before parsing is started. This allows consistency between
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 44e32086..a2647d70 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -25,6 +25,7 @@ from sos.component import SoSComponent
from sos.cleaner.parsers.ip_parser import SoSIPParser
from sos.cleaner.parsers.mac_parser import SoSMacParser
from sos.cleaner.parsers.hostname_parser import SoSHostnameParser
+from sos.cleaner.parsers.keyword_parser import SoSKeywordParser
from sos.cleaner.obfuscation_archive import SoSObfuscationArchive
from sos.utilities import get_human_readable
from textwrap import fill
@@ -41,6 +42,7 @@ class SoSCleaner(SoSComponent):
arg_defaults = {
'domains': [],
'jobs': 4,
+ 'keywords': [],
'map_file': '/etc/sos/cleaner/default_mapping',
'no_update': False,
'target': ''
@@ -80,7 +82,8 @@ class SoSCleaner(SoSComponent):
self.parsers = [
SoSHostnameParser(self.opts.map_file, self.opts.domains),
SoSIPParser(self.opts.map_file),
- SoSMacParser(self.opts.map_file)
+ SoSMacParser(self.opts.map_file),
+ SoSKeywordParser(self.opts.map_file, self.opts.keywords)
]
self.log_info("Cleaner initialized. From cmdline: %s"
@@ -161,6 +164,9 @@ third party.
help='List of domain names to obfuscate')
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
help='Number of concurrent archives to clean')
+ clean_grp.add_argument('--keywords', action='extend', default=[],
+ dest='keywords',
+ help='List of keywords to obfuscate')
clean_grp.add_argument('--map', dest='map_file',
default='/etc/sos/cleaner/default_mapping',
help=('Provide a previously generated mapping '
@@ -530,15 +536,17 @@ third party.
:param archive SoSObfuscationArchive: An open archive object
"""
for parser in self.parsers:
- # this is a bit clunky, but we need to load this particular
- # parser in a different way due to how hostnames are validated for
- # obfuscation
+ if not parser.prep_map_file:
+ continue
prep_file = archive.get_file_path(parser.prep_map_file)
if not prep_file:
self.log_debug("Could not prepare %s: %s does not exist"
% (parser.name, parser.prep_map_file),
caller=archive.archive_name)
continue
+ # this is a bit clunky, but we need to load this particular
+ # parser in a different way due to how hostnames are validated for
+ # obfuscation
if isinstance(parser, SoSHostnameParser):
with open(prep_file, 'r') as host_file:
hostname = host_file.readline().strip()
diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
index 27fd1d50..dd464e5a 100644
--- a/sos/cleaner/mappings/__init__.py
+++ b/sos/cleaner/mappings/__init__.py
@@ -21,7 +21,7 @@ class SoSMap():
items.
"""
- ignore_list = []
+ ignore_matches = []
def __init__(self):
self.dataset = {}
diff --git a/sos/cleaner/mappings/keyword_map.py b/sos/cleaner/mappings/keyword_map.py
new file mode 100644
index 00000000..ddc324c0
--- /dev/null
+++ b/sos/cleaner/mappings/keyword_map.py
@@ -0,0 +1,31 @@
+# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.cleaner.mappings import SoSMap
+
+
+class SoSKeywordMap(SoSMap):
+ """Mapping store for user provided keywords
+
+ By default, this map will perform no matching or obfuscation. It relies
+ entirely on the use of the --keywords option by the user.
+
+ Any keywords provided are then obfuscated into 'obfuscatedwordX', where X
+ is an incrementing integer.
+ """
+
+ word_count = 0
+
+ def sanitize_item(self, item):
+ _ob_item = "obfuscatedword%s" % self.word_count
+ self.word_count += 1
+ if _ob_item in self.dataset.values():
+ return self.sanitize_item(item)
+ return _ob_item
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
new file mode 100644
index 00000000..169ce759
--- /dev/null
+++ b/sos/cleaner/parsers/keyword_parser.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.cleaner.parsers import SoSCleanerParser
+from sos.cleaner.mappings.keyword_map import SoSKeywordMap
+
+
+class SoSKeywordParser(SoSCleanerParser):
+ """Handles parsing for user provided keywords
+ """
+
+ name = 'Keyword Parser'
+ map_file_key = 'keyword_map'
+ prep_map_file = ''
+
+ def __init__(self, conf_file=None, keywords=None):
+ self.mapping = SoSKeywordMap()
+ self.user_keywords = []
+ super(SoSKeywordParser, self).__init__(conf_file)
+ for _keyword in self.mapping.dataset.keys():
+ self.user_keywords.append(_keyword)
+ if keywords:
+ self.user_keywords.extend(keywords)
+
+ def parse_line(self, line):
+ count = 0
+ for keyword in self.user_keywords:
+ if keyword in line:
+ line = line.replace(keyword, self.mapping.get(keyword))
+ count += 1
+ return line, count
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index 57c767ac..e24f1801 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -63,6 +63,7 @@ class SoSCollector(SoSComponent):
'group': None,
'image': '',
'jobs': 4,
+ 'keywords': [],
'label': '',
'list_options': False,
'log_size': 0,
@@ -344,6 +345,9 @@ class SoSCollector(SoSComponent):
cleaner_grp.add_argument('--domains', dest='domains', default=[],
action='extend',
help='Additional domain names to obfuscate')
+ cleaner_grp.add_argument('--keywords', action='extend', default=[],
+ dest='keywords',
+ help='List of keywords to obfuscate')
cleaner_grp.add_argument('--no-update', action='store_true',
default=False, dest='no_update',
help='Do not update the default cleaner map')
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index 1cc10f0b..2c62bf61 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -85,6 +85,7 @@ class SoSReport(SoSComponent):
'dry_run': False,
'experimental': False,
'enableplugins': [],
+ 'keywords': [],
'plugopts': [],
'label': '',
'list_plugins': False,
@@ -291,6 +292,9 @@ class SoSReport(SoSComponent):
cleaner_grp.add_argument('--domains', dest='domains', default=[],
action='extend',
help='Additional domain names to obfuscate')
+ cleaner_grp.add_argument('--keywords', action='extend', default=[],
+ dest='keywords',
+ help='List of keywords to obfuscate')
cleaner_grp.add_argument('--no-update', action='store_true',
default=False, dest='no_update',
help='Do not update the default cleaner map')
diff --git a/tests/cleaner_tests.py b/tests/cleaner_tests.py
index 4e292a05..aee1147d 100644
--- a/tests/cleaner_tests.py
+++ b/tests/cleaner_tests.py
@@ -12,9 +12,11 @@ from ipaddress import ip_interface
from sos.cleaner.parsers.ip_parser import SoSIPParser
from sos.cleaner.parsers.mac_parser import SoSMacParser
from sos.cleaner.parsers.hostname_parser import SoSHostnameParser
+from sos.cleaner.parsers.keyword_parser import SoSKeywordParser
from sos.cleaner.mappings.ip_map import SoSIPMap
from sos.cleaner.mappings.mac_map import SoSMacMap
from sos.cleaner.mappings.hostname_map import SoSHostnameMap
+from sos.cleaner.mappings.keyword_map import SoSKeywordMap
class CleanerMapTests(unittest.TestCase):
@@ -23,6 +25,7 @@ class CleanerMapTests(unittest.TestCase):
self.mac_map = SoSMacMap()
self.ip_map = SoSIPMap()
self.host_map = SoSHostnameMap(['redhat.com'])
+ self.kw_map = SoSKeywordMap()
def test_mac_map_obfuscate_valid_v4(self):
_test = self.mac_map.get('12:34:56:78:90:ab')
@@ -89,12 +92,19 @@ class CleanerMapTests(unittest.TestCase):
_test = self.host_map.get('example.foobar.com')
self.assertEqual(_test, 'example.foobar.com')
+ def test_keyword_single(self):
+ _test = self.kw_map.get('foobar')
+ self.assertEqual(_test, 'obfuscatedword0')
+
+
class CleanerParserTests(unittest.TestCase):
def setUp(self):
self.ip_parser = SoSIPParser()
self.mac_parser = SoSMacParser()
self.host_parser = SoSHostnameParser(opt_domains='foobar.com')
+ self.kw_parser = SoSKeywordParser(keywords=['foobar'])
+ self.kw_parser_none = SoSKeywordParser()
def test_ip_parser_valid_ipv4_line(self):
line = 'foobar foo 10.0.0.1/24 barfoo bar'
@@ -135,3 +145,13 @@ class CleanerParserTests(unittest.TestCase):
line = 'testing just myhost in a line'
_test = self.host_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)
+
+ def test_keyword_parser_valid_line(self):
+ line = 'this is my foobar test line'
+ _test = self.kw_parser.parse_line(line)[0]
+ self.assertNotEqual(line, _test)
+
+ def test_keyword_parser_no_change_by_default(self):
+ line = 'this is my foobar test line'
+ _test = self.kw_parser_none.parse_line(line)[0]
+ self.assertEqual(line, _test)