aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Moravec <pmoravec@redhat.com>2024-02-12 08:30:14 +0100
committerJake Hunsaker <jacob.r.hunsaker@gmail.com>2024-03-11 15:43:32 -0400
commitecda520d73114aab26ac025f89b575dc518c9b02 (patch)
tree0d91bec3f4daeb8aff9577fa19aa7f97276130ae
parenta4b5554bbabb73a4dc3e6a5e18d82961da519183 (diff)
downloadsos-ecda520d73114aab26ac025f89b575dc518c9b02.tar.gz
[cleaner] Add option to skip cleaning files
A new option --skip-cleaning-files / --skip-masking-files allows cleaner to skip cleaning files where the user is certain no sensitive information is present. The option supports globs / wildcards. Relevant: #3469 Closes: #3520 Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
-rw-r--r--man/en/sos-clean.110
-rw-r--r--sos/cleaner/__init__.py33
-rw-r--r--sos/cleaner/archives/__init__.py1
-rw-r--r--sos/cleaner/parsers/__init__.py14
-rw-r--r--sos/cleaner/parsers/hostname_parser.py4
-rw-r--r--sos/cleaner/parsers/ip_parser.py6
-rw-r--r--sos/cleaner/parsers/ipv6_parser.py6
-rw-r--r--sos/cleaner/parsers/keyword_parser.py4
-rw-r--r--sos/cleaner/parsers/mac_parser.py6
-rw-r--r--sos/cleaner/parsers/username_parser.py4
-rw-r--r--sos/collector/__init__.py6
-rw-r--r--sos/report/__init__.py6
12 files changed, 67 insertions, 33 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
index c51f3276..fe3a1d8d 100644
--- a/man/en/sos-clean.1
+++ b/man/en/sos-clean.1
@@ -5,6 +5,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
.B sos clean TARGET [options]
[\-\-domains]
[\-\-disable-parsers]
+ [\-\-skip-cleaning-files|\-\-skip-masking-files]
[\-\-keywords]
[\-\-keyword-file]
[\-\-map-file]
@@ -63,6 +64,15 @@ trust in the party/parties that may handle the generated report.
Valid values for this option are currently: \fBhostname\fR, \fBip\fR, \fBipv6\fR,
\fBmac\fR, \fBkeyword\fR, and \fBusername\fR.
.TP
+.B \-\-skip-cleaning-files, \-\-skip-masking-files FILES
+Provide a comma-delimited list of files inside an archive, that cleaner should skip in cleaning.
+
+Globs like asterisk are supported, so \fBsos_commands/host/hostname*\fR will match all three
+usual filenames in that directory (\fBhostname\fR, \fBhostnamectl_status\fR and \fBhostname_-f\fR).
+
+Use this option with caution, only when being certain the given files do not contain any sensitive
+information.
+.TP
.B \-\-keywords KEYWORDS
Provide a comma-delimited list of keywords to scrub in addition to the default parsers.
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index e4ab0a15..c4fd53af 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -15,6 +15,7 @@ import os
import shutil
import sos.cleaner.preppers
import tempfile
+import fnmatch
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
@@ -81,6 +82,7 @@ class SoSCleaner(SoSComponent):
'archive_type': 'auto',
'domains': [],
'disable_parsers': [],
+ 'skip_clean_files': [],
'jobs': 4,
'keywords': [],
'keyword_file': None,
@@ -116,7 +118,7 @@ class SoSCleaner(SoSComponent):
# when obfuscating a SoSCollector run during archive extraction
os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True)
- self.validate_parser_values()
+ self.review_parser_values()
self.cleaner_mapping = self.load_map_file()
os.umask(0o77)
@@ -125,13 +127,14 @@ class SoSCleaner(SoSComponent):
self.cleaner_md = self.manifest.components.add_section('cleaner')
+ skip_clean_files = self.opts.skip_clean_files
self.parsers = [
- SoSHostnameParser(self.cleaner_mapping),
- SoSIPParser(self.cleaner_mapping),
- SoSIPv6Parser(self.cleaner_mapping),
- SoSMacParser(self.cleaner_mapping),
- SoSKeywordParser(self.cleaner_mapping),
- SoSUsernameParser(self.cleaner_mapping)
+ SoSHostnameParser(self.cleaner_mapping, skip_clean_files),
+ SoSIPParser(self.cleaner_mapping, skip_clean_files),
+ SoSIPv6Parser(self.cleaner_mapping, skip_clean_files),
+ SoSMacParser(self.cleaner_mapping, skip_clean_files),
+ SoSKeywordParser(self.cleaner_mapping, skip_clean_files),
+ SoSUsernameParser(self.cleaner_mapping, skip_clean_files)
]
for _parser in self.opts.disable_parsers:
@@ -262,6 +265,11 @@ third party.
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that those '
'elements are not obfuscated'))
+ clean_grp.add_argument('--skip-cleaning-files', '--skip-masking-files',
+ action='extend', default=[],
+ dest='skip_clean_files',
+ help=('List of files to skip/ignore during '
+ 'cleaning. Globs are supported.'))
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
help='Number of concurrent archives to clean')
clean_grp.add_argument('--keywords', action='extend', default=[],
@@ -323,10 +331,11 @@ third party.
if self.nested_archive:
self.nested_archive.ui_name = self.nested_archive.description
- def validate_parser_values(self):
- """Check any values passed to the parsers via the commandline, e.g.
- the --domains option, to ensure that they are valid for the parser in
- question.
+ def review_parser_values(self):
+ """Check any values passed to the parsers via the commandline:
+ - For the --domains option, ensure that they are valid for the parser
+ in question.
+ - Convert --skip-cleaning-files from globs to regular expressions.
"""
for _dom in self.opts.domains:
if len(_dom.split('.')) < 2:
@@ -334,6 +343,8 @@ third party.
f"Invalid value '{_dom}' given: --domains values must be "
"actual domains"
)
+ self.opts.skip_clean_files = [fnmatch.translate(p) for p in
+ self.opts.skip_clean_files]
def execute(self):
"""SoSCleaner will begin by inspecting the TARGET option to determine
diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py
index 404a2de9..86aa8e04 100644
--- a/sos/cleaner/archives/__init__.py
+++ b/sos/cleaner/archives/__init__.py
@@ -50,7 +50,6 @@ class SoSObfuscationArchive():
type_name = 'undetermined'
description = 'undetermined'
is_nested = False
- skip_files = []
prep_files = {}
def __init__(self, archive_path, tmpdir):
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
index 5c802119..73dbe656 100644
--- a/sos/cleaner/parsers/__init__.py
+++ b/sos/cleaner/parsers/__init__.py
@@ -42,22 +42,24 @@ class SoSCleanerParser():
name = 'Undefined Parser'
regex_patterns = []
skip_line_patterns = []
- skip_files = []
+ parser_skip_files = [] # list of skip files relevant to a parser
+ skip_clean_files = [] # list of global skip files from cmdline arguments
map_file_key = 'unset'
compile_regexes = True
- def __init__(self, config={}):
+ def __init__(self, config={}, skip_clean_files=[]):
if self.map_file_key in config:
self.mapping.conf_update(config[self.map_file_key])
+ self.skip_clean_files = skip_clean_files
self._generate_skip_regexes()
def _generate_skip_regexes(self):
- """Generate the regexes for the parser's configured `skip_files`,
- so that we don't regenerate them on every file being examined for if
- the parser should skip a given file.
+ """Generate the regexes for the parser's configured parser_skip_files
+ or global skip_clean_files, so that we don't regenerate them on every
+ file being examined for if the parser should skip a given file.
"""
self.skip_patterns = []
- for p in self.skip_files:
+ for p in self.parser_skip_files + self.skip_clean_files:
self.skip_patterns.append(re.compile(p))
def generate_item_regexes(self):
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
index a7396298..642aa05d 100644
--- a/sos/cleaner/parsers/hostname_parser.py
+++ b/sos/cleaner/parsers/hostname_parser.py
@@ -21,9 +21,9 @@ class SoSHostnameParser(SoSCleanerParser):
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
]
- def __init__(self, config):
+ def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSHostnameMap()
- super(SoSHostnameParser, self).__init__(config)
+ super(SoSHostnameParser, self).__init__(config, skip_clean_files)
def parse_line(self, line):
"""This will be called for every line in every file we process, so that
diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
index d5522ac2..f6d464a5 100644
--- a/sos/cleaner/parsers/ip_parser.py
+++ b/sos/cleaner/parsers/ip_parser.py
@@ -25,7 +25,7 @@ class SoSIPParser(SoSCleanerParser):
r'.*dnf\[.*\]:'
]
- skip_files = [
+ parser_skip_files = [
# skip these as version numbers will frequently look like IP addresses
# when using regex matching
'installed-debs',
@@ -44,6 +44,6 @@ class SoSIPParser(SoSCleanerParser):
map_file_key = 'ip_map'
compile_regexes = False
- def __init__(self, config):
+ def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSIPMap()
- super(SoSIPParser, self).__init__(config)
+ super(SoSIPParser, self).__init__(config, skip_clean_files)
diff --git a/sos/cleaner/parsers/ipv6_parser.py b/sos/cleaner/parsers/ipv6_parser.py
index b209c646..dfd7282a 100644
--- a/sos/cleaner/parsers/ipv6_parser.py
+++ b/sos/cleaner/parsers/ipv6_parser.py
@@ -29,15 +29,15 @@ class SoSIPv6Parser(SoSCleanerParser):
r"(([0-9a-f]{1,4}(:[0-9a-f]{0,4}){0,5}))([^.])::(([0-9a-f]{1,4}"
r"(:[0-9a-f]{1,4}){0,5})?))(/\d{1,3})?(?![:\\a-z0-9])"
]
- skip_files = [
+ parser_skip_files = [
'etc/dnsmasq.conf.*',
'.*modinfo.*',
]
compile_regexes = False
- def __init__(self, config):
+ def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSIPv6Map()
- super(SoSIPv6Parser, self).__init__(config)
+ super(SoSIPv6Parser, self).__init__(config, skip_clean_files)
def get_map_contents(self):
"""Structure the dataset contents properly so that they can be reloaded
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
index f611ccd2..3c6c442b 100644
--- a/sos/cleaner/parsers/keyword_parser.py
+++ b/sos/cleaner/parsers/keyword_parser.py
@@ -20,9 +20,9 @@ class SoSKeywordParser(SoSCleanerParser):
name = 'Keyword Parser'
map_file_key = 'keyword_map'
- def __init__(self, config):
+ def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSKeywordMap()
- super(SoSKeywordParser, self).__init__(config)
+ super(SoSKeywordParser, self).__init__(config, skip_clean_files)
def _parse_line(self, line):
return line, 0
diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py
index 4e790018..74f95a6a 100644
--- a/sos/cleaner/parsers/mac_parser.py
+++ b/sos/cleaner/parsers/mac_parser.py
@@ -43,15 +43,15 @@ class SoSMacParser(SoSCleanerParser):
'53:4f:53',
'534f:53'
)
- skip_files = [
+ parser_skip_files = [
'sos_commands/.*/modinfo.*'
]
map_file_key = 'mac_map'
compile_regexes = False
- def __init__(self, config):
+ def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSMacMap()
- super(SoSMacParser, self).__init__(config)
+ super(SoSMacParser, self).__init__(config, skip_clean_files)
def reduce_mac_match(self, match):
"""Strips away leading and trailing non-alphanum characters from any
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
index 5909f52d..c999ff55 100644
--- a/sos/cleaner/parsers/username_parser.py
+++ b/sos/cleaner/parsers/username_parser.py
@@ -26,9 +26,9 @@ class SoSUsernameParser(SoSCleanerParser):
map_file_key = 'username_map'
regex_patterns = []
- def __init__(self, config):
+ def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSUsernameMap()
- super(SoSUsernameParser, self).__init__(config)
+ super(SoSUsernameParser, self).__init__(config, skip_clean_files)
def _parse_line(self, line):
return line, 0
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index a03202a2..850e0696 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -87,6 +87,7 @@ class SoSCollector(SoSComponent):
'group': None,
'image': '',
'force_pull_image': True,
+ 'skip_clean_files': [],
'jobs': 4,
'journal_size': 0,
'keywords': [],
@@ -483,6 +484,11 @@ class SoSCollector(SoSComponent):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
+ cleaner_grp.add_argument('--skip-cleaning-files',
+ '--skip-masking-files', action='extend',
+ default=[], dest='skip_clean_files',
+ help=('List of files to skip/ignore during '
+ 'cleaning. Globs are supported.'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index f137b8c4..bda75844 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -88,6 +88,7 @@ class SoSReport(SoSComponent):
'desc': '',
'domains': [],
'disable_parsers': [],
+ 'skip_clean_files': [],
'dry_run': False,
'estimate_only': False,
'experimental': False,
@@ -358,6 +359,11 @@ class SoSReport(SoSComponent):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
+ cleaner_grp.add_argument('--skip-cleaning-files',
+ '--skip-masking-files', action='extend',
+ default=[], dest='skip_clean_files',
+ help=('List of files to skip/ignore during '
+ 'cleaning. Globs are supported.'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')