diff options
author | Pavel Moravec <pmoravec@redhat.com> | 2024-02-12 08:30:14 +0100 |
---|---|---|
committer | Jake Hunsaker <jacob.r.hunsaker@gmail.com> | 2024-03-11 15:43:32 -0400 |
commit | ecda520d73114aab26ac025f89b575dc518c9b02 (patch) | |
tree | 0d91bec3f4daeb8aff9577fa19aa7f97276130ae | |
parent | a4b5554bbabb73a4dc3e6a5e18d82961da519183 (diff) | |
download | sos-ecda520d73114aab26ac025f89b575dc518c9b02.tar.gz |
[cleaner] Add option to skip cleaning files
A new option --skip-cleaning-files / --skip-masking-files allows cleaner
to skip cleaning files where the user is certain no sensitive information
is present.
The option supports globs / wildcards.
Relevant: #3469
Closes: #3520
Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
-rw-r--r-- | man/en/sos-clean.1 | 10 | ||||
-rw-r--r-- | sos/cleaner/__init__.py | 33 | ||||
-rw-r--r-- | sos/cleaner/archives/__init__.py | 1 | ||||
-rw-r--r-- | sos/cleaner/parsers/__init__.py | 14 | ||||
-rw-r--r-- | sos/cleaner/parsers/hostname_parser.py | 4 | ||||
-rw-r--r-- | sos/cleaner/parsers/ip_parser.py | 6 | ||||
-rw-r--r-- | sos/cleaner/parsers/ipv6_parser.py | 6 | ||||
-rw-r--r-- | sos/cleaner/parsers/keyword_parser.py | 4 | ||||
-rw-r--r-- | sos/cleaner/parsers/mac_parser.py | 6 | ||||
-rw-r--r-- | sos/cleaner/parsers/username_parser.py | 4 | ||||
-rw-r--r-- | sos/collector/__init__.py | 6 | ||||
-rw-r--r-- | sos/report/__init__.py | 6 |
12 files changed, 67 insertions, 33 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1 index c51f3276..fe3a1d8d 100644 --- a/man/en/sos-clean.1 +++ b/man/en/sos-clean.1 @@ -5,6 +5,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports .B sos clean TARGET [options] [\-\-domains] [\-\-disable-parsers] + [\-\-skip-cleaning-files|\-\-skip-masking-files] [\-\-keywords] [\-\-keyword-file] [\-\-map-file] @@ -63,6 +64,15 @@ trust in the party/parties that may handle the generated report. Valid values for this option are currently: \fBhostname\fR, \fBip\fR, \fBipv6\fR, \fBmac\fR, \fBkeyword\fR, and \fBusername\fR. .TP +.B \-\-skip-cleaning-files, \-\-skip-masking-files FILES +Provide a comma-delimited list of files inside an archive, that cleaner should skip in cleaning. + +Globs like asterisk are supported, so \fBsos_commands/host/hostname*\fR will match all three +usual filenames in that directory (\fBhostname\fR, \fBhostnamectl_status\fR and \fBhostname_-f\fR). + +Use this option with caution, only when being certain the given files do not contain any sensitive +information. +.TP .B \-\-keywords KEYWORDS Provide a comma-delimited list of keywords to scrub in addition to the default parsers. diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index e4ab0a15..c4fd53af 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -15,6 +15,7 @@ import os import shutil import sos.cleaner.preppers import tempfile +import fnmatch from concurrent.futures import ThreadPoolExecutor from datetime import datetime @@ -81,6 +82,7 @@ class SoSCleaner(SoSComponent): 'archive_type': 'auto', 'domains': [], 'disable_parsers': [], + 'skip_clean_files': [], 'jobs': 4, 'keywords': [], 'keyword_file': None, @@ -116,7 +118,7 @@ class SoSCleaner(SoSComponent): # when obfuscating a SoSCollector run during archive extraction os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True) - self.validate_parser_values() + self.review_parser_values() self.cleaner_mapping = self.load_map_file() os.umask(0o77) @@ -125,13 +127,14 @@ class SoSCleaner(SoSComponent): self.cleaner_md = self.manifest.components.add_section('cleaner') + skip_clean_files = self.opts.skip_clean_files self.parsers = [ - SoSHostnameParser(self.cleaner_mapping), - SoSIPParser(self.cleaner_mapping), - SoSIPv6Parser(self.cleaner_mapping), - SoSMacParser(self.cleaner_mapping), - SoSKeywordParser(self.cleaner_mapping), - SoSUsernameParser(self.cleaner_mapping) + SoSHostnameParser(self.cleaner_mapping, skip_clean_files), + SoSIPParser(self.cleaner_mapping, skip_clean_files), + SoSIPv6Parser(self.cleaner_mapping, skip_clean_files), + SoSMacParser(self.cleaner_mapping, skip_clean_files), + SoSKeywordParser(self.cleaner_mapping, skip_clean_files), + SoSUsernameParser(self.cleaner_mapping, skip_clean_files) ] for _parser in self.opts.disable_parsers: @@ -262,6 +265,11 @@ third party. default=[], dest='disable_parsers', help=('Disable specific parsers, so that those ' 'elements are not obfuscated')) + clean_grp.add_argument('--skip-cleaning-files', '--skip-masking-files', + action='extend', default=[], + dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Globs are supported.')) clean_grp.add_argument('-j', '--jobs', default=4, type=int, help='Number of concurrent archives to clean') clean_grp.add_argument('--keywords', action='extend', default=[], @@ -323,10 +331,11 @@ third party. if self.nested_archive: self.nested_archive.ui_name = self.nested_archive.description - def validate_parser_values(self): - """Check any values passed to the parsers via the commandline, e.g. - the --domains option, to ensure that they are valid for the parser in - question. + def review_parser_values(self): + """Check any values passed to the parsers via the commandline: + - For the --domains option, ensure that they are valid for the parser + in question. + - Convert --skip-cleaning-files from globs to regular expressions. """ for _dom in self.opts.domains: if len(_dom.split('.')) < 2: @@ -334,6 +343,8 @@ third party. f"Invalid value '{_dom}' given: --domains values must be " "actual domains" ) + self.opts.skip_clean_files = [fnmatch.translate(p) for p in + self.opts.skip_clean_files] def execute(self): """SoSCleaner will begin by inspecting the TARGET option to determine diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index 404a2de9..86aa8e04 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -50,7 +50,6 @@ class SoSObfuscationArchive(): type_name = 'undetermined' description = 'undetermined' is_nested = False - skip_files = [] prep_files = {} def __init__(self, archive_path, tmpdir): diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index 5c802119..73dbe656 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -42,22 +42,24 @@ class SoSCleanerParser(): name = 'Undefined Parser' regex_patterns = [] skip_line_patterns = [] - skip_files = [] + parser_skip_files = [] # list of skip files relevant to a parser + skip_clean_files = [] # list of global skip files from cmdline arguments map_file_key = 'unset' compile_regexes = True - def __init__(self, config={}): + def __init__(self, config={}, skip_clean_files=[]): if self.map_file_key in config: self.mapping.conf_update(config[self.map_file_key]) + self.skip_clean_files = skip_clean_files self._generate_skip_regexes() def _generate_skip_regexes(self): - """Generate the regexes for the parser's configured `skip_files`, - so that we don't regenerate them on every file being examined for if - the parser should skip a given file. + """Generate the regexes for the parser's configured parser_skip_files + or global skip_clean_files, so that we don't regenerate them on every + file being examined for if the parser should skip a given file. """ self.skip_patterns = [] - for p in self.skip_files: + for p in self.parser_skip_files + self.skip_clean_files: self.skip_patterns.append(re.compile(p)) def generate_item_regexes(self): diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index a7396298..642aa05d 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -21,9 +21,9 @@ class SoSHostnameParser(SoSCleanerParser): r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSHostnameMap() - super(SoSHostnameParser, self).__init__(config) + super(SoSHostnameParser, self).__init__(config, skip_clean_files) def parse_line(self, line): """This will be called for every line in every file we process, so that diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py index d5522ac2..f6d464a5 100644 --- a/sos/cleaner/parsers/ip_parser.py +++ b/sos/cleaner/parsers/ip_parser.py @@ -25,7 +25,7 @@ class SoSIPParser(SoSCleanerParser): r'.*dnf\[.*\]:' ] - skip_files = [ + parser_skip_files = [ # skip these as version numbers will frequently look like IP addresses # when using regex matching 'installed-debs', @@ -44,6 +44,6 @@ class SoSIPParser(SoSCleanerParser): map_file_key = 'ip_map' compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSIPMap() - super(SoSIPParser, self).__init__(config) + super(SoSIPParser, self).__init__(config, skip_clean_files) diff --git a/sos/cleaner/parsers/ipv6_parser.py b/sos/cleaner/parsers/ipv6_parser.py index b209c646..dfd7282a 100644 --- a/sos/cleaner/parsers/ipv6_parser.py +++ b/sos/cleaner/parsers/ipv6_parser.py @@ -29,15 +29,15 @@ class SoSIPv6Parser(SoSCleanerParser): r"(([0-9a-f]{1,4}(:[0-9a-f]{0,4}){0,5}))([^.])::(([0-9a-f]{1,4}" r"(:[0-9a-f]{1,4}){0,5})?))(/\d{1,3})?(?![:\\a-z0-9])" ] - skip_files = [ + parser_skip_files = [ 'etc/dnsmasq.conf.*', '.*modinfo.*', ] compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSIPv6Map() - super(SoSIPv6Parser, self).__init__(config) + super(SoSIPv6Parser, self).__init__(config, skip_clean_files) def get_map_contents(self): """Structure the dataset contents properly so that they can be reloaded diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py index f611ccd2..3c6c442b 100644 --- a/sos/cleaner/parsers/keyword_parser.py +++ b/sos/cleaner/parsers/keyword_parser.py @@ -20,9 +20,9 @@ class SoSKeywordParser(SoSCleanerParser): name = 'Keyword Parser' map_file_key = 'keyword_map' - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSKeywordMap() - super(SoSKeywordParser, self).__init__(config) + super(SoSKeywordParser, self).__init__(config, skip_clean_files) def _parse_line(self, line): return line, 0 diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py index 4e790018..74f95a6a 100644 --- a/sos/cleaner/parsers/mac_parser.py +++ b/sos/cleaner/parsers/mac_parser.py @@ -43,15 +43,15 @@ class SoSMacParser(SoSCleanerParser): '53:4f:53', '534f:53' ) - skip_files = [ + parser_skip_files = [ 'sos_commands/.*/modinfo.*' ] map_file_key = 'mac_map' compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSMacMap() - super(SoSMacParser, self).__init__(config) + super(SoSMacParser, self).__init__(config, skip_clean_files) def reduce_mac_match(self, match): """Strips away leading and trailing non-alphanum characters from any diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 5909f52d..c999ff55 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -26,9 +26,9 @@ class SoSUsernameParser(SoSCleanerParser): map_file_key = 'username_map' regex_patterns = [] - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSUsernameMap() - super(SoSUsernameParser, self).__init__(config) + super(SoSUsernameParser, self).__init__(config, skip_clean_files) def _parse_line(self, line): return line, 0 diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index a03202a2..850e0696 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -87,6 +87,7 @@ class SoSCollector(SoSComponent): 'group': None, 'image': '', 'force_pull_image': True, + 'skip_clean_files': [], 'jobs': 4, 'journal_size': 0, 'keywords': [], @@ -483,6 +484,11 @@ class SoSCollector(SoSComponent): default=[], dest='disable_parsers', help=('Disable specific parsers, so that ' 'those elements are not obfuscated')) + cleaner_grp.add_argument('--skip-cleaning-files', + '--skip-masking-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Globs are supported.')) cleaner_grp.add_argument('--keywords', action='extend', default=[], dest='keywords', help='List of keywords to obfuscate') diff --git a/sos/report/__init__.py b/sos/report/__init__.py index f137b8c4..bda75844 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -88,6 +88,7 @@ class SoSReport(SoSComponent): 'desc': '', 'domains': [], 'disable_parsers': [], + 'skip_clean_files': [], 'dry_run': False, 'estimate_only': False, 'experimental': False, @@ -358,6 +359,11 @@ class SoSReport(SoSComponent): default=[], dest='disable_parsers', help=('Disable specific parsers, so that ' 'those elements are not obfuscated')) + cleaner_grp.add_argument('--skip-cleaning-files', + '--skip-masking-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Globs are supported.')) cleaner_grp.add_argument('--keywords', action='extend', default=[], dest='keywords', help='List of keywords to obfuscate') |