aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--man/en/sos-clean.125
-rw-r--r--sos/cleaner/__init__.py308
-rw-r--r--sos/cleaner/archives/__init__.py (renamed from sos/cleaner/obfuscation_archive.py)80
-rw-r--r--sos/cleaner/archives/generic.py52
-rw-r--r--sos/cleaner/archives/sos.py106
-rw-r--r--sos/cleaner/parsers/__init__.py6
-rw-r--r--sos/cleaner/parsers/hostname_parser.py1
-rw-r--r--sos/cleaner/parsers/ip_parser.py1
-rw-r--r--sos/cleaner/parsers/keyword_parser.py1
-rw-r--r--sos/cleaner/parsers/mac_parser.py1
-rw-r--r--sos/cleaner/parsers/username_parser.py8
-rw-r--r--tests/cleaner_tests/existing_archive.py7
-rw-r--r--tests/cleaner_tests/full_report_run.py3
-rw-r--r--tests/cleaner_tests/report_with_mask.py3
14 files changed, 423 insertions, 179 deletions
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
index b77bc63c..54026713 100644
--- a/man/en/sos-clean.1
+++ b/man/en/sos-clean.1
@@ -10,6 +10,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
[\-\-jobs]
[\-\-no-update]
[\-\-keep-binary-files]
+ [\-\-archive-type]
.SH DESCRIPTION
\fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
@@ -88,6 +89,30 @@ Users should review any archive that keeps binary files in place before sending
a third party.
Default: False (remove encountered binary files)
+.TP
+.B \-\-archive-type TYPE
+Specify the type of archive that TARGET was generated as.
+When sos inspects a TARGET archive, it tries to identify what type of archive it is.
+For example, it may be a report generated by \fBsos report\fR, or a collection of those
+reports generated by \fBsos collect\fR, which require separate approaches.
+
+This option may be useful if a given TARGET archive is known to be of a specific type,
+but due to unknown reasons or some malformed/missing information in the archive directly,
+that is not properly identified by sos.
+
+The following are accepted values for this option:
+
+ \fBauto\fR Automatically detect the archive type
+ \fBreport\fR An archive generated by \fBsos report\fR
+ \fBcollect\fR An archive generated by \fBsos collect\fR
+
+The following may also be used, however note that these do not attempt to pre-load
+any information from the archives into the parsers. This means that, among other limitations,
+items like host and domain names may not be obfuscated unless an obfuscated mapping already exists
+on the system from a previous execution.
+
+ \fBdata-dir\fR A plain directory on the filesystem.
+ \fBtarball\fR A generic tar archive not associated with any known tool
.SH SEE ALSO
.BR sos (1)
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 6aadfe79..6d2eb483 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -12,9 +12,7 @@ import hashlib
import json
import logging
import os
-import re
import shutil
-import tarfile
import tempfile
from concurrent.futures import ThreadPoolExecutor
@@ -27,7 +25,10 @@ from sos.cleaner.parsers.mac_parser import SoSMacParser
from sos.cleaner.parsers.hostname_parser import SoSHostnameParser
from sos.cleaner.parsers.keyword_parser import SoSKeywordParser
from sos.cleaner.parsers.username_parser import SoSUsernameParser
-from sos.cleaner.obfuscation_archive import SoSObfuscationArchive
+from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
+ SoSCollectorArchive,
+ SoSCollectorDirectory)
+from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
from sos.utilities import get_human_readable
from textwrap import fill
@@ -41,6 +42,7 @@ class SoSCleaner(SoSComponent):
desc = "Obfuscate sensitive networking information in a report"
arg_defaults = {
+ 'archive_type': 'auto',
'domains': [],
'jobs': 4,
'keywords': [],
@@ -70,6 +72,7 @@ class SoSCleaner(SoSComponent):
self.from_cmdline = False
if not hasattr(self.opts, 'jobs'):
self.opts.jobs = 4
+ self.opts.archive_type = 'auto'
self.soslog = logging.getLogger('sos')
self.ui_log = logging.getLogger('sos_ui')
# create the tmp subdir here to avoid a potential race condition
@@ -92,6 +95,17 @@ class SoSCleaner(SoSComponent):
SoSUsernameParser(self.cleaner_mapping, self.opts.usernames)
]
+ self.archive_types = [
+ SoSReportDirectory,
+ SoSReportArchive,
+ SoSCollectorDirectory,
+ SoSCollectorArchive,
+ # make sure these two are always last as they are fallbacks
+ DataDirArchive,
+ TarballArchive
+ ]
+ self.nested_archive = None
+
self.log_info("Cleaner initialized. From cmdline: %s"
% self.from_cmdline)
@@ -178,6 +192,11 @@ third party.
)
clean_grp.add_argument('target', metavar='TARGET',
help='The directory or archive to obfuscate')
+ clean_grp.add_argument('--archive-type', default='auto',
+ choices=['auto', 'report', 'collect',
+ 'data-dir', 'tarball'],
+ help=('Specify what kind of archive the target '
+ 'was generated as'))
clean_grp.add_argument('--domains', action='extend', default=[],
help='List of domain names to obfuscate')
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
@@ -218,59 +237,28 @@ third party.
In the event the target path is not an archive, abort.
"""
- if not tarfile.is_tarfile(self.opts.target):
- self.ui_log.error(
- "Invalid target: must be directory or tar archive"
- )
- self._exit(1)
-
- archive = tarfile.open(self.opts.target)
- self.arc_name = self.opts.target.split('/')[-1].split('.')[:-2][0]
-
- try:
- archive.getmember(os.path.join(self.arc_name, 'sos_logs'))
- except Exception:
- # this is not an sos archive
- self.ui_log.error("Invalid target: not an sos archive")
- self._exit(1)
-
- # see if there are archives within this archive
- nested_archives = []
- for _file in archive.getmembers():
- if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
- (_file.name.endswith(('.md5', '.sha256')))):
- nested_archives.append(_file.name.split('/')[-1])
-
- if nested_archives:
- self.log_info("Found nested archive(s), extracting top level")
- nested_path = self.extract_archive(archive)
- for arc_file in os.listdir(nested_path):
- if re.match('sosreport.*.tar.*', arc_file):
- if arc_file.endswith(('.md5', '.sha256')):
- continue
- self.report_paths.append(os.path.join(nested_path,
- arc_file))
- # add the toplevel extracted archive
- self.report_paths.append(nested_path)
+ _arc = None
+ if self.opts.archive_type != 'auto':
+ check_type = self.opts.archive_type.replace('-', '_')
+ for archive in self.archive_types:
+ if archive.type_name == check_type:
+ _arc = archive(self.opts.target, self.tmpdir)
else:
- self.report_paths.append(self.opts.target)
-
- archive.close()
-
- def extract_archive(self, archive):
- """Extract an archive into our tmpdir so that we may inspect it or
- iterate through its contents for obfuscation
-
- Positional arguments:
-
- :param archive: An open TarFile object for the archive
-
- """
- if not isinstance(archive, tarfile.TarFile):
- archive = tarfile.open(archive)
- path = os.path.join(self.tmpdir, 'cleaner')
- archive.extractall(path)
- return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
+ for arc in self.archive_types:
+ if arc.check_is_type(self.opts.target):
+ _arc = arc(self.opts.target, self.tmpdir)
+ break
+ if not _arc:
+ return
+ self.report_paths.append(_arc)
+ if _arc.is_nested:
+ self.report_paths.extend(_arc.get_nested_archives())
+ # We need to preserve the top level archive until all
+ # nested archives are processed
+ self.report_paths.remove(_arc)
+ self.nested_archive = _arc
+ if self.nested_archive:
+ self.nested_archive.ui_name = self.nested_archive.description
def execute(self):
"""SoSCleaner will begin by inspecting the TARGET option to determine
@@ -283,6 +271,7 @@ third party.
be unpacked, cleaned, and repacked and the final top-level archive will
then be repacked as well.
"""
+ self.arc_name = self.opts.target.split('/')[-1].split('.tar')[0]
if self.from_cmdline:
self.print_disclaimer()
self.report_paths = []
@@ -290,23 +279,11 @@ third party.
self.ui_log.error("Invalid target: no such file or directory %s"
% self.opts.target)
self._exit(1)
- if os.path.isdir(self.opts.target):
- self.arc_name = self.opts.target.split('/')[-1]
- for _file in os.listdir(self.opts.target):
- if _file == 'sos_logs':
- self.report_paths.append(self.opts.target)
- if (_file.startswith('sosreport') and
- (_file.endswith(".tar.gz") or _file.endswith(".tar.xz"))):
- self.report_paths.append(os.path.join(self.opts.target,
- _file))
- if not self.report_paths:
- self.ui_log.error("Invalid target: not an sos directory")
- self._exit(1)
- else:
- self.inspect_target_archive()
+
+ self.inspect_target_archive()
if not self.report_paths:
- self.ui_log.error("No valid sos archives or directories found\n")
+ self.ui_log.error("No valid archives or directories found\n")
self._exit(1)
# we have at least one valid target to obfuscate
@@ -334,33 +311,7 @@ third party.
final_path = None
if len(self.completed_reports) > 1:
- # we have an archive of archives, so repack the obfuscated tarball
- arc_name = self.arc_name + '-obfuscated'
- self.setup_archive(name=arc_name)
- for arc in self.completed_reports:
- if arc.is_tarfile:
- arc_dest = self.obfuscate_string(
- arc.final_archive_path.split('/')[-1]
- )
- self.archive.add_file(arc.final_archive_path,
- dest=arc_dest)
- checksum = self.get_new_checksum(arc.final_archive_path)
- if checksum is not None:
- dname = self.obfuscate_string(
- "checksums/%s.%s" % (arc_dest, self.hash_name)
- )
- self.archive.add_string(checksum, dest=dname)
- else:
- for dirname, dirs, files in os.walk(arc.archive_path):
- for filename in files:
- if filename.startswith('sosreport'):
- continue
- fname = os.path.join(dirname, filename)
- dnm = self.obfuscate_string(
- fname.split(arc.archive_name)[-1].lstrip('/')
- )
- self.archive.add_file(fname, dest=dnm)
- arc_path = self.archive.finalize(self.opts.compression_type)
+ arc_path = self.rebuild_nested_archive()
else:
arc = self.completed_reports[0]
arc_path = arc.final_archive_path
@@ -371,8 +322,7 @@ third party.
)
with open(os.path.join(self.sys_tmp, chksum_name), 'w') as cf:
cf.write(checksum)
-
- self.write_cleaner_log()
+ self.write_cleaner_log()
final_path = self.obfuscate_string(
os.path.join(self.sys_tmp, arc_path.split('/')[-1])
@@ -393,6 +343,30 @@ third party.
self.cleanup()
+ def rebuild_nested_archive(self):
+ """Handles repacking the nested tarball, now containing only obfuscated
+ copies of the reports, log files, manifest, etc...
+ """
+ # we have an archive of archives, so repack the obfuscated tarball
+ arc_name = self.arc_name + '-obfuscated'
+ self.setup_archive(name=arc_name)
+ for archive in self.completed_reports:
+ arc_dest = archive.final_archive_path.split('/')[-1]
+ checksum = self.get_new_checksum(archive.final_archive_path)
+ if checksum is not None:
+ dname = "checksums/%s.%s" % (arc_dest, self.hash_name)
+ self.archive.add_string(checksum, dest=dname)
+ for dirn, dirs, files in os.walk(self.nested_archive.extracted_path):
+ for filename in files:
+ fname = os.path.join(dirn, filename)
+ dname = fname.split(self.nested_archive.extracted_path)[-1]
+ dname = dname.lstrip('/')
+ self.archive.add_file(fname, dest=dname)
+ # remove it now so we don't balloon our fs space needs
+ os.remove(fname)
+ self.write_cleaner_log(archive=True)
+ return self.archive.finalize(self.opts.compression_type)
+
def compile_mapping_dict(self):
"""Build a dict that contains each parser's map as a key, with the
contents as that key's value. This will then be written to disk in the
@@ -441,7 +415,7 @@ third party.
self.log_error("Could not update mapping config file: %s"
% err)
- def write_cleaner_log(self):
+ def write_cleaner_log(self, archive=False):
"""When invoked via the command line, the logging from SoSCleaner will
not be added to the archive(s) it processes, so we need to write it
separately to disk
@@ -454,6 +428,10 @@ third party.
for line in self.sos_log_file.readlines():
logfile.write(line)
+ if archive:
+ self.obfuscate_file(log_name)
+ self.archive.add_file(log_name, dest="sos_logs/cleaner.log")
+
def get_new_checksum(self, archive_path):
"""Calculate a new checksum for the obfuscated archive, as the previous
checksum will no longer be valid
@@ -481,11 +459,11 @@ third party.
be obfuscated concurrently.
"""
try:
- if len(self.report_paths) > 1:
- msg = ("Found %s total reports to obfuscate, processing up to "
- "%s concurrently\n"
- % (len(self.report_paths), self.opts.jobs))
- self.ui_log.info(msg)
+ msg = (
+ "Found %s total reports to obfuscate, processing up to %s "
+ "concurrently\n" % (len(self.report_paths), self.opts.jobs)
+ )
+ self.ui_log.info(msg)
if self.opts.keep_binary_files:
self.ui_log.warning(
"WARNING: binary files that potentially contain sensitive "
@@ -494,53 +472,67 @@ third party.
pool = ThreadPoolExecutor(self.opts.jobs)
pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
pool.shutdown(wait=True)
+ # finally, obfuscate the nested archive if one exists
+ if self.nested_archive:
+ self._replace_obfuscated_archives()
+ self.obfuscate_report(self.nested_archive)
except KeyboardInterrupt:
self.ui_log.info("Exiting on user cancel")
os._exit(130)
+ def _replace_obfuscated_archives(self):
+ """When we have a nested archive, we need to rebuild the original
+ archive, which entails replacing the existing archives with their
+ obfuscated counterparts
+ """
+ for archive in self.completed_reports:
+ os.remove(archive.archive_path)
+ dest = self.nested_archive.extracted_path
+ tarball = archive.final_archive_path.split('/')[-1]
+ dest_name = os.path.join(dest, tarball)
+ shutil.move(archive.final_archive_path, dest)
+ archive.final_archive_path = dest_name
+
def preload_all_archives_into_maps(self):
"""Before doing the actual obfuscation, if we have multiple archives
to obfuscate then we need to preload each of them into the mappings
to ensure that node1 is obfuscated in node2 as well as node2 being
obfuscated in node1's archive.
"""
- self.log_info("Pre-loading multiple archives into obfuscation maps")
+ self.log_info("Pre-loading all archives into obfuscation maps")
for _arc in self.report_paths:
- is_dir = os.path.isdir(_arc)
- if is_dir:
- _arc_name = _arc
- else:
- archive = tarfile.open(_arc)
- _arc_name = _arc.split('/')[-1].split('.tar')[0]
- # for each parser, load the map_prep_file into memory, and then
- # send that for obfuscation. We don't actually obfuscate the file
- # here, do that in the normal archive loop
for _parser in self.parsers:
- if not _parser.prep_map_file:
+ try:
+ pfile = _arc.prep_files[_parser.name.lower().split()[0]]
+ if not pfile:
+ continue
+ except (IndexError, KeyError):
continue
- if isinstance(_parser.prep_map_file, str):
- _parser.prep_map_file = [_parser.prep_map_file]
- for parse_file in _parser.prep_map_file:
- _arc_path = os.path.join(_arc_name, parse_file)
+ if isinstance(pfile, str):
+ pfile = [pfile]
+ for parse_file in pfile:
+ self.log_debug("Attempting to load %s" % parse_file)
try:
- if is_dir:
- _pfile = open(_arc_path, 'r')
- content = _pfile.read()
- else:
- _pfile = archive.extractfile(_arc_path)
- content = _pfile.read().decode('utf-8')
- _pfile.close()
+ content = _arc.get_file_content(parse_file)
+ if not content:
+ continue
if isinstance(_parser, SoSUsernameParser):
_parser.load_usernames_into_map(content)
- for line in content.splitlines():
- if isinstance(_parser, SoSHostnameParser):
- _parser.load_hostname_into_map(line)
- self.obfuscate_line(line)
+ elif isinstance(_parser, SoSHostnameParser):
+ _parser.load_hostname_into_map(
+ content.splitlines()[0]
+ )
+ else:
+ for line in content.splitlines():
+ self.obfuscate_line(line)
except Exception as err:
- self.log_debug("Could not prep %s: %s"
- % (_arc_path, err))
+ self.log_info(
+ "Could not prepare %s from %s (archive: %s): %s"
+ % (_parser.name, parse_file, _arc.archive_name,
+ err)
+ )
- def obfuscate_report(self, report):
+ def obfuscate_report(self, archive):
"""Individually handle each archive or directory we've discovered by
running through each file therein.
@@ -549,17 +541,12 @@ third party.
:param report str: Filepath to the directory or archive
"""
try:
- if not os.access(report, os.W_OK):
- msg = "Insufficient permissions on %s" % report
- self.log_info(msg)
- self.ui_log.error(msg)
- return
-
- archive = SoSObfuscationArchive(report, self.tmpdir)
arc_md = self.cleaner_md.add_section(archive.archive_name)
start_time = datetime.now()
arc_md.add_field('start_time', start_time)
- archive.extract()
+ # don't double extract nested archives
+ if not archive.is_extracted:
+ archive.extract()
archive.report_msg("Beginning obfuscation...")
file_list = archive.get_file_list()
@@ -586,27 +573,28 @@ third party.
caller=archive.archive_name)
# if the archive was already a tarball, repack it
- method = archive.get_compression()
- if method:
- archive.report_msg("Re-compressing...")
- try:
- archive.rename_top_dir(
- self.obfuscate_string(archive.archive_name)
- )
- archive.compress(method)
- except Exception as err:
- self.log_debug("Archive %s failed to compress: %s"
- % (archive.archive_name, err))
- archive.report_msg("Failed to re-compress archive: %s"
- % err)
- return
+ if not archive.is_nested:
+ method = archive.get_compression()
+ if method:
+ archive.report_msg("Re-compressing...")
+ try:
+ archive.rename_top_dir(
+ self.obfuscate_string(archive.archive_name)
+ )
+ archive.compress(method)
+ except Exception as err:
+ self.log_debug("Archive %s failed to compress: %s"
+ % (archive.archive_name, err))
+ archive.report_msg("Failed to re-compress archive: %s"
+ % err)
+ return
+ self.completed_reports.append(archive)
end_time = datetime.now()
arc_md.add_field('end_time', end_time)
arc_md.add_field('run_time', end_time - start_time)
arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
arc_md.add_field('total_substitutions', archive.total_sub_count)
- self.completed_reports.append(archive)
rmsg = ''
if archive.removed_file_count:
rmsg = " [removed %s unprocessable files]"
@@ -615,7 +603,7 @@ third party.
except Exception as err:
self.ui_log.info("Exception while processing %s: %s"
- % (report, err))
+ % (archive.archive_name, err))
def obfuscate_file(self, filename, short_name=None, arc_name=None):
"""Obfuscate and individual file, line by line.
@@ -635,6 +623,8 @@ third party.
# the requested file doesn't exist in the archive
return
subs = 0
+ if not short_name:
+ short_name = filename.split('/')[-1]
if not os.path.islink(filename):
# don't run the obfuscation on the link, but on the actual file
# at some other point.
@@ -745,3 +735,5 @@ third party.
for parser in self.parsers:
_sec = parse_sec.add_section(parser.name.replace(' ', '_').lower())
_sec.add_field('entries', len(parser.mapping.dataset.keys()))
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/archives/__init__.py
index ea0b7012..795c5a78 100644
--- a/sos/cleaner/obfuscation_archive.py
+++ b/sos/cleaner/archives/__init__.py
@@ -40,6 +40,10 @@ class SoSObfuscationArchive():
file_sub_list = []
total_sub_count = 0
removed_file_count = 0
+ type_name = 'undetermined'
+ description = 'undetermined'
+ is_nested = False
+ prep_files = {}
def __init__(self, archive_path, tmpdir):
self.archive_path = archive_path
@@ -50,7 +54,43 @@ class SoSObfuscationArchive():
self.soslog = logging.getLogger('sos')
self.ui_log = logging.getLogger('sos_ui')
self.skip_list = self._load_skip_list()
- self.log_info("Loaded %s as an archive" % self.archive_path)
+ self.is_extracted = False
+ self._load_self()
+ self.archive_root = ''
+ self.log_info(
+ "Loaded %s as type %s"
+ % (self.archive_path, self.description)
+ )
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ """Check if the archive is a well-known type we directly support"""
+ return False
+
+ def _load_self(self):
+ if self.is_tarfile:
+ self.tarobj = tarfile.open(self.archive_path)
+
+ def get_nested_archives(self):
+ """Return a list of ObfuscationArchives that represent additional
+ archives found within the target archive. For example, an archive from
+ `sos collect` will return a list of ``SoSReportArchive`` objects.
+
+ This should be overridden by individual types of ObfuscationArchive's
+ """
+ return []
+
+ def get_archive_root(self):
+ """Set the root path for the archive that should be prepended to any
+ filenames given to methods in this class.
+ """
+ if self.is_tarfile:
+ toplevel = self.tarobj.firstmember
+ if toplevel.isdir():
+ return toplevel.name
+ else:
+ return os.sep
+ return os.path.abspath(self.archive_path)
def report_msg(self, msg):
"""Helper to easily format ui messages on a per-report basis"""
@@ -96,10 +136,42 @@ class SoSObfuscationArchive():
os.remove(full_fname)
self.removed_file_count += 1
- def extract(self):
+ def format_file_name(self, fname):
+ """Based on the type of archive we're dealing with, do whatever that
+ archive requires to a provided **relative** filepath to be able to
+ access it within the archive
+ """
+ if not self.is_extracted:
+ if not self.archive_root:
+ self.archive_root = self.get_archive_root()
+ return os.path.join(self.archive_root, fname)
+ else:
+ return os.path.join(self.extracted_path, fname)
+
+ def get_file_content(self, fname):
+ """Return the content from the specified fname. Particularly useful for
+ tarball-type archives so we can retrieve prep file contents prior to
+ extracting the entire archive
+ """
+ if self.is_extracted is False and self.is_tarfile:
+ filename = self.format_file_name(fname)
+ try:
+ return self.tarobj.extractfile(filename).read().decode('utf-8')
+ except KeyError:
+ self.log_debug(
+ "Unable to retrieve %s: no such file in archive" % fname
+ )
+ return ''
+ else:
+ with open(self.format_file_name(fname), 'r') as to_read:
+ return to_read.read()
+
+ def extract(self, quiet=False):
if self.is_tarfile:
- self.report_msg("Extracting...")
+ if not quiet:
+ self.report_msg("Extracting...")
self.extracted_path = self.extract_self()
+ self.is_extracted = True
else:
self.extracted_path = self.archive_path
# if we're running as non-root (e.g. collector), then we can have a
@@ -317,3 +389,5 @@ class SoSObfuscationArchive():
return False
except UnicodeDecodeError:
return True
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/cleaner/archives/generic.py b/sos/cleaner/archives/generic.py
new file mode 100644
index 00000000..2ce6f09b
--- /dev/null
+++ b/sos/cleaner/archives/generic.py
@@ -0,0 +1,52 @@
+# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+
+from sos.cleaner.archives import SoSObfuscationArchive
+
+import os
+import tarfile
+
+
+class DataDirArchive(SoSObfuscationArchive):
+ """A plain directory on the filesystem that is not directly associated with
+ any known or supported collection utility
+ """
+
+ type_name = 'data_dir'
+ description = 'unassociated directory'
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ return os.path.isdir(arc_path)
+
+ def set_archive_root(self):
+ return os.path.abspath(self.archive_path)
+
+
+class TarballArchive(SoSObfuscationArchive):
+ """A generic tar archive that is not associated with any known or supported
+ collection utility
+ """
+
+ type_name = 'tarball'
+ description = 'unassociated tarball'
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ try:
+ return tarfile.is_tarfile(arc_path)
+ except Exception:
+ return False
+
+ def set_archive_root(self):
+ if self.tarobj.firstmember.isdir():
+ return self.tarobj.firstmember.name
+ return ''
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
new file mode 100644
index 00000000..4401d710
--- /dev/null
+++ b/sos/cleaner/archives/sos.py
@@ -0,0 +1,106 @@
+# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+
+from sos.cleaner.archives import SoSObfuscationArchive
+
+import os
+import tarfile
+
+
+class SoSReportArchive(SoSObfuscationArchive):
+ """This is the class representing an sos report, or in other words the
+ type the archive the SoS project natively generates
+ """
+
+ type_name = 'report'
+ description = 'sos report archive'
+ prep_files = {
+ 'hostname': 'sos_commands/host/hostname',
+ 'ip': 'sos_commands/networking/ip_-o_addr',
+ 'mac': 'sos_commands/networking/ip_-d_address',
+ 'username': [
+ 'sos_commands/login/lastlog_-u_1000-60000',
+ 'sos_commands/login/lastlog_-u_60001-65536',
+ 'sos_commands/login/lastlog_-u_65537-4294967295',
+ # AD users will be reported here, but favor the lastlog files since
+ # those will include local users who have not logged in
+ 'sos_commands/login/last'
+ ]
+ }
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ try:
+ return tarfile.is_tarfile(arc_path) and 'sosreport-' in arc_path
+ except Exception:
+ return False
+
+
+class SoSReportDirectory(SoSReportArchive):
+ """This is the archive class representing a build directory, or in other
+ words what `sos report --clean` will end up using for in-line obfuscation
+ """
+
+ type_name = 'report_dir'
+ description = 'sos report directory'
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ if os.path.isdir(arc_path):
+ return 'sos_logs' in os.listdir(arc_path)
+ return False
+
+
+class SoSCollectorArchive(SoSObfuscationArchive):
+ """Archive class representing the tarball created by ``sos collect``. It
+ will not provide prep files on its own, however it will provide a list
+ of SoSReportArchive's which will then be used to prep the parsers
+ """
+
+ type_name = 'collect'
+ description = 'sos collect tarball'
+ is_nested = True
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ try:
+ return (tarfile.is_tarfile(arc_path) and 'sos-collect' in arc_path)
+ except Exception:
+ return False
+
+ def get_nested_archives(self):
+ self.extract(quiet=True)
+ _path = self.extracted_path
+ archives = []
+ for fname in os.listdir(_path):
+ arc_name = os.path.join(_path, fname)
+ if 'sosreport-' in fname and tarfile.is_tarfile(arc_name):
+ archives.append(SoSReportArchive(arc_name, self.tmpdir))
+ return archives
+
+
+class SoSCollectorDirectory(SoSCollectorArchive):
+ """The archive class representing the temp directory used by ``sos
+ collect`` when ``--clean`` is used during runtime.
+ """
+
+ type_name = 'collect_dir'
+ description = 'sos collect directory'
+
+ @classmethod
+ def check_is_type(cls, arc_path):
+ if os.path.isdir(arc_path):
+ for fname in os.listdir(arc_path):
+ if 'sos-collector-' in fname:
+ return True
+ return False
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
index af6e375e..e62fd938 100644
--- a/sos/cleaner/parsers/__init__.py
+++ b/sos/cleaner/parsers/__init__.py
@@ -37,11 +37,6 @@ class SoSCleanerParser():
:cvar map_file_key: The key in the ``map_file`` to read when loading
previous obfuscation matches
:vartype map_file_key: ``str``
-
-
- :cvar prep_map_file: File to read from an archive to pre-seed the map with
- matches. E.G. ip_addr for loading IP addresses
- :vartype prep_map_fie: ``str``
"""
name = 'Undefined Parser'
@@ -49,7 +44,6 @@ class SoSCleanerParser():
skip_line_patterns = []
skip_files = []
map_file_key = 'unset'
- prep_map_file = []
def __init__(self, config={}):
if self.map_file_key in config:
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
index 71e13d3f..daa76a62 100644
--- a/sos/cleaner/parsers/hostname_parser.py
+++ b/sos/cleaner/parsers/hostname_parser.py
@@ -16,7 +16,6 @@ class SoSHostnameParser(SoSCleanerParser):
name = 'Hostname Parser'
map_file_key = 'hostname_map'
- prep_map_file = 'sos_commands/host/hostname'
regex_patterns = [
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
]
diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
index 525139e8..71d38be8 100644
--- a/sos/cleaner/parsers/ip_parser.py
+++ b/sos/cleaner/parsers/ip_parser.py
@@ -41,7 +41,6 @@ class SoSIPParser(SoSCleanerParser):
]
map_file_key = 'ip_map'
- prep_map_file = 'sos_commands/networking/ip_-o_addr'
def __init__(self, config):
self.mapping = SoSIPMap()
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
index 68de3727..694c6073 100644
--- a/sos/cleaner/parsers/keyword_parser.py
+++ b/sos/cleaner/parsers/keyword_parser.py
@@ -20,7 +20,6 @@ class SoSKeywordParser(SoSCleanerParser):
name = 'Keyword Parser'
map_file_key = 'keyword_map'
- prep_map_file = ''
def __init__(self, config, keywords=None, keyword_file=None):
self.mapping = SoSKeywordMap()
diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py
index 7ca80b8d..c74288cf 100644
--- a/sos/cleaner/parsers/mac_parser.py
+++ b/sos/cleaner/parsers/mac_parser.py
@@ -30,7 +30,6 @@ class SoSMacParser(SoSCleanerParser):
'534f:53'
)
map_file_key = 'mac_map'
- prep_map_file = 'sos_commands/networking/ip_-d_address'
def __init__(self, config):
self.mapping = SoSMacMap()
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
index b142e371..35377a31 100644
--- a/sos/cleaner/parsers/username_parser.py
+++ b/sos/cleaner/parsers/username_parser.py
@@ -25,14 +25,6 @@ class SoSUsernameParser(SoSCleanerParser):
name = 'Username Parser'
map_file_key = 'username_map'
- prep_map_file = [
- 'sos_commands/login/lastlog_-u_1000-60000',
- 'sos_commands/login/lastlog_-u_60001-65536',
- 'sos_commands/login/lastlog_-u_65537-4294967295',
- # AD users will be reported here, but favor the lastlog files since
- # those will include local users who have not logged in
- 'sos_commands/login/last'
- ]
regex_patterns = []
skip_list = [
'core',
diff --git a/tests/cleaner_tests/existing_archive.py b/tests/cleaner_tests/existing_archive.py
index 0eaf6c8d..e13d1cae 100644
--- a/tests/cleaner_tests/existing_archive.py
+++ b/tests/cleaner_tests/existing_archive.py
@@ -28,6 +28,13 @@ class ExistingArchiveCleanTest(StageTwoReportTest):
def test_obfuscation_log_created(self):
self.assertFileExists(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE))
+ def test_archive_type_correct(self):
+ with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
+ for line in log:
+ if "Loaded %s" % ARCHIVE in line:
+ assert 'as type sos report archive' in line, "Incorrect archive type detected: %s" % line
+ break
+
def test_from_cmdline_logged(self):
with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
for line in log:
diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
index 3b28e7a2..2de54946 100644
--- a/tests/cleaner_tests/full_report_run.py
+++ b/tests/cleaner_tests/full_report_run.py
@@ -35,6 +35,9 @@ class FullCleanTest(StageTwoReportTest):
def test_tarball_named_obfuscated(self):
self.assertTrue('obfuscated' in self.archive)
+ def test_archive_type_correct(self):
+ self.assertSosLogContains('Loaded .* as type sos report directory')
+
def test_hostname_not_in_any_file(self):
host = self.sysinfo['pre']['networking']['hostname']
# much faster to just use grep here
diff --git a/tests/cleaner_tests/report_with_mask.py b/tests/cleaner_tests/report_with_mask.py
index 4f94ba33..08e873d4 100644
--- a/tests/cleaner_tests/report_with_mask.py
+++ b/tests/cleaner_tests/report_with_mask.py
@@ -31,6 +31,9 @@ class ReportWithMask(StageOneReportTest):
def test_tarball_named_obfuscated(self):
self.assertTrue('obfuscated' in self.archive)
+ def test_archive_type_correct(self):
+ self.assertSosLogContains('Loaded .* as type sos report directory')
+
def test_localhost_was_obfuscated(self):
self.assertFileHasContent('/etc/hostname', 'host0')