aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryn M. Reeves <bmr@redhat.com>2014-12-09 17:30:06 +0000
committerBryn M. Reeves <bmr@redhat.com>2014-12-09 17:30:06 +0000
commit348ed379209aacfe6c1f870a3a68e06b39a51f06 (patch)
tree59a897cd42c56fa605c2de54f55938726d39e331
parenta85f58cb566883532ba0df93ecf0dba5bc190fc3 (diff)
downloadsos-348ed379209aacfe6c1f870a3a68e06b39a51f06.tar.gz
[global] make all utf-8 handling use errors='ignore'
Stop playing whack-a-mole with unicode handling and ignore all invalid characters. It's not really possible to ensure that we always get strict utf-8 data from the system - e.g. dmesg on systems with broken BIOS strings will often spit undecodable byte sequences. Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
-rw-r--r--sos/archive.py4
-rw-r--r--sos/plugins/__init__.py5
-rw-r--r--sos/utilities.py7
3 files changed, 10 insertions, 6 deletions
diff --git a/sos/archive.py b/sos/archive.py
index 60636251..0e019bfd 100644
--- a/sos/archive.py
+++ b/sos/archive.py
@@ -406,9 +406,9 @@ class TarFileArchive(FileCacheArchive):
close_fds=True)
stdout, stderr = p.communicate()
if stdout:
- self.log_info(stdout.decode('utf-8'))
+ self.log_info(stdout.decode('utf-8', 'ignore'))
if stderr:
- self.log_error(stderr.decode('utf-8'))
+ self.log_error(stderr.decode('utf-8', 'ignore'))
self._suffix += suffix
return self.name()
except Exception as e:
diff --git a/sos/plugins/__init__.py b/sos/plugins/__init__.py
index b2891444..4fd85be9 100644
--- a/sos/plugins/__init__.py
+++ b/sos/plugins/__init__.py
@@ -542,7 +542,7 @@ class Plugin(object):
def add_string_as_file(self, content, filename):
"""Add a string to the archive as a file named `filename`"""
self.copy_strings.append((content, filename))
- content = "..." + (content.splitlines()[0]).decode('utf8')
+ content = "..." + (content.splitlines()[0]).decode('utf8', 'ignore')
self._log_debug("added string '%s' as '%s'" % (content, filename))
def get_cmd_output_now(self, exe, suggest_filename=None,
@@ -611,7 +611,8 @@ class Plugin(object):
def _collect_strings(self):
for string, file_name in self.copy_strings:
- content = "..." + (string.splitlines()[0]).decode('utf8')
+ content = "..."
+ content += (string.splitlines()[0]).decode('utf8', 'ignore')
self._log_info("collecting string '%s' as '%s'"
% (content, file_name))
try:
diff --git a/sos/utilities.py b/sos/utilities.py
index 7e8cd7eb..8cb4ed6d 100644
--- a/sos/utilities.py
+++ b/sos/utilities.py
@@ -140,7 +140,7 @@ def sos_get_command_output(command, timeout=300, runat=None):
# shlex.split() reacts badly to unicode on older python runtimes.
if not six.PY3:
- command = command.encode('utf-8')
+ command = command.encode('utf-8', 'ignore')
args = shlex.split(command)
try:
p = Popen(args, shell=False, stdout=PIPE, stderr=STDOUT,
@@ -159,7 +159,10 @@ def sos_get_command_output(command, timeout=300, runat=None):
if p.returncode == 126 or p.returncode == 127:
stdout = six.binary_type(b"")
- return {'status': p.returncode, 'output': stdout.decode('utf-8')}
+ return {
+ 'status': p.returncode,
+ 'output': stdout.decode('utf-8', 'ignore')
+ }
def import_module(module_fqname, superclasses=None):