From 9e0f3d1103dc0554086c44f9e86d7b266c97ca66 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Thu, 26 Jan 2023 21:40:38 +0100 Subject: wrap: be more robust with locale On (some?) MacOS systems there is no C.UTF-8 locale available. Instead there is a non-standard "UTF-8" (encoding only) replacement. Running wrap on MacOS results in an error: error: failed to set locale: Bad file descriptor Instead of expecting that C.UTF-8 will always be available, try to use the user set locale (either from the $LC_ALL or $LANG environment variables). If these variables are unset or if they are set to an invalid/non-existent locale, fallback on C.UTF-8. If C.UTF-8 is not available, make one last desperate attempt for this UTF-8 non-standard locale (MacOS only). aerc will always send UTF-8 encoded text to the filter commands, If the locale that we managed to load does not use the UTF-8 character encoding, exit with an explicit error instead of risking undefined behaviour. Reported-by: Ben Cohen Signed-off-by: Robin Jarry --- filters/wrap.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) (limited to 'filters') diff --git a/filters/wrap.c b/filters/wrap.c index ba084c35..cfe86581 100644 --- a/filters/wrap.c +++ b/filters/wrap.c @@ -418,6 +418,40 @@ static void sanitize_line(const wchar_t *in, wchar_t *out) *out = L'\0'; } +static int set_stdio_encoding(void) +{ + const char *locale = setlocale(LC_ALL, ""); + + if (!locale) { + /* Neither LC_ALL nor LANG env vars are defined or are set to + * a non existant/installed locale. Try with a generic UTF-8 + * locale which is expected to be available on all POSIX + * systems. */ + locale = setlocale(LC_ALL, "C.UTF-8"); + if (!locale) { + /* The system is not following POSIX standards. Last + * resort: check if 'UTF-8' (encoding only) exists. */ + locale = setlocale(LC_CTYPE, "UTF-8"); + } + } + if (!locale) { + perror("error: failed to set locale"); + return 1; + } + + /* aerc will always send UTF-8 text, ensure that we read that properly */ + if (!strstr(locale, "UTF-8") && !strstr(locale, "utf-8")) { + fprintf(stderr, "error: locale '%s' is not UTF-8\n", locale); + return 1; + } + + /* ensure files are configured to read/write wide characters */ + fwide(in_file, true); + fwide(stdout, true); + + return 0; +} + int main(int argc, char **argv) { /* line needs to be 8 times larger than buf since every read character @@ -440,14 +474,9 @@ int main(int argc, char **argv) is_patch = true; regfree(&re); - /* aerc will always send UTF-8 text, force locale here */ - if (!setlocale(LC_CTYPE, "C.UTF-8")) { - err = 1; - perror("error: failed to set locale"); + err = set_stdio_encoding(); + if (err) goto end; - } - fwide(in_file, true); - fwide(stdout, true); while (fgetws(buf, BUFFER_SIZE, in_file)) { if (is_patch) { -- cgit