From efa6734f7e00c3cb95d77b5b5007681f94dd570e Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Wed, 23 Jul 2014 15:00:08 +0000 Subject: Security fix: After decoding numeric (\N) and one-character (\<, \> etc.) character escape sequences, do not forget to HTML-encode the resulting ASCII character. Malicious manuals were able to smuggle XSS content by roff-escaping the HTML-special characters they need. That's a classic bug type in many web applications, actually... :-( Found myself while auditing the HTML formatter for safe output handling. --- html.c | 63 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 26 deletions(-) (limited to 'html.c') diff --git a/html.c b/html.c index d4f2a057..13d1902c 100644 --- a/html.c +++ b/html.c @@ -110,6 +110,7 @@ static const char *const roffscales[SCALE_MAX] = { static void bufncat(struct html *, const char *, size_t); static void print_ctag(struct html *, enum htmltag); +static int print_escape(char); static int print_encode(struct html *, const char *, int); static void print_metaf(struct html *, enum mandoc_esc); static void print_attr(struct html *, const char *, const char *); @@ -323,6 +324,37 @@ html_strlen(const char *cp) return(sz); } +static int +print_escape(char c) +{ + + switch (c) { + case '<': + printf("<"); + break; + case '>': + printf(">"); + break; + case '&': + printf("&"); + break; + case '"': + printf("""); + break; + case ASCII_NBRSP: + putchar('-'); + break; + case ASCII_HYPH: + putchar('-'); + /* FALLTHROUGH */ + case ASCII_BREAK: + break; + default: + return(0); + } + return(1); +} + static int print_encode(struct html *h, const char *p, int norecurse) { @@ -350,30 +382,8 @@ print_encode(struct html *h, const char *p, int norecurse) if ('\0' == *p) break; - switch (*p++) { - case '<': - printf("<"); - continue; - case '>': - printf(">"); - continue; - case '&': - printf("&"); - continue; - case '"': - printf("""); - continue; - case ASCII_NBRSP: - putchar('-'); + if (print_escape(*p++)) continue; - case ASCII_HYPH: - putchar('-'); - /* FALLTHROUGH */ - case ASCII_BREAK: - continue; - default: - break; - } esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) @@ -408,21 +418,22 @@ print_encode(struct html *h, const char *p, int norecurse) switch (esc) { case ESCAPE_UNICODE: - /* Skip passed "u" header. */ + /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); if ('\0' != c) printf("&#x%x;", c); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); - if ('\0' != c) + if ( ! ('\0' == c || print_escape(c))) putchar(c); break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); if (c > 0) printf("&#%d;", c); - else if (-1 == c && 1 == len) + else if (-1 == c && 1 == len && + !print_escape(*seq)) putchar((int)*seq); break; case ESCAPE_NOSPACE: -- cgit