diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-07-23 15:00:08 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-07-23 15:00:08 +0000 |
commit | efa6734f7e00c3cb95d77b5b5007681f94dd570e (patch) | |
tree | 4adcc290b9b88b17586ac9d131a9a706bedafb9b | |
parent | 3d2eb53149ca24b5957180553ec66503bbcd3e7b (diff) | |
download | mandoc-efa6734f7e00c3cb95d77b5b5007681f94dd570e.tar.gz |
Security fix:
After decoding numeric (\N) and one-character (\<, \> etc.)
character escape sequences, do not forget to HTML-encode the
resulting ASCII character. Malicious manuals were able to smuggle
XSS content by roff-escaping the HTML-special characters they need.
That's a classic bug type in many web applications, actually... :-(
Found myself while auditing the HTML formatter for safe output handling.
-rw-r--r-- | chars.c | 13 | ||||
-rw-r--r-- | html.c | 63 |
2 files changed, 49 insertions, 27 deletions
@@ -127,7 +127,18 @@ mchars_num2uc(const char *p, size_t sz) if ((i = mandoc_strntoi(p, sz, 16)) < 0) return('\0'); - /* FIXME: make sure we're not in a bogus range. */ + + /* + * Security warning: + * Never extend the range of accepted characters + * to overlap with the ASCII range, 0x00-0x7F + * without re-auditing the callers of this function. + * Some callers might relay on the fact that we never + * return ASCII characters for their escaping decisions. + * + * XXX Code is missing here to exclude bogus ranges. + */ + return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); } @@ -110,6 +110,7 @@ static const char *const roffscales[SCALE_MAX] = { static void bufncat(struct html *, const char *, size_t); static void print_ctag(struct html *, enum htmltag); +static int print_escape(char); static int print_encode(struct html *, const char *, int); static void print_metaf(struct html *, enum mandoc_esc); static void print_attr(struct html *, const char *, const char *); @@ -324,6 +325,37 @@ html_strlen(const char *cp) } static int +print_escape(char c) +{ + + switch (c) { + case '<': + printf("<"); + break; + case '>': + printf(">"); + break; + case '&': + printf("&"); + break; + case '"': + printf("""); + break; + case ASCII_NBRSP: + putchar('-'); + break; + case ASCII_HYPH: + putchar('-'); + /* FALLTHROUGH */ + case ASCII_BREAK: + break; + default: + return(0); + } + return(1); +} + +static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; @@ -350,30 +382,8 @@ print_encode(struct html *h, const char *p, int norecurse) if ('\0' == *p) break; - switch (*p++) { - case '<': - printf("<"); - continue; - case '>': - printf(">"); - continue; - case '&': - printf("&"); - continue; - case '"': - printf("""); - continue; - case ASCII_NBRSP: - putchar('-'); + if (print_escape(*p++)) continue; - case ASCII_HYPH: - putchar('-'); - /* FALLTHROUGH */ - case ASCII_BREAK: - continue; - default: - break; - } esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) @@ -408,21 +418,22 @@ print_encode(struct html *h, const char *p, int norecurse) switch (esc) { case ESCAPE_UNICODE: - /* Skip passed "u" header. */ + /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); if ('\0' != c) printf("&#x%x;", c); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); - if ('\0' != c) + if ( ! ('\0' == c || print_escape(c))) putchar(c); break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); if (c > 0) printf("&#%d;", c); - else if (-1 == c && 1 == len) + else if (-1 == c && 1 == len && + !print_escape(*seq)) putchar((int)*seq); break; case ESCAPE_NOSPACE: |