diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-28 13:24:44 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-28 13:24:44 +0000 |
commit | 1af40e20be48508da80ee8a5ee06a4f34351a59c (patch) | |
tree | 49e6672afa0b366d22914b65f4b919f77d397e5f | |
parent | 06dca5ea40d18f0b1eeb965602aa2749e1c39f48 (diff) | |
download | mandoc-1af40e20be48508da80ee8a5ee06a4f34351a59c.tar.gz |
Tighten Unicode escape name parsing.
Accept only 0xXXXX, 0xYXXXX, 0x10XXXX with Y != 0.
This simplifies mchars_num2uc().
-rw-r--r-- | chars.c | 11 | ||||
-rw-r--r-- | mandoc.c | 13 |
2 files changed, 12 insertions, 12 deletions
@@ -123,14 +123,9 @@ mchars_num2uc(const char *p, size_t sz) { int i; - if ((i = mandoc_strntoi(p, sz, 16)) < 0) - return(0xFFFD); - - /* - * XXX Code is missing here to exclude bogus ranges. - */ - - return(i <= 0x10FFFF ? i : 0xFFFD); + i = mandoc_strntoi(p, sz, 16); + assert(i >= 0 && i <= 0x10FFFF); + return(i); } const char * @@ -334,13 +334,18 @@ mandoc_escape(const char **end, const char **start, int *sz) if (1 == *sz && 'c' == **start) gly = ESCAPE_NOSPACE; /* - * Unicode escapes are defined in groff as \[uXXXX] + * Unicode escapes are defined in groff as \[u0000] * to \[u10FFFF], where the contained value must be * a valid Unicode codepoint. Here, however, only - * check the length and the validity of all digits. + * check the length and range. */ - else if (*sz > 4 && *sz < 8 && **start == 'u' && - (int)strspn(*start + 1, "0123456789ABCDEFabcdef") + if (**start != 'u' || *sz < 5 || *sz > 7) + break; + if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) + break; + if (*sz == 6 && (*start)[1] == '0') + break; + if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") + 1 == *sz) gly = ESCAPE_UNICODE; break; |