summaryrefslogtreecommitdiffstats
path: root/mandoc.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-28 13:24:44 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-28 13:24:44 +0000
commit1af40e20be48508da80ee8a5ee06a4f34351a59c (patch)
tree49e6672afa0b366d22914b65f4b919f77d397e5f /mandoc.c
parent06dca5ea40d18f0b1eeb965602aa2749e1c39f48 (diff)
downloadmandoc-1af40e20be48508da80ee8a5ee06a4f34351a59c.tar.gz
Tighten Unicode escape name parsing.
Accept only 0xXXXX, 0xYXXXX, 0x10XXXX with Y != 0. This simplifies mchars_num2uc().
Diffstat (limited to 'mandoc.c')
-rw-r--r--mandoc.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/mandoc.c b/mandoc.c
index db7db0c4..f74e32a7 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -334,13 +334,18 @@ mandoc_escape(const char **end, const char **start, int *sz)
if (1 == *sz && 'c' == **start)
gly = ESCAPE_NOSPACE;
/*
- * Unicode escapes are defined in groff as \[uXXXX]
+ * Unicode escapes are defined in groff as \[u0000]
* to \[u10FFFF], where the contained value must be
* a valid Unicode codepoint. Here, however, only
- * check the length and the validity of all digits.
+ * check the length and range.
*/
- else if (*sz > 4 && *sz < 8 && **start == 'u' &&
- (int)strspn(*start + 1, "0123456789ABCDEFabcdef")
+ if (**start != 'u' || *sz < 5 || *sz > 7)
+ break;
+ if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
+ break;
+ if (*sz == 6 && (*start)[1] == '0')
+ break;
+ if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
+ 1 == *sz)
gly = ESCAPE_UNICODE;
break;