summaryrefslogtreecommitdiffstats
path: root/mandoc.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-13 17:17:45 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-13 17:17:45 +0000
commit61fcdf77510d89258927a8cfc957096701a22191 (patch)
treee5f9bc443c50d56c3860aee87a38353307835364 /mandoc.c
parent80cad0356226ad303de33e333b5fad9a23eb4983 (diff)
downloadmandoc-61fcdf77510d89258927a8cfc957096701a22191.tar.gz
Stricter syntax checking of Unicode character names:
Require exactly 4, 5 or 6 hex digits and allow nothing else. This avoids mishandling stuff like \[ua] and \C'uA' as Unicode and also fixes underlining in eqn(7) -Thtml output which uses \[ul]. Problem found and semantics suggested by kristaps@.
Diffstat (limited to 'mandoc.c')
-rw-r--r--mandoc.c23
1 files changed, 11 insertions, 12 deletions
diff --git a/mandoc.c b/mandoc.c
index f454a40e..db7db0c4 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -79,24 +79,13 @@ mandoc_escape(const char **end, const char **start, int *sz)
break;
case '[':
gly = ESCAPE_SPECIAL;
- /*
- * Unicode escapes are defined in groff as \[uXXXX] to
- * \[u10FFFF], where the contained value must be a valid
- * Unicode codepoint. Here, however, only check whether
- * it's not a zero-width escape.
- */
- if ('u' == (*start)[0] && ']' != (*start)[1])
- gly = ESCAPE_UNICODE;
term = ']';
break;
case 'C':
if ('\'' != **start)
return(ESCAPE_ERROR);
*start = ++*end;
- if ('u' == (*start)[0] && '\'' != (*start)[1])
- gly = ESCAPE_UNICODE;
- else
- gly = ESCAPE_SPECIAL;
+ gly = ESCAPE_SPECIAL;
term = '\'';
break;
@@ -344,6 +333,16 @@ mandoc_escape(const char **end, const char **start, int *sz)
case ESCAPE_SPECIAL:
if (1 == *sz && 'c' == **start)
gly = ESCAPE_NOSPACE;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX]
+ * to \[u10FFFF], where the contained value must be
+ * a valid Unicode codepoint. Here, however, only
+ * check the length and the validity of all digits.
+ */
+ else if (*sz > 4 && *sz < 8 && **start == 'u' &&
+ (int)strspn(*start + 1, "0123456789ABCDEFabcdef")
+ + 1 == *sz)
+ gly = ESCAPE_UNICODE;
break;
default:
break;