diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-05-15 15:30:33 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-05-15 15:30:33 +0000 |
commit | 877bef0b9533150581c911aff80cfbb71c13de8e (patch) | |
tree | dd8226017e02a9986ea94aac1ccd95e3035dda59 | |
parent | 6ae46f238194f0cb76bd4e0b8dd456d2be0b960d (diff) | |
download | mandoc-877bef0b9533150581c911aff80cfbb71c13de8e.tar.gz |
Support groff's escape for Unicode input. See
http://mdocml.bsd.lv/archives/tech/0368.html
For the time being, we just throw it away.
-rw-r--r-- | mandoc.c | 8 | ||||
-rw-r--r-- | mandoc.h | 1 | ||||
-rw-r--r-- | mandoc_char.7 | 14 |
3 files changed, 23 insertions, 0 deletions
@@ -125,6 +125,14 @@ mandoc_escape(const char **end, const char **start, int *sz) break; case ('['): gly = ESCAPE_SPECIAL; + /* + * Unicode escapes are defined in groff as \[uXXXX] to + * \[u10FFFF], where the contained value must be a valid + * Unicode codepoint. Here, however, only check whether + * it's not a zero-width escape. + */ + if ('u' == cp[i] && ']' != cp[i + 1]) + gly = ESCAPE_UNICODE; term = ']'; break; case ('C'): @@ -299,6 +299,7 @@ enum mandoc_esc { ESCAPE_FONTROMAN, /* roman font mode */ ESCAPE_FONTPREV, /* previous font mode */ ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_UNICODE, /* a unicode codepoint */ ESCAPE_NOSPACE /* suppress space if the last on a line */ }; diff --git a/mandoc_char.7 b/mandoc_char.7 index 10a64167..f61d3a07 100644 --- a/mandoc_char.7 +++ b/mandoc_char.7 @@ -520,6 +520,20 @@ portable. .It \e*(Px Ta \*(Px Ta POSIX standard name .It \e*(Ai Ta \*(Ai Ta ANSI standard name .El +.Sh UNICODE CHARACTERS +The escape sequence +.Pp +.Dl \e[uXXXX] +.Pp +is interpreted as a Unicode codepoint. +The codepoint must be in the range above U+0080 and less than U+10FFFF. +For compatibility, points must be zero-padded to four characters; if +greater than four characters, no zero padding is allowed. +Unicode surrogates are not allowed. +.\" .Pp +.\" Unicode glyphs attenuate to the +.\" .Sq \&? +.\" character if invalid or not rendered by current output media. .Sh NUMBERED CHARACTERS For backward compatibility with existing manuals, .Xr mandoc 1 |