summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-05-15 15:30:33 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-05-15 15:30:33 +0000
commit877bef0b9533150581c911aff80cfbb71c13de8e (patch)
treedd8226017e02a9986ea94aac1ccd95e3035dda59
parent6ae46f238194f0cb76bd4e0b8dd456d2be0b960d (diff)
downloadmandoc-877bef0b9533150581c911aff80cfbb71c13de8e.tar.gz
Support groff's escape for Unicode input. See
http://mdocml.bsd.lv/archives/tech/0368.html For the time being, we just throw it away.
-rw-r--r--mandoc.c8
-rw-r--r--mandoc.h1
-rw-r--r--mandoc_char.714
3 files changed, 23 insertions, 0 deletions
diff --git a/mandoc.c b/mandoc.c
index 4a9be8e7..9991978d 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -125,6 +125,14 @@ mandoc_escape(const char **end, const char **start, int *sz)
break;
case ('['):
gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
term = ']';
break;
case ('C'):
diff --git a/mandoc.h b/mandoc.h
index 0b14038e..07070d88 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -299,6 +299,7 @@ enum mandoc_esc {
ESCAPE_FONTROMAN, /* roman font mode */
ESCAPE_FONTPREV, /* previous font mode */
ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_UNICODE, /* a unicode codepoint */
ESCAPE_NOSPACE /* suppress space if the last on a line */
};
diff --git a/mandoc_char.7 b/mandoc_char.7
index 10a64167..f61d3a07 100644
--- a/mandoc_char.7
+++ b/mandoc_char.7
@@ -520,6 +520,20 @@ portable.
.It \e*(Px Ta \*(Px Ta POSIX standard name
.It \e*(Ai Ta \*(Ai Ta ANSI standard name
.El
+.Sh UNICODE CHARACTERS
+The escape sequence
+.Pp
+.Dl \e[uXXXX]
+.Pp
+is interpreted as a Unicode codepoint.
+The codepoint must be in the range above U+0080 and less than U+10FFFF.
+For compatibility, points must be zero-padded to four characters; if
+greater than four characters, no zero padding is allowed.
+Unicode surrogates are not allowed.
+.\" .Pp
+.\" Unicode glyphs attenuate to the
+.\" .Sq \&?
+.\" character if invalid or not rendered by current output media.
.Sh NUMBERED CHARACTERS
For backward compatibility with existing manuals,
.Xr mandoc 1