diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-05-17 22:32:45 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-05-17 22:32:45 +0000 |
commit | 71405e53b941d50754fb4041993c7ef5ad4c5654 (patch) | |
tree | 0a18494430e5b8fbe125f79052c2d8ab12d1d87c /term.c | |
parent | ae85c53a20e295bcb9eecc8a2d24955c35392839 (diff) | |
download | mandoc-71405e53b941d50754fb4041993c7ef5ad4c5654.tar.gz |
Locale support. I'm checking this in to clean up fall-out in-tree, but
it looks pretty good. Basically, the -Tlocale option propogates into
term_ascii.c, where we set locale-specific console call-backs IFF (1)
setlocale() works; (2) locale support is compiled in (see Makefile for
-DUSE_WCHAR); (3) the internal structure of wchar_t maps directly to
Unicode codepoints as defined by __STDC_ISO_10646__; and (4) the console
supports multi-byte characters.
To date, this configuration only supports GNU/Linux. OpenBSD doesn't
export __STDC_ISO_10646__ although I'm told by stsp@openbsd.org that it
should (it has the correct map). Apparently FreeBSD is the same way.
NetBSD? Don't know. Apple also supports this, but doesn't define the
macro. Special-casing!
Benchmark: -Tlocale incurs less than 0.2 factor overhead when run
through several thousand manuals when UTF8 output is enabled. Native
mode (whether directly -Tascii or through no locale or whatever) is
UNCHANGED: the function callbacks are the same as before.
Note. If the underlying system does NOT support STDC_ISO_10646, there
is a "slow" version possible with iconv or other means of flipping from
a Unicode codepoint to a wchar_t.
Diffstat (limited to 'term.c')
-rw-r--r-- | term.c | 51 |
1 files changed, 45 insertions, 6 deletions
@@ -36,6 +36,7 @@ static void adjbuf(struct termp *p, int); static void bufferc(struct termp *, char); static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); void term_free(struct termp *p) @@ -403,7 +404,7 @@ term_word(struct termp *p, const char *word) { const char *seq, *cp; char c; - int sz; + int sz, uc; size_t ssz; enum mandoc_esc esc; @@ -440,7 +441,13 @@ term_word(struct termp *p, const char *word) switch (esc) { case (ESCAPE_UNICODE): - encode(p, "?", 1); + if (TERMENC_ASCII == p->enc) { + encode1(p, '?'); + break; + } + uc = mchars_num2uc(seq + 1, sz - 1); + if ('\0' != uc) + encode1(p, uc); break; case (ESCAPE_NUMBERED): if ('\0' != (c = mchars_num2char(seq, sz))) @@ -503,6 +510,33 @@ bufferc(struct termp *p, char c) p->buf[p->col++] = c; } +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ +static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->col + 4 >= p->maxcols) + adjbuf(p, p->col + 4); + + f = term_fonttop(p); + + if (TERMFONT_NONE == f) { + p->buf[p->col++] = c; + return; + } else if (TERMFONT_UNDER == f) { + p->buf[p->col++] = '_'; + } else + p->buf[p->col++] = c; + + p->buf[p->col++] = 8; + p->buf[p->col++] = c; +} + static void encode(struct termp *p, const char *word, size_t sz) { @@ -584,11 +618,16 @@ term_strlen(const struct termp *p, const char *cp) case (ESCAPE_ERROR): return(sz); case (ESCAPE_UNICODE): - c = '?'; - /* FALLTHROUGH */ - case (ESCAPE_NUMBERED): + if (TERMENC_ASCII != p->enc) { + sz += (*p->width)(p, '?'); + break; + } + c = mchars_num2uc(seq + 1, ssz - 1); if ('\0' != c) - c = mchars_num2char(seq, ssz); + sz += (*p->width)(p, c); + break; + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, ssz); if ('\0' != c) sz += (*p->width)(p, c); break; |