summaryrefslogtreecommitdiffstats
path: root/term.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-05-17 22:32:45 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-05-17 22:32:45 +0000
commit71405e53b941d50754fb4041993c7ef5ad4c5654 (patch)
tree0a18494430e5b8fbe125f79052c2d8ab12d1d87c /term.c
parentae85c53a20e295bcb9eecc8a2d24955c35392839 (diff)
downloadmandoc-71405e53b941d50754fb4041993c7ef5ad4c5654.tar.gz
Locale support. I'm checking this in to clean up fall-out in-tree, but
it looks pretty good. Basically, the -Tlocale option propogates into term_ascii.c, where we set locale-specific console call-backs IFF (1) setlocale() works; (2) locale support is compiled in (see Makefile for -DUSE_WCHAR); (3) the internal structure of wchar_t maps directly to Unicode codepoints as defined by __STDC_ISO_10646__; and (4) the console supports multi-byte characters. To date, this configuration only supports GNU/Linux. OpenBSD doesn't export __STDC_ISO_10646__ although I'm told by stsp@openbsd.org that it should (it has the correct map). Apparently FreeBSD is the same way. NetBSD? Don't know. Apple also supports this, but doesn't define the macro. Special-casing! Benchmark: -Tlocale incurs less than 0.2 factor overhead when run through several thousand manuals when UTF8 output is enabled. Native mode (whether directly -Tascii or through no locale or whatever) is UNCHANGED: the function callbacks are the same as before. Note. If the underlying system does NOT support STDC_ISO_10646, there is a "slow" version possible with iconv or other means of flipping from a Unicode codepoint to a wchar_t.
Diffstat (limited to 'term.c')
-rw-r--r--term.c51
1 files changed, 45 insertions, 6 deletions
diff --git a/term.c b/term.c
index 09647a07..b90f9b7d 100644
--- a/term.c
+++ b/term.c
@@ -36,6 +36,7 @@
static void adjbuf(struct termp *p, int);
static void bufferc(struct termp *, char);
static void encode(struct termp *, const char *, size_t);
+static void encode1(struct termp *, int);
void
term_free(struct termp *p)
@@ -403,7 +404,7 @@ term_word(struct termp *p, const char *word)
{
const char *seq, *cp;
char c;
- int sz;
+ int sz, uc;
size_t ssz;
enum mandoc_esc esc;
@@ -440,7 +441,13 @@ term_word(struct termp *p, const char *word)
switch (esc) {
case (ESCAPE_UNICODE):
- encode(p, "?", 1);
+ if (TERMENC_ASCII == p->enc) {
+ encode1(p, '?');
+ break;
+ }
+ uc = mchars_num2uc(seq + 1, sz - 1);
+ if ('\0' != uc)
+ encode1(p, uc);
break;
case (ESCAPE_NUMBERED):
if ('\0' != (c = mchars_num2char(seq, sz)))
@@ -503,6 +510,33 @@ bufferc(struct termp *p, char c)
p->buf[p->col++] = c;
}
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
+static void
+encode1(struct termp *p, int c)
+{
+ enum termfont f;
+
+ if (p->col + 4 >= p->maxcols)
+ adjbuf(p, p->col + 4);
+
+ f = term_fonttop(p);
+
+ if (TERMFONT_NONE == f) {
+ p->buf[p->col++] = c;
+ return;
+ } else if (TERMFONT_UNDER == f) {
+ p->buf[p->col++] = '_';
+ } else
+ p->buf[p->col++] = c;
+
+ p->buf[p->col++] = 8;
+ p->buf[p->col++] = c;
+}
+
static void
encode(struct termp *p, const char *word, size_t sz)
{
@@ -584,11 +618,16 @@ term_strlen(const struct termp *p, const char *cp)
case (ESCAPE_ERROR):
return(sz);
case (ESCAPE_UNICODE):
- c = '?';
- /* FALLTHROUGH */
- case (ESCAPE_NUMBERED):
+ if (TERMENC_ASCII != p->enc) {
+ sz += (*p->width)(p, '?');
+ break;
+ }
+ c = mchars_num2uc(seq + 1, ssz - 1);
if ('\0' != c)
- c = mchars_num2char(seq, ssz);
+ sz += (*p->width)(p, c);
+ break;
+ case (ESCAPE_NUMBERED):
+ c = mchars_num2char(seq, ssz);
if ('\0' != c)
sz += (*p->width)(p, c);
break;