diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-05-17 22:32:45 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-05-17 22:32:45 +0000 |
commit | 71405e53b941d50754fb4041993c7ef5ad4c5654 (patch) | |
tree | 0a18494430e5b8fbe125f79052c2d8ab12d1d87c /term_ascii.c | |
parent | ae85c53a20e295bcb9eecc8a2d24955c35392839 (diff) | |
download | mandoc-71405e53b941d50754fb4041993c7ef5ad4c5654.tar.gz |
Locale support. I'm checking this in to clean up fall-out in-tree, but
it looks pretty good. Basically, the -Tlocale option propogates into
term_ascii.c, where we set locale-specific console call-backs IFF (1)
setlocale() works; (2) locale support is compiled in (see Makefile for
-DUSE_WCHAR); (3) the internal structure of wchar_t maps directly to
Unicode codepoints as defined by __STDC_ISO_10646__; and (4) the console
supports multi-byte characters.
To date, this configuration only supports GNU/Linux. OpenBSD doesn't
export __STDC_ISO_10646__ although I'm told by stsp@openbsd.org that it
should (it has the correct map). Apparently FreeBSD is the same way.
NetBSD? Don't know. Apple also supports this, but doesn't define the
macro. Special-casing!
Benchmark: -Tlocale incurs less than 0.2 factor overhead when run
through several thousand manuals when UTF8 output is enabled. Native
mode (whether directly -Tascii or through no locale or whatever) is
UNCHANGED: the function callbacks are the same as before.
Note. If the underlying system does NOT support STDC_ISO_10646, there
is a "slow" version possible with iconv or other means of flipping from
a Unicode codepoint to a wchar_t.
Diffstat (limited to 'term_ascii.c')
-rw-r--r-- | term_ascii.c | 81 |
1 files changed, 69 insertions, 12 deletions
diff --git a/term_ascii.c b/term_ascii.c index 831621b3..a59b91f0 100644 --- a/term_ascii.c +++ b/term_ascii.c @@ -21,16 +21,26 @@ #include <sys/types.h> #include <assert.h> +#ifdef USE_WCHAR +# include <locale.h> +#endif #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#ifdef USE_WCHAR +# include <wchar.h> +#endif #include "mandoc.h" #include "out.h" #include "term.h" #include "main.h" +#if ! defined(__STDC_ISO_10646__) +# undef USE_WCHAR +#endif + static struct termp *ascii_init(enum termenc, char *); static double ascii_hspan(const struct termp *, const struct roffsu *); @@ -41,6 +51,13 @@ static void ascii_end(struct termp *); static void ascii_endline(struct termp *); static void ascii_letter(struct termp *, int); +#ifdef USE_WCHAR +static void locale_advance(struct termp *, size_t); +static void locale_endline(struct termp *); +static void locale_letter(struct termp *, int); +static size_t locale_width(const struct termp *, int); +#endif + static struct termp * ascii_init(enum termenc enc, char *outopts) { @@ -54,15 +71,28 @@ ascii_init(enum termenc enc, char *outopts) p->tabwidth = 5; p->defrmargin = 78; - p->advance = ascii_advance; p->begin = ascii_begin; p->end = ascii_end; - p->endline = ascii_endline; p->hspan = ascii_hspan; - p->letter = ascii_letter; p->type = TERMTYPE_CHAR; + + p->enc = TERMENC_ASCII; + p->advance = ascii_advance; + p->endline = ascii_endline; + p->letter = ascii_letter; p->width = ascii_width; +#if defined (USE_WCHAR) + if (TERMENC_LOCALE == enc) + if (setlocale(LC_ALL, "") && MB_CUR_MAX > 1) { + p->enc = enc; + p->advance = locale_advance; + p->endline = locale_endline; + p->letter = locale_letter; + p->width = locale_width; + } +#endif + toks[0] = "width"; toks[1] = NULL; @@ -104,7 +134,6 @@ ascii_width(const struct termp *p, int c) return(1); } - void ascii_free(void *arg) { @@ -112,17 +141,14 @@ ascii_free(void *arg) term_free((struct termp *)arg); } - /* ARGSUSED */ static void ascii_letter(struct termp *p, int c) { - /* LINTED */ putchar(c); } - static void ascii_begin(struct termp *p) { @@ -130,7 +156,6 @@ ascii_begin(struct termp *p) (*p->headf)(p, p->argf); } - static void ascii_end(struct termp *p) { @@ -138,7 +163,6 @@ ascii_end(struct termp *p) (*p->footf)(p, p->argf); } - /* ARGSUSED */ static void ascii_endline(struct termp *p) @@ -147,19 +171,16 @@ ascii_endline(struct termp *p) putchar('\n'); } - /* ARGSUSED */ static void ascii_advance(struct termp *p, size_t len) { size_t i; - /* Just print whitespace on the terminal. */ for (i = 0; i < len; i++) putchar(' '); } - /* ARGSUSED */ static double ascii_hspan(const struct termp *p, const struct roffsu *su) @@ -198,3 +219,39 @@ ascii_hspan(const struct termp *p, const struct roffsu *su) return(r); } +#ifdef USE_WCHAR +/* ARGSUSED */ +static size_t +locale_width(const struct termp *p, int c) +{ + int rc; + + return((rc = wcwidth(c)) < 0 ? 0 : rc); +} + +/* ARGSUSED */ +static void +locale_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putwchar(L' '); +} + +/* ARGSUSED */ +static void +locale_endline(struct termp *p) +{ + + putwchar(L'\n'); +} + +/* ARGSUSED */ +static void +locale_letter(struct termp *p, int c) +{ + + putwchar(c); +} +#endif |