Locale support. I'm checking this in to clean up fall-out in-tree, but

it looks pretty good. Basically, the -Tlocale option propogates into term_ascii.c, where we set locale-specific console call-backs IFF (1) setlocale() works; (2) locale support is compiled in (see Makefile for -DUSE_WCHAR); (3) the internal structure of wchar_t maps directly to Unicode codepoints as defined by __STDC_ISO_10646__; and (4) the console supports multi-byte characters. To date, this configuration only supports GNU/Linux. OpenBSD doesn't export __STDC_ISO_10646__ although I'm told by stsp@openbsd.org that it should (it has the correct map). Apparently FreeBSD is the same way. NetBSD? Don't know. Apple also supports this, but doesn't define the macro. Special-casing! Benchmark: -Tlocale incurs less than 0.2 factor overhead when run through several thousand manuals when UTF8 output is enabled. Native mode (whether directly -Tascii or through no locale or whatever) is UNCHANGED: the function callbacks are the same as before. Note. If the underlying system does NOT support STDC_ISO_10646, there is a "slow" version possible with iconv or other means of flipping from a Unicode codepoint to a wchar_t.
author: Kristaps Dzonsons <kristaps@bsd.lv> 2011-05-17 22:32:45 +0000
committer: Kristaps Dzonsons <kristaps@bsd.lv> 2011-05-17 22:32:45 +0000
commit: 71405e53b941d50754fb4041993c7ef5ad4c5654 (patch)
tree: 0a18494430e5b8fbe125f79052c2d8ab12d1d87c /term.c
parent: ae85c53a20e295bcb9eecc8a2d24955c35392839 (diff)
download: mandoc-71405e53b941d50754fb4041993c7ef5ad4c5654.tar.gz
1 files changed, 45 insertions, 6 deletions
diff --git a/term.c b/term.c
index 09647a07..b90f9b7d 100644
--- a/term.c
+++ b/term.c
@@ -36,6 +36,7 @@
 static	void		 adjbuf(struct termp *p, int);
 static	void		 bufferc(struct termp *, char);
 static	void		 encode(struct termp *, const char *, size_t);
+static	void		 encode1(struct termp *, int);
 
 void
 term_free(struct termp *p)
@@ -403,7 +404,7 @@ term_word(struct termp *p, const char *word)
 {
 	const char	*seq, *cp;
 	char		 c;
-	int		 sz;
+	int		 sz, uc;
 	size_t		 ssz;
 	enum mandoc_esc	 esc;
 
@@ -440,7 +441,13 @@ term_word(struct termp *p, const char *word)
 
 		switch (esc) {
 		case (ESCAPE_UNICODE):
-			encode(p, "?", 1);
+			if (TERMENC_ASCII == p->enc) {
+				encode1(p, '?');
+				break;
+			}
+			uc = mchars_num2uc(seq + 1, sz - 1);
+			if ('\0' != uc)
+				encode1(p, uc);
 			break;
 		case (ESCAPE_NUMBERED):
 			if ('\0' != (c = mchars_num2char(seq, sz)))
@@ -503,6 +510,33 @@ bufferc(struct termp *p, char c)
 	p->buf[p->col++] = c;
 }
 
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
+static void
+encode1(struct termp *p, int c)
+{
+	enum termfont	  f;
+
+	if (p->col + 4 >= p->maxcols)
+		adjbuf(p, p->col + 4);
+
+	f = term_fonttop(p);
+
+	if (TERMFONT_NONE == f) {
+		p->buf[p->col++] = c;
+		return;
+	} else if (TERMFONT_UNDER == f) {
+		p->buf[p->col++] = '_';
+	} else
+		p->buf[p->col++] = c;
+
+	p->buf[p->col++] = 8;
+	p->buf[p->col++] = c;
+}
+
 static void
 encode(struct termp *p, const char *word, size_t sz)
 {
@@ -584,11 +618,16 @@ term_strlen(const struct termp *p, const char *cp)
 			case (ESCAPE_ERROR):
 				return(sz);
 			case (ESCAPE_UNICODE):
-				c = '?';
-				/* FALLTHROUGH */
-			case (ESCAPE_NUMBERED):
+				if (TERMENC_ASCII != p->enc) {
+					sz += (*p->width)(p, '?');
+					break;
+				}
+				c = mchars_num2uc(seq + 1, ssz - 1);
 				if ('\0' != c)
-					c = mchars_num2char(seq, ssz);
+					sz += (*p->width)(p, c);
+				break;
+			case (ESCAPE_NUMBERED):
+				c = mchars_num2char(seq, ssz);
 				if ('\0' != c)
 					sz += (*p->width)(p, c);
 				break;
author	Kristaps Dzonsons <kristaps@bsd.lv>	2011-05-17 22:32:45 +0000
committer	Kristaps Dzonsons <kristaps@bsd.lv>	2011-05-17 22:32:45 +0000
commit	71405e53b941d50754fb4041993c7ef5ad4c5654 (patch)
tree	0a18494430e5b8fbe125f79052c2d8ab12d1d87c /term.c
parent	ae85c53a20e295bcb9eecc8a2d24955c35392839 (diff)
download	mandoc-71405e53b941d50754fb4041993c7ef5ad4c5654.tar.gz