diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-26 18:07:28 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-26 18:07:28 +0000 |
commit | 69c245b0cf5bf2585729fc7e5d9b2957c1a00851 (patch) | |
tree | 2121b16bd41fb701624abb6e92d9c079eb85a698 | |
parent | 769a036f3a9f484327108011e3bfbe984e435947 (diff) | |
download | mandoc-69c245b0cf5bf2585729fc7e5d9b2957c1a00851.tar.gz |
In -Tascii mode, provide approximations even for some Unicode escape
sequences above codepoint 512 by doing a reverse lookup in the
existing mandoc_char(7) character table.
Again, groff isn't smart enough to do this and silently discards such
escape sequences without printing anything.
-rw-r--r-- | chars.c | 11 | ||||
-rw-r--r-- | mandoc.h | 1 | ||||
-rw-r--r-- | mchars_alloc.3 | 11 | ||||
-rw-r--r-- | term_ascii.c | 6 |
4 files changed, 27 insertions, 2 deletions
@@ -149,6 +149,17 @@ mchars_spec2str(const struct mchars *arg, return(ln->ascii); } +const char * +mchars_uc2str(int uc) +{ + int i; + + for (i = 0; i < LINES_MAX; i++) + if (uc == lines[i].unicode) + return(lines[i].ascii); + return("<?>"); +} + static const struct ln * find(const struct mchars *tab, const char *p, size_t sz) { @@ -426,6 +426,7 @@ enum mandoc_esc mandoc_escape(const char **, const char **, int *); struct mchars *mchars_alloc(void); void mchars_free(struct mchars *); char mchars_num2char(const char *, size_t); +const char *mchars_uc2str(int); int mchars_num2uc(const char *, size_t); int mchars_spec2cp(const struct mchars *, const char *, size_t); diff --git a/mchars_alloc.3 b/mchars_alloc.3 index 34c9fa3f..44442e4a 100644 --- a/mchars_alloc.3 +++ b/mchars_alloc.3 @@ -59,6 +59,8 @@ .Fa "size_t sz" .Fa "size_t *rsz" .Fc +.Ft "const char *" +.Fn mchars_uc2str "int codepoint" .Sh DESCRIPTION These functions translate Unicode character numbers and .Xr roff 7 @@ -199,6 +201,14 @@ output module use this function to render and .Ic \eC\(aq Ns Ar name Ns Ic \(aq escape sequences. +.Pp +The function +.Fn mchars_uc2str +performs a reverse lookup of the Unicode +.Fa codepoint +and returns an ASCII string representation, or the string +.Qq <?> +if none is available. .Sh FILES These funtions are implemented in the file .Pa chars.c . @@ -218,6 +228,7 @@ following mandoc versions: .It Fn mchars_num2uc Ta 1.11.3 Ta \(em Ta \(em .It Fn mchars_spec2cp Ta 1.11.2 Ta Fn chars_spec2cp Ta 1.10.5 .It Fn mchars_spec2str Ta 1.11.2 Ta Fn a2ascii Ta 1.5.3 +.It Fn mchars_uc2str Ta 1.13.2 Ta \(em Ta \(em .El .Sh AUTHORS .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv diff --git a/term_ascii.c b/term_ascii.c index d7283411..f5782ff2 100644 --- a/term_ascii.c +++ b/term_ascii.c @@ -236,9 +236,11 @@ ascii_uc2str(int uc) "j", "DZ", "D", "dz", "G", "g", "HV", "W", "N", "n", "A", "a", "AE", "ae", "O", "o"}; - if (uc < 0 || (size_t)uc >= sizeof(tab)/sizeof(tab[0])) + if (uc < 0) return("<?>"); - return(tab[uc]); + if ((size_t)uc < sizeof(tab)/sizeof(tab[0])) + return(tab[uc]); + return(mchars_uc2str(uc)); } static size_t |