summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-26 18:07:28 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-26 18:07:28 +0000
commit69c245b0cf5bf2585729fc7e5d9b2957c1a00851 (patch)
tree2121b16bd41fb701624abb6e92d9c079eb85a698
parent769a036f3a9f484327108011e3bfbe984e435947 (diff)
downloadmandoc-69c245b0cf5bf2585729fc7e5d9b2957c1a00851.tar.gz
In -Tascii mode, provide approximations even for some Unicode escape
sequences above codepoint 512 by doing a reverse lookup in the existing mandoc_char(7) character table. Again, groff isn't smart enough to do this and silently discards such escape sequences without printing anything.
-rw-r--r--chars.c11
-rw-r--r--mandoc.h1
-rw-r--r--mchars_alloc.311
-rw-r--r--term_ascii.c6
4 files changed, 27 insertions, 2 deletions
diff --git a/chars.c b/chars.c
index e11e64c9..950f9edc 100644
--- a/chars.c
+++ b/chars.c
@@ -149,6 +149,17 @@ mchars_spec2str(const struct mchars *arg,
return(ln->ascii);
}
+const char *
+mchars_uc2str(int uc)
+{
+ int i;
+
+ for (i = 0; i < LINES_MAX; i++)
+ if (uc == lines[i].unicode)
+ return(lines[i].ascii);
+ return("<?>");
+}
+
static const struct ln *
find(const struct mchars *tab, const char *p, size_t sz)
{
diff --git a/mandoc.h b/mandoc.h
index 14fddd36..5052a8b3 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -426,6 +426,7 @@ enum mandoc_esc mandoc_escape(const char **, const char **, int *);
struct mchars *mchars_alloc(void);
void mchars_free(struct mchars *);
char mchars_num2char(const char *, size_t);
+const char *mchars_uc2str(int);
int mchars_num2uc(const char *, size_t);
int mchars_spec2cp(const struct mchars *,
const char *, size_t);
diff --git a/mchars_alloc.3 b/mchars_alloc.3
index 34c9fa3f..44442e4a 100644
--- a/mchars_alloc.3
+++ b/mchars_alloc.3
@@ -59,6 +59,8 @@
.Fa "size_t sz"
.Fa "size_t *rsz"
.Fc
+.Ft "const char *"
+.Fn mchars_uc2str "int codepoint"
.Sh DESCRIPTION
These functions translate Unicode character numbers and
.Xr roff 7
@@ -199,6 +201,14 @@ output module use this function to render
and
.Ic \eC\(aq Ns Ar name Ns Ic \(aq
escape sequences.
+.Pp
+The function
+.Fn mchars_uc2str
+performs a reverse lookup of the Unicode
+.Fa codepoint
+and returns an ASCII string representation, or the string
+.Qq <?>
+if none is available.
.Sh FILES
These funtions are implemented in the file
.Pa chars.c .
@@ -218,6 +228,7 @@ following mandoc versions:
.It Fn mchars_num2uc Ta 1.11.3 Ta \(em Ta \(em
.It Fn mchars_spec2cp Ta 1.11.2 Ta Fn chars_spec2cp Ta 1.10.5
.It Fn mchars_spec2str Ta 1.11.2 Ta Fn a2ascii Ta 1.5.3
+.It Fn mchars_uc2str Ta 1.13.2 Ta \(em Ta \(em
.El
.Sh AUTHORS
.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
diff --git a/term_ascii.c b/term_ascii.c
index d7283411..f5782ff2 100644
--- a/term_ascii.c
+++ b/term_ascii.c
@@ -236,9 +236,11 @@ ascii_uc2str(int uc)
"j", "DZ", "D", "dz", "G", "g", "HV", "W",
"N", "n", "A", "a", "AE", "ae", "O", "o"};
- if (uc < 0 || (size_t)uc >= sizeof(tab)/sizeof(tab[0]))
+ if (uc < 0)
return("<?>");
- return(tab[uc]);
+ if ((size_t)uc < sizeof(tab)/sizeof(tab[0]))
+ return(tab[uc]);
+ return(mchars_uc2str(uc));
}
static size_t