summaryrefslogtreecommitdiffstats
path: root/term.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-26 17:12:03 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-26 17:12:03 +0000
commit769a036f3a9f484327108011e3bfbe984e435947 (patch)
tree79c751b46195aae7e4a581337e647055584884f7 /term.c
parent90de6f743cde657a20885806bb1ea6bce6741b71 (diff)
downloadmandoc-769a036f3a9f484327108011e3bfbe984e435947.tar.gz
Improve -Tascii output for Unicode escape sequences: For the first 512
code points, provide ASCII approximations. This is already much better than what groff does, which prints nothing for most code points. A few minor fixes while here: * Handle Unicode escape sequences in the ASCII range. * In case of errors, use the REPLACEMENT CHARACTER U+FFFD for -Tutf8 and the string "<?>" for -Tascii output. * Handle all one-character escape sequences in mchars_spec2{cp,str}() and remove the workarounds on the higher level.
Diffstat (limited to 'term.c')
-rw-r--r--term.c92
1 files changed, 39 insertions, 53 deletions
diff --git a/term.c b/term.c
index f1e959a2..8f395a9e 100644
--- a/term.c
+++ b/term.c
@@ -444,27 +444,14 @@ term_word(struct termp *p, const char *word)
if (ESCAPE_ERROR == esc)
continue;
- if (TERMENC_ASCII != p->enc)
- switch (esc) {
- case ESCAPE_UNICODE:
- uc = mchars_num2uc(seq + 1, sz - 1);
- if ('\0' == uc)
- break;
- encode1(p, uc);
- continue;
- case ESCAPE_SPECIAL:
- uc = mchars_spec2cp(p->symtab, seq, sz);
- if (uc <= 0)
- break;
- encode1(p, uc);
- continue;
- default:
- break;
- }
-
switch (esc) {
case ESCAPE_UNICODE:
- encode1(p, '?');
+ uc = mchars_num2uc(seq + 1, sz - 1);
+ if (p->enc == TERMENC_ASCII) {
+ cp = ascii_uc2str(uc);
+ encode(p, cp, strlen(cp));
+ } else
+ encode1(p, uc);
break;
case ESCAPE_NUMBERED:
c = mchars_num2char(seq, sz);
@@ -472,11 +459,19 @@ term_word(struct termp *p, const char *word)
encode(p, &c, 1);
break;
case ESCAPE_SPECIAL:
- cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
- if (NULL != cp)
- encode(p, cp, ssz);
- else if (1 == ssz)
- encode(p, seq, sz);
+ if (p->enc == TERMENC_ASCII) {
+ cp = mchars_spec2str(p->symtab,
+ seq, sz, &ssz);
+ if (cp == NULL)
+ encode(p, "<?>", 3);
+ else
+ encode(p, cp, ssz);
+ } else {
+ uc = mchars_spec2cp(p->symtab, seq, sz);
+ if (uc <= 0)
+ uc = 0xFFFD;
+ encode1(p, uc);
+ }
break;
case ESCAPE_FONTBOLD:
term_fontrepl(p, TERMFONT_BOLD);
@@ -683,31 +678,16 @@ term_strlen(const struct termp *p, const char *cp)
if (ESCAPE_ERROR == esc)
continue;
- if (TERMENC_ASCII != p->enc)
- switch (esc) {
- case ESCAPE_UNICODE:
- c = mchars_num2uc(seq + 1,
- ssz - 1);
- if ('\0' == c)
- break;
- sz += cond_width(p, c, &skip);
- continue;
- case ESCAPE_SPECIAL:
- c = mchars_spec2cp(p->symtab,
- seq, ssz);
- if (c <= 0)
- break;
- sz += cond_width(p, c, &skip);
- continue;
- default:
- break;
- }
-
rhs = NULL;
switch (esc) {
case ESCAPE_UNICODE:
- sz += cond_width(p, '?', &skip);
+ c = mchars_num2uc(seq + 1, sz - 1);
+ if (p->enc == TERMENC_ASCII) {
+ rhs = ascii_uc2str(c);
+ rsz = strlen(rhs);
+ } else
+ sz += cond_width(p, c, &skip);
break;
case ESCAPE_NUMBERED:
c = mchars_num2char(seq, ssz);
@@ -715,14 +695,20 @@ term_strlen(const struct termp *p, const char *cp)
sz += cond_width(p, c, &skip);
break;
case ESCAPE_SPECIAL:
- rhs = mchars_spec2str(p->symtab,
- seq, ssz, &rsz);
-
- if (ssz != 1 || rhs)
- break;
-
- rhs = seq;
- rsz = ssz;
+ if (p->enc == TERMENC_ASCII) {
+ rhs = mchars_spec2str(p->symtab,
+ seq, ssz, &rsz);
+ if (rhs == NULL) {
+ rhs = "<?>";
+ rsz = 3;
+ }
+ } else {
+ c = mchars_spec2cp(p->symtab,
+ seq, ssz);
+ if (c <= 0)
+ c = 0xFFFD;
+ sz += cond_width(p, c, &skip);
+ }
break;
case ESCAPE_SKIPCHAR:
skip = 1;