diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-26 17:12:03 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-26 17:12:03 +0000 |
commit | 769a036f3a9f484327108011e3bfbe984e435947 (patch) | |
tree | 79c751b46195aae7e4a581337e647055584884f7 /term.c | |
parent | 90de6f743cde657a20885806bb1ea6bce6741b71 (diff) | |
download | mandoc-769a036f3a9f484327108011e3bfbe984e435947.tar.gz |
Improve -Tascii output for Unicode escape sequences: For the first 512
code points, provide ASCII approximations. This is already much better
than what groff does, which prints nothing for most code points.
A few minor fixes while here:
* Handle Unicode escape sequences in the ASCII range.
* In case of errors, use the REPLACEMENT CHARACTER U+FFFD for -Tutf8
and the string "<?>" for -Tascii output.
* Handle all one-character escape sequences in mchars_spec2{cp,str}()
and remove the workarounds on the higher level.
Diffstat (limited to 'term.c')
-rw-r--r-- | term.c | 92 |
1 files changed, 39 insertions, 53 deletions
@@ -444,27 +444,14 @@ term_word(struct termp *p, const char *word) if (ESCAPE_ERROR == esc) continue; - if (TERMENC_ASCII != p->enc) - switch (esc) { - case ESCAPE_UNICODE: - uc = mchars_num2uc(seq + 1, sz - 1); - if ('\0' == uc) - break; - encode1(p, uc); - continue; - case ESCAPE_SPECIAL: - uc = mchars_spec2cp(p->symtab, seq, sz); - if (uc <= 0) - break; - encode1(p, uc); - continue; - default: - break; - } - switch (esc) { case ESCAPE_UNICODE: - encode1(p, '?'); + uc = mchars_num2uc(seq + 1, sz - 1); + if (p->enc == TERMENC_ASCII) { + cp = ascii_uc2str(uc); + encode(p, cp, strlen(cp)); + } else + encode1(p, uc); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, sz); @@ -472,11 +459,19 @@ term_word(struct termp *p, const char *word) encode(p, &c, 1); break; case ESCAPE_SPECIAL: - cp = mchars_spec2str(p->symtab, seq, sz, &ssz); - if (NULL != cp) - encode(p, cp, ssz); - else if (1 == ssz) - encode(p, seq, sz); + if (p->enc == TERMENC_ASCII) { + cp = mchars_spec2str(p->symtab, + seq, sz, &ssz); + if (cp == NULL) + encode(p, "<?>", 3); + else + encode(p, cp, ssz); + } else { + uc = mchars_spec2cp(p->symtab, seq, sz); + if (uc <= 0) + uc = 0xFFFD; + encode1(p, uc); + } break; case ESCAPE_FONTBOLD: term_fontrepl(p, TERMFONT_BOLD); @@ -683,31 +678,16 @@ term_strlen(const struct termp *p, const char *cp) if (ESCAPE_ERROR == esc) continue; - if (TERMENC_ASCII != p->enc) - switch (esc) { - case ESCAPE_UNICODE: - c = mchars_num2uc(seq + 1, - ssz - 1); - if ('\0' == c) - break; - sz += cond_width(p, c, &skip); - continue; - case ESCAPE_SPECIAL: - c = mchars_spec2cp(p->symtab, - seq, ssz); - if (c <= 0) - break; - sz += cond_width(p, c, &skip); - continue; - default: - break; - } - rhs = NULL; switch (esc) { case ESCAPE_UNICODE: - sz += cond_width(p, '?', &skip); + c = mchars_num2uc(seq + 1, sz - 1); + if (p->enc == TERMENC_ASCII) { + rhs = ascii_uc2str(c); + rsz = strlen(rhs); + } else + sz += cond_width(p, c, &skip); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, ssz); @@ -715,14 +695,20 @@ term_strlen(const struct termp *p, const char *cp) sz += cond_width(p, c, &skip); break; case ESCAPE_SPECIAL: - rhs = mchars_spec2str(p->symtab, - seq, ssz, &rsz); - - if (ssz != 1 || rhs) - break; - - rhs = seq; - rsz = ssz; + if (p->enc == TERMENC_ASCII) { + rhs = mchars_spec2str(p->symtab, + seq, ssz, &rsz); + if (rhs == NULL) { + rhs = "<?>"; + rsz = 3; + } + } else { + c = mchars_spec2cp(p->symtab, + seq, ssz); + if (c <= 0) + c = 0xFFFD; + sz += cond_width(p, c, &skip); + } break; case ESCAPE_SKIPCHAR: skip = 1; |