diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-27 13:31:04 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-27 13:31:04 +0000 |
commit | 4313688a446fb34dd7d7ba53f63060819ff17efd (patch) | |
tree | 6b41e6fa3fc2a125358e5e6cc8d4b0a8445a634b | |
parent | 19b7c45c5da55cf8afd4f6546c261a60bf1db390 (diff) | |
download | mandoc-4313688a446fb34dd7d7ba53f63060819ff17efd.tar.gz |
Fix a regression in term.c rev. 1.229 reported by bentley@:
In UTF-8 output, do not print anything if mchars_spec2cp() returns 0.
In particular, this repairs handling of zero-width spaces (\&).
While here, let mchars_spec2cp() return 0xFFFD instead of -1
if the character is not found, simplifying the using code.
In HTML output, do not print obfuscated ASCII characters and
do not test for one-char escapes, mchars_spec2cp() already does that.
-rw-r--r-- | chars.c | 2 | ||||
-rw-r--r-- | html.c | 9 | ||||
-rw-r--r-- | mandocdb.c | 2 | ||||
-rw-r--r-- | term.c | 10 |
4 files changed, 11 insertions, 12 deletions
@@ -104,7 +104,7 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) const struct ln *ln; ln = find(arg, p, sz); - return(ln != NULL ? ln->unicode : sz == 1 ? *p : -1); + return(ln != NULL ? ln->unicode : sz == 1 ? *p : 0xFFFD); } char @@ -457,11 +457,12 @@ print_encode(struct html *h, const char *p, int norecurse) break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); - if (c > 0) + if (c <= 0) + break; + if (c < 0x20 || c > 0x7e) printf("&#%d;", c); - else if (-1 == c && 1 == len && - !print_escape(*seq)) - putchar((int)*seq); + else if ( ! print_escape(c)) + putchar(c); break; case ESCAPE_NOSPACE: if ('\0' == *p) @@ -1903,7 +1903,7 @@ render_key(struct mchars *mc, struct str *key) */ if (write_utf8) { - if (0 == (u = mchars_spec2cp(mc, seq, len))) + if ((u = mchars_spec2cp(mc, seq, len)) <= 0) continue; cpp = utfbuf; if (0 == (sz = utf8(u, utfbuf))) @@ -468,9 +468,8 @@ term_word(struct termp *p, const char *word) encode(p, cp, ssz); } else { uc = mchars_spec2cp(p->symtab, seq, sz); - if (uc <= 0) - uc = 0xFFFD; - encode1(p, uc); + if (uc > 0) + encode1(p, uc); } break; case ESCAPE_FONTBOLD: @@ -705,9 +704,8 @@ term_strlen(const struct termp *p, const char *cp) } else { c = mchars_spec2cp(p->symtab, seq, ssz); - if (c <= 0) - c = 0xFFFD; - sz += cond_width(p, c, &skip); + if (c > 0) + sz += cond_width(p, c, &skip); } break; case ESCAPE_SKIPCHAR: |