summaryrefslogtreecommitdiffstats
path: root/term.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-29 00:17:43 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-29 00:17:43 +0000
commit1b2b0a521e5fb13fb845879ff53b5201b76097f4 (patch)
treebd180ac98d410d71e4bf7f89a307d3c1b676f0c2 /term.c
parentcfd3120c8be71fbcacc05d872ae13324d3a3685f (diff)
downloadmandoc-1b2b0a521e5fb13fb845879ff53b5201b76097f4.tar.gz
In terminal output, unify handling of Unicode and numbered character
escape sequences just like it was earlier implemented for -Thtml. Do not let control characters other than ASCII 9 (horizontal tab) propagate to the output, even though groff allows them; but that really doesn't look like a great idea. Let mchars_num2char() return int such that we can distinguish invalid \N syntax from \N'0'. This also reduces the danger of signed char issues popping up.
Diffstat (limited to 'term.c')
-rw-r--r--term.c102
1 files changed, 64 insertions, 38 deletions
diff --git a/term.c b/term.c
index 10516d14..9b7ccb27 100644
--- a/term.c
+++ b/term.c
@@ -391,7 +391,6 @@ term_word(struct termp *p, const char *word)
{
const char nbrsp[2] = { ASCII_NBRSP, 0 };
const char *seq, *cp;
- char c;
int sz, uc;
size_t ssz;
enum mandoc_esc esc;
@@ -443,16 +442,11 @@ term_word(struct termp *p, const char *word)
switch (esc) {
case ESCAPE_UNICODE:
uc = mchars_num2uc(seq + 1, sz - 1);
- if (p->enc == TERMENC_ASCII) {
- cp = ascii_uc2str(uc);
- encode(p, cp, strlen(cp));
- } else
- encode1(p, uc);
break;
case ESCAPE_NUMBERED:
- c = mchars_num2char(seq, sz);
- if ('\0' != c)
- encode(p, &c, 1);
+ uc = mchars_num2char(seq, sz);
+ if (uc < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
if (p->enc == TERMENC_ASCII) {
@@ -465,35 +459,50 @@ term_word(struct termp *p, const char *word)
if (uc > 0)
encode1(p, uc);
}
- break;
+ continue;
case ESCAPE_FONTBOLD:
term_fontrepl(p, TERMFONT_BOLD);
- break;
+ continue;
case ESCAPE_FONTITALIC:
term_fontrepl(p, TERMFONT_UNDER);
- break;
+ continue;
case ESCAPE_FONTBI:
term_fontrepl(p, TERMFONT_BI);
- break;
+ continue;
case ESCAPE_FONT:
/* FALLTHROUGH */
case ESCAPE_FONTROMAN:
term_fontrepl(p, TERMFONT_NONE);
- break;
+ continue;
case ESCAPE_FONTPREV:
term_fontlast(p);
- break;
+ continue;
case ESCAPE_NOSPACE:
if (TERMP_SKIPCHAR & p->flags)
p->flags &= ~TERMP_SKIPCHAR;
else if ('\0' == *word)
p->flags |= TERMP_NOSPACE;
- break;
+ continue;
case ESCAPE_SKIPCHAR:
p->flags |= TERMP_SKIPCHAR;
- break;
+ continue;
default:
- break;
+ continue;
+ }
+
+ /*
+ * Common handling for Unicode and numbered
+ * character escape sequences.
+ */
+
+ if (p->enc == TERMENC_ASCII) {
+ cp = ascii_uc2str(uc);
+ encode(p, cp, strlen(cp));
+ } else {
+ if ((uc < 0x20 && uc != 0x09) ||
+ (uc > 0x7E && uc < 0xA0))
+ uc = 0xFFFD;
+ encode1(p, uc);
}
}
p->flags &= ~TERMP_NBRWORD;
@@ -645,7 +654,7 @@ size_t
term_strlen(const struct termp *p, const char *cp)
{
size_t sz, rsz, i;
- int ssz, skip, c;
+ int ssz, skip, uc;
const char *seq, *rhs;
enum mandoc_esc esc;
static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
@@ -675,44 +684,61 @@ term_strlen(const struct termp *p, const char *cp)
switch (esc) {
case ESCAPE_UNICODE:
- c = mchars_num2uc(seq + 1, sz - 1);
- if (p->enc == TERMENC_ASCII) {
- rhs = ascii_uc2str(c);
- rsz = strlen(rhs);
- } else
- sz += cond_width(p, c, &skip);
+ uc = mchars_num2uc(seq + 1, sz - 1);
break;
case ESCAPE_NUMBERED:
- c = mchars_num2char(seq, ssz);
- if ('\0' != c)
- sz += cond_width(p, c, &skip);
+ uc = mchars_num2char(seq, ssz);
+ if (uc < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
- if (p->enc == TERMENC_ASCII)
+ if (p->enc == TERMENC_ASCII) {
rhs = mchars_spec2str(p->symtab,
seq, ssz, &rsz);
- else {
- c = mchars_spec2cp(p->symtab,
+ if (rhs != NULL)
+ break;
+ } else {
+ uc = mchars_spec2cp(p->symtab,
seq, ssz);
- if (c > 0)
- sz += cond_width(p, c, &skip);
+ if (uc > 0)
+ sz += cond_width(p, uc, &skip);
}
- break;
+ continue;
case ESCAPE_SKIPCHAR:
skip = 1;
- break;
+ continue;
default:
- break;
+ continue;
}
- if (NULL == rhs)
- break;
+ /*
+ * Common handling for Unicode and numbered
+ * character escape sequences.
+ */
+
+ if (rhs == NULL) {
+ if (p->enc == TERMENC_ASCII) {
+ rhs = ascii_uc2str(uc);
+ rsz = strlen(rhs);
+ } else {
+ if ((uc < 0x20 && uc != 0x09) ||
+ (uc > 0x7E && uc < 0xA0))
+ uc = 0xFFFD;
+ sz += cond_width(p, uc, &skip);
+ continue;
+ }
+ }
if (skip) {
skip = 0;
break;
}
+ /*
+ * Common handling for all escape sequences
+ * printing more than one character.
+ */
+
for (i = 0; i < rsz; i++)
sz += (*p->width)(p, *rhs++);
break;