summaryrefslogtreecommitdiffstats
path: root/read.c
diff options
context:
space:
mode:
authorJoerg Sonnenberger <joerg@netbsd.org>2012-02-05 16:46:15 +0000
committerJoerg Sonnenberger <joerg@netbsd.org>2012-02-05 16:46:15 +0000
commit8872acfd3bb11e0bdbd221a6616c7c74b549b313 (patch)
tree9b13a57d132d8eb3be912f19e61694e29cbe4a0f /read.c
parentbf31e136950ee3cb6456462d7c01917c79971922 (diff)
downloadmandoc-8872acfd3bb11e0bdbd221a6616c7c74b549b313.tar.gz
Don't silently skip non-ASCII characters, but replace them with ``?''.
This is less likely to break the syntax of macros.
Diffstat (limited to 'read.c')
-rw-r--r--read.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/read.c b/read.c
index dd456468..be788c2b 100644
--- a/read.c
+++ b/read.c
@@ -325,9 +325,9 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
* Warn about bogus characters. If you're using
* non-ASCII encoding, you're screwing your
* readers. Since I'd rather this not happen,
- * I'll be helpful and drop these characters so
- * we don't display gibberish. Note to manual
- * writers: use special characters.
+ * I'll be helpful and replace these characters
+ * with "?", so we don't display gibberish.
+ * Note to manual writers: use special characters.
*/
c = (unsigned char) blk.buf[i];
@@ -335,8 +335,11 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
if ( ! (isascii(c) &&
(isgraph(c) || isblank(c)))) {
mandoc_msg(MANDOCERR_BADCHAR, curp,
- curp->line, pos, "ignoring byte");
+ curp->line, pos, NULL);
i++;
+ if (pos >= (int)ln.sz)
+ resize_buf(&ln, 256);
+ ln.buf[pos++] = '?';
continue;
}