diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2010-07-20 14:56:42 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2010-07-20 14:56:42 +0000 |
commit | 714ad8829b754f1142d7471b3dbf5e2400f8e96d (patch) | |
tree | 9965fff6f010f018954e66473e011ccd37583e5e /main.c | |
parent | f4173e3c44a199bfbd7efb30b4ee4f84855706ba (diff) | |
download | mandoc-714ad8829b754f1142d7471b3dbf5e2400f8e96d.tar.gz |
Strip non-graphable input characters from input. The manuals
specifically say that this is not allowed, and were it allowed, output
would be inconsistent across output media (-Tps will puke,
non-your-charset terminals will puke, etc.).
With this done, simplify check_text() to only check escapes and for
tabs. Add in a new tab warning, too.
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 22 |
1 files changed, 22 insertions, 0 deletions
@@ -23,6 +23,7 @@ #include <sys/stat.h> #include <assert.h> +#include <ctype.h> #include <fcntl.h> #include <stdio.h> #include <stdint.h> @@ -110,6 +111,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "list type must come first", "bad standard", "bad library", + "tab in non-literal context", "bad escape sequence", "unterminated quoted string", "argument requires the width argument", @@ -491,6 +493,26 @@ fdesc(struct curparse *curp) ++lnn; break; } + + /* + * Warn about bogus characters. If you're using + * non-ASCII encoding, you're screwing your + * readers. Since I'd rather this not happen, + * I'll be helpful and drop these characters so + * we don't display gibberish. Note to manual + * writers: use special characters. + */ + + if ( ! isgraph((u_char)blk.buf[i]) && + ! isblank((u_char)blk.buf[i])) { + if ( ! mmsg(MANDOCERR_BADCHAR, curp, + lnn_start, pos, + "ignoring byte")) + goto bailout; + i++; + continue; + } + /* Trailing backslash is like a plain character. */ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { if (pos >= (int)ln.sz) |