summaryrefslogtreecommitdiffstats
path: root/main.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-07-20 14:56:42 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-07-20 14:56:42 +0000
commit714ad8829b754f1142d7471b3dbf5e2400f8e96d (patch)
tree9965fff6f010f018954e66473e011ccd37583e5e /main.c
parentf4173e3c44a199bfbd7efb30b4ee4f84855706ba (diff)
downloadmandoc-714ad8829b754f1142d7471b3dbf5e2400f8e96d.tar.gz
Strip non-graphable input characters from input. The manuals
specifically say that this is not allowed, and were it allowed, output would be inconsistent across output media (-Tps will puke, non-your-charset terminals will puke, etc.). With this done, simplify check_text() to only check escapes and for tabs. Add in a new tab warning, too.
Diffstat (limited to 'main.c')
-rw-r--r--main.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/main.c b/main.c
index 8a997e9a..9d5a90bd 100644
--- a/main.c
+++ b/main.c
@@ -23,6 +23,7 @@
#include <sys/stat.h>
#include <assert.h>
+#include <ctype.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdint.h>
@@ -110,6 +111,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"list type must come first",
"bad standard",
"bad library",
+ "tab in non-literal context",
"bad escape sequence",
"unterminated quoted string",
"argument requires the width argument",
@@ -491,6 +493,26 @@ fdesc(struct curparse *curp)
++lnn;
break;
}
+
+ /*
+ * Warn about bogus characters. If you're using
+ * non-ASCII encoding, you're screwing your
+ * readers. Since I'd rather this not happen,
+ * I'll be helpful and drop these characters so
+ * we don't display gibberish. Note to manual
+ * writers: use special characters.
+ */
+
+ if ( ! isgraph((u_char)blk.buf[i]) &&
+ ! isblank((u_char)blk.buf[i])) {
+ if ( ! mmsg(MANDOCERR_BADCHAR, curp,
+ lnn_start, pos,
+ "ignoring byte"))
+ goto bailout;
+ i++;
+ continue;
+ }
+
/* Trailing backslash is like a plain character. */
if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
if (pos >= (int)ln.sz)