summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-07-20 14:56:42 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-07-20 14:56:42 +0000
commit714ad8829b754f1142d7471b3dbf5e2400f8e96d (patch)
tree9965fff6f010f018954e66473e011ccd37583e5e
parentf4173e3c44a199bfbd7efb30b4ee4f84855706ba (diff)
downloadmandoc-714ad8829b754f1142d7471b3dbf5e2400f8e96d.tar.gz
Strip non-graphable input characters from input. The manuals
specifically say that this is not allowed, and were it allowed, output would be inconsistent across output media (-Tps will puke, non-your-charset terminals will puke, etc.). With this done, simplify check_text() to only check escapes and for tabs. Add in a new tab warning, too.
-rw-r--r--main.c22
-rw-r--r--man_validate.c44
-rw-r--r--mandoc.h1
-rw-r--r--mdoc_validate.c38
4 files changed, 67 insertions, 38 deletions
diff --git a/main.c b/main.c
index 8a997e9a..9d5a90bd 100644
--- a/main.c
+++ b/main.c
@@ -23,6 +23,7 @@
#include <sys/stat.h>
#include <assert.h>
+#include <ctype.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdint.h>
@@ -110,6 +111,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"list type must come first",
"bad standard",
"bad library",
+ "tab in non-literal context",
"bad escape sequence",
"unterminated quoted string",
"argument requires the width argument",
@@ -491,6 +493,26 @@ fdesc(struct curparse *curp)
++lnn;
break;
}
+
+ /*
+ * Warn about bogus characters. If you're using
+ * non-ASCII encoding, you're screwing your
+ * readers. Since I'd rather this not happen,
+ * I'll be helpful and drop these characters so
+ * we don't display gibberish. Note to manual
+ * writers: use special characters.
+ */
+
+ if ( ! isgraph((u_char)blk.buf[i]) &&
+ ! isblank((u_char)blk.buf[i])) {
+ if ( ! mmsg(MANDOCERR_BADCHAR, curp,
+ lnn_start, pos,
+ "ignoring byte"))
+ goto bailout;
+ i++;
+ continue;
+ }
+
/* Trailing backslash is like a plain character. */
if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
if (pos >= (int)ln.sz)
diff --git a/man_validate.c b/man_validate.c
index 57f8be9f..0d96953c 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -26,6 +26,7 @@
#include <limits.h>
#include <stdarg.h>
#include <stdlib.h>
+#include <string.h>
#include "mandoc.h"
#include "libman.h"
@@ -206,32 +207,37 @@ check_text(CHKARGS)
{
char *p;
int pos, c;
-
- assert(n->string);
+ size_t sz;
for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
- if ('\\' == *p) {
- c = mandoc_special(p);
- if (c) {
- p += c - 1;
- pos += c - 1;
- continue;
- }
+ sz = strcspn(p, "\t\\");
+ p += (int)sz;
+
+ if ('\0' == *p)
+ break;
+
+ pos += (int)sz;
- c = man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
- if ( ! (MAN_IGN_ESCAPE & m->pflags) && ! c)
- return(c);
+ if ('\t' == *p) {
+ if (MAN_LITERAL & m->flags)
+ continue;
+ if (man_pmsg(m, n->line, pos, MANDOCERR_BADTAB))
+ continue;
+ return(0);
}
- /*
- * FIXME: we absolutely cannot let \b get through or it
- * will destroy some assumptions in terms of format.
- */
+ /* Check the special character. */
- if ('\t' == *p || isprint((u_char)*p) || ASCII_HYPH == *p)
+ c = mandoc_special(p);
+ if (c) {
+ p += c - 1;
+ pos += c - 1;
continue;
- if ( ! man_pmsg(m, n->line, pos, MANDOCERR_BADCHAR))
- return(0);
+ }
+
+ c = man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
+ if ( ! (MAN_IGN_ESCAPE & m->pflags) && ! c)
+ return(c);
}
return(1);
diff --git a/mandoc.h b/mandoc.h
index 8b3dd794..c7ac2a3e 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -39,6 +39,7 @@ enum mandocerr {
MANDOCERR_LISTFIRST, /* list type must come first */
MANDOCERR_BADSTANDARD, /* bad standard */
MANDOCERR_BADLIB, /* bad library */
+ MANDOCERR_BADTAB, /* tab in non-literal context */
MANDOCERR_BADESCAPE, /* bad escape sequence */
MANDOCERR_BADQUOTE, /* unterminated quoted string */
MANDOCERR_NOWIDTHARG, /* argument requires the width argument */
diff --git a/mdoc_validate.c b/mdoc_validate.c
index 6bd6c14d..cc00a1ae 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -453,26 +453,29 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v)
static int
-check_text(struct mdoc *mdoc, int line, int pos, char *p)
+check_text(struct mdoc *m, int ln, int pos, char *p)
{
int c;
-
- /*
- * FIXME: we absolutely cannot let \b get through or it will
- * destroy some assumptions in terms of format.
- */
+ size_t sz;
for ( ; *p; p++, pos++) {
+ sz = strcspn(p, "\t\\");
+ p += (int)sz;
+
+ if ('\0' == *p)
+ break;
+
+ pos += (int)sz;
+
if ('\t' == *p) {
- if ( ! (MDOC_LITERAL & mdoc->flags))
- if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR))
- return(0);
- } else if ( ! isprint((u_char)*p) && ASCII_HYPH != *p)
- if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR))
- return(0);
+ if (MDOC_LITERAL & m->flags)
+ continue;
+ if (mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB))
+ continue;
+ return(0);
+ }
- if ('\\' != *p)
- continue;
+ /* Check the special character. */
c = mandoc_special(p);
if (c) {
@@ -481,8 +484,8 @@ check_text(struct mdoc *mdoc, int line, int pos, char *p)
continue;
}
- c = mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADESCAPE);
- if ( ! (MDOC_IGN_ESCAPE & mdoc->pflags) && ! c)
+ c = mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
+ if ( ! (MDOC_IGN_ESCAPE & m->pflags) && ! c)
return(c);
}
@@ -490,8 +493,6 @@ check_text(struct mdoc *mdoc, int line, int pos, char *p)
}
-
-
static int
check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type t)
{
@@ -509,7 +510,6 @@ check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type t)
}
-
static int
pre_display(PRE_ARGS)
{