summaryrefslogtreecommitdiffstats
path: root/mdoc_validate.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2017-06-10 16:54:16 +0000
committerIngo Schwarze <schwarze@openbsd.org>2017-06-10 16:54:16 +0000
commitcf7cd4433f7b7f3a614ab90aae9845b64adec040 (patch)
treecda49769409639d7fb76c880bd579ecaea30b16f /mdoc_validate.c
parent3784137518f9e911fa64096240df34b967ced4f3 (diff)
downloadmandoc-cf7cd4433f7b7f3a614ab90aae9845b64adec040.tar.gz
Reduce false positives for the "no blank before trailing delimiter" message.
This brings us down to one false positive for about every 18 pages.
Diffstat (limited to 'mdoc_validate.c')
-rw-r--r--mdoc_validate.c77
1 files changed, 75 insertions, 2 deletions
diff --git a/mdoc_validate.c b/mdoc_validate.c
index c641aa15..5af39eb0 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -412,9 +412,17 @@ static void
post_delim(POST_ARGS)
{
const struct roff_node *nch;
- const char *lc;
+ const char *lc, *cp;
+ int nw;
enum mdelim delim;
+ enum roff_tok tok;
+ /*
+ * Find candidates: at least two bytes,
+ * the last one a closing or middle delimiter.
+ */
+
+ tok = mdoc->last->tok;
nch = mdoc->last->last;
if (nch == NULL || nch->type != ROFFT_TEXT)
return;
@@ -424,9 +432,74 @@ post_delim(POST_ARGS)
delim = mdoc_isdelim(lc);
if (delim == DELIM_NONE || delim == DELIM_OPEN)
return;
+
+ /*
+ * Reduce false positives by allowing various cases.
+ */
+
+ /* Escaped delimiters. */
+ if (lc > nch->string + 1 && lc[-2] == '\\' &&
+ (lc[-1] == '&' || lc[-1] == 'e'))
+ return;
+
+ /* Specific byte sequences. */
+ switch (*lc) {
+ case ')':
+ for (cp = lc; cp >= nch->string; cp--)
+ if (*cp == '(')
+ return;
+ break;
+ case '.':
+ if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.')
+ return;
+ if (lc[-1] == '.')
+ return;
+ break;
+ case ';':
+ if (tok == MDOC_Vt)
+ return;
+ break;
+ case '?':
+ if (lc[-1] == '?')
+ return;
+ break;
+ case ']':
+ for (cp = lc; cp >= nch->string; cp--)
+ if (*cp == '[')
+ return;
+ break;
+ case '|':
+ if (lc == nch->string + 1 && lc[-1] == '|')
+ return;
+ default:
+ break;
+ }
+
+ /* Exactly two non-alphanumeric bytes. */
+ if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1]))
+ return;
+
+ /* At least three alphabetic words with a sentence ending. */
+ if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em ||
+ tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po ||
+ tok == MDOC_Pq || tok == MDOC_Sy)) {
+ nw = 0;
+ for (cp = lc - 1; cp >= nch->string; cp--) {
+ if (*cp == ' ') {
+ nw++;
+ if (cp > nch->string && cp[-1] == ',')
+ cp--;
+ } else if (isalpha((unsigned int)*cp)) {
+ if (nw > 1)
+ return;
+ } else
+ break;
+ }
+ }
+
mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse,
nch->line, nch->pos + (lc - nch->string),
- "%s%s %s", roff_name[mdoc->last->tok],
+ "%s%s %s", roff_name[tok],
nch == mdoc->last->child ? "" : " ...", nch->string);
}