summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mandoc.15
-rw-r--r--mandoc.h1
-rw-r--r--mdoc_validate.c63
-rw-r--r--read.c1
4 files changed, 69 insertions, 1 deletions
diff --git a/mandoc.1 b/mandoc.1
index 0d85d914..1d731558 100644
--- a/mandoc.1
+++ b/mandoc.1
@@ -857,6 +857,11 @@ A single manual page contains two copies of the RCS identifier for
the same operating system.
Consider deleting the later instance and moving the first one up
to the top of the page.
+.It Sy "typo in section name"
+.Pq mdoc
+Fuzzy string matching revealed that the argument of an
+.Ic \&Sh
+macro is similar, but not identical to a standard section name.
.It Sy "useless macro"
.Pq mdoc
A
diff --git a/mandoc.h b/mandoc.h
index 5d32d422..362b8424 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -56,6 +56,7 @@ enum mandocerr {
MANDOCERR_DATE_LEGACY, /* legacy man(7) date format: Dd ... */
MANDOCERR_RCS_REP, /* duplicate RCS id: ... */
+ MANDOCERR_SEC_TYPO, /* typo in section name: Sh ... */
MANDOCERR_MACRO_USELESS, /* useless macro: macro */
MANDOCERR_BX, /* consider using OS macro: macro */
MANDOCERR_ER_ORDER, /* errnos out of order: Er ... */
diff --git a/mdoc_validate.c b/mdoc_validate.c
index b9075d06..42a51afd 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -60,6 +60,7 @@ static void check_toptext(struct roff_man *, int, int, const char *);
static int child_an(const struct roff_node *);
static size_t macro2len(enum roff_tok);
static void rewrite_macro2len(struct roff_man *, char **);
+static int similar(const char *, const char *);
static void post_an(POST_ARGS);
static void post_an_norm(POST_ARGS);
@@ -2148,11 +2149,54 @@ post_sh_authors(POST_ARGS)
mdoc->last->line, mdoc->last->pos, NULL);
}
+/*
+ * Return an upper bound for the string distance (allowing
+ * transpositions). Not a full Levenshtein implementation
+ * because Levenshtein is quadratic in the string length
+ * and this function is called for every standard name,
+ * so the check for each custom name would be cubic.
+ * The following crude heuristics is linear, resulting
+ * in quadratic behaviour for checking one custom name,
+ * which does not cause measurable slowdown.
+ */
+static int
+similar(const char *s1, const char *s2)
+{
+ const int maxdist = 3;
+ int dist = 0;
+
+ while (s1[0] != '\0' && s2[0] != '\0') {
+ if (s1[0] == s2[0]) {
+ s1++;
+ s2++;
+ continue;
+ }
+ if (++dist > maxdist)
+ return INT_MAX;
+ if (s1[1] == s2[1]) { /* replacement */
+ s1++;
+ s2++;
+ } else if (s1[0] == s2[1] && s1[1] == s2[0]) {
+ s1 += 2; /* transposition */
+ s2 += 2;
+ } else if (s1[0] == s2[1]) /* insertion */
+ s2++;
+ else if (s1[1] == s2[0]) /* deletion */
+ s1++;
+ else
+ return INT_MAX;
+ }
+ dist += strlen(s1) + strlen(s2);
+ return dist > maxdist ? INT_MAX : dist;
+}
+
static void
post_sh_head(POST_ARGS)
{
struct roff_node *nch;
const char *goodsec;
+ const char *const *testsec;
+ int dist, mindist;
enum roff_sec sec;
/*
@@ -2190,8 +2234,25 @@ post_sh_head(POST_ARGS)
/* We don't care about custom sections after this. */
- if (sec == SEC_CUSTOM)
+ if (sec == SEC_CUSTOM) {
+ if ((nch = mdoc->last->child) == NULL ||
+ nch->type != ROFFT_TEXT || nch->next != NULL)
+ return;
+ goodsec = NULL;
+ mindist = INT_MAX;
+ for (testsec = secnames + 1; *testsec != NULL; testsec++) {
+ dist = similar(nch->string, *testsec);
+ if (dist < mindist) {
+ goodsec = *testsec;
+ mindist = dist;
+ }
+ }
+ if (goodsec != NULL)
+ mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse,
+ nch->line, nch->pos, "Sh %s instead of %s",
+ nch->string, goodsec);
return;
+ }
/*
* Check whether our non-custom section is being repeated or is
diff --git a/read.c b/read.c
index 83f9da47..bcbc1e9a 100644
--- a/read.c
+++ b/read.c
@@ -98,6 +98,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"legacy man(7) date format",
"duplicate RCS id",
+ "typo in section name",
"useless macro",
"consider using OS macro",
"errnos out of order",