From c78c9e8936467a043d7391bd6994eb881eb0cc70 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Thu, 1 Sep 2011 22:25:53 +0000 Subject: Finishing touches on demandoc. It now backs over ending punctuation as well as leading punctuation. Again, this isn't the same as deroff (which uses, I think, some punctuation as delimiters), but it's easier to explain and simpler to audit. --- demandoc.1 | 14 ++++++++++++-- demandoc.c | 18 +++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/demandoc.1 b/demandoc.1 index 169616d2..1c1cd585 100644 --- a/demandoc.1 +++ b/demandoc.1 @@ -41,8 +41,10 @@ This outputs each word of text on its own line. A .Qq word , in this case, refers to whitespace-delimited terms beginning with at -least two letters after opening punctuation and not consisting of any -escape sequences. +least two letters and not consisting of any escape sequences. +Terms have their leading and trailing punctuation +.Pq double-quotes, sentence punctuation, etc. +stripped. .It Ar The input files. .El @@ -84,6 +86,14 @@ Such errors cause to exit at once, possibly in the middle of parsing or formatting a file. The output databases are corrupt and should be removed . .El +.Sh EXAMPLES +The traditional usage of +.Nm +is for spell-checking manuals on +.Bx . +This is accomplished as follows (assuming British spelling): +.Pp +.Dl $ demandoc -w file.1 | spell -b .Sh SEE ALSO .Xr mandoc 1 , .Xr man 7 diff --git a/demandoc.c b/demandoc.c index b61e1fe8..8b6533f2 100644 --- a/demandoc.c +++ b/demandoc.c @@ -132,7 +132,7 @@ static void pstring(const char *p, int col, int *colp, int list) { enum mandoc_esc esc; - const char *start; + const char *start, *end; int emit; /* @@ -161,8 +161,20 @@ again: } else if (isspace((unsigned char)*p)) break; - if (emit && p - start >= 2) { - for ( ; start != p; start++) + end = p - 1; + + while (end > start) + if ('.' == *end || ',' == *end || + '\'' == *end || '"' == *end || + ')' == *end || '!' == *end || + '?' == *end || ':' == *end || + ';' == *end) + end--; + else + break; + + if (emit && end - start >= 1) { + for ( ; start <= end; start++) if (ASCII_HYPH == *start) putchar('-'); else -- cgit