summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-09-06 22:39:36 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-09-06 22:39:36 +0000
commitc525d64ed5cfcdc59ac04ab0411b657f2e5e157a (patch)
tree2c84b84c3de80a4af01f1d2dbe041ccca7006567
parent34ba2d164c86804e434dc4199679b9611f18710a (diff)
downloadmandoc-c525d64ed5cfcdc59ac04ab0411b657f2e5e157a.tar.gz
Move main format autodetection from the parser dispatcher to the
roff parser where .Dd and .TH are already detected, anyway. This improves robustness because it correctly handles whitespace or an alternate control character before Dd. In the parser dispatcher, provide a fallback looking ahead in the input buffer instead of always assuming man(7). This corrects autodetection when Dd is preceded by other macros or macro-like handled requests like .ll. Triggered by reports from Daniel Levai about issues on Slackware Linux.
-rw-r--r--libmandoc.h1
-rw-r--r--read.c67
-rw-r--r--roff.c20
3 files changed, 60 insertions, 28 deletions
diff --git a/libmandoc.h b/libmandoc.h
index 51828a21..4b62b231 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -77,6 +77,7 @@ int roff_getreg(const struct roff *, const char *);
char *roff_strdup(const struct roff *, const char *);
int roff_getcontrol(const struct roff *,
const char *, int *);
+int roff_getformat(const struct roff *);
#if 0
char roff_eqndelim(const struct roff *);
void roff_openeqn(struct roff *, const char *,
diff --git a/read.c b/read.c
index 8fb220c6..70c6567f 100644
--- a/read.c
+++ b/read.c
@@ -51,21 +51,22 @@ struct buf {
};
struct mparse {
- enum mandoclevel file_status; /* status of current parse */
- enum mandoclevel wlevel; /* ignore messages below this */
- int line; /* line number in the file */
- int options; /* parser options */
struct man *pman; /* persistent man parser */
struct mdoc *pmdoc; /* persistent mdoc parser */
struct man *man; /* man parser */
struct mdoc *mdoc; /* mdoc parser */
struct roff *roff; /* roff parser (!NULL) */
char *sodest; /* filename pointed to by .so */
- int reparse_count; /* finite interp. stack */
- mandocmsg mmsg; /* warning/error message handler */
- const char *file;
- struct buf *secondary;
+ const char *file; /* filename of current input file */
+ struct buf *primary; /* buffer currently being parsed */
+ struct buf *secondary; /* preprocessed copy of input */
const char *defos; /* default operating system */
+ mandocmsg mmsg; /* warning/error message handler */
+ enum mandoclevel file_status; /* status of current parse */
+ enum mandoclevel wlevel; /* ignore messages below this */
+ int options; /* parser options */
+ int reparse_count; /* finite interp. stack */
+ int line; /* line number in the file */
};
static void resize_buf(struct buf *, size_t);
@@ -248,19 +249,10 @@ resize_buf(struct buf *buf, size_t initial)
static void
pset(const char *buf, int pos, struct mparse *curp)
{
+ char *cp, *ep;
+ int format;
int i;
- /*
- * Try to intuit which kind of manual parser should be used. If
- * passed in by command-line (-man, -mdoc), then use that
- * explicitly. If passed as -mandoc, then try to guess from the
- * line: either skip dot-lines, use -mdoc when finding `.Dt', or
- * default to -man, which is more lenient.
- *
- * Separate out pmdoc/pman from mdoc/man: the first persists
- * through all parsers, while the latter is used per-parse.
- */
-
if ('.' == buf[0] || '\'' == buf[0]) {
for (i = 1; buf[i]; i++)
if (' ' != buf[i] && '\t' != buf[i])
@@ -269,15 +261,35 @@ pset(const char *buf, int pos, struct mparse *curp)
return;
}
- if (MPARSE_MDOC & curp->options) {
- curp->mdoc = curp->pmdoc;
- return;
- } else if (MPARSE_MAN & curp->options) {
- curp->man = curp->pman;
- return;
+ /*
+ * If neither command line arguments -mdoc or -man select
+ * a parser nor the roff parser found a .Dd or .TH macro
+ * yet, look ahead in the main input buffer.
+ */
+
+ if ((format = roff_getformat(curp->roff)) == 0) {
+ cp = curp->primary->buf;
+ ep = cp + curp->primary->sz;
+ while (cp < ep) {
+ if (*cp == '.' || *cp != '\'') {
+ cp++;
+ if (cp[0] == 'D' && cp[1] == 'd') {
+ format = MPARSE_MDOC;
+ break;
+ }
+ if (cp[0] == 'T' && cp[1] == 'H') {
+ format = MPARSE_MAN;
+ break;
+ }
+ }
+ cp = memchr(cp, '\n', ep - cp);
+ if (cp == NULL)
+ break;
+ cp++;
+ }
}
- if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
+ if (format == MPARSE_MDOC) {
if (NULL == curp->pmdoc)
curp->pmdoc = mdoc_alloc(
curp->roff, curp, curp->defos,
@@ -287,6 +299,8 @@ pset(const char *buf, int pos, struct mparse *curp)
return;
}
+ /* Fall back to man(7) as a last resort. */
+
if (NULL == curp->pman)
curp->pman = man_alloc(curp->roff, curp,
MPARSE_QUICK & curp->options ? 1 : 0);
@@ -720,6 +734,7 @@ mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
/* Line number is per-file. */
svfile = curp->file;
curp->file = file;
+ curp->primary = &blk;
curp->line = 1;
recursion_depth++;
diff --git a/roff.c b/roff.c
index 54e1ea51..4d3b525b 100644
--- a/roff.c
+++ b/roff.c
@@ -122,6 +122,7 @@ struct roff {
int options; /* parse options */
int rstacksz; /* current size limit of rstack */
int rstackpos; /* position in rstack */
+ int format; /* current file in mdoc or man format */
char control; /* control character */
};
@@ -456,6 +457,7 @@ roff_reset(struct roff *r)
{
roff_free1(r);
+ r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
r->control = 0;
}
@@ -475,6 +477,7 @@ roff_alloc(struct mparse *parse, int options)
r = mandoc_calloc(1, sizeof(struct roff));
r->parse = parse;
r->options = options;
+ r->format = options & (MPARSE_MDOC | MPARSE_MAN);
r->rstackpos = -1;
roffhash_init();
@@ -1776,10 +1779,13 @@ roff_Dd(ROFF_ARGS)
{
const char *const *cp;
- if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
+ if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
for (cp = __mdoc_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
+ if (r->format == 0)
+ r->format = MPARSE_MDOC;
+
return(ROFF_CONT);
}
@@ -1788,10 +1794,13 @@ roff_TH(ROFF_ARGS)
{
const char *const *cp;
- if (0 == (MPARSE_QUICK & r->options))
+ if ((r->options & MPARSE_QUICK) == 0)
for (cp = __man_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
+ if (r->format == 0)
+ r->format = MPARSE_MAN;
+
return(ROFF_CONT);
}
@@ -2307,6 +2316,13 @@ roff_strdup(const struct roff *r, const char *p)
return(res);
}
+int
+roff_getformat(const struct roff *r)
+{
+
+ return(r->format);
+}
+
/*
* Find out whether a line is a macro line or not.
* If it is, adjust the current position and return one; if it isn't,