summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--action.c49
-rw-r--r--argv.c3
-rw-r--r--macro.c206
-rw-r--r--mdoc.c45
-rw-r--r--mdocml.c14
-rw-r--r--private.h11
-rw-r--r--validate.c30
7 files changed, 257 insertions, 101 deletions
diff --git a/action.c b/action.c
index 1d8a91c5..abe3b901 100644
--- a/action.c
+++ b/action.c
@@ -29,11 +29,13 @@ struct actions {
/* Per-macro action routines. */
-static int post_sh(struct mdoc *);
-static int post_os(struct mdoc *);
-static int post_dt(struct mdoc *);
-static int post_dd(struct mdoc *);
-static int post_nm(struct mdoc *);
+static int post_sh(struct mdoc *);
+static int post_os(struct mdoc *);
+static int post_dt(struct mdoc *);
+static int post_dd(struct mdoc *);
+static int post_nm(struct mdoc *);
+
+static int post_prologue(struct mdoc *);
/* Array of macro action routines. */
@@ -226,7 +228,8 @@ post_dt(struct mdoc *mdoc)
if (NULL == mdoc->meta.title)
mdoc->meta.title = xstrdup("untitled");
- return(1);
+
+ return(post_prologue(mdoc));
}
@@ -244,7 +247,9 @@ post_os(struct mdoc *mdoc)
mdoc->meta.os = xstrdup(buf[0] ? buf : "local");
mdoc->sec_lastn = mdoc->sec_last = SEC_BODY;
- return(1);
+ mdoc->flags |= MDOC_BODYPARSE;
+
+ return(post_prologue(mdoc));
}
@@ -285,15 +290,41 @@ post_dd(struct mdoc *mdoc)
}
if (mdoc->meta.date && NULL == n)
- return(1);
+ return(post_prologue(mdoc));
else if (n)
return(mdoc_err(mdoc, "invalid parameter syntax"));
if ((mdoc->meta.date = mdoc_atotime(date)))
- return(1);
+ return(post_prologue(mdoc));
return(mdoc_err(mdoc, "invalid parameter syntax"));
}
+static int
+post_prologue(struct mdoc *mdoc)
+{
+ struct mdoc_node *n;
+
+ if (mdoc->last->parent->child == mdoc->last)
+ mdoc->last->parent->child = mdoc->last->prev;
+ if (mdoc->last->prev)
+ mdoc->last->prev->next = NULL;
+
+ n = mdoc->last;
+ assert(NULL == mdoc->last->next);
+
+ if (mdoc->last->prev) {
+ mdoc->last = mdoc->last->prev;
+ mdoc->next = MDOC_NEXT_SIBLING;
+ } else {
+ mdoc->last = mdoc->last->parent;
+ mdoc->next = MDOC_NEXT_CHILD;
+ }
+
+ mdoc_node_freelist(n);
+ return(1);
+}
+
+
int
mdoc_action_post(struct mdoc *mdoc)
{
diff --git a/argv.c b/argv.c
index 9c37bf2c..b0a33c13 100644
--- a/argv.c
+++ b/argv.c
@@ -25,8 +25,9 @@
#include "private.h"
-
/* FIXME: .It called with -column and quoted arguments. */
+/* FIXME: if arguments are quoted, they should not be later parsed for
+ * macros. */
static int lookup(int, const char *);
static int parse(struct mdoc *, int,
diff --git a/macro.c b/macro.c
index 2185e181..9851f60c 100644
--- a/macro.c
+++ b/macro.c
@@ -25,19 +25,65 @@
#include <time.h>
#endif
-#include "private.h"
+/*
+ * This has scanning/parsing routines, each of which extract a macro and
+ * its arguments and parameters, then know how to progress to the next
+ * macro. Macros are parsed according as follows:
+ *
+ * ELEMENT: TEXT | epsilon
+ * BLOCK: HEAD PUNCT BODY PUNCT BLOCK_TAIL PUNCT
+ * BLOCK_TAIL: TAIL | epsilon
+ * HEAD: ELEMENT | TEXT | BLOCK | epsilon
+ * BODY: ELEMENT | TEXT | BLOCK | epsilon
+ * TAIL: TEXT | epsilon
+ * PUNCT: TEXT (delimiters) | epsilon
+ *
+ * These are arranged into a parse tree, an example of which follows:
+ *
+ * ROOT
+ * BLOCK (.Sh)
+ * HEAD
+ * TEXT (`NAME')
+ * BODY
+ * ELEMENT (.Nm)
+ * TEXT (`mdocml')
+ * ELEMENT (.Nd)
+ * TEXT (`mdoc macro compiler')
+ * BLOCK (.Op)
+ * HEAD
+ * ELEMENT (.Fl)
+ * TEXT (`v')
+ * BLOCK (.Op)
+ * HEAD
+ * ELEMENT (.Fl)
+ * TEXT (`v')
+ * ELEMENT (.Fl)
+ * TEXT (`W')
+ * ELEMENT (.Ns)
+ * ELEMENT (.Ar)
+ * TEXT (`err...')
+ *
+ * These types are always per-line except for block bodies, which may
+ * span multiple lines. Macros are assigned a parsing routine, which
+ * corresponds to the type, in the mdoc_macros table.
+ *
+ * Note that types are general: there can be several parsing routines
+ * corresponding to a single type. The macro_text function, for
+ * example, parses an ELEMENT type (see the function definition for
+ * details) that may be interrupted by further macros; the
+ * macro_constant function, on the other hand, parses an ELEMENT type
+ * spanning a single line.
+ */
-/* FIXME: maxlineargs should be per LINE, no per TOKEN. */
+#include "private.h"
-static int rewind_alt(int);
-static int rewind_dohalt(int, enum mdoc_type,
- const struct mdoc_node *);
#define REWIND_REWIND (1 << 0)
#define REWIND_NOHALT (1 << 1)
#define REWIND_HALT (1 << 2)
+static int rewind_dohalt(int, enum mdoc_type,
+ const struct mdoc_node *);
+static int rewind_alt(int);
static int rewind_dobreak(int, const struct mdoc_node *);
-
-
static int rewind_elem(struct mdoc *, int);
static int rewind_impblock(struct mdoc *, int, int, int);
static int rewind_expblock(struct mdoc *, int, int, int);
@@ -158,7 +204,7 @@ rewind_dohalt(int tok, enum mdoc_type type, const struct mdoc_node *p)
case (MDOC_Qq):
/* FALLTHROUGH */
case (MDOC_Sq):
- assert(MDOC_BODY != type);
+ assert(MDOC_HEAD != type);
assert(MDOC_TAIL != type);
if (type == p->type && tok == p->tok)
return(REWIND_REWIND);
@@ -412,6 +458,10 @@ append_delims(struct mdoc *mdoc, int line, int *pos, char *buf)
}
+/*
+ * Close out an explicit scope. This optionally parses a TAIL type with
+ * a set number of TEXT children.
+ */
int
macro_scoped_close(MACRO_PROT_ARGS)
{
@@ -497,6 +547,30 @@ macro_scoped_close(MACRO_PROT_ARGS)
}
+/*
+ * A general text macro. This is a complex case because of punctuation.
+ * If a text macro is followed by words, then punctuation, the macro is
+ * "stopped" and "reopened" following the punctuation. Thus, the
+ * following arises:
+ *
+ * .Fl a ; b
+ *
+ * ELEMENT (.Fl)
+ * TEXT (`a')
+ * TEXT (`;')
+ * ELEMENT (.Fl)
+ * TEXT (`b')
+ *
+ * This must handle the following situations:
+ *
+ * .Fl Ar b ; ;
+ *
+ * ELEMENT (.Fl)
+ * ELEMENT (.Ar)
+ * TEXT (`b')
+ * TEXT (`;')
+ * TEXT (`;')
+ */
int
macro_text(MACRO_PROT_ARGS)
{
@@ -603,6 +677,30 @@ macro_text(MACRO_PROT_ARGS)
}
+/*
+ * Handle explicit-scope (having a different closure token) and implicit
+ * scope (closing out prior scopes when re-invoked) macros. These
+ * constitute the BLOCK type and usually span multiple lines. These
+ * always have HEAD and sometimes have BODY types. In the multi-line
+ * case:
+ *
+ * .Bd -ragged
+ * Text.
+ * .Fl macro
+ * Another.
+ * .Ed
+ *
+ * BLOCK (.Bd)
+ * HEAD
+ * BODY
+ * TEXT (`Text.')
+ * ELEMENT (.Fl)
+ * TEXT (`macro')
+ * TEXT (`Another.')
+ *
+ * Note that the `.It' macro, possibly the most difficult (as it has
+ * embedded scope, etc.) is handled by this routine.
+ */
int
macro_scoped(MACRO_PROT_ARGS)
{
@@ -704,6 +802,25 @@ macro_scoped(MACRO_PROT_ARGS)
}
+/*
+ * This handles a case of implicitly-scoped macro (BLOCK) limited to a
+ * single line. Instead of being closed out by a subsequent call to
+ * another macro, the scope is closed at the end of line. These don't
+ * have BODY or TAIL types. Notice that the punctuation falls outside
+ * of the HEAD type.
+ *
+ * .Qq a Fl b Ar d ; ;
+ *
+ * BLOCK (Qq)
+ * HEAD
+ * TEXT (`a')
+ * ELEMENT (.Fl)
+ * TEXT (`b')
+ * ELEMENT (.Ar)
+ * TEXT (`d')
+ * TEXT (`;')
+ * TEXT (`;')
+ */
int
macro_scoped_line(MACRO_PROT_ARGS)
{
@@ -716,6 +833,9 @@ macro_scoped_line(MACRO_PROT_ARGS)
if ( ! mdoc_head_alloc(mdoc, line, ppos, tok))
return(0);
+ mdoc->next = MDOC_NEXT_SIBLING;
+ if ( ! mdoc_body_alloc(mdoc, line, ppos, tok))
+ return(0);
mdoc->next = MDOC_NEXT_CHILD;
/* XXX - no known argument macros. */
@@ -747,16 +867,31 @@ macro_scoped_line(MACRO_PROT_ARGS)
}
if (1 == ppos) {
- if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos))
return(0);
if ( ! append_delims(mdoc, line, pos, buf))
return(0);
- } else if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ } else if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos))
return(0);
return(rewind_impblock(mdoc, tok, line, ppos));
}
+/*
+ * A constant-scoped macro is like a simple-scoped macro (mdoc_scoped)
+ * except that it doesn't handle implicit scopes and explicit ones have
+ * a fixed number of TEXT children to the BODY.
+ *
+ * .Fl a So b Sc ;
+ *
+ * ELEMENT (.Fl)
+ * TEXT (`a')
+ * BLOCK (.So)
+ * HEAD
+ * BODY
+ * TEXT (`b')
+ * TEXT (';')
+ */
int
macro_constant_scoped(MACRO_PROT_ARGS)
{
@@ -856,6 +991,19 @@ macro_constant_scoped(MACRO_PROT_ARGS)
}
+/*
+ * A delimited constant is very similar to the macros parsed by
+ * macro_text except that, in the event of punctuation, the macro isn't
+ * "re-opened" as it is in macro_text. Also, these macros have a fixed
+ * number of parameters.
+ *
+ * .Fl a No b
+ *
+ * ELEMENT (.Fl)
+ * TEXT (`a')
+ * ELEMENT (.No)
+ * TEXT (`b')
+ */
int
macro_constant_delimited(MACRO_PROT_ARGS)
{
@@ -958,13 +1106,18 @@ macro_constant_delimited(MACRO_PROT_ARGS)
}
+/*
+ * A constant macro is the simplest classification. It spans an entire
+ * line.
+ */
int
macro_constant(MACRO_PROT_ARGS)
{
int c, lastarg, argc, fl;
struct mdoc_arg argv[MDOC_LINEARG_MAX];
char *p;
- struct mdoc_node *n;
+
+ assert( ! (MDOC_CALLABLE & mdoc_macros[tok].flags));
fl = 0;
if (MDOC_QUOTABLE & mdoc_macros[tok].flags)
@@ -1020,31 +1173,7 @@ macro_constant(MACRO_PROT_ARGS)
mdoc->next = MDOC_NEXT_SIBLING;
}
- if ( ! rewind_elem(mdoc, tok))
- return(0);
- if ( ! (MDOC_NOKEEP & mdoc_macros[tok].flags))
- return(1);
-
- assert(mdoc->last->tok == tok);
- if (mdoc->last->parent->child == mdoc->last)
- mdoc->last->parent->child = mdoc->last->prev;
- if (mdoc->last->prev)
- mdoc->last->prev->next = NULL;
-
- n = mdoc->last;
- assert(NULL == mdoc->last->next);
-
- if (mdoc->last->prev) {
- mdoc->last = mdoc->last->prev;
- mdoc->next = MDOC_NEXT_SIBLING;
- } else {
- mdoc->last = mdoc->last->parent;
- mdoc->next = MDOC_NEXT_CHILD;
- }
-
- mdoc_node_freelist(n);
-
- return(1);
+ return(rewind_elem(mdoc, tok));
}
@@ -1057,6 +1186,11 @@ macro_obsolete(MACRO_PROT_ARGS)
}
+/*
+ * This is called at the end of parsing. It must traverse up the tree,
+ * closing out open [implicit] scopes. Obviously, open explicit scopes
+ * are errors.
+ */
int
macro_end(struct mdoc *mdoc)
{
diff --git a/mdoc.c b/mdoc.c
index ffacbedf..67350c40 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -85,9 +85,9 @@ const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
const struct mdoc_macro __mdoc_macros[MDOC_MAX] = {
{ NULL, 0 }, /* \" */
- { macro_constant, MDOC_PROLOGUE | MDOC_NOKEEP }, /* Dd */
- { macro_constant, MDOC_PROLOGUE | MDOC_NOKEEP }, /* Dt */
- { macro_constant, MDOC_PROLOGUE | MDOC_NOKEEP }, /* Os */
+ { macro_constant, MDOC_PROLOGUE }, /* Dd */
+ { macro_constant, MDOC_PROLOGUE }, /* Dt */
+ { macro_constant, MDOC_PROLOGUE }, /* Os */
{ macro_scoped, 0 }, /* Sh */
{ macro_scoped, 0 }, /* Ss */
{ macro_text, 0 }, /* Pp */
@@ -295,14 +295,15 @@ mdoc_parseln(struct mdoc *mdoc, int line, char *buf)
if (MDOC_HALT & mdoc->flags)
return(0);
+ mdoc->linetok = 0;
+
if ('.' != *buf) {
- if (SEC_PROLOGUE != mdoc->sec_lastn) {
- if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
- return(0);
- mdoc->next = MDOC_NEXT_SIBLING;
- return(1);
- }
- return(mdoc_perr(mdoc, line, 0, "text disallowed"));
+ if ( ! (MDOC_BODYPARSE & mdoc->flags))
+ return(mdoc_perr(mdoc, line, 0, "text disallowed"));
+ if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
+ return(0);
+ mdoc->next = MDOC_NEXT_SIBLING;
+ return(1);
}
if (buf[1] && '\\' == buf[1])
@@ -400,8 +401,8 @@ mdoc_macro(struct mdoc *mdoc, int tok,
assert(mdoc_macros[tok].fp);
if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
- SEC_PROLOGUE == mdoc->sec_lastn)
- return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue"));
+ ! (MDOC_BODYPARSE & mdoc->flags))
+ return(mdoc_perr(mdoc, ln, ppos, "macro disallowed: not in document body"));
if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
return(mdoc_perr(mdoc, ln, ppos, "macro not callable"));
return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf));
@@ -417,6 +418,26 @@ mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
assert(mdoc->first);
assert(MDOC_ROOT != p->type);
+ /* See if we exceed the suggest line-max. */
+
+ switch (p->type) {
+ case (MDOC_TEXT):
+ /* FALLTHROUGH */
+ case (MDOC_ELEM):
+ /* FALLTHROUGH */
+ case (MDOC_BLOCK):
+ mdoc->linetok++;
+ break;
+ default:
+ break;
+ }
+
+ if (mdoc->linetok > MDOC_LINEARG_SOFTMAX)
+ if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT,
+ "suggested %d tokens per line exceeded (has %d)",
+ MDOC_LINEARG_SOFTMAX, mdoc->linetok))
+ return(0);
+
if (MDOC_TEXT == mdoc->last->type)
on = "<text>";
else if (MDOC_ROOT == mdoc->last->type)
diff --git a/mdocml.c b/mdocml.c
index 0befca5a..87884080 100644
--- a/mdocml.c
+++ b/mdocml.c
@@ -34,19 +34,7 @@
#define MD_LINE_SZ (256) /* Max input line size. */
-/*
- * Put this into a mdoctrans.h, which has:
- *
- * struct mdoc_trans; (opaque)
- *
- * struct mdoc_trans *mdoc_trans_alloc(const char *filter);
- *
- * mdoc_trans_free(struct mdoc_trans *);
- *
- * int mdoc_trans_getopt(struct mdoc_trans *, char *);
- *
- * int mdoc_trans_print(struct mdoc_trans *, const struct mdoc_node *);
- */
+/* TODO: have a struct for each transformer. */
typedef int (*mdocprint)(const struct mdoc_node *);
diff --git a/private.h b/private.h
index 7dfb24b3..cf894321 100644
--- a/private.h
+++ b/private.h
@@ -30,8 +30,10 @@ struct mdoc {
void *data;
struct mdoc_cb cb;
void *htab;
+ int linetok;
int flags;
#define MDOC_HALT (1 << 0)
+#define MDOC_BODYPARSE (1 << 1)
enum mdoc_next next;
struct mdoc_node *last;
struct mdoc_node *first;
@@ -41,9 +43,13 @@ struct mdoc {
};
-/* FIXME: it's 9 (this isn't used properly). */
+/* Hard-limit of macro arguments. */
-#define MDOC_LINEARG_MAX 12
+#define MDOC_LINEARG_MAX 9
+
+/* Suggested limit of macro arguments. */
+
+#define MDOC_LINEARG_SOFTMAX 9
#define MACRO_PROT_ARGS struct mdoc *mdoc, int tok, int line, \
int ppos, int *pos, char *buf
@@ -57,7 +63,6 @@ struct mdoc_macro {
#define MDOC_QUOTABLE (1 << 3)
#define MDOC_PROLOGUE (1 << 4)
#define MDOC_TABSEP (1 << 5)
-#define MDOC_NOKEEP (1 << 6)
};
#define mdoc_nwarn(mdoc, node, type, fmt, ...) \
diff --git a/validate.c b/validate.c
index d92a0947..7a1151db 100644
--- a/validate.c
+++ b/validate.c
@@ -27,7 +27,6 @@ typedef int (*v_post)(struct mdoc *);
/* FIXME: some sections should only occur in specific msecs. */
/* FIXME: ignoring Pp. */
/* FIXME: math symbols. */
-/* FIXME: make sure prologue is complete. */
/* FIXME: valid character-escape checks. */
/* FIXME: make sure required sections are included (NAME, ...). */
@@ -72,7 +71,6 @@ static int pre_prologue(struct mdoc *, struct mdoc_node *);
static int herr_ge1(struct mdoc *);
static int herr_le1(struct mdoc *);
-static int hwarn_ge1(struct mdoc *);
static int herr_eq0(struct mdoc *);
static int eerr_eq0(struct mdoc *);
static int eerr_le1(struct mdoc *);
@@ -82,7 +80,6 @@ static int eerr_ge1(struct mdoc *);
static int ewarn_eq0(struct mdoc *);
static int ewarn_eq1(struct mdoc *);
static int bwarn_ge1(struct mdoc *);
-static int berr_eq0(struct mdoc *);
static int ewarn_ge1(struct mdoc *);
static int ebool(struct mdoc *);
static int post_sh(struct mdoc *);
@@ -119,14 +116,13 @@ static v_post posts_bd[] = { herr_eq0, bwarn_ge1, NULL };
static v_post posts_text[] = { eerr_ge1, NULL };
static v_post posts_wtext[] = { ewarn_ge1, NULL };
static v_post posts_notext[] = { eerr_eq0, NULL };
-static v_post posts_wline[] = { hwarn_ge1, berr_eq0, NULL };
+static v_post posts_wline[] = { bwarn_ge1, herr_eq0, NULL };
static v_post posts_sh[] = { herr_ge1, bwarn_ge1, post_sh, NULL };
static v_post posts_bl[] = { herr_eq0, bwarn_ge1, post_bl, NULL };
static v_post posts_it[] = { post_it, NULL };
static v_post posts_in[] = { ewarn_eq1, NULL };
static v_post posts_ss[] = { herr_ge1, NULL };
static v_post posts_pp[] = { ewarn_eq0, NULL };
-static v_post posts_d1[] = { herr_ge1, NULL };
static v_post posts_ex[] = { eerr_le1, post_ex, NULL };
static v_post posts_an[] = { post_an, NULL };
static v_post posts_at[] = { post_at, NULL };
@@ -145,8 +141,8 @@ const struct valids mdoc_valids[MDOC_MAX] = {
{ pres_sh, posts_sh }, /* Sh */
{ pres_ss, posts_ss }, /* Ss */
{ NULL, posts_pp }, /* Pp */
- { pres_d1, posts_d1 }, /* D1 */
- { pres_d1, posts_d1 }, /* Dl */
+ { pres_d1, posts_wline }, /* D1 */
+ { pres_d1, posts_wline }, /* Dl */
{ pres_bd, posts_bd }, /* Bd */
{ NULL, NULL }, /* Ed */
{ pres_bl, posts_bl }, /* Bl */
@@ -365,16 +361,6 @@ pre_check_parent(struct mdoc *mdoc, struct mdoc_node *node,
static int
-berr_eq0(struct mdoc *mdoc)
-{
-
- if (MDOC_BODY != mdoc->last->type)
- return(1);
- return(post_check_children_eq(mdoc, "body children", 0));
-}
-
-
-static int
bwarn_ge1(struct mdoc *mdoc)
{
@@ -467,16 +453,6 @@ herr_eq0(struct mdoc *mdoc)
static int
-hwarn_ge1(struct mdoc *mdoc)
-{
-
- if (MDOC_HEAD != mdoc->last->type)
- return(1);
- return(post_check_children_wgt(mdoc, "parameters", 0));
-}
-
-
-static int
herr_le1(struct mdoc *mdoc)
{
if (MDOC_HEAD != mdoc->last->type)