summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libmdoc.h3
-rw-r--r--mdoc.371
-rw-r--r--mdoc.c18
-rw-r--r--mdoc.h8
-rw-r--r--mdoc_html.c4
-rw-r--r--mdoc_macro.c266
-rw-r--r--mdoc_term.c31
-rw-r--r--tree.c5
8 files changed, 320 insertions, 86 deletions
diff --git a/libmdoc.h b/libmdoc.h
index df34021c..579809cd 100644
--- a/libmdoc.h
+++ b/libmdoc.h
@@ -109,6 +109,9 @@ int mdoc_block_alloc(struct mdoc *, int, int,
int mdoc_head_alloc(struct mdoc *, int, int, enum mdoct);
int mdoc_tail_alloc(struct mdoc *, int, int, enum mdoct);
int mdoc_body_alloc(struct mdoc *, int, int, enum mdoct);
+int mdoc_endbody_alloc(struct mdoc *m, int line, int pos,
+ enum mdoct tok, struct mdoc_node *body,
+ enum mdoc_endbody end);
void mdoc_node_delete(struct mdoc *, struct mdoc_node *);
void mdoc_hash_init(void);
enum mdoct mdoc_hash_find(const char *);
diff --git a/mdoc.3 b/mdoc.3
index b21c34dc..4a9b2e14 100644
--- a/mdoc.3
+++ b/mdoc.3
@@ -217,10 +217,14 @@ and
fields), its position in the tree (the
.Va parent ,
.Va child ,
+.Va nchild ,
.Va next
and
.Va prev
-fields) and some type-specific data.
+fields) and some type-specific data, in particular, for nodes generated
+from macros, the generating macro in the
+.Va tok
+field.
.Pp
The tree itself is arranged according to the following normal form,
where capitalised non-terminals represent nodes.
@@ -235,11 +239,11 @@ where capitalised non-terminals represent nodes.
.It ELEMENT
\(<- TEXT*
.It HEAD
-\(<- mnode+
+\(<- mnode*
.It BODY
-\(<- mnode+
+\(<- mnode* [ENDBODY mnode*]
.It TAIL
-\(<- mnode+
+\(<- mnode*
.It TEXT
\(<- [[:printable:],0x1e]*
.El
@@ -253,6 +257,65 @@ an empty line will produce a zero-length string.
Multiple body parts are only found in invocations of
.Sq \&Bl \-column ,
where a new body introduces a new phrase.
+.Ss Badly nested blocks
+A special kind of node is available to end the formatting
+associated with a given block before the physical end of that block.
+Such an ENDBODY node has a non-null
+.Va end
+field, is of the BODY
+.Va type ,
+has the same
+.Va tok
+as the BLOCK it is ending, and has a
+.Va pending
+field pointing to that BLOCK's BODY node.
+It is an indirect child of that BODY node
+and has no children of its own.
+.Pp
+An ENDBODY node is generated when a block ends while one of its child
+blocks is still open, like in the following example:
+.Bd -literal -offset indent
+\&.Ao ao
+\&.Bo bo ac
+\&.Ac bc
+\&.Bc end
+.Ed
+.Pp
+This example results in the following block structure:
+.Bd -literal -offset indent
+BLOCK Ao
+ HEAD Ao
+ BODY Ao
+ TEXT ao
+ BLOCK Bo, pending -> Ao
+ HEAD Bo
+ BODY Bo
+ TEXT bo
+ TEXT ac
+ ENDBODY Ao, pending -> Ao
+ TEXT bc
+TEXT end
+.Ed
+.Pp
+Here, the formatting of the Ao block extends from TEXT ao to TEXT ac,
+while the formatting of the Bo block extends from TEXT bo to TEXT bc,
+rendering like this in
+.Fl T Ns Cm ascii
+mode:
+.Dl <ao [bo ac> bc] end
+Support for badly nested blocks is only provided for backward
+compatibility with some older
+.Xr mdoc 7
+implementations.
+Using them in new code is stronly discouraged:
+Some frontends, in particular
+.Fl T Ns Cm html ,
+are unable to render them in any meaningful way,
+many other
+.Xr mdoc 7
+implementations do not support them, and even for those that do,
+the behaviour is not well-defined, in particular when using multiple
+levels of badly nested blocks.
.Sh EXAMPLES
The following example reads lines from stdin and parses them, operating
on the finished parse tree with
diff --git a/mdoc.c b/mdoc.c
index 59c54b83..5fc9ed95 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -332,6 +332,8 @@ node_append(struct mdoc *mdoc, struct mdoc_node *p)
p->parent->tail = p;
break;
case (MDOC_BODY):
+ if (p->end)
+ break;
assert(MDOC_BLOCK == p->parent->type);
p->parent->body = p;
break;
@@ -436,6 +438,22 @@ mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
int
+mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok,
+ struct mdoc_node *body, enum mdoc_endbody end)
+{
+ struct mdoc_node *p;
+
+ p = node_alloc(m, line, pos, tok, MDOC_BODY);
+ p->pending = body;
+ p->end = end;
+ if ( ! node_append(m, p))
+ return(0);
+ m->next = MDOC_NEXT_SIBLING;
+ return(1);
+}
+
+
+int
mdoc_block_alloc(struct mdoc *m, int line, int pos,
enum mdoct tok, struct mdoc_arg *args)
{
diff --git a/mdoc.h b/mdoc.h
index e09256f1..06b09270 100644
--- a/mdoc.h
+++ b/mdoc.h
@@ -249,6 +249,12 @@ struct mdoc_arg {
unsigned int refcnt;
};
+enum mdoc_endbody {
+ ENDBODY_NOT = 0,
+ ENDBODY_SPACE,
+ ENDBODY_NOSPACE,
+};
+
enum mdoc_list {
LIST__NONE = 0,
LIST_bullet,
@@ -302,6 +308,7 @@ struct mdoc_node {
#define MDOC_EOS (1 << 2) /* at sentence boundary */
#define MDOC_LINE (1 << 3) /* first macro/text on line */
#define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting */
+#define MDOC_ENDED (1 << 5) /* rendering has been ended */
enum mdoc_type type; /* AST node type */
enum mdoc_sec sec; /* current named section */
/* FIXME: these can be union'd to shave a few bytes. */
@@ -311,6 +318,7 @@ struct mdoc_node {
struct mdoc_node *body; /* BLOCK */
struct mdoc_node *tail; /* BLOCK */
char *string; /* TEXT */
+ enum mdoc_endbody end; /* BODY */
union {
struct mdoc_bl Bl;
diff --git a/mdoc_html.c b/mdoc_html.c
index cafc664c..a9259498 100644
--- a/mdoc_html.c
+++ b/mdoc_html.c
@@ -437,7 +437,7 @@ print_mdoc_node(MDOC_ARGS)
print_text(h, n->string);
return;
default:
- if (mdocs[n->tok].pre)
+ if (mdocs[n->tok].pre && !n->end)
child = (*mdocs[n->tok].pre)(m, n, h);
break;
}
@@ -453,7 +453,7 @@ print_mdoc_node(MDOC_ARGS)
mdoc_root_post(m, n, h);
break;
default:
- if (mdocs[n->tok].post)
+ if (mdocs[n->tok].post && !n->end)
(*mdocs[n->tok].post)(m, n, h);
break;
}
diff --git a/mdoc_macro.c b/mdoc_macro.c
index 197d6d4e..9bf63fd4 100644
--- a/mdoc_macro.c
+++ b/mdoc_macro.c
@@ -50,6 +50,8 @@ static int append_delims(struct mdoc *,
int, int *, char *);
static enum mdoct lookup(enum mdoct, const char *);
static enum mdoct lookup_raw(const char *);
+static int make_pending(struct mdoc_node *, enum mdoc_type,
+ struct mdoc *, int, int);
static int phrase(struct mdoc *, int, int, char *);
static enum mdoct rew_alt(enum mdoct);
static int rew_dobreak(enum mdoct,
@@ -61,8 +63,6 @@ static int rew_last(struct mdoc *,
const struct mdoc_node *);
static int rew_sub(enum mdoc_type, struct mdoc *,
enum mdoct, int, int);
-static int swarn(struct mdoc *, enum mdoc_type, int,
- int, const struct mdoc_node *);
const struct mdoc_macro __mdoc_macros[MDOC_MAX] = {
{ in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ap */
@@ -192,53 +192,6 @@ const struct mdoc_macro __mdoc_macros[MDOC_MAX] = {
const struct mdoc_macro * const mdoc_macros = __mdoc_macros;
-static int
-swarn(struct mdoc *mdoc, enum mdoc_type type,
- int line, int pos, const struct mdoc_node *p)
-{
- const char *n, *t, *tt;
- enum mandocerr ec;
-
- n = t = "<root>";
- tt = "block";
-
- switch (type) {
- case (MDOC_BODY):
- tt = "multi-line";
- break;
- case (MDOC_HEAD):
- tt = "line";
- break;
- default:
- break;
- }
-
- switch (p->type) {
- case (MDOC_BLOCK):
- n = mdoc_macronames[p->tok];
- t = "block";
- break;
- case (MDOC_BODY):
- n = mdoc_macronames[p->tok];
- t = "multi-line";
- break;
- case (MDOC_HEAD):
- n = mdoc_macronames[p->tok];
- t = "line";
- break;
- default:
- break;
- }
-
- ec = (MDOC_IGN_SCOPE & mdoc->pflags) ?
- MANDOCERR_SCOPE : MANDOCERR_SYNTSCOPE;
-
- return(mdoc_vmsg(mdoc, ec, line, pos,
- "%s scope breaks %s of %s",
- tt, t, n));
-}
-
-
/*
* This is called at the end of parsing. It must traverse up the tree,
* closing out open [implicit] scopes. Obviously, open explicit scopes
@@ -410,7 +363,11 @@ rew_dohalt(enum mdoct tok, enum mdoc_type type,
/* FALLTHROUGH */
case (MDOC_Vt):
assert(MDOC_TAIL != type);
- if (type == p->type && tok == p->tok)
+ if (tok != p->tok)
+ break;
+ if (p->end)
+ return(REWIND_HALT);
+ if (type == p->type)
return(REWIND_REWIND);
break;
case (MDOC_It):
@@ -464,7 +421,11 @@ rew_dohalt(enum mdoct tok, enum mdoc_type type,
case (MDOC_So):
/* FALLTHROUGH */
case (MDOC_Xo):
- if (type == p->type && tok == p->tok)
+ if (tok != p->tok)
+ break;
+ if (p->end)
+ return(REWIND_HALT);
+ if (type == p->type)
return(REWIND_REWIND);
break;
/* Multi-line explicit scope close. */
@@ -499,7 +460,11 @@ rew_dohalt(enum mdoct tok, enum mdoc_type type,
case (MDOC_Sc):
/* FALLTHROUGH */
case (MDOC_Xc):
- if (type == p->type && rew_alt(tok) == p->tok)
+ if (rew_alt(tok) != p->tok)
+ break;
+ if (p->end)
+ return(REWIND_HALT);
+ if (type == p->type)
return(REWIND_REWIND);
break;
default:
@@ -526,6 +491,8 @@ rew_dobreak(enum mdoct tok, const struct mdoc_node *p)
return(1);
if (MDOC_VALID & p->flags)
return(1);
+ if (MDOC_BODY == p->type && p->end)
+ return(1);
switch (tok) {
case (MDOC_It):
@@ -576,6 +543,83 @@ rew_elem(struct mdoc *mdoc, enum mdoct tok)
}
+/*
+ * We are trying to close a block identified by tok,
+ * but the child block *broken is still open.
+ * Thus, postpone closing the tok block
+ * until the rew_sub call closing *broken.
+ */
+static int
+make_pending(struct mdoc_node *broken, enum mdoct tok,
+ struct mdoc *m, int line, int ppos)
+{
+ struct mdoc_node *breaker;
+
+ /*
+ * Iterate backwards, searching for the block matching tok,
+ * that is, the block breaking the *broken block.
+ */
+ for (breaker = broken->parent; breaker; breaker = breaker->parent) {
+
+ /*
+ * If the *broken block had already been broken before
+ * and we encounter its breaker, make the tok block
+ * pending on the inner breaker.
+ * Graphically, "[A breaker=[B broken=[C->B B] tok=A] C]"
+ * becomes "[A broken=[B [C->B B] tok=A] C]"
+ * and finally "[A [B->A [C->B B] A] C]".
+ */
+ if (breaker == broken->pending) {
+ broken = breaker;
+ continue;
+ }
+
+ if (REWIND_REWIND != rew_dohalt(tok, MDOC_BLOCK, breaker))
+ continue;
+ if (MDOC_BODY == broken->type)
+ broken = broken->parent;
+
+ /*
+ * Found the breaker.
+ * If another, outer breaker is already pending on
+ * the *broken block, we must not clobber the link
+ * to the outer breaker, but make it pending on the
+ * new, now inner breaker.
+ * Graphically, "[A breaker=[B broken=[C->A A] tok=B] C]"
+ * becomes "[A breaker=[B->A broken=[C A] tok=B] C]"
+ * and finally "[A [B->A [C->B A] B] C]".
+ */
+ if (broken->pending) {
+ struct mdoc_node *taker;
+
+ /*
+ * If the breaker had also been broken before,
+ * it cannot take on the outer breaker itself,
+ * but must hand it on to its own breakers.
+ * Graphically, this is the following situation:
+ * "[A [B breaker=[C->B B] broken=[D->A A] tok=C] D]"
+ * "[A taker=[B->A breaker=[C->B B] [D->C A] C] D]"
+ */
+ taker = breaker;
+ while (taker->pending)
+ taker = taker->pending;
+ taker->pending = broken->pending;
+ }
+ broken->pending = breaker;
+ mdoc_vmsg(m, MANDOCERR_SCOPE, line, ppos, "%s breaks %s",
+ mdoc_macronames[tok], mdoc_macronames[broken->tok]);
+ return(1);
+ }
+
+ /*
+ * Found no matching block for tok.
+ * Are you trying to close a block that is not open?
+ * Report failure and abort the parser.
+ */
+ mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTNOSCOPE);
+ return(0);
+}
+
static int
rew_sub(enum mdoc_type t, struct mdoc *m,
enum mdoct tok, int line, int ppos)
@@ -587,7 +631,7 @@ rew_sub(enum mdoc_type t, struct mdoc *m,
for (n = m->last; n; n = n->parent) {
c = rew_dohalt(tok, t, n);
if (REWIND_HALT == c) {
- if (MDOC_BLOCK != t)
+ if (n->end || MDOC_BLOCK != t)
return(1);
if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags))
return(1);
@@ -599,8 +643,7 @@ rew_sub(enum mdoc_type t, struct mdoc *m,
break;
else if (rew_dobreak(tok, n))
continue;
- if ( ! swarn(m, t, line, ppos, n))
- return(0);
+ return(make_pending(n, tok, m, line, ppos));
}
assert(n);
@@ -608,15 +651,14 @@ rew_sub(enum mdoc_type t, struct mdoc *m,
return(0);
/*
- * The current block extends an enclosing block beyond a line
- * break. Now that the current block ends, close the enclosing
- * block, too.
+ * The current block extends an enclosing block.
+ * Now that the current block ends, close the enclosing block, too.
*/
- if (NULL != (n = n->pending)) {
- assert(MDOC_HEAD == n->type);
+ while (NULL != (n = n->pending)) {
if ( ! rew_last(m, n))
return(0);
- if ( ! mdoc_body_alloc(m, n->line, n->pos, n->tok))
+ if (MDOC_HEAD == n->type &&
+ ! mdoc_body_alloc(m, n->line, n->pos, n->tok))
return(0);
}
@@ -672,9 +714,13 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)
static int
blk_exp_close(MACRO_PROT_ARGS)
{
+ struct mdoc_node *body; /* Our own body. */
+ struct mdoc_node *later; /* A sub-block starting later. */
+ struct mdoc_node *n; /* For searching backwards. */
+
int j, lastarg, maxargs, flushed, nl;
enum margserr ac;
- enum mdoct ntok;
+ enum mdoct atok, ntok;
char *p;
nl = MDOC_NEWLINE & m->flags;
@@ -688,6 +734,68 @@ blk_exp_close(MACRO_PROT_ARGS)
break;
}
+ /*
+ * Search backwards for beginnings of blocks,
+ * both of our own and of pending sub-blocks.
+ */
+ atok = rew_alt(tok);
+ body = later = NULL;
+ for (n = m->last; n; n = n->parent) {
+ if (MDOC_VALID & n->flags)
+ continue;
+
+ /* Remember the start of our own body. */
+ if (MDOC_BODY == n->type && atok == n->tok) {
+ if ( ! n->end)
+ body = n;
+ continue;
+ }
+
+ if (MDOC_BLOCK != n->type)
+ continue;
+ if (atok == n->tok) {
+ assert(body);
+
+ /*
+ * Found the start of our own block.
+ * When there is no pending sub block,
+ * just proceed to closing out.
+ */
+ if (NULL == later)
+ break;
+
+ /*
+ * When there is a pending sub block,
+ * postpone closing out the current block
+ * until the rew_sub() closing out the sub-block.
+ */
+ if ( ! make_pending(later, tok, m, line, ppos))
+ return(0);
+
+ /*
+ * Mark the place where the formatting - but not
+ * the scope - of the current block ends.
+ */
+ if ( ! mdoc_endbody_alloc(m, line, ppos,
+ atok, body, ENDBODY_SPACE))
+ return(0);
+ break;
+ }
+
+ /*
+ * When finding an open sub block, remember the last
+ * open explicit block, or, in case there are only
+ * implicit ones, the first open implicit block.
+ */
+ if (later &&
+ MDOC_EXPLICIT & mdoc_macros[later->tok].flags)
+ continue;
+ if (MDOC_CALLABLE & mdoc_macros[n->tok].flags) {
+ assert( ! (MDOC_ACTED & n->flags));
+ later = n;
+ }
+ }
+
if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) {
/* FIXME: do this in validate */
if (buf[*pos])
@@ -702,7 +810,7 @@ blk_exp_close(MACRO_PROT_ARGS)
if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos))
return(0);
- if (maxargs > 0)
+ if (NULL == later && maxargs > 0)
if ( ! mdoc_tail_alloc(m, line, ppos, rew_alt(tok)))
return(0);
@@ -1255,22 +1363,36 @@ blk_part_imp(MACRO_PROT_ARGS)
body->parent->flags |= MDOC_EOS;
}
+ /*
+ * If there is an open sub-block requiring explicit close-out,
+ * postpone closing out the current block
+ * until the rew_sub() call closing out the sub-block.
+ */
+ for (n = m->last; n && n != body && n != blk->parent; n = n->parent) {
+ if (MDOC_BLOCK == n->type &&
+ MDOC_EXPLICIT & mdoc_macros[n->tok].flags &&
+ ! (MDOC_VALID & n->flags)) {
+ assert( ! (MDOC_ACTED & n->flags));
+ if ( ! make_pending(n, tok, m, line, ppos))
+ return(0);
+ if ( ! mdoc_endbody_alloc(m, line, ppos,
+ tok, body, ENDBODY_NOSPACE))
+ return(0);
+ return(1);
+ }
+ }
+
/*
* If we can't rewind to our body, then our scope has already
* been closed by another macro (like `Oc' closing `Op'). This
* is ugly behaviour nodding its head to OpenBSD's overwhelming
* crufty use of `Op' breakage.
- *
- * FIXME - this should be ifdef'd OpenBSD?
*/
- for (n = m->last; n; n = n->parent)
- if (body == n)
- break;
-
- if (NULL == n && ! mdoc_nmsg(m, body, MANDOCERR_SCOPE))
+ if (n != body && ! mdoc_vmsg(m, MANDOCERR_SCOPE, line, ppos,
+ "%s broken", mdoc_macronames[tok]))
return(0);
- if (n && ! rew_last(m, body))
+ if (n && ! rew_sub(MDOC_BODY, m, tok, line, ppos))
return(0);
/* Standard appending of delimiters. */
@@ -1280,7 +1402,7 @@ blk_part_imp(MACRO_PROT_ARGS)
/* Rewind scope, if applicable. */
- if (n && ! rew_last(m, blk))
+ if (n && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos))
return(0);
return(1);
diff --git a/mdoc_term.c b/mdoc_term.c
index cae18fa3..3ccf9de7 100644
--- a/mdoc_term.c
+++ b/mdoc_term.c
@@ -325,20 +325,37 @@ print_mdoc_node(DECL_ARGS)
memset(&npair, 0, sizeof(struct termpair));
npair.ppair = pair;
- if (MDOC_TEXT != n->type) {
- if (termacts[n->tok].pre)
- chld = (*termacts[n->tok].pre)(p, &npair, m, n);
- } else
+ if (MDOC_TEXT == n->type)
term_word(p, n->string);
+ else if (termacts[n->tok].pre && !n->end)
+ chld = (*termacts[n->tok].pre)(p, &npair, m, n);
if (chld && n->child)
print_mdoc_nodelist(p, &npair, m, n->child);
term_fontpopq(p, font);
- if (MDOC_TEXT != n->type)
- if (termacts[n->tok].post)
- (*termacts[n->tok].post)(p, &npair, m, n);
+ if (MDOC_TEXT != n->type &&
+ termacts[n->tok].post &&
+ ! (MDOC_ENDED & n->flags)) {
+ (*termacts[n->tok].post)(p, &npair, m, n);
+
+ /*
+ * Explicit end tokens not only call the post
+ * handler, but also tell the respective block
+ * that it must not call the post handler again.
+ */
+ if (n->end)
+ n->pending->flags |= MDOC_ENDED;
+
+ /*
+ * End of line terminating an implicit block
+ * while an explicit block is still open.
+ * Continue the explicit block without spacing.
+ */
+ if (ENDBODY_NOSPACE == n->end)
+ p->flags |= TERMP_NOSPACE;
+ }
if (MDOC_EOS & n->flags)
p->flags |= TERMP_SENTENCE;
diff --git a/tree.c b/tree.c
index 1156a0d7..a5071b29 100644
--- a/tree.c
+++ b/tree.c
@@ -75,7 +75,10 @@ print_mdoc(const struct mdoc_node *n, int indent)
t = "block-head";
break;
case (MDOC_BODY):
- t = "block-body";
+ if (n->end)
+ t = "body-end";
+ else
+ t = "block-body";
break;
case (MDOC_TAIL):
t = "block-tail";