summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-03-27 10:04:56 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-03-27 10:04:56 +0000
commit1a23231b7d2adf726b168ad0121186f58c9afc2c (patch)
tree2cae492c3f4cafc129472596cfbb002cb2aefbaa
parent80d2ac69bc131eb6fd2465642f52a9255dabd36a (diff)
downloadmandoc-1a23231b7d2adf726b168ad0121186f58c9afc2c.tar.gz
Fixed re-adjustment of scope in exiting roff instructions (libman).
Added title-case check for (libman). Fixed premature closure of roff instruction scope (libman). Added documentation of ignored roff macros to man(3).
-rw-r--r--Makefile4
-rw-r--r--index.sgml9
-rw-r--r--libman.h5
-rw-r--r--man.3116
-rw-r--r--man.c10
-rw-r--r--man_action.c1
-rw-r--r--man_macro.c100
-rw-r--r--man_validate.c23
8 files changed, 215 insertions, 53 deletions
diff --git a/Makefile b/Makefile
index 47ad3fef..432c4f41 100644
--- a/Makefile
+++ b/Makefile
@@ -10,8 +10,8 @@ INSTALL_DATA = install -m 0444
INSTALL_LIB = install -m 0644
INSTALL_MAN = $(INSTALL_DATA)
-VERSION = 1.9.17
-VDATE = 22 March 2010
+VERSION = 1.9.18
+VDATE = 27 March 2010
VFLAGS = -DVERSION="\"$(VERSION)\"" -DHAVE_CONFIG_H
WFLAGS = -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings
diff --git a/index.sgml b/index.sgml
index ed7b15c9..87c5e1df 100644
--- a/index.sgml
+++ b/index.sgml
@@ -219,6 +219,15 @@
<COL CLASS="date">
<TBODY>
<TR>
+ <TD VALIGN="top"><SPAN CLASS="date">27-03-2010</SPAN></TD>
+ <TD VALIGN="top">
+ Version <SPAN CLASS="rev">1.9.18</SPAN>: many fixes (largely pertaining to
+ scope) and improvements (e.g., handling of apostrophe-control macros, which
+ fixes the strange <Q>BR</Q> seen in some macro output) to handling roff
+ instructions in -man documents.
+ </TD>
+ </TR>
+ <TR>
<TD VALIGN="top"><SPAN CLASS="date">25-03-2010</SPAN></TD>
<TD VALIGN="top">
Version <SPAN CLASS="rev">1.9.17</SPAN> highlights: accept <A
diff --git a/libman.h b/libman.h
index 747abfa9..ba410ae8 100644
--- a/libman.h
+++ b/libman.h
@@ -61,6 +61,8 @@ enum merr {
WOLITERAL,
WNLITERAL,
WROFFNEST,
+ WROFFSCOPE,
+ WTITLECASE,
WERRMAX
};
@@ -111,7 +113,8 @@ int man_valid_post(struct man *);
int man_valid_pre(struct man *, const struct man_node *);
int man_action_post(struct man *);
int man_action_pre(struct man *, struct man_node *);
-int man_unscope(struct man *, const struct man_node *);
+int man_unscope(struct man *,
+ const struct man_node *, enum merr);
__END_DECLS
diff --git a/man.3 b/man.3
index fd0b75a0..dc8c85ae 100644
--- a/man.3
+++ b/man.3
@@ -17,8 +17,10 @@
.Dd $Mdocdate$
.Dt MAN 3
.Os
-.\" SECTION
+.
+.
.Sh NAME
+.Nm man ,
.Nm man_alloc ,
.Nm man_parseln ,
.Nm man_endparse ,
@@ -27,7 +29,8 @@
.Nm man_free ,
.Nm man_reset
.Nd man macro compiler library
-.\" SECTION
+.
+.
.Sh SYNOPSIS
.In man.h
.Vt extern const char * const * man_macronames;
@@ -45,16 +48,17 @@
.Fn man_meta "const struct man *man"
.Ft int
.Fn man_endparse "struct man *man"
-.\" SECTION
+.
+.
.Sh DESCRIPTION
The
-.Nm man
+.Nm
library parses lines of
.Xr man 7
input (and
.Em only
man) into an abstract syntax tree (AST).
-.\" PARAGRAPH
+.
.Pp
In general, applications initiate a parsing sequence with
.Fn man_alloc ,
@@ -74,8 +78,58 @@ function may be used in order to reset the parser for another input
sequence. See the
.Sx EXAMPLES
section for a full example.
-.\" PARAGRAPH
+.
+.Pp
+Beyond the full set of macros defined in
+.Xr man 7 ,
+the
+.Nm
+library also accepts the following macros:
+.
.Pp
+.Bl -tag -width Ds -compact
+.It am
+.It ami
+.It de
+.It dei
+.It ig
+Instructional macros in the original roff language. Blocks begun by
+these macros end with
+.Sq ..
+and may begin anywhere, although they may not break the next-line
+scoping rules specified in
+.Xr man 7 .
+These blocks are discarded.
+.
+.It PD
+Has no effect. Handled as a current-scope line macro.
+.
+.It Sp
+A synonym for
+.Sq sp 0.5v
+.Pq part of the standard preamble for Perl documentation .
+Handled as a line macro.
+.
+.It UC
+Has no effect. Handled as a current-scope line macro.
+.
+.It Vb
+A synonym for
+.Sq nf
+.Pq part of the standard preamble for Perl documentation .
+Handled as a current-scope line macro.
+.
+.It Ve
+A synonym for
+.Sq fi ,
+closing
+.Sq Vb
+.Pq part of the standard preamble for Perl documentation .
+Handled as a current-scope line macro.
+.El
+.
+.
+.Sh REFERENCE
This section further defines the
.Sx Types ,
.Sx Functions
@@ -84,7 +138,8 @@ and
available to programmers. Following that, the
.Sx Abstract Syntax Tree
section documents the output tree.
-.\" SUBSECTION
+.
+.
.Ss Types
Both functions (see
.Sx Functions )
@@ -92,16 +147,16 @@ and variables (see
.Sx Variables )
may use the following types:
.Bl -ohang
-.\" LIST-ITEM
+.
.It Vt struct man
An opaque type defined in
.Pa man.c .
Its values are only used privately within the library.
-.\" LIST-ITEM
+.
.It Vt struct man_cb
A set of message callbacks defined in
.Pa man.h .
-.\" LIST-ITEM
+.
.It Vt struct man_node
A parsed node. Defined in
.Pa man.h .
@@ -109,11 +164,12 @@ See
.Sx Abstract Syntax Tree
for details.
.El
-.\" SUBSECTION
+.
+.
.Ss Functions
Function descriptions follow:
.Bl -ohang
-.\" LIST-ITEM
+.
.It Fn man_alloc
Allocates a parsing structure. The
.Fa data
@@ -126,29 +182,29 @@ arguments are defined in
.Pa man.h .
Returns NULL on failure. If non-NULL, the pointer must be freed with
.Fn man_free .
-.\" LIST-ITEM
+.
.It Fn man_reset
Reset the parser for another parse routine. After its use,
.Fn man_parseln
behaves as if invoked for the first time.
-.\" LIST-ITEM
+.
.It Fn man_free
Free all resources of a parser. The pointer is no longer valid after
invocation.
-.\" LIST-ITEM
+.
.It Fn man_parseln
Parse a nil-terminated line of input. This line should not contain the
trailing newline. Returns 0 on failure, 1 on success. The input buffer
.Fa buf
is modified by this function.
-.\" LIST-ITEM
+.
.It Fn man_endparse
Signals that the parse is complete. Note that if
.Fn man_endparse
is called subsequent to
.Fn man_node ,
the resulting tree is incomplete. Returns 0 on failure, 1 on success.
-.\" LIST-ITEM
+.
.It Fn man_node
Returns the first node of the parse. Note that if
.Fn man_parseln
@@ -163,15 +219,17 @@ or
.Fn man_endparse
return 0, the data will be incomplete.
.El
-.\" SUBSECTION
+.
+.
.Ss Variables
The following variables are also defined:
.Bl -ohang
-.\" LIST-ITEM
+.
.It Va man_macronames
An array of string-ified token names.
.El
-.\" SUBSECTION
+.
+.
.Ss Abstract Syntax Tree
The
.Nm
@@ -185,13 +243,13 @@ or after
or
.Fn man_parseln
fail, it may be incomplete.
-.\" PARAGRAPH
+.
.Pp
This AST is governed by the ontological
rules dictated in
.Xr man 7
and derives its terminology accordingly.
-.\" PARAGRAPH
+.
.Pp
The AST is composed of
.Vt struct man_node
@@ -210,13 +268,12 @@ fields), its position in the tree (the
and
.Va prev
fields) and some type-specific data.
-.\" PARAGRAPH
+.
.Pp
The tree itself is arranged according to the following normal form,
where capitalised non-terminals represent nodes.
.Pp
.Bl -tag -width "ELEMENTXX" -compact
-.\" LIST-ITEM
.It ROOT
\(<- mnode+
.It mnode
@@ -232,12 +289,13 @@ where capitalised non-terminals represent nodes.
.It TEXT
\(<- [[:alpha:]]*
.El
-.\" PARAGRAPH
+.
.Pp
The only elements capable of nesting other elements are those with
next-lint scope as documented in
.Xr man 7 .
-.\" SECTION
+.
+.
.Sh EXAMPLES
The following example reads lines from stdin and parses them, operating
on the finished parse tree with
@@ -273,11 +331,13 @@ if (NULL == (node = man_node(man)))
parsed(man, node);
man_free(man);
.Ed
-.\" SECTION
+.
+.
.Sh SEE ALSO
.Xr mandoc 1 ,
.Xr man 7
-.\" SECTION
+.
+.
.Sh AUTHORS
The
.Nm
diff --git a/man.c b/man.c
index bf3754a8..1c85c0fe 100644
--- a/man.c
+++ b/man.c
@@ -51,6 +51,8 @@ const char *const __man_merrnames[WERRMAX] = {
"literal context already open", /* WOLITERAL */
"no literal context open", /* WNLITERAL */
"invalid nesting of roff declarations", /* WROFFNEST */
+ "scope in roff instructions broken", /* WROFFSCOPE */
+ "document title should be uppercase", /* WTITLECASE */
};
const char *const __man_macronames[MAN_MAX] = {
@@ -155,7 +157,7 @@ int
man_parseln(struct man *m, int ln, char *buf)
{
- return('.' == *buf ?
+ return('.' == *buf || '\'' == *buf ?
man_pmacro(m, ln, buf) :
man_ptext(m, ln, buf));
}
@@ -447,7 +449,7 @@ descope:
if (MAN_ELINE & m->flags) {
m->flags &= ~MAN_ELINE;
- if ( ! man_unscope(m, m->last->parent))
+ if ( ! man_unscope(m, m->last->parent, WERRMAX))
return(0);
}
@@ -455,7 +457,7 @@ descope:
return(1);
m->flags &= ~MAN_BLINE;
- if ( ! man_unscope(m, m->last->parent))
+ if ( ! man_unscope(m, m->last->parent, WERRMAX))
return(0);
return(man_body_alloc(m, line, 0, m->last->tok));
}
@@ -623,7 +625,7 @@ out:
assert(MAN_BLINE & m->flags);
m->flags &= ~MAN_BLINE;
- if ( ! man_unscope(m, m->last->parent))
+ if ( ! man_unscope(m, m->last->parent, WERRMAX))
return(0);
return(man_body_alloc(m, ln, 0, m->last->tok));
diff --git a/man_action.c b/man_action.c
index 6cd24666..48f4259e 100644
--- a/man_action.c
+++ b/man_action.c
@@ -124,7 +124,6 @@ post_de(struct man *m)
*/
if (MAN_BLOCK == m->last->type)
man_node_delete(m, m->last);
-
return(1);
}
diff --git a/man_macro.c b/man_macro.c
index da506e2d..1c83b6c6 100644
--- a/man_macro.c
+++ b/man_macro.c
@@ -43,6 +43,8 @@ static enum rew rew_dohalt(enum mant, enum man_type,
const struct man_node *);
static enum rew rew_block(enum mant, enum man_type,
const struct man_node *);
+static int rew_warn(struct man *,
+ struct man_node *, enum merr);
const struct man_macro __man_macros[MAN_MAX] = {
{ in_line_eoln, MAN_NSCOPED }, /* br */
@@ -91,14 +93,39 @@ const struct man_macro __man_macros[MAN_MAX] = {
const struct man_macro * const man_macros = __man_macros;
+/*
+ * Warn when "n" is an explicit non-roff macro.
+ */
+static int
+rew_warn(struct man *m, struct man_node *n, enum merr er)
+{
+
+ if (er == WERRMAX || MAN_BLOCK != n->type)
+ return(1);
+ if (MAN_VALID & n->flags)
+ return(1);
+ if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
+ return(1);
+ if (MAN_NOCLOSE & man_macros[n->tok].flags)
+ return(1);
+ return(man_nwarn(m, n, er));
+}
+
+
+/*
+ * Rewind scope. If a code "er" != WERRMAX has been provided, it will
+ * be used if an explicit block scope is being closed out.
+ */
int
-man_unscope(struct man *m, const struct man_node *n)
+man_unscope(struct man *m, const struct man_node *n, enum merr er)
{
assert(n);
/* LINTED */
while (m->last != n) {
+ if ( ! rew_warn(m, m->last, er))
+ return(0);
if ( ! man_valid_post(m))
return(0);
if ( ! man_action_post(m))
@@ -107,6 +134,8 @@ man_unscope(struct man *m, const struct man_node *n)
assert(m->last);
}
+ if ( ! rew_warn(m, m->last, er))
+ return(0);
if ( ! man_valid_post(m))
return(0);
if ( ! man_action_post(m))
@@ -140,18 +169,47 @@ rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
{
enum rew c;
+ /* We cannot progress beyond the root ever. */
if (MAN_ROOT == n->type)
return(REW_HALT);
+
assert(n->parent);
+
+ /* Normal nodes shouldn't go to the level of the root. */
if (MAN_ROOT == n->parent->type)
return(REW_REWIND);
+
+ /* Already-validated nodes should be closed out. */
if (MAN_VALID & n->flags)
return(REW_NOHALT);
- /* Rewind to ourselves, first. */
+ /* First: rewind to ourselves. */
if (type == n->type && tok == n->tok)
return(REW_REWIND);
+ /*
+ * If we're a roff macro, then we can close out anything that
+ * stands between us and our parent context.
+ */
+ if (MAN_NOCLOSE & man_macros[tok].flags)
+ return(REW_NOHALT);
+
+ /*
+ * Don't clobber roff macros: this is a bit complicated. If the
+ * current macro is a roff macro, halt immediately and don't
+ * rewind. If it's not, and the parent is, then close out the
+ * current scope and halt at the parent.
+ */
+ if (MAN_NOCLOSE & man_macros[n->tok].flags)
+ return(REW_HALT);
+ if (MAN_NOCLOSE & man_macros[n->parent->tok].flags)
+ return(REW_REWIND);
+
+ /*
+ * Next follow the implicit scope-smashings as defined by man.7:
+ * section, sub-section, etc.
+ */
+
switch (tok) {
case (MAN_SH):
break;
@@ -210,10 +268,15 @@ rew_scope(enum man_type type, struct man *m, enum mant tok)
break;
}
- /* Rewind until the current point. */
-
+ /*
+ * Rewind until the current point. Warn if we're a roff
+ * instruction that's mowing over explicit scopes.
+ */
assert(n);
- return(man_unscope(m, n));
+ if (MAN_NOCLOSE & man_macros[tok].flags)
+ return(man_unscope(m, n, WROFFSCOPE));
+
+ return(man_unscope(m, n, WERRMAX));
}
@@ -229,6 +292,8 @@ blk_dotted(MACRO_PROT_ARGS)
enum mant ntok;
struct man_node *nn;
+ /* Check for any of the following parents... */
+
for (nn = m->last->parent; nn; nn = nn->parent)
if (nn->tok == MAN_de || nn->tok == MAN_dei ||
nn->tok == MAN_am ||
@@ -249,6 +314,20 @@ blk_dotted(MACRO_PROT_ARGS)
if ( ! rew_scope(MAN_BLOCK, m, ntok))
return(0);
+ /*
+ * XXX: manually adjust our next-line status. roff macros are,
+ * for the moment, ignored, so we don't want to close out bodies
+ * and so on.
+ */
+
+ switch (m->last->type) {
+ case (MAN_BODY):
+ m->next = MAN_NEXT_CHILD;
+ break;
+ default:
+ break;
+ }
+
return(1);
}
@@ -485,15 +564,6 @@ man_macroend(struct man *m)
n = MAN_VALID & m->last->flags ?
m->last->parent : m->last;
- for ( ; n; n = n->parent) {
- if (MAN_BLOCK != n->type)
- continue;
- if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
- continue;
- if ( ! man_nwarn(m, n, WEXITSCOPE))
- return(0);
- }
-
- return(man_unscope(m, m->first));
+ return(man_unscope(m, m->first, WEXITSCOPE));
}
diff --git a/man_validate.c b/man_validate.c
index 6787d911..b69f9e68 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -50,9 +50,10 @@ static int check_roff(CHKARGS);
static int check_root(CHKARGS);
static int check_sec(CHKARGS);
static int check_text(CHKARGS);
+static int check_title(CHKARGS);
static v_check posts_eq0[] = { check_eq0, NULL };
-static v_check posts_ge2_le5[] = { check_ge2, check_le5, NULL };
+static v_check posts_th[] = { check_ge2, check_le5, check_title, NULL };
static v_check posts_par[] = { check_par, NULL };
static v_check posts_part[] = { check_part, NULL };
static v_check posts_sec[] = { check_sec, NULL };
@@ -62,7 +63,7 @@ static v_check pres_roff[] = { check_bline, check_roff, NULL };
static const struct man_valid man_valids[MAN_MAX] = {
{ NULL, posts_eq0 }, /* br */
- { pres_bline, posts_ge2_le5 }, /* TH */ /* FIXME: make sure capitalised. */
+ { pres_bline, posts_th }, /* TH */
{ pres_bline, posts_sec }, /* SH */
{ pres_bline, posts_sec }, /* SS */
{ pres_bline, posts_par }, /* TP */
@@ -174,6 +175,24 @@ check_root(CHKARGS)
static int
+check_title(CHKARGS)
+{
+ const char *p;
+
+ assert(n->child);
+ if ('\0' == *n->child->string)
+ return(man_nerr(m, n, WNOTITLE));
+
+ for (p = n->child->string; '\0' != *p; p++)
+ if (isalpha((u_char)*p) && ! isupper((u_char)*p))
+ if ( ! man_nwarn(m, n, WTITLECASE))
+ return(0);
+
+ return(1);
+}
+
+
+static int
check_text(CHKARGS)
{
const char *p;