From 1a23231b7d2adf726b168ad0121186f58c9afc2c Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Sat, 27 Mar 2010 10:04:56 +0000 Subject: Fixed re-adjustment of scope in exiting roff instructions (libman). Added title-case check for (libman). Fixed premature closure of roff instruction scope (libman). Added documentation of ignored roff macros to man(3). --- Makefile | 4 +- index.sgml | 9 +++++ libman.h | 5 ++- man.3 | 116 +++++++++++++++++++++++++++++++++++++++++++-------------- man.c | 10 +++-- man_action.c | 1 - man_macro.c | 100 +++++++++++++++++++++++++++++++++++++++++-------- man_validate.c | 23 +++++++++++- 8 files changed, 215 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index 47ad3fef..432c4f41 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ INSTALL_DATA = install -m 0444 INSTALL_LIB = install -m 0644 INSTALL_MAN = $(INSTALL_DATA) -VERSION = 1.9.17 -VDATE = 22 March 2010 +VERSION = 1.9.18 +VDATE = 27 March 2010 VFLAGS = -DVERSION="\"$(VERSION)\"" -DHAVE_CONFIG_H WFLAGS = -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings diff --git a/index.sgml b/index.sgml index ed7b15c9..87c5e1df 100644 --- a/index.sgml +++ b/index.sgml @@ -218,6 +218,15 @@ + + + +
27-03-2010 + Version 1.9.18: many fixes (largely pertaining to + scope) and improvements (e.g., handling of apostrophe-control macros, which + fixes the strange BR seen in some macro output) to handling roff + instructions in -man documents. +
25-03-2010 diff --git a/libman.h b/libman.h index 747abfa9..ba410ae8 100644 --- a/libman.h +++ b/libman.h @@ -61,6 +61,8 @@ enum merr { WOLITERAL, WNLITERAL, WROFFNEST, + WROFFSCOPE, + WTITLECASE, WERRMAX }; @@ -111,7 +113,8 @@ int man_valid_post(struct man *); int man_valid_pre(struct man *, const struct man_node *); int man_action_post(struct man *); int man_action_pre(struct man *, struct man_node *); -int man_unscope(struct man *, const struct man_node *); +int man_unscope(struct man *, + const struct man_node *, enum merr); __END_DECLS diff --git a/man.3 b/man.3 index fd0b75a0..dc8c85ae 100644 --- a/man.3 +++ b/man.3 @@ -17,8 +17,10 @@ .Dd $Mdocdate$ .Dt MAN 3 .Os -.\" SECTION +. +. .Sh NAME +.Nm man , .Nm man_alloc , .Nm man_parseln , .Nm man_endparse , @@ -27,7 +29,8 @@ .Nm man_free , .Nm man_reset .Nd man macro compiler library -.\" SECTION +. +. .Sh SYNOPSIS .In man.h .Vt extern const char * const * man_macronames; @@ -45,16 +48,17 @@ .Fn man_meta "const struct man *man" .Ft int .Fn man_endparse "struct man *man" -.\" SECTION +. +. .Sh DESCRIPTION The -.Nm man +.Nm library parses lines of .Xr man 7 input (and .Em only man) into an abstract syntax tree (AST). -.\" PARAGRAPH +. .Pp In general, applications initiate a parsing sequence with .Fn man_alloc , @@ -74,8 +78,58 @@ function may be used in order to reset the parser for another input sequence. See the .Sx EXAMPLES section for a full example. -.\" PARAGRAPH +. +.Pp +Beyond the full set of macros defined in +.Xr man 7 , +the +.Nm +library also accepts the following macros: +. .Pp +.Bl -tag -width Ds -compact +.It am +.It ami +.It de +.It dei +.It ig +Instructional macros in the original roff language. Blocks begun by +these macros end with +.Sq .. +and may begin anywhere, although they may not break the next-line +scoping rules specified in +.Xr man 7 . +These blocks are discarded. +. +.It PD +Has no effect. Handled as a current-scope line macro. +. +.It Sp +A synonym for +.Sq sp 0.5v +.Pq part of the standard preamble for Perl documentation . +Handled as a line macro. +. +.It UC +Has no effect. Handled as a current-scope line macro. +. +.It Vb +A synonym for +.Sq nf +.Pq part of the standard preamble for Perl documentation . +Handled as a current-scope line macro. +. +.It Ve +A synonym for +.Sq fi , +closing +.Sq Vb +.Pq part of the standard preamble for Perl documentation . +Handled as a current-scope line macro. +.El +. +. +.Sh REFERENCE This section further defines the .Sx Types , .Sx Functions @@ -84,7 +138,8 @@ and available to programmers. Following that, the .Sx Abstract Syntax Tree section documents the output tree. -.\" SUBSECTION +. +. .Ss Types Both functions (see .Sx Functions ) @@ -92,16 +147,16 @@ and variables (see .Sx Variables ) may use the following types: .Bl -ohang -.\" LIST-ITEM +. .It Vt struct man An opaque type defined in .Pa man.c . Its values are only used privately within the library. -.\" LIST-ITEM +. .It Vt struct man_cb A set of message callbacks defined in .Pa man.h . -.\" LIST-ITEM +. .It Vt struct man_node A parsed node. Defined in .Pa man.h . @@ -109,11 +164,12 @@ See .Sx Abstract Syntax Tree for details. .El -.\" SUBSECTION +. +. .Ss Functions Function descriptions follow: .Bl -ohang -.\" LIST-ITEM +. .It Fn man_alloc Allocates a parsing structure. The .Fa data @@ -126,29 +182,29 @@ arguments are defined in .Pa man.h . Returns NULL on failure. If non-NULL, the pointer must be freed with .Fn man_free . -.\" LIST-ITEM +. .It Fn man_reset Reset the parser for another parse routine. After its use, .Fn man_parseln behaves as if invoked for the first time. -.\" LIST-ITEM +. .It Fn man_free Free all resources of a parser. The pointer is no longer valid after invocation. -.\" LIST-ITEM +. .It Fn man_parseln Parse a nil-terminated line of input. This line should not contain the trailing newline. Returns 0 on failure, 1 on success. The input buffer .Fa buf is modified by this function. -.\" LIST-ITEM +. .It Fn man_endparse Signals that the parse is complete. Note that if .Fn man_endparse is called subsequent to .Fn man_node , the resulting tree is incomplete. Returns 0 on failure, 1 on success. -.\" LIST-ITEM +. .It Fn man_node Returns the first node of the parse. Note that if .Fn man_parseln @@ -163,15 +219,17 @@ or .Fn man_endparse return 0, the data will be incomplete. .El -.\" SUBSECTION +. +. .Ss Variables The following variables are also defined: .Bl -ohang -.\" LIST-ITEM +. .It Va man_macronames An array of string-ified token names. .El -.\" SUBSECTION +. +. .Ss Abstract Syntax Tree The .Nm @@ -185,13 +243,13 @@ or after or .Fn man_parseln fail, it may be incomplete. -.\" PARAGRAPH +. .Pp This AST is governed by the ontological rules dictated in .Xr man 7 and derives its terminology accordingly. -.\" PARAGRAPH +. .Pp The AST is composed of .Vt struct man_node @@ -210,13 +268,12 @@ fields), its position in the tree (the and .Va prev fields) and some type-specific data. -.\" PARAGRAPH +. .Pp The tree itself is arranged according to the following normal form, where capitalised non-terminals represent nodes. .Pp .Bl -tag -width "ELEMENTXX" -compact -.\" LIST-ITEM .It ROOT \(<- mnode+ .It mnode @@ -232,12 +289,13 @@ where capitalised non-terminals represent nodes. .It TEXT \(<- [[:alpha:]]* .El -.\" PARAGRAPH +. .Pp The only elements capable of nesting other elements are those with next-lint scope as documented in .Xr man 7 . -.\" SECTION +. +. .Sh EXAMPLES The following example reads lines from stdin and parses them, operating on the finished parse tree with @@ -273,11 +331,13 @@ if (NULL == (node = man_node(man))) parsed(man, node); man_free(man); .Ed -.\" SECTION +. +. .Sh SEE ALSO .Xr mandoc 1 , .Xr man 7 -.\" SECTION +. +. .Sh AUTHORS The .Nm diff --git a/man.c b/man.c index bf3754a8..1c85c0fe 100644 --- a/man.c +++ b/man.c @@ -51,6 +51,8 @@ const char *const __man_merrnames[WERRMAX] = { "literal context already open", /* WOLITERAL */ "no literal context open", /* WNLITERAL */ "invalid nesting of roff declarations", /* WROFFNEST */ + "scope in roff instructions broken", /* WROFFSCOPE */ + "document title should be uppercase", /* WTITLECASE */ }; const char *const __man_macronames[MAN_MAX] = { @@ -155,7 +157,7 @@ int man_parseln(struct man *m, int ln, char *buf) { - return('.' == *buf ? + return('.' == *buf || '\'' == *buf ? man_pmacro(m, ln, buf) : man_ptext(m, ln, buf)); } @@ -447,7 +449,7 @@ descope: if (MAN_ELINE & m->flags) { m->flags &= ~MAN_ELINE; - if ( ! man_unscope(m, m->last->parent)) + if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); } @@ -455,7 +457,7 @@ descope: return(1); m->flags &= ~MAN_BLINE; - if ( ! man_unscope(m, m->last->parent)) + if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); return(man_body_alloc(m, line, 0, m->last->tok)); } @@ -623,7 +625,7 @@ out: assert(MAN_BLINE & m->flags); m->flags &= ~MAN_BLINE; - if ( ! man_unscope(m, m->last->parent)) + if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); return(man_body_alloc(m, ln, 0, m->last->tok)); diff --git a/man_action.c b/man_action.c index 6cd24666..48f4259e 100644 --- a/man_action.c +++ b/man_action.c @@ -124,7 +124,6 @@ post_de(struct man *m) */ if (MAN_BLOCK == m->last->type) man_node_delete(m, m->last); - return(1); } diff --git a/man_macro.c b/man_macro.c index da506e2d..1c83b6c6 100644 --- a/man_macro.c +++ b/man_macro.c @@ -43,6 +43,8 @@ static enum rew rew_dohalt(enum mant, enum man_type, const struct man_node *); static enum rew rew_block(enum mant, enum man_type, const struct man_node *); +static int rew_warn(struct man *, + struct man_node *, enum merr); const struct man_macro __man_macros[MAN_MAX] = { { in_line_eoln, MAN_NSCOPED }, /* br */ @@ -91,14 +93,39 @@ const struct man_macro __man_macros[MAN_MAX] = { const struct man_macro * const man_macros = __man_macros; +/* + * Warn when "n" is an explicit non-roff macro. + */ +static int +rew_warn(struct man *m, struct man_node *n, enum merr er) +{ + + if (er == WERRMAX || MAN_BLOCK != n->type) + return(1); + if (MAN_VALID & n->flags) + return(1); + if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags)) + return(1); + if (MAN_NOCLOSE & man_macros[n->tok].flags) + return(1); + return(man_nwarn(m, n, er)); +} + + +/* + * Rewind scope. If a code "er" != WERRMAX has been provided, it will + * be used if an explicit block scope is being closed out. + */ int -man_unscope(struct man *m, const struct man_node *n) +man_unscope(struct man *m, const struct man_node *n, enum merr er) { assert(n); /* LINTED */ while (m->last != n) { + if ( ! rew_warn(m, m->last, er)) + return(0); if ( ! man_valid_post(m)) return(0); if ( ! man_action_post(m)) @@ -107,6 +134,8 @@ man_unscope(struct man *m, const struct man_node *n) assert(m->last); } + if ( ! rew_warn(m, m->last, er)) + return(0); if ( ! man_valid_post(m)) return(0); if ( ! man_action_post(m)) @@ -140,18 +169,47 @@ rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n) { enum rew c; + /* We cannot progress beyond the root ever. */ if (MAN_ROOT == n->type) return(REW_HALT); + assert(n->parent); + + /* Normal nodes shouldn't go to the level of the root. */ if (MAN_ROOT == n->parent->type) return(REW_REWIND); + + /* Already-validated nodes should be closed out. */ if (MAN_VALID & n->flags) return(REW_NOHALT); - /* Rewind to ourselves, first. */ + /* First: rewind to ourselves. */ if (type == n->type && tok == n->tok) return(REW_REWIND); + /* + * If we're a roff macro, then we can close out anything that + * stands between us and our parent context. + */ + if (MAN_NOCLOSE & man_macros[tok].flags) + return(REW_NOHALT); + + /* + * Don't clobber roff macros: this is a bit complicated. If the + * current macro is a roff macro, halt immediately and don't + * rewind. If it's not, and the parent is, then close out the + * current scope and halt at the parent. + */ + if (MAN_NOCLOSE & man_macros[n->tok].flags) + return(REW_HALT); + if (MAN_NOCLOSE & man_macros[n->parent->tok].flags) + return(REW_REWIND); + + /* + * Next follow the implicit scope-smashings as defined by man.7: + * section, sub-section, etc. + */ + switch (tok) { case (MAN_SH): break; @@ -210,10 +268,15 @@ rew_scope(enum man_type type, struct man *m, enum mant tok) break; } - /* Rewind until the current point. */ - + /* + * Rewind until the current point. Warn if we're a roff + * instruction that's mowing over explicit scopes. + */ assert(n); - return(man_unscope(m, n)); + if (MAN_NOCLOSE & man_macros[tok].flags) + return(man_unscope(m, n, WROFFSCOPE)); + + return(man_unscope(m, n, WERRMAX)); } @@ -229,6 +292,8 @@ blk_dotted(MACRO_PROT_ARGS) enum mant ntok; struct man_node *nn; + /* Check for any of the following parents... */ + for (nn = m->last->parent; nn; nn = nn->parent) if (nn->tok == MAN_de || nn->tok == MAN_dei || nn->tok == MAN_am || @@ -249,6 +314,20 @@ blk_dotted(MACRO_PROT_ARGS) if ( ! rew_scope(MAN_BLOCK, m, ntok)) return(0); + /* + * XXX: manually adjust our next-line status. roff macros are, + * for the moment, ignored, so we don't want to close out bodies + * and so on. + */ + + switch (m->last->type) { + case (MAN_BODY): + m->next = MAN_NEXT_CHILD; + break; + default: + break; + } + return(1); } @@ -485,15 +564,6 @@ man_macroend(struct man *m) n = MAN_VALID & m->last->flags ? m->last->parent : m->last; - for ( ; n; n = n->parent) { - if (MAN_BLOCK != n->type) - continue; - if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags)) - continue; - if ( ! man_nwarn(m, n, WEXITSCOPE)) - return(0); - } - - return(man_unscope(m, m->first)); + return(man_unscope(m, m->first, WEXITSCOPE)); } diff --git a/man_validate.c b/man_validate.c index 6787d911..b69f9e68 100644 --- a/man_validate.c +++ b/man_validate.c @@ -50,9 +50,10 @@ static int check_roff(CHKARGS); static int check_root(CHKARGS); static int check_sec(CHKARGS); static int check_text(CHKARGS); +static int check_title(CHKARGS); static v_check posts_eq0[] = { check_eq0, NULL }; -static v_check posts_ge2_le5[] = { check_ge2, check_le5, NULL }; +static v_check posts_th[] = { check_ge2, check_le5, check_title, NULL }; static v_check posts_par[] = { check_par, NULL }; static v_check posts_part[] = { check_part, NULL }; static v_check posts_sec[] = { check_sec, NULL }; @@ -62,7 +63,7 @@ static v_check pres_roff[] = { check_bline, check_roff, NULL }; static const struct man_valid man_valids[MAN_MAX] = { { NULL, posts_eq0 }, /* br */ - { pres_bline, posts_ge2_le5 }, /* TH */ /* FIXME: make sure capitalised. */ + { pres_bline, posts_th }, /* TH */ { pres_bline, posts_sec }, /* SH */ { pres_bline, posts_sec }, /* SS */ { pres_bline, posts_par }, /* TP */ @@ -173,6 +174,24 @@ check_root(CHKARGS) } +static int +check_title(CHKARGS) +{ + const char *p; + + assert(n->child); + if ('\0' == *n->child->string) + return(man_nerr(m, n, WNOTITLE)); + + for (p = n->child->string; '\0' != *p; p++) + if (isalpha((u_char)*p) && ! isupper((u_char)*p)) + if ( ! man_nwarn(m, n, WTITLECASE)) + return(0); + + return(1); +} + + static int check_text(CHKARGS) { -- cgit