diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2009-08-13 11:45:29 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2009-08-13 11:45:29 +0000 |
commit | 1aa6cc37dcc72475cfe4c2a003d11b606284f790 (patch) | |
tree | e54eb36b944b08bce1d256d7c997c150ff8080b7 | |
parent | 0e0a1095bd43dfa6ae1e90b823dac732a6ed980b (diff) | |
download | mandoc-1aa6cc37dcc72475cfe4c2a003d11b606284f790.tar.gz |
Significant overhaul in libman. Macros are now block- and line-scoped (with
next-line scope extensions possible). man.7 reflects block and line scoping,
and also includes a REFERENCE section that will be used as a template for the
big mdoc reference. Many fixes in next-line behaviour for both inline and
block macros. Added some macros for compatibility (from me.7). Corrected
quoted-literal handling for libman.
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | libman.h | 34 | ||||
-rw-r--r-- | man.7 | 299 | ||||
-rw-r--r-- | man.c | 161 | ||||
-rw-r--r-- | man.h | 12 | ||||
-rw-r--r-- | man_action.c | 5 | ||||
-rw-r--r-- | man_argv.c | 99 | ||||
-rw-r--r-- | man_macro.c | 363 | ||||
-rw-r--r-- | man_term.c | 233 | ||||
-rw-r--r-- | man_validate.c | 217 | ||||
-rw-r--r-- | tree.c | 15 |
11 files changed, 1060 insertions, 387 deletions
@@ -29,11 +29,11 @@ MDOCSRCS = mdoc_macro.c mdoc.c mdoc_hash.c mdoc_strings.c \ arch.c vol.c msec.c st.c mandoc.c MANLNS = man_macro.ln man.ln man_hash.ln man_validate.ln \ - man_action.ln mandoc.ln + man_action.ln mandoc.ln man_argv.ln MANOBJS = man_macro.o man.o man_hash.o man_validate.o \ - man_action.o mandoc.o + man_action.o mandoc.o man_argv.o MANSRCS = man_macro.c man.c man_hash.c man_validate.c \ - man_action.c mandoc.c + man_action.c mandoc.c man_argv.c MAINLNS = main.ln mdoc_term.ln ascii.ln term.ln tree.ln \ compat.ln man_term.ln @@ -159,6 +159,9 @@ term.o: term.c term.h man.h mdoc.h mdoc_argv.ln: mdoc_argv.c libmdoc.h mdoc_argv.o: mdoc_argv.c libmdoc.h +man_argv.ln: man_argv.c libman.h +man_argv.o: man_argv.c libman.h + man_validate.ln: man_validate.c libman.h man_validate.o: man_validate.c libman.h @@ -31,7 +31,8 @@ struct man { int pflags; int flags; #define MAN_HALT (1 << 0) -#define MAN_NLINE (1 << 1) +#define MAN_ELINE (1 << 1) /* Next-line element scope. */ +#define MAN_BLINE (1 << 2) /* Next-line block scope. */ enum man_next next; struct man_node *last; struct man_node *first; @@ -50,9 +51,26 @@ enum merr { WNOTITLE, WESCAPE, WNUMFMT, + WHEADARGS, + WBODYARGS, + WNHEADARGS, + WMACRO, + WMACROFORM, + WEXITSCOPE, WERRMAX }; +#define MACRO_PROT_ARGS struct man *m, int tok, int line, \ + int ppos, int *pos, char *buf + +struct man_macro { + int (*fp)(MACRO_PROT_ARGS); + int flags; +#define MAN_SCOPED (1 << 0) +}; + +extern const struct man_macro *const man_macros; + __BEGIN_DECLS #define man_perr(m, l, p, t) \ @@ -64,21 +82,29 @@ __BEGIN_DECLS #define man_nwarn(m, n, t) \ man_err((m), (n)->line, (n)->pos, 0, (t)) -int man_err(struct man *, int, int, int, enum merr); int man_word_alloc(struct man *, int, int, const char *); +int man_block_alloc(struct man *, int, int, int); +int man_head_alloc(struct man *, int, int, int); +int man_body_alloc(struct man *, int, int, int); int man_elem_alloc(struct man *, int, int, int); void man_node_free(struct man_node *); void man_node_freelist(struct man_node *); void *man_hash_alloc(void); -int man_macro(struct man *, int, - int, int, int *, char *); int man_hash_find(const void *, const char *); void man_hash_free(void *); int man_macroend(struct man *); +int man_args(struct man *, int, int *, char *, char **); +#define ARGS_ERROR (-1) +#define ARGS_EOLN (0) +#define ARGS_WORD (1) +#define ARGS_QWORD (1) +int man_err(struct man *, int, int, int, enum merr); int man_vwarn(struct man *, int, int, const char *, ...); int man_verr(struct man *, int, int, const char *, ...); int man_valid_post(struct man *); +int man_valid_pre(struct man *, const struct man_node *); int man_action_post(struct man *); +int man_unscope(struct man *, const struct man_node *); __END_DECLS @@ -71,11 +71,12 @@ subsequent word isn't off-set by whitespace. .\" SUB-SECTION .Ss Comments Text following a -.Sq \e" , +.Sq \e\*" , whether in a macro or free-form text line, is ignored to the end of line. A macro line with only a control character and comment escape, .Sq \&.\e" , -is also ignored. +is also ignored. Macro lines with only a control charater and +optionally whitespace are stripped from input. .\" SUB-SECTION .Ss Special Characters Special characters may occur in both macro and free-form lines. @@ -108,20 +109,47 @@ from input. These are later re-added, if applicable, by a front-end utility such as .Xr mandoc 1 . .\" SECTION -.Sh STRUCTURE +.Sh MANUAL STRUCTURE Each .Nm document must contain contains at least the -.Sq \&.TH +.Sq \&TH macro describing the document's section and title. It may occur anywhere in the document, although conventionally, it appears as the first macro. .Pp -Beyond the -.Sq \&.TH , -at least one macro or text node must appear in the document. +Beyond +.Sq \&TH , +at least one macro or text node must appear in the document. Documents +are generally structured as follows: +.Bd -literal -offset indent +\&.TH FOO 1 "13 Aug 2009" +\&. +\&.SH NAME +foo \e- a description goes here +\&. +\&.SH SYNOPSIS +\efBfoo\efR [\efB\e-options\efR] arguments... +\&. +\&.SH DESCRIPTION +The \efBfoo\efR utility does... +\&. +\&.\e\*q .SH RETURN VALUES +\&.\e\*q .SH ENVIRONMENT +\&.\e\*q .SH FILES +\&.\e\*q .SH EXAMPLES +\&.\e\*q .SH DIAGNOSTICS +\&.\e\*q .SH ERRORS +\&.\e\*q .SH SEE ALSO +\&.\e\*q \efBbar\efR(1) +\&.\e\*q .SH STANDARDS +\&.\e\*q .SH HISTORY +\&.\e\*q .SH AUTHORS +\&.\e\*q .SH CAVEATS +\&.\e\*q .SH BUGS +.Ed .\" SECTION -.Sh SYNTAX +.Sh MACRO SYNTAX Macros are one to three three characters in length and begin with a control character , .Sq \&. , @@ -132,96 +160,207 @@ and .Sq \&.\ \ \ \&PP are equivalent. .Pp -All -.Nm -macros follow the same structural rules: -.Bd -literal -offset indent -\&.YO \(lBbody...\(rB -.Ed -.Pp The -.Dq body -consists of zero or more arguments to the macro. -.Pp .Nm -has a primitive notion of multi-line scope for the following macros: -.Sq \&.TM , -.Sq \&.SM , -.Sq \&.SB , -.Sq \&.BI , -.Sq \&.IB , -.Sq \&.BR , -.Sq \&.RB , -.Sq \&.R , -.Sq \&.B , -.Sq \&.I , -.Sq \&.IR -and -.Sq \&.RI . -When these macros are invoked without arguments, the subsequent line is -considered a continuation of the macro. Thus: +macros are classified by scope: line scope or block scope. Line-scoped +macros are only scoped to the current line (and, in some situations, +the subsequent line). Block macros are scoped to the current line and +subsequent lines until closed by another block macro. +.\" SUBSECTION +.Ss Line Macros +Line-macros are scoped to the current line, with the body consisting of +zero or more arguments. If a macro is next-line scoped and the line +arguments are empty, the next line is used instead. Thus: .Bd -literal -offset indent \&.RI foo .Ed +.\" PARAGRAPH .Pp is equivalent to .Sq \&.RI foo . -If two consecutive lines exhibit the latter behaviour, -an error is raised. Thus, the following is not acceptable: +.\" PARAGRAPH +Consecutive next-line invocations are disallowed. .Bd -literal -offset indent -\&.RI -\&.I -Hello, world. +\&.YO \(lBbody...\(rB +\(lBbody...\(rB .Ed +.\" PARAGRAPH .Pp -The -.Sq \&.TP -macro is similar, but does not need an empty argument line to trigger -the behaviour. -.\" SECTION -.Sh MACROS -This section contains a complete list of all -.Nm -macros and corresponding number of arguments. -.Pp -.Bl -column "MacroX" "Arguments" -compact -offset indent -.It Em Macro Ta Em Arguments -.It \&.TH Ta >1, <6 -.It \&.SH Ta >0 -.It \&.SS Ta >0 -.It \&.TP Ta n -.It \&.LP Ta 0 -.It \&.PP Ta 0 -.It \&.P Ta 0 -.It \&.IP Ta <3 -.It \&.HP Ta <2 -.It \&.SM Ta n -.It \&.SB Ta n -.It \&.BI Ta n -.It \&.IB Ta n -.It \&.BR Ta n -.It \&.RB Ta n -.It \&.R Ta n -.It \&.B Ta n -.It \&.I Ta n -.It \&.IR Ta n -.It \&.RI Ta n +.Bl -column -compact -offset indent "MacroX" "ArgumentsX" "ScopeXXXXX" +.It Em Macro Ta Em Arguments Ta Em Scope +.It \&B Ta n Ta next-line +.It \&BI Ta n Ta current +.It \&BR Ta n Ta current +.It \&I Ta n Ta next-line +.It \&IB Ta n Ta current +.It \&IR Ta n Ta current +.It \&R Ta n Ta next-line +.It \&RB Ta n Ta current +.It \&RI Ta n Ta current +.It \&SB Ta n Ta next-line +.It \&SM Ta n Ta next-line +.It \&TH Ta >1, <6 Ta current +.It \&br Ta 0 Ta current +.It \&fi Ta 0 Ta current +.It \&i Ta n Ta current +.It \&na Ta 0 Ta current +.It \&nf Ta 0 Ta current +.It \&r Ta 0 Ta current +.It \&sp Ta 1 Ta current .El +.\" PARAGRAPH .Pp -Although not historically part of the +The lower-case +.Sq \&br , +.Sq \&fi , +.Sq \&i , +.Sq \&na , +.Sq \&nf , +.Sq \&r , +and +.Sq \&sp +macros aren't historically part of .Nm -system, the following macros are also supported: +and should not be used. They're included for compatibility. +.\" SUBSECTION +.Ss Block Macros +Block macros are comprised of a head and body. The head is scoped to +the current line and, in one circumstance, the next line; the body is +scoped to subsequent lines and is closed out by a subsequent block macro +invocation. +.Bd -literal -offset indent +\&.YO \(lBhead...\(rB +\(lBhead...\(rB +\(lBbody...\(rB +.Ed +.\" PARAGRAPH .Pp -.Bl -column "MacroX" "Arguments" -compact -offset indent -.It Em Macro Ta Em Arguments -.It \&.br Ta 0 -.It \&.i Ta n +If a block macro is next-line scoped, it may only be followed by in-line +macros (excluding +.Sq na , +.Sq sp , +.Sq nf , +.Sq fi , +and +.Sq TH ) . +.\" PARAGRAPH +.Pp +.Bl -column "MacroX" "Arguments" "ScopeXXXX" -compact -offset indent +.It Em Macro Ta Em Arguments Ta Em Scope +.It \&HP Ta <2 Ta current +.It \&IP Ta <3 Ta current +.It \&LP Ta 0 Ta current +.It \&P Ta 0 Ta current +.It \&PP Ta 0 Ta current +.It \&SH Ta >0 Ta current +.It \&SS Ta >0 Ta current +.It \&TP Ta n Ta next-line .El +.\" SECTION +.Sh REFERENCE +This section is a canonical reference to all macros, arranged +alphabetically. For the scoping of individual macros, see +.Sx MACRO SYNTAX . +.Bl -tag -width Ds -offset indent +.It \&B +Text is rendered in bold face. +.It \&BI +Text is rendered alternately in bold face and italic. Thus, +.Sq \&.BI this word and that +causes +.Sq this +and +.Sq and +to render in bold face, while +.Sq word +and +.Sq that +render in italics. Whitespace between arguments is omitted in output. +.It \&BR +Text is rendered alternately in bold face and roman (the default font). +Whitespace between arguments is omitted in output. +.It \&HP +.\" TODO. +.It \&I +Text is rendered in italics. +.It \&IB +Text is rendered alternately in italics and bold face. Whitespace +between arguments is omitted in output. +.It \&IP +.\" TODO. +.It \&IR +Text is rendered alternately in italics and roman (the default font). +Whitespace between arguments is omitted in output. +.It \&LP, \&P, \&PP +Begin an undecorated paragraph. The scope of a paragraph is closed by a +subsequent paragraph, sub-section, section, or end of file. +.It \&R +Text is rendered in roman (the default font). +.It \&RB +Text is rendered alternately in roman (the default font) and bold face. +Whitespace between arguments is omitted in output. +.It \&RI +Text is rendered alternately in roman (the default font) and italics. +Whitespace between arguments is omitted in output. +.It \&SB +Text is rendered in small size (one point smaller than the default font) +bold face. +.It \&SH +Begin a section. The scope of a section is only closed by another +section or the end of file. +.It \&SM +Text is rendered in small size (one point smaller than the default +font). +.It \&SS +Begin a sub-section. The scope of a sub-section is closed by a +subsequent sub-section, section, or end of file. +.It \&TH +Sets the title of the manual page with the following syntax: +.Bd -literal -offset indent +\&.TH title section date source volume +.Ed .Pp -These follow the same calling conventions as the above -.Nm -macros. +At least the +.Va title +and +.Va section +arguments must be provided. The +.Va date +argument should be formatted as +.Qq %b [%d] %Y +format, described in +.Xr strptime 3 . +The +.Va source +string specifies the organisation providing the utility. The +.Va volume +replaces the default rendered volume as dictated by the manual section. +.It \&TP +.\" TODO. +.It \&br +Breaks the current line. Consecutive invocations have no further effect. +.\" TODO. +.It \&fi +End literal mode begun by +.Sq \&nf . +.It \&i +Italicise arguments. If no arguments are specified, all subsequent text +is italicised. +.It \&na +No alignment to the right margin. +.It \&nf +Begin literal mode: all subsequent free-form lines have their end of +line boundaries preserved. May be ended by +.Sq \&fi . +.It \&r +Fonts and styles (bold face, italics) reset to roman (default font). +.It \&sp +Insert n spaces, where n is the macro's positive numeric argument. If +0, this is equivalent to the +.Sq br +macro. +.El .\" SECTION .Sh COMPATIBILITY See @@ -35,6 +35,12 @@ const char *const __man_merrnames[WERRMAX] = { "document has no title/section", /* WNOTITLE */ "invalid escape sequence", /* WESCAPE */ "invalid number format", /* WNUMFMT */ + "expected block head arguments", /* WHEADARGS */ + "expected block body arguments", /* WBODYARGS */ + "expected empty block head", /* WNHEADARGS */ + "unknown macro", /* WMACRO */ + "ill-formed macro", /* WMACROFORM */ + "scope open on exit" /* WEXITSCOPE */ }; const char *const __man_macronames[MAN_MAX] = { @@ -43,7 +49,8 @@ const char *const __man_macronames[MAN_MAX] = { "IP", "HP", "SM", "SB", "BI", "IB", "BR", "RB", "R", "B", "I", "IR", - "RI", "na", "i", "sp" + "RI", "na", "i", "sp", + "nf", "fi", "r" }; const char * const *man_macronames = __man_macronames; @@ -199,6 +206,22 @@ man_node_append(struct man *man, struct man_node *p) p->parent->nchild++; + if ( ! man_valid_pre(man, p)) + return(0); + + switch (p->type) { + case (MAN_HEAD): + assert(MAN_BLOCK == p->parent->type); + p->parent->head = p; + break; + case (MAN_BODY): + assert(MAN_BLOCK == p->parent->type); + p->parent->body = p; + break; + default: + break; + } + man->last = p; switch (p->type) { @@ -246,6 +269,51 @@ man_elem_alloc(struct man *man, int line, int pos, int tok) int +man_head_alloc(struct man *m, int line, int pos, int tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_HEAD, tok); + if (NULL == p) + return(0); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_body_alloc(struct man *m, int line, int pos, int tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_BODY, tok); + if (NULL == p) + return(0); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_block_alloc(struct man *m, int line, int pos, int tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_BLOCK, tok); + if (NULL == p) + return(0); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int man_word_alloc(struct man *man, int line, int pos, const char *word) { @@ -290,29 +358,33 @@ static int man_ptext(struct man *m, int line, char *buf) { + /* First allocate word. */ + if ( ! man_word_alloc(m, line, 0, buf)) return(0); m->next = MAN_NEXT_SIBLING; /* - * If this is one of the zany NLINE macros that consumes the - * next line of input as being influenced, then close out the - * existing macro "scope" and continue processing. + * Co-ordinate what happens with having a next-line scope open: + * first close out the element scope (if applicable), then close + * out the block scope (also if applicable). */ - if ( ! (MAN_NLINE & m->flags)) - return(1); + /* XXX - this should be in man_action.c. */ - m->flags &= ~MAN_NLINE; - m->last = m->last->parent; + if (MAN_ELINE & m->flags) { + m->flags &= ~MAN_ELINE; + if ( ! man_unscope(m, m->last->parent)) + return(0); + } - assert(MAN_ROOT != m->last->type); - if ( ! man_valid_post(m)) - return(0); - if ( ! man_action_post(m)) - return(0); + if ( ! (MAN_BLINE & m->flags)) + return(1); + m->flags &= ~MAN_BLINE; - return(1); + if ( ! man_unscope(m, m->last->parent)) + return(0); + return(man_body_alloc(m, line, 0, m->last->tok)); } @@ -321,12 +393,10 @@ man_pmacro(struct man *m, int ln, char *buf) { int i, j, c, ppos, fl; char mac[5]; - struct man_node *n; /* Comments and empties are quickly ignored. */ - n = m->last; - fl = MAN_NLINE & m->flags; + fl = m->flags; if (0 == buf[1]) goto out; @@ -356,24 +426,20 @@ man_pmacro(struct man *m, int ln, char *buf) if (j == 4 || j < 1) { if ( ! (MAN_IGN_MACRO & m->pflags)) { - (void)man_verr(m, ln, ppos, - "ill-formed macro: %s", mac); + (void)man_perr(m, ln, ppos, WMACROFORM); goto err; } - if ( ! man_vwarn(m, ln, ppos, - "ill-formed macro: %s", mac)) + if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) goto err; return(1); } if (MAN_MAX == (c = man_hash_find(m->htab, mac))) { if ( ! (MAN_IGN_MACRO & m->pflags)) { - (void)man_verr(m, ln, ppos, - "unknown macro: %s", mac); + (void)man_perr(m, ln, ppos, WMACRO); goto err; } - if ( ! man_vwarn(m, ln, ppos, - "unknown macro: %s", mac)) + if ( ! man_pwarn(m, ln, ppos, WMACRO)) goto err; return(1); } @@ -385,32 +451,33 @@ man_pmacro(struct man *m, int ln, char *buf) /* Begin recursive parse sequence. */ - if ( ! man_macro(m, c, ln, ppos, &i, buf)) + assert(man_macros[c].fp); + + if ( ! (*man_macros[c].fp)(m, c, ln, ppos, &i, buf)) goto err; out: - if (fl) { - /* - * A NLINE macro has been immediately followed with - * another. Close out the preceding macro's scope, and - * continue. - */ - assert(MAN_ROOT != m->last->type); - assert(m->last->parent); - assert(MAN_ROOT != m->last->parent->type); - - if (n != m->last) - m->last = m->last->parent; - - if ( ! man_valid_post(m)) - return(0); - if ( ! man_action_post(m)) - return(0); - m->next = MAN_NEXT_SIBLING; - m->flags &= ~MAN_NLINE; - } + if ( ! (MAN_BLINE & fl)) + return(1); - return(1); + /* + * If we've opened a new next-line element scope, then return + * now, as the next line will close out the block scope. + */ + + if (MAN_ELINE & m->flags) + return(1); + + /* Close out the block scope opened in the prior line. */ + + /* XXX - this should be in man_action.c. */ + + assert(MAN_BLINE & m->flags); + m->flags &= ~MAN_BLINE; + + if ( ! man_unscope(m, m->last->parent)) + return(0); + return(man_body_alloc(m, ln, 0, m->last->tok)); err: /* Error out. */ @@ -43,12 +43,18 @@ #define MAN_na 21 #define MAN_i 22 #define MAN_sp 23 -#define MAN_MAX 24 +#define MAN_nf 24 +#define MAN_fi 25 +#define MAN_r 26 +#define MAN_MAX 27 enum man_type { MAN_TEXT, MAN_ELEM, - MAN_ROOT + MAN_ROOT, + MAN_BLOCK, + MAN_HEAD, + MAN_BODY }; struct man_meta { @@ -73,6 +79,8 @@ struct man_node { #define MAN_ACTED (1 << 1) enum man_type type; char *string; + struct man_node *head; + struct man_node *body; }; #define MAN_IGN_MACRO (1 << 0) diff --git a/man_action.c b/man_action.c index 71f8dc3c..671246ab 100644 --- a/man_action.c +++ b/man_action.c @@ -60,6 +60,9 @@ const struct actions man_actions[MAN_MAX] = { { NULL }, /* na */ { NULL }, /* i */ { NULL }, /* sp */ + { NULL }, /* nf */ + { NULL }, /* fi */ + { NULL }, /* r*/ }; @@ -151,12 +154,10 @@ post_TH(struct man *m) */ if (m->last->parent->child == m->last) { - assert(MAN_ROOT == m->last->parent->type); m->last->parent->child = NULL; n = m->last; m->last = m->last->parent; m->next = MAN_NEXT_CHILD; - assert(m->last == m->first); } else { assert(m->last->prev); m->last->prev->next = NULL; diff --git a/man_argv.c b/man_argv.c new file mode 100644 index 00000000..65d088f7 --- /dev/null +++ b/man_argv.c @@ -0,0 +1,99 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "libman.h" + + +int +man_args(struct man *m, int line, int *pos, char *buf, char **v) +{ + + assert(*pos); + assert(' ' != buf[*pos]); + + if (0 == buf[*pos]) + return(ARGS_EOLN); + + *v = &buf[*pos]; + + /* + * Process a quoted literal. A quote begins with a double-quote + * and ends with a double-quote NOT preceded by a double-quote. + * Whitespace is NOT involved in literal termination. + */ + + if ('\"' == buf[*pos]) { + *v = &buf[++(*pos)]; + + for ( ; buf[*pos]; (*pos)++) { + if ('\"' != buf[*pos]) + continue; + if ('\"' != buf[*pos + 1]) + break; + (*pos)++; + } + + if (0 == buf[*pos]) { + if ( ! man_pwarn(m, line, *pos, WTQUOTE)) + return(ARGS_ERROR); + return(ARGS_QWORD); + } + + buf[(*pos)++] = 0; + + if (0 == buf[*pos]) + return(ARGS_QWORD); + + while (' ' == buf[*pos]) + (*pos)++; + + if (0 == buf[*pos]) + if ( ! man_pwarn(m, line, *pos, WTSPACE)) + return(ARGS_ERROR); + + return(ARGS_QWORD); + } + + /* + * A non-quoted term progresses until either the end of line or + * a non-escaped whitespace. + */ + + for ( ; buf[*pos]; (*pos)++) + if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) + break; + + if (0 == buf[*pos]) + return(ARGS_WORD); + + buf[(*pos)++] = 0; + + while (' ' == buf[*pos]) + (*pos)++; + + if (0 == buf[*pos]) + if ( ! man_pwarn(m, line, *pos, WTSPACE)) + return(ARGS_ERROR); + + return(ARGS_WORD); +} + diff --git a/man_macro.c b/man_macro.c index 1532b9c5..3ded243a 100644 --- a/man_macro.c +++ b/man_macro.c @@ -21,204 +21,275 @@ #include "libman.h" -#define FL_NLINE (1 << 0) -#define FL_TLINE (1 << 1) - -static int man_args(struct man *, int, - int *, char *, char **); - -static int man_flags[MAN_MAX] = { - 0, /* br */ - 0, /* TH */ - 0, /* SH */ - 0, /* SS */ - FL_TLINE, /* TP */ - 0, /* LP */ - 0, /* PP */ - 0, /* P */ - 0, /* IP */ - 0, /* HP */ - FL_NLINE, /* SM */ - FL_NLINE, /* SB */ - FL_NLINE, /* BI */ - FL_NLINE, /* IB */ - FL_NLINE, /* BR */ - FL_NLINE, /* RB */ - FL_NLINE, /* R */ - FL_NLINE, /* B */ - FL_NLINE, /* I */ - FL_NLINE, /* IR */ - FL_NLINE, /* RI */ - 0, /* na */ - FL_NLINE, /* i */ - 0, /* sp */ +#define REW_REWIND (0) /* See rew_scope(). */ +#define REW_NOHALT (1) /* See rew_scope(). */ +#define REW_HALT (2) /* See rew_scope(). */ + +static int in_line_eoln(MACRO_PROT_ARGS); +static int blk_imp(MACRO_PROT_ARGS); + +static int rew_scope(enum man_type, struct man *, int); +static int rew_dohalt(int, enum man_type, + const struct man_node *); + +const struct man_macro __man_macros[MAN_MAX] = { + { in_line_eoln, 0 }, /* br */ + { in_line_eoln, 0 }, /* TH */ + { blk_imp, 0 }, /* SH */ + { blk_imp, 0 }, /* SS */ + { blk_imp, MAN_SCOPED }, /* TP */ + { blk_imp, 0 }, /* LP */ + { blk_imp, 0 }, /* PP */ + { blk_imp, 0 }, /* P */ + { blk_imp, 0 }, /* IP */ + { blk_imp, 0 }, /* HP */ + { in_line_eoln, MAN_SCOPED }, /* SM */ + { in_line_eoln, MAN_SCOPED }, /* SB */ + { in_line_eoln, 0 }, /* BI */ + { in_line_eoln, 0 }, /* IB */ + { in_line_eoln, 0 }, /* BR */ + { in_line_eoln, 0 }, /* RB */ + { in_line_eoln, MAN_SCOPED }, /* R */ + { in_line_eoln, MAN_SCOPED }, /* B */ + { in_line_eoln, MAN_SCOPED }, /* I */ + { in_line_eoln, 0 }, /* IR */ + { in_line_eoln, 0 }, /* RI */ + { in_line_eoln, 0 }, /* na */ + { in_line_eoln, 0 }, /* i */ + { in_line_eoln, 0 }, /* sp */ + { in_line_eoln, 0 }, /* nf */ + { in_line_eoln, 0 }, /* fi */ + { in_line_eoln, 0 }, /* r */ }; +const struct man_macro * const man_macros = __man_macros; + + int -man_macro(struct man *man, int tok, int line, - int ppos, int *pos, char *buf) +man_unscope(struct man *m, const struct man_node *n) { - int w, la; - char *p; - struct man_node *n; - - if ( ! man_elem_alloc(man, line, ppos, tok)) - return(0); - n = man->last; - man->next = MAN_NEXT_CHILD; - for (;;) { - la = *pos; - w = man_args(man, line, pos, buf, &p); + assert(n); + m->next = MAN_NEXT_SIBLING; - if (-1 == w) + /* LINTED */ + while (m->last != n) { + if ( ! man_valid_post(m)) return(0); - if (0 == w) - break; - - if ( ! man_word_alloc(man, line, la, p)) + if ( ! man_action_post(m)) return(0); - man->next = MAN_NEXT_SIBLING; + m->last = m->last->parent; + assert(m->last); } - if (n == man->last && (FL_NLINE & man_flags[tok])) { - if (MAN_NLINE & man->flags) - return(man_perr(man, line, ppos, WLNSCOPE)); - man->flags |= MAN_NLINE; - return(1); - } + if ( ! man_valid_post(m)) + return(0); + return(man_action_post(m)); +} - if (FL_TLINE & man_flags[tok]) { - if (MAN_NLINE & man->flags) - return(man_perr(man, line, ppos, WLNSCOPE)); - man->flags |= MAN_NLINE; - return(1); + +/* + * There are three scope levels: scoped to the root (all), scoped to the + * section (all less sections), and scoped to subsections (all less + * sections and subsections). + */ +static int +rew_dohalt(int tok, enum man_type type, const struct man_node *n) +{ + + if (MAN_ROOT == n->type) + return(REW_HALT); + assert(n->parent); + if (MAN_ROOT == n->parent->type) + return(REW_REWIND); + if (MAN_VALID & n->flags) + return(REW_NOHALT); + + switch (tok) { + case (MAN_SH): + /* Break at root. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + break; + case (MAN_SS): + /* Break at section. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + if (MAN_BODY == n->type && MAN_SH == n->tok) + return(REW_HALT); + break; + default: + /* Break at subsection. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + if (MAN_BODY == n->type && MAN_SS == n->tok) + return(REW_HALT); + if (MAN_BODY == n->type && MAN_SH == n->tok) + return(REW_HALT); + break; } - /* - * Note that when TH is pruned, we'll be back at the root, so - * make sure that we don't clobber as its sibling. - */ + return(REW_NOHALT); +} - for ( ; man->last; man->last = man->last->parent) { - if (man->last == n) - break; - if (man->last->type == MAN_ROOT) + +/* + * Rewinding entails ascending the parse tree until a coherent point, + * for example, the `SH' macro will close out any intervening `SS' + * scopes. When a scope is closed, it must be validated and actioned. + */ +static int +rew_scope(enum man_type type, struct man *m, int tok) +{ + struct man_node *n; + int c; + + /* LINTED */ + for (n = m->last; n; n = n->parent) { + /* + * Whether we should stop immediately (REW_HALT), stop + * and rewind until this point (REW_REWIND), or keep + * rewinding (REW_NOHALT). + */ + c = rew_dohalt(tok, type, n); + if (REW_HALT == c) + return(1); + if (REW_REWIND == c) break; - if ( ! man_valid_post(man)) - return(0); - if ( ! man_action_post(man)) - return(0); } - assert(man->last); + /* Rewind until the current point. */ + + assert(n); + return(man_unscope(m, n)); +} - /* - * Same here regarding whether we're back at the root. - */ - if (man->last->type != MAN_ROOT && ! man_valid_post(man)) +/* + * Parse an implicit-block macro. These contain a MAN_HEAD and a + * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other + * scopes, such as `SH' closing out an `SS', are defined in the rew + * routines. + */ +int +blk_imp(MACRO_PROT_ARGS) +{ + int w, la; + char *p; + + /* Close out prior scopes. */ + + if ( ! rew_scope(MAN_BODY, m, tok)) return(0); - if (man->last->type != MAN_ROOT && ! man_action_post(man)) + if ( ! rew_scope(MAN_BLOCK, m, tok)) return(0); - if (man->last->type != MAN_ROOT) - man->next = MAN_NEXT_SIBLING; - return(1); -} + /* Allocate new block & head scope. */ + if ( ! man_block_alloc(m, line, ppos, tok)) + return(0); + if ( ! man_head_alloc(m, line, ppos, tok)) + return(0); -int -man_macroend(struct man *m) -{ + /* Add line arguments. */ - for ( ; m->last && m->last != m->first; - m->last = m->last->parent) { - if ( ! man_valid_post(m)) + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); + + if (-1 == w) return(0); - if ( ! man_action_post(m)) + if (0 == w) + break; + + if ( ! man_word_alloc(m, line, la, p)) return(0); + m->next = MAN_NEXT_SIBLING; } - assert(m->last == m->first); - if ( ! man_valid_post(m)) - return(0); - if ( ! man_action_post(m)) + /* Close out head and open body (unless MAN_SCOPE). */ + + if (MAN_SCOPED & man_macros[tok].flags) { + m->flags |= MAN_BLINE; + return(1); + } else if ( ! rew_scope(MAN_HEAD, m, tok)) return(0); - return(1); + return(man_body_alloc(m, line, ppos, tok)); } -/* ARGSUSED */ -static int -man_args(struct man *m, int line, - int *pos, char *buf, char **v) +int +in_line_eoln(MACRO_PROT_ARGS) { + int w, la; + char *p; + struct man_node *n; - if (0 == buf[*pos]) + if ( ! man_elem_alloc(m, line, ppos, tok)) return(0); - /* First parse non-quoted strings. */ - - if ('\"' != buf[*pos]) { - *v = &buf[*pos]; - - while (buf[*pos]) { - if (' ' == buf[*pos]) - if ('\\' != buf[*pos - 1]) - break; - (*pos)++; - } - - if (0 == buf[*pos]) - return(1); - - buf[(*pos)++] = 0; + n = m->last; + m->next = MAN_NEXT_CHILD; - if (0 == buf[*pos]) - return(1); - - while (buf[*pos] && ' ' == buf[*pos]) - (*pos)++; + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); - if (buf[*pos]) - return(1); + if (-1 == w) + return(0); + if (0 == w) + break; - if ( ! man_pwarn(m, line, *pos, WTSPACE)) - return(-1); + if ( ! man_word_alloc(m, line, la, p)) + return(0); + m->next = MAN_NEXT_SIBLING; + } + if (n == m->last && (MAN_SCOPED & man_macros[tok].flags)) { + m->flags |= MAN_ELINE; return(1); - } + } /* - * If we're a quoted string (and quoted strings are allowed), - * then parse ahead to the next quote. If none's found, it's an - * error. After, parse to the next word. + * Note that when TH is pruned, we'll be back at the root, so + * make sure that we don't clobber as its sibling. */ - *v = &buf[++(*pos)]; - - while (buf[*pos] && '\"' != buf[*pos]) - (*pos)++; + /* FIXME: clean this to use man_unscope(). */ - if (0 == buf[*pos]) { - if ( ! man_pwarn(m, line, *pos, WTQUOTE)) - return(-1); - return(1); + for ( ; m->last; m->last = m->last->parent) { + if (m->last == n) + break; + if (m->last->type == MAN_ROOT) + break; + if ( ! man_valid_post(m)) + return(0); + if ( ! man_action_post(m)) + return(0); } - buf[(*pos)++] = 0; - if (0 == buf[*pos]) - return(1); + assert(m->last); - while (buf[*pos] && ' ' == buf[*pos]) - (*pos)++; + /* + * Same here regarding whether we're back at the root. + */ - if (buf[*pos]) - return(1); + if (m->last->type != MAN_ROOT && ! man_valid_post(m)) + return(0); + if (m->last->type != MAN_ROOT && ! man_action_post(m)) + return(0); + if (m->last->type != MAN_ROOT) + m->next = MAN_NEXT_SIBLING; - if ( ! man_pwarn(m, line, *pos, WTSPACE)) - return(-1); return(1); } + + +int +man_macroend(struct man *m) +{ + + return(man_unscope(m, m->first)); +} + @@ -32,7 +32,10 @@ extern size_t strlcpy(char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); #endif +#define MANT_LITERAL (1 << 0) + #define DECL_ARGS struct termp *p, \ + int *fl, \ const struct man_node *n, \ const struct man_meta *m @@ -44,7 +47,7 @@ struct termact { static int pre_B(DECL_ARGS); static int pre_BI(DECL_ARGS); static int pre_BR(DECL_ARGS); -static int pre_br(DECL_ARGS); +static int pre_HP(DECL_ARGS); static int pre_I(DECL_ARGS); static int pre_IB(DECL_ARGS); static int pre_IP(DECL_ARGS); @@ -55,11 +58,17 @@ static int pre_RI(DECL_ARGS); static int pre_SH(DECL_ARGS); static int pre_SS(DECL_ARGS); static int pre_TP(DECL_ARGS); +static int pre_br(DECL_ARGS); +static int pre_fi(DECL_ARGS); +static int pre_nf(DECL_ARGS); +static int pre_r(DECL_ARGS); +static int pre_sp(DECL_ARGS); static void post_B(DECL_ARGS); static void post_I(DECL_ARGS); static void post_SH(DECL_ARGS); static void post_SS(DECL_ARGS); +static void post_i(DECL_ARGS); static const struct termact termacts[MAN_MAX] = { { pre_br, NULL }, /* br */ @@ -71,7 +80,7 @@ static const struct termact termacts[MAN_MAX] = { { pre_PP, NULL }, /* PP */ { pre_PP, NULL }, /* P */ { pre_IP, NULL }, /* IP */ - { pre_PP, NULL }, /* HP */ /* FIXME */ + { pre_HP, NULL }, /* HP */ { NULL, NULL }, /* SM */ { pre_B, post_B }, /* SB */ { pre_BI, NULL }, /* BI */ @@ -83,9 +92,12 @@ static const struct termact termacts[MAN_MAX] = { { pre_I, post_I }, /* I */ { pre_IR, NULL }, /* IR */ { pre_RI, NULL }, /* RI */ - { NULL, NULL }, /* na */ - { pre_I, post_I }, /* i */ - { NULL, NULL }, /* sp */ + { NULL, NULL }, /* na */ /* TODO: document that has no effect */ + { pre_I, post_i }, /* i */ + { pre_sp, NULL }, /* sp */ + { pre_nf, NULL }, /* nf */ + { pre_fi, NULL }, /* fi */ + { pre_r, NULL }, /* r */ }; static void print_head(struct termp *, @@ -102,13 +114,16 @@ static int arg_width(const struct man_node *); int man_run(struct termp *p, const struct man *m) { + int fl; print_head(p, man_meta(m)); p->flags |= TERMP_NOSPACE; assert(man_node(m)); assert(MAN_ROOT == man_node(m)->type); + + fl = 0; if (man_node(m)->child) - print_body(p, man_node(m)->child, man_meta(m)); + print_body(p, &fl, man_node(m)->child, man_meta(m)); print_foot(p, man_meta(m)); return(1); @@ -171,6 +186,27 @@ pre_I(DECL_ARGS) /* ARGSUSED */ +static int +pre_r(DECL_ARGS) +{ + + p->flags &= ~TERMP_UNDER; + p->flags &= ~TERMP_BOLD; + return(1); +} + + +/* ARGSUSED */ +static void +post_i(DECL_ARGS) +{ + + if (n->nchild) + p->flags &= ~TERMP_UNDER; +} + + +/* ARGSUSED */ static void post_I(DECL_ARGS) { @@ -181,6 +217,27 @@ post_I(DECL_ARGS) /* ARGSUSED */ static int +pre_fi(DECL_ARGS) +{ + + *fl &= ~MANT_LITERAL; + return(1); +} + + +/* ARGSUSED */ +static int +pre_nf(DECL_ARGS) +{ + + term_newln(p); + *fl |= MANT_LITERAL; + return(1); +} + + +/* ARGSUSED */ +static int pre_IR(DECL_ARGS) { const struct man_node *nn; @@ -191,7 +248,7 @@ pre_IR(DECL_ARGS) p->flags |= TERMP_UNDER; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, fl, nn, m); if ( ! (i % 2)) p->flags &= ~TERMP_UNDER; } @@ -210,7 +267,7 @@ pre_IB(DECL_ARGS) p->flags |= i % 2 ? TERMP_BOLD : TERMP_UNDER; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, fl, nn, m); p->flags &= i % 2 ? ~TERMP_BOLD : ~TERMP_UNDER; } return(0); @@ -229,7 +286,7 @@ pre_RB(DECL_ARGS) p->flags |= TERMP_BOLD; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, fl, nn, m); if (i % 2) p->flags &= ~TERMP_BOLD; } @@ -249,7 +306,7 @@ pre_RI(DECL_ARGS) p->flags |= TERMP_UNDER; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, fl, nn, m); if ( ! (i % 2)) p->flags &= ~TERMP_UNDER; } @@ -269,7 +326,7 @@ pre_BR(DECL_ARGS) p->flags |= TERMP_BOLD; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, fl, nn, m); if ( ! (i % 2)) p->flags &= ~TERMP_BOLD; } @@ -288,7 +345,7 @@ pre_BI(DECL_ARGS) p->flags |= i % 2 ? TERMP_UNDER : TERMP_BOLD; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, fl, nn, m); p->flags &= i % 2 ? ~TERMP_UNDER : ~TERMP_BOLD; } return(0); @@ -316,6 +373,27 @@ post_B(DECL_ARGS) /* ARGSUSED */ static int +pre_sp(DECL_ARGS) +{ + int i, len; + + if (NULL == n->child) { + term_vspace(p); + return(0); + } + + len = atoi(n->child->string); + if (0 == len) + term_newln(p); + for (i = 0; i < len; i++) + term_vspace(p); + + return(0); +} + + +/* ARGSUSED */ +static int pre_br(DECL_ARGS) { @@ -326,14 +404,29 @@ pre_br(DECL_ARGS) /* ARGSUSED */ static int +pre_HP(DECL_ARGS) +{ + + /* TODO */ + return(1); +} + + +/* ARGSUSED */ +static int pre_PP(DECL_ARGS) { - term_vspace(p); - term_vspace(p); - p->offset = INDENT; - p->flags |= TERMP_NOSPACE; - return(0); + switch (n->type) { + case (MAN_BLOCK): + fmt_block_vspace(p, n); + break; + default: + p->offset = INDENT; + break; + } + + return(1); } @@ -341,6 +434,8 @@ pre_PP(DECL_ARGS) static int pre_IP(DECL_ARGS) { + /* TODO */ +#if 0 const struct man_node *nn; size_t offs, sv; int ival; @@ -372,7 +467,7 @@ pre_IP(DECL_ARGS) break; } } - print_node(p, nn, m); + print_node(p, fl, nn, m); } p->rmargin = p->offset + offs; @@ -385,6 +480,8 @@ pre_IP(DECL_ARGS) p->flags |= TERMP_NOLPAD | TERMP_NOSPACE; return(0); +#endif + return(1); } @@ -392,6 +489,8 @@ pre_IP(DECL_ARGS) static int pre_TP(DECL_ARGS) { + /* TODO */ +#if 0 const struct man_node *nn; size_t offs; @@ -411,12 +510,14 @@ pre_TP(DECL_ARGS) offs = INDENT; for ( ; nn; nn = nn->next) - print_node(p, nn, m); + print_node(p, fl, nn, m); term_flushln(p); p->flags |= TERMP_NOSPACE; p->offset += offs; return(0); +#endif + return(1); } @@ -425,8 +526,21 @@ static int pre_SS(DECL_ARGS) { - term_vspace(p); - p->flags |= TERMP_BOLD; + switch (n->type) { + case (MAN_BLOCK): + term_newln(p); + if (n->prev) + term_vspace(p); + break; + case (MAN_HEAD): + p->flags |= TERMP_BOLD; + p->offset = HALFINDENT; + break; + default: + p->offset = INDENT; + break; + } + return(1); } @@ -436,9 +550,14 @@ static void post_SS(DECL_ARGS) { - term_flushln(p); - p->flags &= ~TERMP_BOLD; - p->flags |= TERMP_NOSPACE; + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + p->flags &= ~TERMP_BOLD; + break; + default: + break; + } } @@ -446,10 +565,28 @@ post_SS(DECL_ARGS) static int pre_SH(DECL_ARGS) { + /* + * XXX: undocumented: using two `SH' macros in sequence has no + * vspace between calls, only a newline. + */ + switch (n->type) { + case (MAN_BLOCK): + if (n->prev && MAN_SH == n->prev->tok) + if (NULL == n->prev->body->child) + break; + term_vspace(p); + break; + case (MAN_HEAD): + p->flags |= TERMP_BOLD; + p->offset = 0; + break; + case (MAN_BODY): + p->offset = INDENT; + break; + default: + break; + } - term_vspace(p); - p->offset = 0; - p->flags |= TERMP_BOLD; return(1); } @@ -459,10 +596,17 @@ static void post_SH(DECL_ARGS) { - term_flushln(p); - p->offset = INDENT; - p->flags &= ~TERMP_BOLD; - p->flags |= TERMP_NOSPACE; + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + p->flags &= ~TERMP_BOLD; + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } } @@ -474,10 +618,6 @@ print_node(DECL_ARGS) c = 1; switch (n->type) { - case(MAN_ELEM): - if (termacts[n->tok].pre) - c = (*termacts[n->tok].pre)(p, n, m); - break; case(MAN_TEXT): if (0 == *n->string) { term_vspace(p); @@ -494,32 +634,35 @@ print_node(DECL_ARGS) if (sz >= 2 && n->string[sz - 1] == 'c' && n->string[sz - 2] == '\\') p->flags |= TERMP_NOSPACE; + /* FIXME: this means that macro lines are munged! */ + if (MANT_LITERAL & *fl) { + p->flags |= TERMP_NOSPACE; + term_flushln(p); + } break; default: + if (termacts[n->tok].pre) + c = (*termacts[n->tok].pre)(p, fl, n, m); break; } if (c && n->child) - print_body(p, n->child, m); + print_body(p, fl, n->child, m); - switch (n->type) { - case (MAN_ELEM): + if (MAN_TEXT != n->type) if (termacts[n->tok].post) - (*termacts[n->tok].post)(p, n, m); - break; - default: - break; - } + (*termacts[n->tok].post)(p, fl, n, m); } static void print_body(DECL_ARGS) { - print_node(p, n, m); + + print_node(p, fl, n, m); if ( ! n->next) return; - print_body(p, n->next, m); + print_body(p, fl, n->next, m); } diff --git a/man_validate.c b/man_validate.c index 442bad54..72a2d154 100644 --- a/man_validate.c +++ b/man_validate.c @@ -26,64 +26,89 @@ #include "libman.h" #include "libmandoc.h" -#define POSTARGS struct man *m, const struct man_node *n +#define CHKARGS struct man *m, const struct man_node *n -typedef int (*v_post)(POSTARGS); +typedef int (*v_check)(CHKARGS); struct man_valid { - v_post *posts; + v_check *pres; + v_check *posts; }; -static int check_eq0(POSTARGS); -static int check_eq1(POSTARGS); -static int check_ge1(POSTARGS); -static int check_ge2(POSTARGS); -static int check_le1(POSTARGS); -static int check_le2(POSTARGS); -static int check_le5(POSTARGS); -static int check_root(POSTARGS); -static int check_sp(POSTARGS); -static int check_text(POSTARGS); - -static v_post posts_eq0[] = { check_eq0, NULL }; -static v_post posts_ge1[] = { check_ge1, NULL }; -static v_post posts_ge2_le5[] = { check_ge2, check_le5, NULL }; -static v_post posts_le1[] = { check_le1, NULL }; -static v_post posts_le2[] = { check_le2, NULL }; -static v_post posts_sp[] = { check_sp, NULL }; +static int check_bline(CHKARGS); +static int check_eline(CHKARGS); +static int check_eq0(CHKARGS); +static int check_eq1(CHKARGS); +static int check_ge2(CHKARGS); +static int check_le5(CHKARGS); +static int check_par(CHKARGS); +static int check_root(CHKARGS); +static int check_sec(CHKARGS); +static int check_sp(CHKARGS); +static int check_text(CHKARGS); + +static v_check posts_eq0[] = { check_eq0, NULL }; +static v_check posts_ge2_le5[] = { check_ge2, check_le5, NULL }; +static v_check posts_par[] = { check_par, NULL }; +static v_check posts_sec[] = { check_sec, NULL }; +static v_check posts_sp[] = { check_sp, NULL }; +static v_check pres_eline[] = { check_eline, NULL }; +static v_check pres_bline[] = { check_bline, NULL }; static const struct man_valid man_valids[MAN_MAX] = { - { posts_eq0 }, /* br */ - { posts_ge2_le5 }, /* TH */ - { posts_ge1 }, /* SH */ - { posts_ge1 }, /* SS */ - { NULL }, /* TP */ - { posts_eq0 }, /* LP */ - { posts_eq0 }, /* PP */ - { posts_eq0 }, /* P */ - { posts_le2 }, /* IP */ - { posts_le1 }, /* HP */ - { NULL }, /* SM */ - { NULL }, /* SB */ - { NULL }, /* BI */ - { NULL }, /* IB */ - { NULL }, /* BR */ - { NULL }, /* RB */ - { NULL }, /* R */ - { NULL }, /* B */ - { NULL }, /* I */ - { NULL }, /* IR */ - { NULL }, /* RI */ - { posts_eq0 }, /* na */ - { NULL }, /* i */ - { posts_sp }, /* sp */ + { pres_bline, posts_eq0 }, /* br */ + { pres_bline, posts_ge2_le5 }, /* TH */ + { pres_bline, posts_sec }, /* SH */ + { pres_bline, posts_sec }, /* SS */ + { pres_bline, posts_par }, /* TP */ + { pres_bline, posts_par }, /* LP */ + { pres_bline, posts_par }, /* PP */ + { pres_bline, posts_par }, /* P */ + { pres_bline, posts_par }, /* IP */ + { pres_bline, posts_par }, /* HP */ + { pres_eline, NULL }, /* SM */ + { pres_eline, NULL }, /* SB */ + { NULL, NULL }, /* BI */ + { NULL, NULL }, /* IB */ + { NULL, NULL }, /* BR */ + { NULL, NULL }, /* RB */ + { pres_eline, NULL }, /* R */ + { pres_eline, NULL }, /* B */ + { pres_eline, NULL }, /* I */ + { NULL, NULL }, /* IR */ + { NULL, NULL }, /* RI */ + { pres_bline, posts_eq0 }, /* na */ + { NULL, NULL }, /* i */ + { pres_bline, posts_sp }, /* sp */ + { pres_bline, posts_eq0 }, /* nf */ + { pres_bline, posts_eq0 }, /* fi */ + { NULL, NULL }, /* r */ }; int +man_valid_pre(struct man *m, const struct man_node *n) +{ + v_check *cp; + + if (MAN_TEXT == n->type) + return(1); + if (MAN_ROOT == n->type) + return(1); + + if (NULL == (cp = man_valids[n->tok].pres)) + return(1); + for ( ; *cp; cp++) + if ( ! (*cp)(m, n)) + return(0); + return(1); +} + + +int man_valid_post(struct man *m) { - v_post *cp; + v_check *cp; if (MAN_VALID & m->last->flags) return(1); @@ -109,9 +134,16 @@ man_valid_post(struct man *m) static int -check_root(POSTARGS) +check_root(CHKARGS) { - + + /* XXX - make this into a warning? */ + if (MAN_BLINE & m->flags) + return(man_nerr(m, n, WEXITSCOPE)); + /* XXX - make this into a warning? */ + if (MAN_ELINE & m->flags) + return(man_nerr(m, n, WEXITSCOPE)); + if (NULL == m->first->child) return(man_nerr(m, n, WNODATA)); if (NULL == m->meta.title) @@ -122,7 +154,7 @@ check_root(POSTARGS) static int -check_text(POSTARGS) +check_text(CHKARGS) { const char *p; int pos, c; @@ -158,7 +190,7 @@ check_text(POSTARGS) #define INEQ_DEFINE(x, ineq, name) \ static int \ -check_##name(POSTARGS) \ +check_##name(CHKARGS) \ { \ if (n->nchild ineq (x)) \ return(1); \ @@ -169,37 +201,106 @@ check_##name(POSTARGS) \ INEQ_DEFINE(0, ==, eq0) INEQ_DEFINE(1, ==, eq1) -INEQ_DEFINE(1, >=, ge1) INEQ_DEFINE(2, >=, ge2) -INEQ_DEFINE(1, <=, le1) -INEQ_DEFINE(2, <=, le2) INEQ_DEFINE(5, <=, le5) static int -check_sp(POSTARGS) +check_sp(CHKARGS) { long lval; char *ep, *buf; - if (NULL == m->last->child) + if (NULL == n->child) return(1); else if ( ! check_eq1(m, n)) return(0); - assert(MAN_TEXT == m->last->child->type); - buf = m->last->child->string; + assert(MAN_TEXT == n->child->type); + buf = n->child->string; assert(buf); /* From OpenBSD's strtol(3). */ + errno = 0; lval = strtol(buf, &ep, 10); if (buf[0] == '\0' || *ep != '\0') - return(man_nerr(m, m->last->child, WNUMFMT)); + return(man_nerr(m, n->child, WNUMFMT)); if ((errno == ERANGE && (lval == LONG_MAX || lval == LONG_MIN)) || (lval > INT_MAX || lval < 0)) - return(man_nerr(m, m->last->child, WNUMFMT)); + return(man_nerr(m, n->child, WNUMFMT)); + + return(1); +} + + +static int +check_sec(CHKARGS) +{ + if (MAN_BODY == n->type && 0 == n->nchild) + return(man_nwarn(m, n, WBODYARGS)); + if (MAN_HEAD == n->type && 0 == n->nchild) + return(man_nerr(m, n, WHEADARGS)); return(1); } + + +static int +check_par(CHKARGS) +{ + + if (MAN_BODY == n->type) + switch (n->tok) { + case (MAN_IP): + /* FALLTHROUGH */ + case (MAN_HP): + /* FALLTHROUGH */ + case (MAN_TP): + /* Body-less lists are ok. */ + break; + default: + if (n->nchild) + break; + return(man_nwarn(m, n, WBODYARGS)); + } + if (MAN_HEAD == n->type) + switch (n->tok) { + case (MAN_PP): + /* FALLTHROUGH */ + case (MAN_P): + /* FALLTHROUGH */ + case (MAN_LP): + if (0 == n->nchild) + break; + return(man_nwarn(m, n, WNHEADARGS)); + default: + if (n->nchild) + break; + return(man_nwarn(m, n, WHEADARGS)); + } + + return(1); +} + + +static int +check_eline(CHKARGS) +{ + + if ( ! (MAN_ELINE & m->flags)) + return(1); + return(man_nerr(m, n, WLNSCOPE)); +} + + +static int +check_bline(CHKARGS) +{ + + if ( ! (MAN_BLINE & m->flags)) + return(1); + return(man_nerr(m, n, WLNSCOPE)); +} + @@ -163,6 +163,15 @@ print_man(const struct man_node *n, int indent) case (MAN_TEXT): t = "text"; break; + case (MAN_BLOCK): + t = "block"; + break; + case (MAN_HEAD): + t = "block-head"; + break; + case (MAN_BODY): + t = "block-body"; + break; default: abort(); /* NOTREACHED */ @@ -173,6 +182,12 @@ print_man(const struct man_node *n, int indent) p = n->string; break; case (MAN_ELEM): + /* FALLTHROUGH */ + case (MAN_BLOCK): + /* FALLTHROUGH */ + case (MAN_HEAD): + /* FALLTHROUGH */ + case (MAN_BODY): p = man_macronames[n->tok]; break; case (MAN_ROOT): |