diff options
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | mdoc.c | 25 | ||||
-rw-r--r-- | mdoc.h | 35 | ||||
-rw-r--r-- | mdocml.c | 220 |
4 files changed, 157 insertions, 129 deletions
@@ -3,7 +3,7 @@ VERSION = 1.2.0 CFLAGS += -W -Wall -Wno-unused-parameter -g LIBLNS = macro.ln mdoc.ln hash.ln strings.ln xstd.ln argv.ln \ - validate.ln action.ln + validate.ln action.ln BINLNS = tree.ln mdocml.ln @@ -14,14 +14,14 @@ LLNS = llib-llibmdoc.ln llib-lmdocml.ln LIBS = libmdoc.a LIBOBJS = macro.o mdoc.o hash.o strings.o xstd.o argv.o \ - validate.o action.o + validate.o action.o BINOBJS = tree.o mdocml.o OBJS = $(LIBOBJS) $(BINOBJS) SRCS = macro.c mdoc.c mdocml.c hash.c strings.c xstd.c argv.c \ - validate.c action.c tree.c + validate.c action.c tree.c HEADS = mdoc.h private.h @@ -212,12 +212,14 @@ static void argfree(size_t, struct mdoc_arg *); static void argcpy(struct mdoc_arg *, const struct mdoc_arg *); +static struct mdoc_node *mdoc_node_alloc(void); static int mdoc_node_append(struct mdoc *, struct mdoc_node *); static void mdoc_elem_free(struct mdoc_elem *); static void mdoc_text_free(struct mdoc_text *); + const struct mdoc_node * mdoc_node(struct mdoc *mdoc) { @@ -510,6 +512,14 @@ mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p) } +static struct mdoc_node * +mdoc_node_alloc(void) +{ + + return(xcalloc(1, sizeof(struct mdoc_node))); +} + + int mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok) { @@ -518,7 +528,7 @@ mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok) assert(mdoc->first); assert(mdoc->last); - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); p->line = line; p->pos = pos; @@ -537,7 +547,7 @@ mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok) assert(mdoc->first); assert(mdoc->last); - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); p->line = line; p->pos = pos; @@ -556,7 +566,7 @@ mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok) assert(mdoc->first); assert(mdoc->last); - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); p->line = line; p->pos = pos; @@ -572,7 +582,7 @@ mdoc_root_alloc(struct mdoc *mdoc) { struct mdoc_node *p; - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); p->type = MDOC_ROOT; @@ -586,7 +596,7 @@ mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, { struct mdoc_node *p; - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); p->pos = pos; p->line = line; @@ -605,7 +615,7 @@ mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, { struct mdoc_node *p; - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); p->line = line; p->pos = pos; @@ -624,7 +634,8 @@ mdoc_word_alloc(struct mdoc *mdoc, { struct mdoc_node *p; - p = xcalloc(1, sizeof(struct mdoc_node)); + p = mdoc_node_alloc(); + p->line = line; p->pos = pos; p->type = MDOC_TEXT; @@ -195,9 +195,10 @@ #define MDOC_Symbolic 61 #define MDOC_ARG_MAX 62 +/* Warnings are either syntax or groff-compatibility. */ enum mdoc_warn { - WARN_SYNTAX, /* Syntax warn (at line/col). */ - WARN_COMPAT /* Groff compat warn (at line/col). */ + WARN_SYNTAX, + WARN_COMPAT }; /* Possible values for the `At' macro. */ @@ -226,19 +227,6 @@ struct mdoc_arg { char **value; }; -/* - * Simplified grammar of syntax tree: - * - * MDOC_ROOT: root of tree - * MDOC_TEXT: free-form text - * MDOC_ELEM: elem [args] MDOC_TEXT... - * MDOC_BLOCK, MDOC_HEAD, MDOC_BODY, MDOC_TAIL: - * MDOC_BLOCK: - * MDOC_HEAD [args] (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)... - * MDOC_BODY (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)... - * MDOC_TAIL (optional) (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)... - */ - /* Type of a syntax node. */ enum mdoc_type { MDOC_TEXT, @@ -340,7 +328,7 @@ enum mdoc_arch { ARCH_zaurus }; -/* Meta-information from prologue. */ +/* Information from prologue. */ struct mdoc_meta { enum mdoc_msec msec; enum mdoc_vol vol; @@ -351,10 +339,12 @@ struct mdoc_meta { char *name; }; +/* Text-only node. */ struct mdoc_text { char *string; }; +/* Block (scoped) node. */ struct mdoc_block { size_t argc; struct mdoc_arg *argv; @@ -363,6 +353,7 @@ struct mdoc_block { struct mdoc_node *tail; }; +/* In-line element node. */ struct mdoc_elem { size_t sz; char **args; @@ -370,13 +361,14 @@ struct mdoc_elem { struct mdoc_arg *argv; }; +/* Typed nodes of an AST node. */ union mdoc_data { struct mdoc_text text; struct mdoc_elem elem; struct mdoc_block block; }; -/* Syntax node in parse tree. */ +/* Node in AST. */ struct mdoc_node { struct mdoc_node *parent; struct mdoc_node *child; @@ -401,7 +393,10 @@ struct mdoc_cb { enum mdoc_warn, const char *); }; +/* Global table of macro names (`Bd', `Ed', etc.). */ extern const char *const *mdoc_macronames; + +/* Global table of argument names (`column', `tag', etc.). */ extern const char *const *mdoc_argnames; __BEGIN_DECLS @@ -414,13 +409,13 @@ void mdoc_free(struct mdoc *); /* Allocate a new parser instance. */ struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *); -/* Parse a single line (boolean retval). */ +/* Parse a single line in a stream (boolean retval). */ int mdoc_parseln(struct mdoc *, int, char *buf); -/* Get result first node. */ +/* Get result first node (after mdoc_endparse!). */ const struct mdoc_node *mdoc_node(struct mdoc *); -/* Get result meta-information. */ +/* Get result meta-information (after mdoc_endparse!). */ const struct mdoc_meta *mdoc_meta(struct mdoc *); /* Signal end of parse sequence (boolean retval). */ @@ -30,15 +30,8 @@ #include "mdoc.h" -#define xfprintf (void)fprintf - #define MD_LINE_SZ (256) /* Max input line size. */ -/* TODO: have a struct for each transformer. */ - -typedef int (*mdocprint)(const struct mdoc_node *); - - struct md_parse { int warn; /* Warning flags. */ #define MD_WARN_SYNTAX (1 << 0) /* Show syntax warnings. */ @@ -49,17 +42,17 @@ struct md_parse { struct mdoc *mdoc; /* Active parser. */ char *buf; /* Input buffer. */ u_long bufsz; /* Input buffer size. */ - char *name; /* Input file name. */ - int fd; /* Input file desc. */ - mdocprint print; /* Node-print function. */ + char *in; /* Input file name. */ + int fdin; /* Input file desc. */ }; extern char *__progname; -extern int treeprint(const struct mdoc_node *); - static void usage(void); +static int parse_opts(struct md_parse *, int, char *[]); +static int parse_subopts(struct md_parse *, char *); + static int parse_begin(struct md_parse *); static int parse_leave(struct md_parse *, int); static int io_begin(struct md_parse *); @@ -79,69 +72,12 @@ extern int getsubopt(char **, char *const *, char **); int main(int argc, char *argv[]) { - int c; struct md_parse parser; - char *opts, *v, *filter; -#define ALL 0 -#define COMPAT 1 -#define SYNTAX 2 -#define ERROR 3 - char *toks[] = { "all", "compat", "syntax", - "error", NULL }; - - extern char *optarg; - extern int optind; - - filter = NULL; (void)memset(&parser, 0, sizeof(struct md_parse)); - while (-1 != (c = getopt(argc, argv, "f:vW:"))) - switch (c) { - case ('f'): - filter = optarg; - break; - case ('v'): - parser.dbg++; - break; - case ('W'): - opts = optarg; - while (*opts) - switch (getsubopt(&opts, toks, &v)) { - case (ALL): - parser.warn |= MD_WARN_ALL; - break; - case (COMPAT): - parser.warn |= MD_WARN_COMPAT; - break; - case (SYNTAX): - parser.warn |= MD_WARN_SYNTAX; - break; - case (ERROR): - parser.warn |= MD_WARN_ERR; - break; - default: - usage(); - return(1); - } - break; - default: - usage(); - return(1); - } - - argv += optind; - argc -= optind; - - parser.name = "-"; - if (1 == argc) - parser.name = *argv++; - - if (filter) { - if (0 == strcmp(filter, "tree")) - parser.print = treeprint; - } - + if ( ! parse_opts(&parser, argc, argv)) + return(EXIT_FAILURE); if ( ! io_begin(&parser)) return(EXIT_FAILURE); @@ -153,11 +89,11 @@ static int io_leave(struct md_parse *p, int code) { - if (-1 == p->fd || STDIN_FILENO == p->fd) + if (-1 == p->fdin || STDIN_FILENO == p->fdin) return(code); - if (-1 == close(p->fd)) { - warn("%s", p->name); + if (-1 == close(p->fdin)) { + warn("%s", p->in); code = 0; } return(code); @@ -165,13 +101,82 @@ io_leave(struct md_parse *p, int code) static int +parse_subopts(struct md_parse *p, char *arg) +{ + char *v; + char *toks[] = { "all", "compat", + "syntax", "error", NULL }; + + /* + * Future -Wxxx levels and so on should be here. For now we + * only recognise syntax and compat warnings as categories, + * beyond the usually "all" and "error" (make warn error out). + */ + + while (*arg) + switch (getsubopt(&arg, toks, &v)) { + case (0): + p->warn |= MD_WARN_ALL; + break; + case (1): + p->warn |= MD_WARN_COMPAT; + break; + case (2): + p->warn |= MD_WARN_SYNTAX; + break; + case (3): + p->warn |= MD_WARN_ERR; + break; + default: + usage(); + return(0); + } + + return(1); +} + + +static int +parse_opts(struct md_parse *p, int argc, char *argv[]) +{ + int c; + + extern char *optarg; + extern int optind; + + p->in = "-"; + + while (-1 != (c = getopt(argc, argv, "vW:"))) + switch (c) { + case ('v'): + p->dbg++; + break; + case ('W'): + if ( ! parse_subopts(p, optarg)) + return(0); + break; + default: + usage(); + return(0); + } + + argv += optind; + if (0 == (argc -= optind)) + return(1); + + p->in = *argv++; + return(1); +} + + +static int io_begin(struct md_parse *p) { - p->fd = STDIN_FILENO; - if (0 != strncmp(p->name, "-", 1)) - if (-1 == (p->fd = open(p->name, O_RDONLY, 0))) { - warn("%s", p->name); + p->fdin = STDIN_FILENO; + if (0 != strncmp(p->in, "-", 1)) + if (-1 == (p->fdin = open(p->in, O_RDONLY, 0))) { + warn("%s", p->in); return(io_leave(p, 0)); } @@ -194,11 +199,17 @@ buf_begin(struct md_parse *p) { struct stat st; - if (-1 == fstat(p->fd, &st)) { - warn("%s", p->name); - return(1); + if (-1 == fstat(p->fdin, &st)) { + warn("%s", p->in); + return(0); } + /* + * Try to intuit the fastest way of sucking down buffered data + * by using either the block buffer size or the hard-coded one. + * This is inspired by bin/cat.c. + */ + p->bufsz = MAX(st.st_blksize, BUFSIZ); if (NULL == (p->buf = malloc(p->bufsz))) { @@ -213,18 +224,22 @@ buf_begin(struct md_parse *p) static int parse_leave(struct md_parse *p, int code) { - const struct mdoc_node *n; if (NULL == p->mdoc) return(code); if ( ! mdoc_endparse(p->mdoc)) code = 0; - if (p->print && (n = mdoc_node(p->mdoc))) - (*p->print)(n); - mdoc_free(p->mdoc); +#if 0 + /* TODO */ + if (code && ! mdoc_write(p->out, mdoc_node(p->mdoc))) { + warnx("%s: write error", p->out); + code = 0; + } +#endif + mdoc_free(p->mdoc); return(code); } @@ -245,9 +260,19 @@ parse_begin(struct md_parse *p) if (NULL == (p->mdoc = mdoc_alloc(p, &cb))) return(parse_leave(p, 0)); + /* + * This is a little more complicated than fgets. TODO: have + * some benchmarks that show it's faster (note that I want to + * check many, many manuals simultaneously, so speed is + * important). Fill a buffer (sized to the block size) with a + * single read, then parse \n-terminated lines into a line + * buffer, which is passed to the parser. Hard-code the line + * buffer to a particular size -- a reasonable assumption. + */ + for (lnn = 1, pos = 0; ; ) { - if (-1 == (sz = read(p->fd, p->buf, p->bufsz))) { - warn("%s", p->name); + if (-1 == (sz = read(p->fdin, p->buf, p->bufsz))) { + warn("%s", p->in); return(parse_leave(p, 0)); } else if (0 == sz) break; @@ -258,8 +283,7 @@ parse_begin(struct md_parse *p) line[(int)pos++] = p->buf[(int)i]; continue; } - warnx("%s: line %d too long", - p->name, lnn); + warnx("%s: line %d too long", p->in, lnn); return(parse_leave(p, 0)); } @@ -283,8 +307,8 @@ msg_err(void *arg, int line, int col, const char *msg) p = (struct md_parse *)arg; - xfprintf(stderr, "%s:%d: error: %s (column %d)\n", - p->name, line, msg, col); + warnx("%s:%d: error: %s (column %d)", + p->in, line, msg, col); return(0); } @@ -299,8 +323,8 @@ msg_msg(void *arg, int line, int col, const char *msg) if (0 == p->dbg) return; - xfprintf(stderr, "%s:%d: debug: %s (column %d)\n", - p->name, line, msg, col); + warnx("%s:%d: debug: %s (column %d)", + p->in, line, msg, col); } @@ -323,14 +347,13 @@ msg_warn(void *arg, int line, int col, return(1); } - xfprintf(stderr, "%s:%d: warning: %s (column %d)\n", - p->name, line, msg, col); + warnx("%s:%d: warning: %s (column %d)", + p->in, line, msg, col); if ( ! (p->warn & MD_WARN_ERR)) return(1); - xfprintf(stderr, "%s: considering warnings as errors\n", - __progname); + warnx("%s: considering warnings as errors", __progname); return(0); } @@ -339,7 +362,6 @@ static void usage(void) { - xfprintf(stderr, "usage: %s [-v] [-Wwarn...] [-ffilter] " - "[infile]\n", __progname); + warnx("usage: %s [-v] [-Wwarn...] [infile]", __progname); } |