summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile6
-rw-r--r--mdoc.c25
-rw-r--r--mdoc.h35
-rw-r--r--mdocml.c220
4 files changed, 157 insertions, 129 deletions
diff --git a/Makefile b/Makefile
index c559c725..e46e1d7d 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ VERSION = 1.2.0
CFLAGS += -W -Wall -Wno-unused-parameter -g
LIBLNS = macro.ln mdoc.ln hash.ln strings.ln xstd.ln argv.ln \
- validate.ln action.ln
+ validate.ln action.ln
BINLNS = tree.ln mdocml.ln
@@ -14,14 +14,14 @@ LLNS = llib-llibmdoc.ln llib-lmdocml.ln
LIBS = libmdoc.a
LIBOBJS = macro.o mdoc.o hash.o strings.o xstd.o argv.o \
- validate.o action.o
+ validate.o action.o
BINOBJS = tree.o mdocml.o
OBJS = $(LIBOBJS) $(BINOBJS)
SRCS = macro.c mdoc.c mdocml.c hash.c strings.c xstd.c argv.c \
- validate.c action.c tree.c
+ validate.c action.c tree.c
HEADS = mdoc.h private.h
diff --git a/mdoc.c b/mdoc.c
index 473f42b5..33372194 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -212,12 +212,14 @@ static void argfree(size_t, struct mdoc_arg *);
static void argcpy(struct mdoc_arg *,
const struct mdoc_arg *);
+static struct mdoc_node *mdoc_node_alloc(void);
static int mdoc_node_append(struct mdoc *,
struct mdoc_node *);
static void mdoc_elem_free(struct mdoc_elem *);
static void mdoc_text_free(struct mdoc_text *);
+
const struct mdoc_node *
mdoc_node(struct mdoc *mdoc)
{
@@ -510,6 +512,14 @@ mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
}
+static struct mdoc_node *
+mdoc_node_alloc(void)
+{
+
+ return(xcalloc(1, sizeof(struct mdoc_node)));
+}
+
+
int
mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
{
@@ -518,7 +528,7 @@ mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
assert(mdoc->first);
assert(mdoc->last);
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
p->line = line;
p->pos = pos;
@@ -537,7 +547,7 @@ mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
assert(mdoc->first);
assert(mdoc->last);
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
p->line = line;
p->pos = pos;
@@ -556,7 +566,7 @@ mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
assert(mdoc->first);
assert(mdoc->last);
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
p->line = line;
p->pos = pos;
@@ -572,7 +582,7 @@ mdoc_root_alloc(struct mdoc *mdoc)
{
struct mdoc_node *p;
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
p->type = MDOC_ROOT;
@@ -586,7 +596,7 @@ mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
{
struct mdoc_node *p;
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
p->pos = pos;
p->line = line;
@@ -605,7 +615,7 @@ mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
{
struct mdoc_node *p;
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
p->line = line;
p->pos = pos;
@@ -624,7 +634,8 @@ mdoc_word_alloc(struct mdoc *mdoc,
{
struct mdoc_node *p;
- p = xcalloc(1, sizeof(struct mdoc_node));
+ p = mdoc_node_alloc();
+
p->line = line;
p->pos = pos;
p->type = MDOC_TEXT;
diff --git a/mdoc.h b/mdoc.h
index a232128b..7d6da91b 100644
--- a/mdoc.h
+++ b/mdoc.h
@@ -195,9 +195,10 @@
#define MDOC_Symbolic 61
#define MDOC_ARG_MAX 62
+/* Warnings are either syntax or groff-compatibility. */
enum mdoc_warn {
- WARN_SYNTAX, /* Syntax warn (at line/col). */
- WARN_COMPAT /* Groff compat warn (at line/col). */
+ WARN_SYNTAX,
+ WARN_COMPAT
};
/* Possible values for the `At' macro. */
@@ -226,19 +227,6 @@ struct mdoc_arg {
char **value;
};
-/*
- * Simplified grammar of syntax tree:
- *
- * MDOC_ROOT: root of tree
- * MDOC_TEXT: free-form text
- * MDOC_ELEM: elem [args] MDOC_TEXT...
- * MDOC_BLOCK, MDOC_HEAD, MDOC_BODY, MDOC_TAIL:
- * MDOC_BLOCK:
- * MDOC_HEAD [args] (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)...
- * MDOC_BODY (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)...
- * MDOC_TAIL (optional) (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)...
- */
-
/* Type of a syntax node. */
enum mdoc_type {
MDOC_TEXT,
@@ -340,7 +328,7 @@ enum mdoc_arch {
ARCH_zaurus
};
-/* Meta-information from prologue. */
+/* Information from prologue. */
struct mdoc_meta {
enum mdoc_msec msec;
enum mdoc_vol vol;
@@ -351,10 +339,12 @@ struct mdoc_meta {
char *name;
};
+/* Text-only node. */
struct mdoc_text {
char *string;
};
+/* Block (scoped) node. */
struct mdoc_block {
size_t argc;
struct mdoc_arg *argv;
@@ -363,6 +353,7 @@ struct mdoc_block {
struct mdoc_node *tail;
};
+/* In-line element node. */
struct mdoc_elem {
size_t sz;
char **args;
@@ -370,13 +361,14 @@ struct mdoc_elem {
struct mdoc_arg *argv;
};
+/* Typed nodes of an AST node. */
union mdoc_data {
struct mdoc_text text;
struct mdoc_elem elem;
struct mdoc_block block;
};
-/* Syntax node in parse tree. */
+/* Node in AST. */
struct mdoc_node {
struct mdoc_node *parent;
struct mdoc_node *child;
@@ -401,7 +393,10 @@ struct mdoc_cb {
enum mdoc_warn, const char *);
};
+/* Global table of macro names (`Bd', `Ed', etc.). */
extern const char *const *mdoc_macronames;
+
+/* Global table of argument names (`column', `tag', etc.). */
extern const char *const *mdoc_argnames;
__BEGIN_DECLS
@@ -414,13 +409,13 @@ void mdoc_free(struct mdoc *);
/* Allocate a new parser instance. */
struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *);
-/* Parse a single line (boolean retval). */
+/* Parse a single line in a stream (boolean retval). */
int mdoc_parseln(struct mdoc *, int, char *buf);
-/* Get result first node. */
+/* Get result first node (after mdoc_endparse!). */
const struct mdoc_node *mdoc_node(struct mdoc *);
-/* Get result meta-information. */
+/* Get result meta-information (after mdoc_endparse!). */
const struct mdoc_meta *mdoc_meta(struct mdoc *);
/* Signal end of parse sequence (boolean retval). */
diff --git a/mdocml.c b/mdocml.c
index 87884080..f76ee0b5 100644
--- a/mdocml.c
+++ b/mdocml.c
@@ -30,15 +30,8 @@
#include "mdoc.h"
-#define xfprintf (void)fprintf
-
#define MD_LINE_SZ (256) /* Max input line size. */
-/* TODO: have a struct for each transformer. */
-
-typedef int (*mdocprint)(const struct mdoc_node *);
-
-
struct md_parse {
int warn; /* Warning flags. */
#define MD_WARN_SYNTAX (1 << 0) /* Show syntax warnings. */
@@ -49,17 +42,17 @@ struct md_parse {
struct mdoc *mdoc; /* Active parser. */
char *buf; /* Input buffer. */
u_long bufsz; /* Input buffer size. */
- char *name; /* Input file name. */
- int fd; /* Input file desc. */
- mdocprint print; /* Node-print function. */
+ char *in; /* Input file name. */
+ int fdin; /* Input file desc. */
};
extern char *__progname;
-extern int treeprint(const struct mdoc_node *);
-
static void usage(void);
+static int parse_opts(struct md_parse *, int, char *[]);
+static int parse_subopts(struct md_parse *, char *);
+
static int parse_begin(struct md_parse *);
static int parse_leave(struct md_parse *, int);
static int io_begin(struct md_parse *);
@@ -79,69 +72,12 @@ extern int getsubopt(char **, char *const *, char **);
int
main(int argc, char *argv[])
{
- int c;
struct md_parse parser;
- char *opts, *v, *filter;
-#define ALL 0
-#define COMPAT 1
-#define SYNTAX 2
-#define ERROR 3
- char *toks[] = { "all", "compat", "syntax",
- "error", NULL };
-
- extern char *optarg;
- extern int optind;
-
- filter = NULL;
(void)memset(&parser, 0, sizeof(struct md_parse));
- while (-1 != (c = getopt(argc, argv, "f:vW:")))
- switch (c) {
- case ('f'):
- filter = optarg;
- break;
- case ('v'):
- parser.dbg++;
- break;
- case ('W'):
- opts = optarg;
- while (*opts)
- switch (getsubopt(&opts, toks, &v)) {
- case (ALL):
- parser.warn |= MD_WARN_ALL;
- break;
- case (COMPAT):
- parser.warn |= MD_WARN_COMPAT;
- break;
- case (SYNTAX):
- parser.warn |= MD_WARN_SYNTAX;
- break;
- case (ERROR):
- parser.warn |= MD_WARN_ERR;
- break;
- default:
- usage();
- return(1);
- }
- break;
- default:
- usage();
- return(1);
- }
-
- argv += optind;
- argc -= optind;
-
- parser.name = "-";
- if (1 == argc)
- parser.name = *argv++;
-
- if (filter) {
- if (0 == strcmp(filter, "tree"))
- parser.print = treeprint;
- }
-
+ if ( ! parse_opts(&parser, argc, argv))
+ return(EXIT_FAILURE);
if ( ! io_begin(&parser))
return(EXIT_FAILURE);
@@ -153,11 +89,11 @@ static int
io_leave(struct md_parse *p, int code)
{
- if (-1 == p->fd || STDIN_FILENO == p->fd)
+ if (-1 == p->fdin || STDIN_FILENO == p->fdin)
return(code);
- if (-1 == close(p->fd)) {
- warn("%s", p->name);
+ if (-1 == close(p->fdin)) {
+ warn("%s", p->in);
code = 0;
}
return(code);
@@ -165,13 +101,82 @@ io_leave(struct md_parse *p, int code)
static int
+parse_subopts(struct md_parse *p, char *arg)
+{
+ char *v;
+ char *toks[] = { "all", "compat",
+ "syntax", "error", NULL };
+
+ /*
+ * Future -Wxxx levels and so on should be here. For now we
+ * only recognise syntax and compat warnings as categories,
+ * beyond the usually "all" and "error" (make warn error out).
+ */
+
+ while (*arg)
+ switch (getsubopt(&arg, toks, &v)) {
+ case (0):
+ p->warn |= MD_WARN_ALL;
+ break;
+ case (1):
+ p->warn |= MD_WARN_COMPAT;
+ break;
+ case (2):
+ p->warn |= MD_WARN_SYNTAX;
+ break;
+ case (3):
+ p->warn |= MD_WARN_ERR;
+ break;
+ default:
+ usage();
+ return(0);
+ }
+
+ return(1);
+}
+
+
+static int
+parse_opts(struct md_parse *p, int argc, char *argv[])
+{
+ int c;
+
+ extern char *optarg;
+ extern int optind;
+
+ p->in = "-";
+
+ while (-1 != (c = getopt(argc, argv, "vW:")))
+ switch (c) {
+ case ('v'):
+ p->dbg++;
+ break;
+ case ('W'):
+ if ( ! parse_subopts(p, optarg))
+ return(0);
+ break;
+ default:
+ usage();
+ return(0);
+ }
+
+ argv += optind;
+ if (0 == (argc -= optind))
+ return(1);
+
+ p->in = *argv++;
+ return(1);
+}
+
+
+static int
io_begin(struct md_parse *p)
{
- p->fd = STDIN_FILENO;
- if (0 != strncmp(p->name, "-", 1))
- if (-1 == (p->fd = open(p->name, O_RDONLY, 0))) {
- warn("%s", p->name);
+ p->fdin = STDIN_FILENO;
+ if (0 != strncmp(p->in, "-", 1))
+ if (-1 == (p->fdin = open(p->in, O_RDONLY, 0))) {
+ warn("%s", p->in);
return(io_leave(p, 0));
}
@@ -194,11 +199,17 @@ buf_begin(struct md_parse *p)
{
struct stat st;
- if (-1 == fstat(p->fd, &st)) {
- warn("%s", p->name);
- return(1);
+ if (-1 == fstat(p->fdin, &st)) {
+ warn("%s", p->in);
+ return(0);
}
+ /*
+ * Try to intuit the fastest way of sucking down buffered data
+ * by using either the block buffer size or the hard-coded one.
+ * This is inspired by bin/cat.c.
+ */
+
p->bufsz = MAX(st.st_blksize, BUFSIZ);
if (NULL == (p->buf = malloc(p->bufsz))) {
@@ -213,18 +224,22 @@ buf_begin(struct md_parse *p)
static int
parse_leave(struct md_parse *p, int code)
{
- const struct mdoc_node *n;
if (NULL == p->mdoc)
return(code);
if ( ! mdoc_endparse(p->mdoc))
code = 0;
- if (p->print && (n = mdoc_node(p->mdoc)))
- (*p->print)(n);
- mdoc_free(p->mdoc);
+#if 0
+ /* TODO */
+ if (code && ! mdoc_write(p->out, mdoc_node(p->mdoc))) {
+ warnx("%s: write error", p->out);
+ code = 0;
+ }
+#endif
+ mdoc_free(p->mdoc);
return(code);
}
@@ -245,9 +260,19 @@ parse_begin(struct md_parse *p)
if (NULL == (p->mdoc = mdoc_alloc(p, &cb)))
return(parse_leave(p, 0));
+ /*
+ * This is a little more complicated than fgets. TODO: have
+ * some benchmarks that show it's faster (note that I want to
+ * check many, many manuals simultaneously, so speed is
+ * important). Fill a buffer (sized to the block size) with a
+ * single read, then parse \n-terminated lines into a line
+ * buffer, which is passed to the parser. Hard-code the line
+ * buffer to a particular size -- a reasonable assumption.
+ */
+
for (lnn = 1, pos = 0; ; ) {
- if (-1 == (sz = read(p->fd, p->buf, p->bufsz))) {
- warn("%s", p->name);
+ if (-1 == (sz = read(p->fdin, p->buf, p->bufsz))) {
+ warn("%s", p->in);
return(parse_leave(p, 0));
} else if (0 == sz)
break;
@@ -258,8 +283,7 @@ parse_begin(struct md_parse *p)
line[(int)pos++] = p->buf[(int)i];
continue;
}
- warnx("%s: line %d too long",
- p->name, lnn);
+ warnx("%s: line %d too long", p->in, lnn);
return(parse_leave(p, 0));
}
@@ -283,8 +307,8 @@ msg_err(void *arg, int line, int col, const char *msg)
p = (struct md_parse *)arg;
- xfprintf(stderr, "%s:%d: error: %s (column %d)\n",
- p->name, line, msg, col);
+ warnx("%s:%d: error: %s (column %d)",
+ p->in, line, msg, col);
return(0);
}
@@ -299,8 +323,8 @@ msg_msg(void *arg, int line, int col, const char *msg)
if (0 == p->dbg)
return;
- xfprintf(stderr, "%s:%d: debug: %s (column %d)\n",
- p->name, line, msg, col);
+ warnx("%s:%d: debug: %s (column %d)",
+ p->in, line, msg, col);
}
@@ -323,14 +347,13 @@ msg_warn(void *arg, int line, int col,
return(1);
}
- xfprintf(stderr, "%s:%d: warning: %s (column %d)\n",
- p->name, line, msg, col);
+ warnx("%s:%d: warning: %s (column %d)",
+ p->in, line, msg, col);
if ( ! (p->warn & MD_WARN_ERR))
return(1);
- xfprintf(stderr, "%s: considering warnings as errors\n",
- __progname);
+ warnx("%s: considering warnings as errors", __progname);
return(0);
}
@@ -339,7 +362,6 @@ static void
usage(void)
{
- xfprintf(stderr, "usage: %s [-v] [-Wwarn...] [-ffilter] "
- "[infile]\n", __progname);
+ warnx("usage: %s [-v] [-Wwarn...] [infile]", __progname);
}