diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | html4_strict.c | 435 | ||||
-rw-r--r-- | libmdocml.c | 5 | ||||
-rw-r--r-- | libmdocml.h | 2 | ||||
-rw-r--r-- | mdocml.c | 3 | ||||
-rw-r--r-- | private.h | 4 |
6 files changed, 243 insertions, 208 deletions
@@ -1,4 +1,4 @@ -CFLAGS += -W -Wall -g +CFLAGS += -W -Wall -Wno-unused-parameter -g LINTFLAGS += -c -e -f -u LNS = mdocml.ln html4_strict.ln dummy.ln libmdocml.ln diff --git a/html4_strict.c b/html4_strict.c index 98934973..f46b3373 100644 --- a/html4_strict.c +++ b/html4_strict.c @@ -27,72 +27,78 @@ #include "libmdocml.h" #include "private.h" -enum roffd { - ROFF_ENTER = 0, - ROFF_EXIT +enum roffd { + ROFF_ENTER = 0, + ROFF_EXIT }; -enum rofftype { - ROFF_NONE = 0, - ROFF_LAYOUT +enum rofftype { + ROFF_TITLE, + ROFF_COMMENT, + ROFF_TEXT, + ROFF_LAYOUT }; -struct rofftree; - #define ROFFCALL_ARGS const struct md_args *arg, \ struct md_mbuf *out, \ const struct md_rbuf *in, \ const char *buf, size_t sz, \ size_t pos, enum roffd type, \ struct rofftree *tree -typedef int (*roffcall)(ROFFCALL_ARGS); -static int roff_Dd(ROFFCALL_ARGS); -static int roff_Dt(ROFFCALL_ARGS); -static int roff_Os(ROFFCALL_ARGS); -static int roff_Sh(ROFFCALL_ARGS); +struct rofftree; struct rofftok { - char id; -#define ROFF___ 0 -#define ROFF_Dd 1 -#define ROFF_Dt 2 -#define ROFF_Os 3 -#define ROFF_Sh 4 -#define ROFF_Max 5 + int id; char name[2]; - roffcall cb; + int (*cb)(ROFFCALL_ARGS); enum rofftype type; int flags; -#define ROFF_NESTED (1 << 0) -}; - -static const struct rofftok tokens[ROFF_Max] = { - { ROFF___, "\\\"", NULL, ROFF_NONE, 0 }, - { ROFF_Dd, "Dd", roff_Dd, ROFF_NONE, 0 }, - { ROFF_Dt, "Dt", roff_Dt, ROFF_NONE, 0 }, - { ROFF_Os, "Os", roff_Os, ROFF_LAYOUT, 0 }, - { ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 }, +#define ROFF_NESTED (1 << 0) /* FIXME: test. */ +#define ROFF_PARSED (1 << 1) /* FIXME: test. */ +#define ROFF_CALLABLE (1 << 2) /* FIXME: test. */ }; struct roffnode { int tok; struct roffnode *parent; - /* TODO: line number at acquisition. */ + size_t line; }; -struct rofftree { +struct rofftree { struct roffnode *last; time_t date; char title[256]; char section[256]; char volume[256]; int state; -#define ROFF_PRELUDE_Os (1 << 1) -#define ROFF_PRELUDE_Dt (1 << 2) -#define ROFF_PRELUDE_Dd (1 << 3) +#define ROFF_PRELUDE (1 << 1) +#define ROFF_PRELUDE_Os (1 << 2) +#define ROFF_PRELUDE_Dt (1 << 3) +#define ROFF_PRELUDE_Dd (1 << 4) +#define ROFF_BODY (1 << 5) }; +#define ROFF___ 0 +#define ROFF_Dd 1 +#define ROFF_Dt 2 +#define ROFF_Os 3 +#define ROFF_Sh 4 +#define ROFF_An 5 +#define ROFF_Li 6 +#define ROFF_Max 7 + +static int roff_Dd(ROFFCALL_ARGS); +static int roff_Dt(ROFFCALL_ARGS); +static int roff_Os(ROFFCALL_ARGS); +static int roff_Sh(ROFFCALL_ARGS); +static int roff_An(ROFFCALL_ARGS); +static int roff_Li(ROFFCALL_ARGS); + +static struct roffnode *roffnode_new(int, size_t, + struct rofftree *); +static void roffnode_free(int, struct rofftree *); + static int rofffind(const char *); static int roffparse(const struct md_args *, struct md_mbuf *, @@ -108,25 +114,44 @@ static void dbg_enter(const struct md_args *, int); static void dbg_leave(const struct md_args *, int); +static const struct rofftok tokens[ROFF_Max] = +{ +{ ROFF___, "\\\"", NULL, ROFF_COMMENT, 0 }, +{ ROFF_Dd, "Dd", roff_Dd, ROFF_TITLE, 0 }, +{ ROFF_Dt, "Dt", roff_Dt, ROFF_TITLE, 0 }, +{ ROFF_Os, "Os", roff_Os, ROFF_TITLE, 0 }, +{ ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 }, +{ ROFF_An, "An", roff_An, ROFF_TEXT, ROFF_PARSED }, +{ ROFF_Li, "Li", roff_Li, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, +}; + + int md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out, - const struct md_rbuf *in, void *data) + const struct md_rbuf *in, int error, void *data) { struct rofftree *tree; - int error; assert(args); assert(data); tree = (struct rofftree *)data; - error = 0; + if (-1 == error) + out = NULL; + + /* LINTED */ while (tree->last) - if ( ! (*tokens[tree->last->tok].cb) - (args, error ? NULL : out, in, NULL, - 0, 0, ROFF_EXIT, tree)) - error = 1; + if ( ! (*tokens[tree->last->tok].cb)(args, out, in, + NULL, 0, 0, ROFF_EXIT, tree)) + out = NULL; + + if (out && (ROFF_PRELUDE & tree->state)) { + warnx("%s: prelude never finished", in->name); + error = 1; + } free(tree); + return(error ? 0 : 1); } @@ -149,6 +174,8 @@ md_init_html4_strict(const struct md_args *args, struct md_mbuf *out, return(0); } + tree->state = ROFF_PRELUDE; + *data = tree; return(1); } @@ -229,18 +256,53 @@ roffparse(const struct md_args *args, struct md_mbuf *out, */ if (3 > sz) { - warnx("%s: malformed input (line %zu, col 1)", + warnx("%s: malformed line (line %zu)", in->name, in->line); return(0); } else if (ROFF_Max == (tokid = rofffind(buf + 1))) { - warnx("%s: unknown token `%c%c' (line %zu, col 1)", + warnx("%s: unknown line token `%c%c' (line %zu)", in->name, *(buf + 1), *(buf + 2), in->line); return(0); - } else if (NULL == tokens[tokid].cb) - return(1); /* Skip token. */ + } - pos = 3; + /* Domain cross-contamination (and sanity) checks. */ + + switch (tokens[tokid].type) { + case (ROFF_TITLE): + if (ROFF_PRELUDE & tree->state) { + assert( ! (ROFF_BODY & tree->state)); + break; + } + assert(ROFF_BODY & tree->state); + warnx("%s: prelude token `%s' in body (line %zu)", + in->name, tokens[tokid].name, in->line); + return(0); + case (ROFF_LAYOUT): + /* FALLTHROUGH */ + case (ROFF_TEXT): + if (ROFF_BODY & tree->state) { + assert( ! (ROFF_PRELUDE & tree->state)); + break; + } + assert(ROFF_PRELUDE & tree->state); + warnx("%s: text token `%s' in prelude (line %zu)", + in->name, tokens[tokid].name, in->line); + return(0); + default: + return(1); + } + + /* + * Text-domain checks. + */ + + if (ROFF_TEXT == tokens[tokid].type && + ! (ROFF_PARSED & tokens[tokid].flags)) { + warnx("%s: text token `%s' not callable (line %zu)", + in->name, tokens[tokid].name, in->line); + return(0); + } /* * If this is a non-nestable layout token and we're below a @@ -253,6 +315,7 @@ roffparse(const struct md_args *args, struct md_mbuf *out, */ node = NULL; + pos = 3; if (ROFF_LAYOUT == tokens[tokid].type && ! (ROFF_NESTED & tokens[tokid].flags)) { @@ -264,14 +327,16 @@ roffparse(const struct md_args *args, struct md_mbuf *out, if ( ! (ROFF_NESTED & tokens[node->tok].flags)) continue; - warnx("%s: scope of %s broken by %s " - "(line %zu, col %zu)", - in->name, tokens[tokid].name, + warnx("%s: scope of %s (line %zu) broken by " + "%s (line %zu)", in->name, + tokens[tokid].name, + node->line, tokens[node->tok].name, - in->line, pos); + in->line); return(0); } } + if (node) { assert(ROFF_LAYOUT == tokens[tokid].type); assert( ! (ROFF_NESTED & tokens[tokid].flags)); @@ -279,6 +344,7 @@ roffparse(const struct md_args *args, struct md_mbuf *out, /* Clear up to last scoped token. */ + /* LINTED */ do { t = tree->last->tok; if ( ! (*tokens[tree->last->tok].cb) @@ -302,84 +368,134 @@ rofffind(const char *name) assert(name); /* FIXME: use a table, this is slow but ok for now. */ + + /* LINTED */ for (i = 0; i < ROFF_Max; i++) + /* LINTED */ if (0 == strncmp(name, tokens[i].name, 2)) - return(i); + return((int)i); return(ROFF_Max); } -/* ARGUSED */ -static int -roff_Dd(ROFFCALL_ARGS) +static struct roffnode * +roffnode_new(int tokid, size_t line, struct rofftree *tree) { + struct roffnode *p; + + if (NULL == (p = malloc(sizeof(struct roffnode)))) { + warn("malloc"); + return(NULL); + } - assert(in); - assert(tree); - assert(arg); - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); - assert(type == ROFF_ENTER); + p->line = line; + p->tok = tokid; + p->parent = tree->last; + tree->last = p; + return(p); +} - if (tree->last) { - warnx("%s: superfluous prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(0); - } - if (0 != tree->state) { - warnx("%s: bad manual prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(1); - } +static void +roffnode_free(int tokid, struct rofftree *tree) +{ + struct roffnode *p; - /* TODO: parse date from buffer. */ + assert(tree->last); + assert(tree->last->tok == tokid); - tree->date = time(NULL); - tree->state |= ROFF_PRELUDE_Dd; + p = tree->last; + tree->last = tree->last->parent; + free(p); +} - (void)printf("Dd\n"); - return(1); +static int dbg_lvl = 0; /* FIXME: de-globalise. */ + + +static void +dbg_enter(const struct md_args *args, int tokid) +{ + int i; + + assert(args); + if ( ! (args->dbg & MD_DBG_TREE)) + return; + + assert(tokid >= 0 && tokid <= ROFF_Max); + + /* LINTED */ + for (i = 0; i < dbg_lvl; i++) + (void)printf(" "); + + (void)printf("%s\n", tokens[tokid].name); + + if (ROFF_LAYOUT == tokens[tokid].type) + dbg_lvl++; } -static int -roff_Dt(ROFFCALL_ARGS) +static void +dbg_leave(const struct md_args *args, int tokid) { + int i; - assert(in); - assert(tree); - assert(arg); - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); - assert(type == ROFF_ENTER); + assert(args); + if ( ! (args->dbg & MD_DBG_TREE)) + return; + if (ROFF_LAYOUT != tokens[tokid].type) + return; - if (tree->last) { - warnx("%s: superfluous prelude (line %zu, col %zu)", - in->name, in->line, pos); + assert(tokid >= 0 && tokid <= ROFF_Max); + assert(dbg_lvl > 0); + + dbg_lvl--; + + /* LINTED */ + for (i = 0; i < dbg_lvl; i++) + (void)printf(" "); + + (void)printf("%s\n", tokens[tokid].name); +} + + +static int +roff_Dd(ROFFCALL_ARGS) +{ + + assert(ROFF_PRELUDE & tree->state); + if (ROFF_PRELUDE_Dt & tree->state || + ROFF_PRELUDE_Dd & tree->state) { + warnx("%s: bad prelude ordering (line %zu)", + in->name, in->line); return(0); } + assert(NULL == tree->last); + tree->state |= ROFF_PRELUDE_Dd; + + dbg_enter(arg, ROFF_Dd); + return(1); +} + + +static int +roff_Dt(ROFFCALL_ARGS) +{ + + assert(ROFF_PRELUDE & tree->state); if ( ! (ROFF_PRELUDE_Dd & tree->state) || - (ROFF_PRELUDE_Os & tree->state) || (ROFF_PRELUDE_Dt & tree->state)) { - warnx("%s: bad manual prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(1); + warnx("%s: bad prelude ordering (line %zu)", + in->name, in->line); + return(0); } - /* TODO: parse titles from buffer. */ - + assert(NULL == tree->last); tree->state |= ROFF_PRELUDE_Dt; - (void)printf("Dt\n"); - + dbg_enter(arg, ROFF_Dt); return(1); } @@ -387,146 +503,63 @@ roff_Dt(ROFFCALL_ARGS) static int roff_Os(ROFFCALL_ARGS) { - struct roffnode *node; - - assert(arg); - assert(tree); - assert(in); if (ROFF_EXIT == type) { - assert(tree->last); - assert(tree->last->tok == ROFF_Os); - - /* TODO: flush out ML footer. */ - - node = tree->last; - tree->last = node->parent; - free(node); - + roffnode_free(ROFF_Os, tree); dbg_leave(arg, ROFF_Os); - return(1); } - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); - - if (tree->last) { - warnx("%s: superfluous prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(0); - } - - if ((ROFF_PRELUDE_Os & tree->state) || - ! (ROFF_PRELUDE_Dt & tree->state) || + assert(ROFF_PRELUDE & tree->state); + if ( ! (ROFF_PRELUDE_Dt & tree->state) || ! (ROFF_PRELUDE_Dd & tree->state)) { - warnx("%s: bad manual prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(1); + warnx("%s: bad prelude ordering (line %zu)", + in->name, in->line); + return(0); } - node = malloc(sizeof(struct roffnode)); - if (NULL == node) { - warn("malloc"); + assert(NULL == tree->last); + if (NULL == roffnode_new(ROFF_Os, in->line, tree)) return(0); - } - node->tok = ROFF_Os; - node->parent = NULL; tree->state |= ROFF_PRELUDE_Os; - tree->last = node; + tree->state &= ~ROFF_PRELUDE; + tree->state |= ROFF_BODY; dbg_enter(arg, ROFF_Os); - return(1); } -static int +static int roff_Sh(ROFFCALL_ARGS) { - struct roffnode *node; - - assert(arg); - assert(tree); - assert(tree->last); - assert(in); if (ROFF_EXIT == type) { - assert(tree->last->tok == ROFF_Sh); - - node = tree->last; - tree->last = node->parent; - free(node); - + roffnode_free(ROFF_Sh, tree); dbg_leave(arg, ROFF_Sh); - return(1); } - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); - - node = malloc(sizeof(struct roffnode)); - if (NULL == node) { - warn("malloc"); + if (NULL == roffnode_new(ROFF_Sh, in->line, tree)) return(0); - } - node->tok = ROFF_Sh; - node->parent = tree->last; - - tree->last = node; dbg_enter(arg, ROFF_Sh); - return(1); } -static int dbg_lvl = 0; /* FIXME: de-globalise. */ - - -static void -dbg_enter(const struct md_args *args, int tokid) +static int +roff_Li(ROFFCALL_ARGS) { - int i; - - assert(args); - if ( ! (args->dbg & MD_DBG_TREE)) - return; - assert(tokid >= 0 && tokid <= ROFF_Max); - - for (i = 0; i < dbg_lvl; i++) - (void)printf(" "); - - (void)printf("%s\n", tokens[tokid].name); - - if (ROFF_LAYOUT == tokens[tokid].type) - dbg_lvl++; + return(1); } -static void -dbg_leave(const struct md_args *args, int tokid) +static int +roff_An(ROFFCALL_ARGS) { - int i; - assert(args); - if ( ! (args->dbg & MD_DBG_TREE)) - return; - - assert(tokid >= 0 && tokid <= ROFF_Max); - assert(dbg_lvl > 0); - - dbg_lvl--; - for (i = 0; i < dbg_lvl; i++) - (void)printf(" "); - - (void)printf("%s\n", tokens[tokid].name); + return(1); } - diff --git a/libmdocml.c b/libmdocml.c index 7317b6d9..a38678d0 100644 --- a/libmdocml.c +++ b/libmdocml.c @@ -135,7 +135,7 @@ md_run_leave(const struct md_args *args, struct md_mbuf *mbuf, /* Run exiters. */ switch (args->type) { case (MD_HTML4_STRICT): - if ( ! md_exit_html4_strict(args, mbuf, rbuf, data)) + if ( ! md_exit_html4_strict(args, mbuf, rbuf, c, data)) return(-1); break; case (MD_DUMMY): @@ -235,8 +235,7 @@ md_run(const struct md_args *args, /* Run initialisers. */ switch (args->type) { case (MD_HTML4_STRICT): - if ( ! md_init_html4_strict - (args, &mbuf, &rbuf, &data)) + if ( ! md_init_html4_strict(args, &mbuf, &rbuf, &data)) return(-1); break; case (MD_DUMMY): diff --git a/libmdocml.h b/libmdocml.h index 6b96094d..c25602fa 100644 --- a/libmdocml.h +++ b/libmdocml.h @@ -43,7 +43,7 @@ struct md_args { union md_params params;/* Parameters for parser. */ enum md_type type; /* Type of parser. */ int dbg; /* Debug level. */ -#define MD_DBG_TREE (1 << 0) +#define MD_DBG_TREE (1 << 0)/* Print the parse tree to stdout. */ }; struct md_buf { @@ -167,6 +167,9 @@ begin_bufs(const struct md_args *args, if (-1 == fstat(in->fd, &stin)) { warn("%s", in->name); return(1); + } else if (0 == stin.st_size) { + warnx("%s: empty file", in->name); + return(1); } else if (-1 == fstat(out->fd, &stout)) { warn("%s", out->name); return(1); @@ -40,7 +40,7 @@ __BEGIN_DECLS typedef int (*md_init)(const struct md_args *, struct md_mbuf *, const struct md_rbuf *, void **); typedef int (*md_exit)(const struct md_args *, struct md_mbuf *, - const struct md_rbuf *, void *); + const struct md_rbuf *, int, void *); typedef int (*md_line)(const struct md_args *, struct md_mbuf *, const struct md_rbuf *, const char *, size_t, void *); @@ -53,7 +53,7 @@ int md_init_html4_strict(const struct md_args *, void **); int md_exit_html4_strict(const struct md_args *, struct md_mbuf *, const struct md_rbuf *, - void *); + int, void *); int md_line_dummy(const struct md_args *, struct md_mbuf *, const struct md_rbuf *, |