diff options
Diffstat (limited to 'mdoc.c')
-rw-r--r-- | mdoc.c | 734 |
1 files changed, 734 insertions, 0 deletions
@@ -0,0 +1,734 @@ +/* $Id$ */ +/* + * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all + * copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ +#include <assert.h> +#include <ctype.h> +#include <err.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "private.h" + +extern int macro_text(struct mdoc *, int, int, int *, char *); +extern int macro_scoped_implicit(struct mdoc *, + int, int, int *, char *); + +const char *const __mdoc_macronames[MDOC_MAX] = { + "\\\"", "Dd", "Dt", "Os", + "Sh", "Ss", "Pp", "D1", + "Dl", "Bd", "Ed", "Bl", + "El", "It", "Ad", "An", + "Ar", "Cd", "Cm", "Dv", + "Er", "Ev", "Ex", "Fa", + "Fd", "Fl", "Fn", "Ft", + "Ic", "In", "Li", "Nd", + "Nm", "Op", "Ot", "Pa", + "Rv", "St", "Va", "Vt", + /* LINTED */ + "Xr", "\%A", "\%B", "\%D", + /* LINTED */ + "\%I", "\%J", "\%N", "\%O", + /* LINTED */ + "\%P", "\%R", "\%T", "\%V", + "Ac", "Ao", "Aq", "At", + "Bc", "Bf", "Bo", "Bq", + "Bsx", "Bx", "Db", "Dc", + "Do", "Dq", "Ec", "Ef", + "Em", "Eo", "Fx", "Ms", + "No", "Ns", "Nx", "Ox", + "Pc", "Pf", "Po", "Pq", + "Qc", "Ql", "Qo", "Qq", + "Re", "Rs", "Sc", "So", + "Sq", "Sm", "Sx", "Sy", + "Tn", "Ux", "Xc", "Xo", + "Fo", "Fc", "Oo", "Oc", + "Bk", "Ek", "Bt", "Hf", + "Fr", "Ud", + }; + +const char *const __mdoc_argnames[MDOC_ARG_MAX] = { + "split", "nosplit", "ragged", + "unfilled", "literal", "file", + "offset", "bullet", "dash", + "hyphen", "item", "enum", + "tag", "diag", "hang", + "ohang", "inset", "column", + "width", "compact", "std", + "p1003.1-88", "p1003.1-90", "p1003.1-96", + "p1003.1-2001", "p1003.1-2004", "p1003.1", + "p1003.1b", "p1003.1b-93", "p1003.1c-95", + "p1003.1g-2000", "p1003.2-92", "p1387.2-95", + "p1003.2", "p1387.2", "isoC-90", + "isoC-amd1", "isoC-tcor1", "isoC-tcor2", + "isoC-99", "ansiC", "ansiC-89", + "ansiC-99", "ieee754", "iso8802-3", + "xpg3", "xpg4", "xpg4.2", + "xpg4.3", "xbd5", "xcu5", + "xsh5", "xns5", "xns5.2d2.0", + "xcurses4.2", "susv2", "susv3", + "svid4", "filled", "words", + }; + +const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { + { NULL, 0 }, /* \" */ + { NULL, 0 }, /* Dd */ + { NULL, 0 }, /* Dt */ + { NULL, 0 }, /* Os */ + { macro_scoped_implicit, 0 }, /* Sh */ + { macro_scoped_implicit, 0 }, /* Ss */ + { NULL, 0 }, /* Pp */ + { NULL, 0 }, /* D1 */ + { NULL, 0 }, /* Dl */ + { NULL, 0 }, /* Bd */ + { NULL, 0 }, /* Ed */ + { NULL, 0 }, /* Bl */ + { NULL, 0 }, /* El */ + { NULL, 0 }, /* It */ + { macro_text, MDOC_CALLABLE }, /* Ad */ + { NULL, 0 }, /* An */ + { macro_text, MDOC_CALLABLE }, /* Ar */ + { NULL, 0 }, /* Cd */ + { macro_text, MDOC_CALLABLE }, /* Cm */ + { macro_text, MDOC_CALLABLE }, /* Dv */ + { macro_text, MDOC_CALLABLE }, /* Er */ + { macro_text, MDOC_CALLABLE }, /* Ev */ + { NULL, 0 }, /* Ex */ + { macro_text, MDOC_CALLABLE }, /* Fa */ + { NULL, 0 }, /* Fd */ + { macro_text, MDOC_CALLABLE }, /* Fl */ + { NULL, 0 }, /* Fn */ + { macro_text, 0 }, /* Ft */ + { macro_text, MDOC_CALLABLE }, /* Ic */ + { NULL, 0 }, /* In */ + { macro_text, MDOC_CALLABLE }, /* Li */ + { NULL, 0 }, /* Nd */ + { NULL, 0 }, /* Nm */ + { NULL, 0 }, /* Op */ + { NULL, 0 }, /* Ot */ + { macro_text, MDOC_CALLABLE }, /* Pa */ + { NULL, 0 }, /* Rv */ + { NULL, 0 }, /* St */ + { macro_text, MDOC_CALLABLE }, /* Va */ + { macro_text, MDOC_CALLABLE }, /* Vt */ + { NULL, 0 }, /* Xr */ + { NULL, 0 }, /* %A */ + { NULL, 0 }, /* %B */ + { NULL, 0 }, /* %D */ + { NULL, 0 }, /* %I */ + { NULL, 0 }, /* %J */ + { NULL, 0 }, /* %N */ + { NULL, 0 }, /* %O */ + { NULL, 0 }, /* %P */ + { NULL, 0 }, /* %R */ + { NULL, 0 }, /* %T */ + { NULL, 0 }, /* %V */ + { NULL, 0 }, /* Ac */ + { NULL, 0 }, /* Ao */ + { NULL, 0 }, /* Aq */ + { NULL, 0 }, /* At */ + { NULL, 0 }, /* Bc */ + { NULL, 0 }, /* Bf */ + { NULL, 0 }, /* Bo */ + { NULL, 0 }, /* Bq */ + { NULL, 0 }, /* Bsx */ + { NULL, 0 }, /* Bx */ + { NULL, 0 }, /* Db */ + { NULL, 0 }, /* Dc */ + { NULL, 0 }, /* Do */ + { NULL, 0 }, /* Dq */ + { NULL, 0 }, /* Ec */ + { NULL, 0 }, /* Ef */ + { macro_text, MDOC_CALLABLE }, /* Em */ + { NULL, 0 }, /* Eo */ + { NULL, 0 }, /* Fx */ + { macro_text, 0 }, /* Ms */ + { NULL, 0 }, /* No */ + { NULL, 0 }, /* Ns */ + { NULL, 0 }, /* Nx */ + { NULL, 0 }, /* Ox */ + { NULL, 0 }, /* Pc */ + { NULL, 0 }, /* Pf */ + { NULL, 0 }, /* Po */ + { NULL, 0 }, /* Pq */ + { NULL, 0 }, /* Qc */ + { NULL, 0 }, /* Ql */ + { NULL, 0 }, /* Qo */ + { NULL, 0 }, /* Qq */ + { NULL, 0 }, /* Re */ + { NULL, 0 }, /* Rs */ + { NULL, 0 }, /* Sc */ + { NULL, 0 }, /* So */ + { NULL, 0 }, /* Sq */ + { NULL, 0 }, /* Sm */ + { NULL, 0 }, /* Sx */ + { NULL, 0 }, /* Sy */ + { macro_text, MDOC_CALLABLE }, /* Tn */ + { NULL, 0 }, /* Ux */ + { NULL, 0 }, /* Xc */ + { NULL, 0 }, /* Xo */ + { NULL, 0 }, /* Fo */ + { NULL, 0 }, /* Fc */ + { NULL, 0 }, /* Oo */ + { NULL, 0 }, /* Oc */ + { NULL, 0 }, /* Bk */ + { NULL, 0 }, /* Ek */ + { NULL, 0 }, /* Bt */ + { NULL, 0 }, /* Hf */ + { NULL, 0 }, /* Fr */ + { NULL, 0 }, /* Ud */ +}; + +const char * const *mdoc_macronames = __mdoc_macronames; +const char * const *mdoc_argnames = __mdoc_argnames; +const struct mdoc_macro * const mdoc_macros = __mdoc_macros; + + +static void *xcalloc(size_t, size_t); +static char *xstrdup(const char *); + +static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *); +static void argfree(size_t, struct mdoc_arg *); +static void argcpy(struct mdoc_arg *, + const struct mdoc_arg *); +static char **paramdup(size_t, const char **); +static void paramfree(size_t, char **); + +static void mdoc_node_freelist(struct mdoc_node *); +static void mdoc_node_append(struct mdoc *, int, + struct mdoc_node *); +static void mdoc_elem_free(struct mdoc_elem *); +static void mdoc_text_free(struct mdoc_text *); + + +const struct mdoc_node * +mdoc_result(struct mdoc *mdoc) +{ + + return(mdoc->first); +} + + +void +mdoc_free(struct mdoc *mdoc) +{ + + if (mdoc->first) + mdoc_node_freelist(mdoc->first); + if (mdoc->htab) + mdoc_hash_free(mdoc->htab); + + free(mdoc); +} + + +struct mdoc * +mdoc_alloc(void *data, const struct mdoc_cb *cb) +{ + struct mdoc *p; + + p = xcalloc(1, sizeof(struct mdoc)); + + p->data = data; + (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); + + p->htab = mdoc_hash_alloc(); + return(p); +} + + +static void * +xcalloc(size_t num, size_t sz) +{ + void *p; + + if (NULL == (p = calloc(num, sz))) + err(EXIT_FAILURE, "calloc"); + return(p); +} + + +static char * +xstrdup(const char *p) +{ + char *pp; + + if (NULL == (pp = strdup(p))) + err(EXIT_FAILURE, "strdup"); + return(pp); +} + + +int +mdoc_parseln(struct mdoc *mdoc, char *buf) +{ + int c, i; + char tmp[5]; + + if ('.' != *buf) { + /* TODO. */ + return(1); + } + + if (buf[1] && '\\' == buf[1]) + if (buf[2] && '\"' == buf[2]) + return(1); + + i = 1; + while (buf[i] && ! isspace(buf[i]) && i < (int)sizeof(tmp)) + i++; + + if (i == (int)sizeof(tmp)) + return(mdoc_err(mdoc, -1, 1, ERR_MACRO_NOTSUP)); + else if (i <= 2) + return(mdoc_err(mdoc, -1, 1, ERR_MACRO_NOTSUP)); + + i--; + + (void)memcpy(tmp, buf + 1, i); + tmp[i++] = 0; + + if (MDOC_MAX == (c = mdoc_find(mdoc, tmp))) + return(mdoc_err(mdoc, c, 1, ERR_MACRO_NOTSUP)); + + while (buf[i] && isspace(buf[i])) + i++; + + if (NULL == (mdoc_macros[c].fp)) { + (void)mdoc_err(mdoc, c, 1, ERR_MACRO_NOTSUP); + return(0); + } + + return((*mdoc_macros[c].fp)(mdoc, c, 1, &i, buf)); +} + + +void +mdoc_msg(struct mdoc *mdoc, int pos, const char *fmt, ...) +{ + va_list ap; + char buf[256]; + + if (NULL == mdoc->cb.mdoc_msg) + return; + + va_start(ap, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + (*mdoc->cb.mdoc_msg)(mdoc->data, pos, buf); +} + + +int +mdoc_err(struct mdoc *mdoc, int tok, int pos, enum mdoc_err type) +{ + + if (NULL == mdoc->cb.mdoc_err) + return(0); + return((*mdoc->cb.mdoc_err)(mdoc->data, tok, pos, type)); +} + + +int +mdoc_warn(struct mdoc *mdoc, int tok, int pos, enum mdoc_warn type) +{ + + if (NULL == mdoc->cb.mdoc_warn) + return(0); + return((*mdoc->cb.mdoc_warn)(mdoc->data, tok, pos, type)); +} + + +int +mdoc_macro(struct mdoc *mdoc, int tok, int ppos, int *pos, char *buf) +{ + + if (NULL == (mdoc_macros[tok].fp)) { + (void)mdoc_err(mdoc, tok, ppos, ERR_MACRO_NOTSUP); + return(0); + } else if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) { + (void)mdoc_err(mdoc, tok, ppos, ERR_MACRO_NOTCALL); + return(0); + } + + return((*mdoc_macros[tok].fp)(mdoc, tok, ppos, pos, buf)); +} + + +static void +mdoc_node_append(struct mdoc *mdoc, int pos, struct mdoc_node *p) +{ + const char *nn, *on, *nt, *ot, *act; + + switch (p->type) { + case (MDOC_TEXT): + nn = "<text>"; + nt = "text"; + break; + case (MDOC_BODY): + nn = mdoc_macronames[p->data.body.tok]; + nt = "body"; + break; + case (MDOC_ELEM): + nn = mdoc_macronames[p->data.elem.tok]; + nt = "elem"; + break; + case (MDOC_HEAD): + nn = mdoc_macronames[p->data.head.tok]; + nt = "head"; + break; + case (MDOC_BLOCK): + nn = mdoc_macronames[p->data.block.tok]; + nt = "block"; + break; + } + + if (NULL == mdoc->first) { + assert(NULL == mdoc->last); + mdoc->first = p; + mdoc->last = p; + mdoc_msg(mdoc, pos, "parse: root %s `%s'", nt, nn); + return; + } + + switch (mdoc->last->type) { + case (MDOC_TEXT): + on = "<text>"; + ot = "text"; + break; + case (MDOC_BODY): + on = mdoc_macronames[mdoc->last->data.body.tok]; + ot = "body"; + break; + case (MDOC_ELEM): + on = mdoc_macronames[mdoc->last->data.elem.tok]; + ot = "elem"; + break; + case (MDOC_HEAD): + on = mdoc_macronames[mdoc->last->data.head.tok]; + ot = "head"; + break; + case (MDOC_BLOCK): + on = mdoc_macronames[mdoc->last->data.block.tok]; + ot = "block"; + break; + } + + switch (p->type) { + case (MDOC_BODY): + switch (mdoc->last->type) { + case (MDOC_BLOCK): + p->parent = mdoc->last; + mdoc->last->child = p; + act = "child"; + break; + case (MDOC_HEAD): + p->parent = mdoc->last->parent; + mdoc->last->next = p; + act = "sibling"; + break; + default: + abort(); + /* NOTREACHED */ + } + break; + case (MDOC_HEAD): + assert(mdoc->last->type == MDOC_BLOCK); + p->parent = mdoc->last; + mdoc->last->child = p; + act = "child"; + break; + default: + switch (mdoc->last->type) { + case (MDOC_BODY): + /* FALLTHROUGH */ + case (MDOC_HEAD): + p->parent = mdoc->last->parent; + mdoc->last->child = p; + act = "child"; + break; + default: + p->parent = mdoc->last->parent; + mdoc->last->next = p; + act = "sibling"; + break; + } + break; + } + + mdoc_msg(mdoc, pos, "parse: %s `%s' %s %s `%s'", + nt, nn, act, ot, on); + mdoc->last = p; +} + + +void +mdoc_head_alloc(struct mdoc *mdoc, int pos, int tok, + size_t paramsz, const char **params) +{ + struct mdoc_node *p; + + assert(mdoc->first); + assert(mdoc->last); + assert(mdoc->last->type == MDOC_BLOCK); + assert(mdoc->last->data.block.tok == tok); + + p = xcalloc(1, sizeof(struct mdoc_node)); + p->type = MDOC_HEAD; + p->data.head.tok = tok; + p->data.head.sz = paramsz; + p->data.head.args = paramdup(paramsz, params); + + mdoc_node_append(mdoc, pos, p); +} + + +void +mdoc_body_alloc(struct mdoc *mdoc, int pos, int tok) +{ + struct mdoc_node *p; + + assert(mdoc->first); + assert(mdoc->last); + assert((mdoc->last->type == MDOC_BLOCK) || + (mdoc->last->type == MDOC_HEAD)); + if (mdoc->last->type == MDOC_BLOCK) + assert(mdoc->last->data.block.tok == tok); + else + assert(mdoc->last->data.head.tok == tok); + + p = xcalloc(1, sizeof(struct mdoc_node)); + + p->type = MDOC_BODY; + p->data.body.tok = tok; + + mdoc_node_append(mdoc, pos, p); +} + + +void +mdoc_block_alloc(struct mdoc *mdoc, int pos, int tok, + size_t argsz, const struct mdoc_arg *args) +{ + struct mdoc_node *p; + + p = xcalloc(1, sizeof(struct mdoc_node)); + + p->type = MDOC_BLOCK; + p->data.block.tok = tok; + p->data.block.argc = argsz; + p->data.block.argv = argdup(argsz, args); + + mdoc_node_append(mdoc, pos, p); +} + + +void +mdoc_elem_alloc(struct mdoc *mdoc, int pos, int tok, + size_t argsz, const struct mdoc_arg *args, + size_t paramsz, const char **params) +{ + struct mdoc_node *p; + + p = xcalloc(1, sizeof(struct mdoc_node)); + p->type = MDOC_ELEM; + p->data.elem.tok = tok; + p->data.elem.sz = paramsz; + p->data.elem.args = paramdup(paramsz, params); + p->data.elem.argc = argsz; + p->data.elem.argv = argdup(argsz, args); + + mdoc_node_append(mdoc, pos, p); +} + + +void +mdoc_word_alloc(struct mdoc *mdoc, int pos, const char *word) +{ + struct mdoc_node *p; + + p = xcalloc(1, sizeof(struct mdoc_node)); + p->type = MDOC_TEXT; + p->data.text.string = xstrdup(word); + + mdoc_node_append(mdoc, pos, p); +} + + +static void +argfree(size_t sz, struct mdoc_arg *p) +{ + size_t i, j; + + if (0 == sz) + return; + + assert(p); + for (i = 0; i < sz; i++) + if (p[i].sz > 0) { + assert(p[i].value); + for (j = 0; j < p[i].sz; j++) + free(p[i].value[j]); + } + free(p); +} + + +static void +mdoc_elem_free(struct mdoc_elem *p) +{ + + paramfree(p->sz, p->args); + argfree(p->argc, p->argv); +} + + +static void +mdoc_block_free(struct mdoc_block *p) +{ + + argfree(p->argc, p->argv); +} + + +static void +mdoc_text_free(struct mdoc_text *p) +{ + + if (p->string) + free(p->string); +} + + +static void +mdoc_head_free(struct mdoc_head *p) +{ + + paramfree(p->sz, p->args); +} + + +void +mdoc_node_free(struct mdoc_node *p) +{ + + switch (p->type) { + case (MDOC_TEXT): + mdoc_text_free(&p->data.text); + break; + case (MDOC_ELEM): + mdoc_elem_free(&p->data.elem); + break; + case (MDOC_BLOCK): + mdoc_block_free(&p->data.block); + break; + case (MDOC_HEAD): + mdoc_head_free(&p->data.head); + break; + default: + break; + } + + free(p); +} + + +static void +mdoc_node_freelist(struct mdoc_node *p) +{ + + if (p->child) + mdoc_node_freelist(p->child); + if (p->next) + mdoc_node_freelist(p->next); + + mdoc_node_free(p); +} + + +int +mdoc_find(const struct mdoc *mdoc, const char *key) +{ + + return(mdoc_hash_find(mdoc->htab, key)); +} + + +static void +argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src) +{ + size_t i; + + dst->arg = src->arg; + if (0 == (dst->sz = src->sz)) + return; + dst->value = xcalloc(dst->sz, sizeof(char *)); + for (i = 0; i < dst->sz; i++) + dst->value[i] = xstrdup(src->value[i]); +} + + +static struct mdoc_arg * +argdup(size_t argsz, const struct mdoc_arg *args) +{ + struct mdoc_arg *pp; + size_t i; + + if (0 == argsz) + return(NULL); + + pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg)); + for (i = 0; i < argsz; i++) + argcpy(&pp[i], &args[i]); + + return(pp); +} + + +static void +paramfree(size_t sz, char **p) +{ + size_t i; + + if (0 == sz) + return; + + assert(p); + for (i = 0; i < sz; i++) + free(p[i]); + free(p); +} + + +static char ** +paramdup(size_t sz, const char **p) +{ + char **pp; + size_t i; + + if (0 == sz) + return(NULL); + + pp = xcalloc(sz, sizeof(char *)); + for (i = 0; i < sz; i++) + pp[i] = xstrdup(p[i]); + + return(pp); +} |