/* $Id$ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/utsname.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libmdoc.h"
#define PRE_ARGS struct mdoc *m, const struct mdoc_node *n
#define POST_ARGS struct mdoc *m
struct actions {
int (*pre)(PRE_ARGS);
int (*post)(POST_ARGS);
};
static int post_ar(POST_ARGS);
static int post_at(POST_ARGS);
static int post_bl(POST_ARGS);
static int post_bl_head(POST_ARGS);
static int post_bl_tagwidth(POST_ARGS);
static int post_bl_width(POST_ARGS);
static int post_dd(POST_ARGS);
static int post_display(POST_ARGS);
static int post_dt(POST_ARGS);
static int post_lb(POST_ARGS);
static int post_nm(POST_ARGS);
static int post_os(POST_ARGS);
static int post_prol(POST_ARGS);
static int post_sh(POST_ARGS);
static int post_st(POST_ARGS);
static int post_std(POST_ARGS);
static int post_tilde(POST_ARGS);
static int pre_bd(PRE_ARGS);
static int pre_dl(PRE_ARGS);
const struct actions mdoc_actions[MDOC_MAX] = {
{ NULL, NULL }, /* Ap */
{ NULL, post_dd }, /* Dd */
{ NULL, post_dt }, /* Dt */
{ NULL, post_os }, /* Os */
{ NULL, post_sh }, /* Sh */
{ NULL, NULL }, /* Ss */
{ NULL, NULL }, /* Pp */
{ NULL, NULL }, /* D1 */
{ pre_dl, post_display }, /* Dl */
{ pre_bd, post_display }, /* Bd */
{ NULL, NULL }, /* Ed */
{ NULL, post_bl }, /* Bl */
{ NULL, NULL }, /* El */
{ NULL, NULL }, /* It */
{ NULL, NULL }, /* Ad */
{ NULL, NULL }, /* An */
{ NULL, post_ar }, /* Ar */
{ NULL, NULL }, /* Cd */ /* FIXME: tabs are accepted! */
{ NULL, NULL }, /* Cm */
{ NULL, NULL }, /* Dv */
{ NULL, NULL }, /* Er */
{ NULL, NULL }, /* Ev */
{ NULL, post_std }, /* Ex */
{ NULL, NULL }, /* Fa */
{ NULL, NULL }, /* Fd */
{ NULL, NULL }, /* Fl */
{ NULL, NULL }, /* Fn */
{ NULL, NULL }, /* Ft */
{ NULL, NULL }, /* Ic */
{ NULL, NULL }, /* In */
{ NULL, NULL }, /* Li */
{ NULL, NULL }, /* Nd */
{ NULL, post_nm }, /* Nm */
{ NULL, NULL }, /* Op */
{ NULL, NULL }, /* Ot */
{ NULL, post_tilde }, /* Pa */
{ NULL, post_std }, /* Rv */
{ NULL, post_st }, /* St */
{ NULL, NULL }, /* Va */
{ NULL, NULL }, /* Vt */
{ NULL, NULL }, /* Xr */
{ NULL, NULL }, /* %A */
{ NULL, NULL }, /* %B */
{ NULL, NULL }, /* %D */
{ NULL, NULL }, /* %I */
{ NULL, NULL }, /* %J */
{ NULL, NULL }, /* %N */
{ NULL, NULL }, /* %O */
{ NULL, NULL }, /* %P */
{ NULL, NULL }, /* %R */
{ NULL, NULL }, /* %T */
{ NULL, NULL }, /* %V */
{ NULL, NULL }, /* Ac */
{ NULL, NULL }, /* Ao */
{ NULL, NULL }, /* Aq */
{ NULL, post_at }, /* At */
{ NULL, NULL }, /* Bc */
{ NULL, NULL }, /* Bf */
{ NULL, NULL }, /* Bo */
{ NULL, NULL }, /* Bq */
{ NULL, NULL }, /* Bsx */
{ NULL, NULL }, /* Bx */
{ NULL, NULL }, /* Db */
{ NULL, NULL }, /* Dc */
{ NULL, NULL }, /* Do */
{ NULL, NULL }, /* Dq */
{ NULL, NULL }, /* Ec */
{ NULL, NULL }, /* Ef */
{ NULL, NULL }, /* Em */
{ NULL, NULL }, /* Eo */
{ NULL, NULL }, /* Fx */
{ NULL, NULL }, /* Ms */
{ NULL, NULL }, /* No */
{ NULL, NULL }, /* Ns */
{ NULL, NULL }, /* Nx */
{ NULL, NULL }, /* Ox */
{ NULL, NULL }, /* Pc */
{ NULL, NULL }, /* Pf */
{ NULL, NULL }, /* Po */
{ NULL, NULL }, /* Pq */
{ NULL, NULL }, /* Qc */
{ NULL, NULL }, /* Ql */
{ NULL, NULL }, /* Qo */
{ NULL, NULL }, /* Qq */
{ NULL, NULL }, /* Re */
{ NULL, NULL }, /* Rs */
{ NULL, NULL }, /* Sc */
{ NULL, NULL }, /* So */
{ NULL, NULL }, /* Sq */
{ NULL, NULL }, /* Sm */
{ NULL, NULL }, /* Sx */
{ NULL, NULL }, /* Sy */
{ NULL, NULL }, /* Tn */
{ NULL, NULL }, /* Ux */
{ NULL, NULL }, /* Xc */
{ NULL, NULL }, /* Xo */
{ NULL, NULL }, /* Fo */
{ NULL, NULL }, /* Fc */
{ NULL, NULL }, /* Oo */
{ NULL, NULL }, /* Oc */
{ NULL, NULL }, /* Bk */
{ NULL, NULL }, /* Ek */
{ NULL, NULL }, /* Bt */
{ NULL, NULL }, /* Hf */
{ NULL, NULL }, /* Fr */
{ NULL, NULL }, /* Ud */
{ NULL, post_lb }, /* Lb */
{ NULL, NULL }, /* Lp */
{ NULL, post_tilde }, /* Lk */
{ NULL, NULL }, /* Mt */
{ NULL, NULL }, /* Brq */
{ NULL, NULL }, /* Bro */
{ NULL, NULL }, /* Brc */
{ NULL, NULL }, /* %C */
{ NULL, NULL }, /* Es */
{ NULL, NULL }, /* En */
{ NULL, NULL }, /* Dx */
{ NULL, NULL }, /* %Q */
{ NULL, NULL }, /* br */
{ NULL, NULL }, /* sp */
};
static int concat(struct mdoc *, const struct mdoc_node *,
char *, size_t);
#ifdef __linux__
extern size_t strlcat(char *, const char *, size_t);
#endif
int
mdoc_action_pre(struct mdoc *m, const struct mdoc_node *n)
{
switch (n->type) {
case (MDOC_ROOT):
/* FALLTHROUGH */
case (MDOC_TEXT):
return(1);
default:
break;
}
if (NULL == mdoc_actions[n->tok].pre)
return(1);
return((*mdoc_actions[n->tok].pre)(m, n));
}
int
mdoc_action_post(struct mdoc *m)
{
if (MDOC_ACTED & m->last->flags)
return(1);
m->last->flags |= MDOC_ACTED;
switch (m->last->type) {
case (MDOC_TEXT):
/* FALLTHROUGH */
case (MDOC_ROOT):
return(1);
default:
break;
}
if (NULL == mdoc_actions[m->last->tok].post)
return(1);
return((*mdoc_actions[m->last->tok].post)(m));
}
static int
concat(struct mdoc *m, const struct mdoc_node *n,
char *buf, size_t sz)
{
for ( ; n; n = n->next) {
assert(MDOC_TEXT == n->type);
if (strlcat(buf, n->string, sz) >= sz)
return(mdoc_nerr(m, n, ETOOLONG));
if (NULL == n->next)
continue;
if (strlcat(buf, " ", sz) >= sz)
return(mdoc_nerr(m, n, ETOOLONG));
}
return(1);
}
static int
post_std(POST_ARGS)
{
/*
* If '-std' is invoked without an argument, fill it in with our
* name (if it's been set).
*/
if (NULL == m->last->args)
return(1);
if (m->last->args->argv[0].sz)
return(1);
assert(m->meta.name);
m->last->args->argv[0].value = calloc(1, sizeof(char *));
if (NULL == m->last->args->argv[0].value)
return(mdoc_nerr(m, m->last, EMALLOC));
m->last->args->argv[0].sz = 1;
m->last->args->argv[0].value[0] = strdup(m->meta.name);
if (NULL == m->last->args->argv[0].value[0])
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
static int
post_nm(POST_ARGS)
{
char buf[64];
if (m->meta.name)
return(1);
buf[0] = 0;
if ( ! concat(m, m->last->child, buf, sizeof(buf)))
return(0);
if (NULL == (m->meta.name = strdup(buf)))
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
static int
post_lb(POST_ARGS)
{
const char *p;
char *buf;
size_t sz;
assert(MDOC_TEXT == m->last->child->type);
p = mdoc_a2lib(m->last->child->string);
if (NULL == p) {
sz = strlen(m->last->child->string) +
2 + strlen("\\(lqlibrary\\(rq");
buf = malloc(sz);
if (NULL == buf)
return(mdoc_nerr(m, m->last, EMALLOC));
(void)snprintf(buf, sz, "library \\(lq%s\\(rq",
m->last->child->string);
free(m->last->child->string);
m->last->child->string = buf;
return(1);
}
free(m->last->child->string);
m->last->child->string = strdup(p);
if (NULL == m->last->child->string)
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
static int
post_st(POST_ARGS)
{
const char *p;
assert(MDOC_TEXT == m->last->child->type);
p = mdoc_a2st(m->last->child->string);
assert(p);
free(m->last->child->string);
m->last->child->string = strdup(p);
if (NULL == m->last->child->string)
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
static int
post_at(POST_ARGS)
{
struct mdoc_node *n;
const char *p;
if (m->last->child) {
assert(MDOC_TEXT == m->last->child->type);
p = mdoc_a2att(m->last->child->string);
assert(p);
free(m->last->child->string);
m->last->child->string = strdup(p);
if (NULL == m->last->child->string)
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
n = m->last;
m->next = MDOC_NEXT_CHILD;
if ( ! mdoc_word_alloc(m, n->line, n->pos, "AT&T UNIX"))
return(0);
m->last = n;
m->next = MDOC_NEXT_SIBLING;
return(1);
}
static int
post_sh(POST_ARGS)
{
enum mdoc_sec sec;
char buf[64];
/*
* We keep track of the current section /and/ the "named"
* section, which is one of the conventional ones, in order to
* check ordering.
*/
if (MDOC_HEAD != m->last->type)
return(1);
buf[0] = 0;
if ( ! concat(m, m->last->child, buf, sizeof(buf)))
return(0);
if (SEC_CUSTOM != (sec = mdoc_atosec(buf)))
m->lastnamed = sec;
switch ((m->lastsec = sec)) {
case (SEC_RETURN_VALUES):
/* FALLTHROUGH */
case (SEC_ERRORS):
switch (m->meta.msec) {
case (2):
/* FALLTHROUGH */
case (3):
/* FALLTHROUGH */
case (9):
break;
default:
return(mdoc_nwarn(m, m->last, EBADSEC));
}
break;
default:
break;
}
return(1);
}
static int
post_dt(POST_ARGS)
{
struct mdoc_node *n;
const char *cp;
char *ep;
long lval;
if (m->meta.title)
free(m->meta.title);
if (m->meta.vol)
free(m->meta.vol);
if (m->meta.arch)
free(m->meta.arch);
m->meta.title = m->meta.vol = m->meta.arch = NULL;
m->meta.msec = 0;
/* Handles: `.Dt'
* --> title = unknown, volume = local, msec = 0, arch = NULL
*/
if (NULL == (n = m->last->child)) {
if (NULL == (m->meta.title = strdup("unknown")))
return(mdoc_nerr(m, m->last, EMALLOC));
if (NULL == (m->meta.vol = strdup("local")))
return(mdoc_nerr(m, m->last, EMALLOC));
return(post_prol(m));
}
/* Handles: `.Dt TITLE'
* --> title = TITLE, volume = local, msec = 0, arch = NULL
*/
if (NULL == (m->meta.title = strdup(n->string)))
return(mdoc_nerr(m, m->last, EMALLOC));
if (NULL == (n = n->next)) {
if (NULL == (m->meta.vol = strdup("local")))
return(mdoc_nerr(m, m->last, EMALLOC));
return(post_prol(m));
}
/* Handles: `.Dt TITLE SEC'
* --> title = TITLE, volume = SEC is msec ?
* format(msec) : SEC,
* msec = SEC is msec ? atoi(msec) : 0,
* arch = NULL
*/
cp = mdoc_a2msec(n->string);
if (cp) {
if (NULL == (m->meta.vol = strdup(cp)))
return(mdoc_nerr(m, m->last, EMALLOC));
errno = 0;
lval = strtol(n->string, &ep, 10);
if (n->string[0] != '\0' && *ep == '\0')
m->meta.msec = (int)lval;
} else if (NULL == (m->meta.vol = strdup(n->string)))
return(mdoc_nerr(m, m->last, EMALLOC));
if (NULL == (n = n->next))
return(post_prol(m));
/* Handles: `.Dt TITLE SEC VOL'
* --> title = TITLE, volume = VOL is vol ?
* format(VOL) :
* VOL is arch ? format(arch) :
* VOL
*/
cp = mdoc_a2vol(n->string);
if (cp) {
free(m->meta.vol);
if (NULL == (m->meta.vol = strdup(cp)))
return(mdoc_nerr(m, m->last, EMALLOC));
n = n->next;
} else {
cp = mdoc_a2arch(n->string);
if (NULL == cp) {
free(m->meta.vol);
if (NULL == (m->meta.vol = strdup(n->string)))
return(mdoc_nerr(m, m->last, EMALLOC));
} else if (NULL == (m->meta.arch = strdup(cp)))
return(mdoc_nerr(m, m->last, EMALLOC));
}
/* Ignore any subsequent parameters... */
return(post_prol(m));
}
static int
post_os(POST_ARGS)
{
char buf[64];
struct utsname utsname;
if (m->meta.os)
free(m->meta.os);
buf[0] = 0;
if ( ! concat(m, m->last->child, buf, sizeof(buf)))
return(0);
if (0 == buf[0]) {
if (-1 == uname(&utsname))
return(mdoc_nerr(m, m->last, EUTSNAME));
if (strlcat(buf, utsname.sysname, 64) >= 64)
return(mdoc_nerr(m, m->last, ETOOLONG));
if (strlcat(buf, " ", 64) >= 64)
return(mdoc_nerr(m, m->last, ETOOLONG));
if (strlcat(buf, utsname.release, 64) >= 64)
return(mdoc_nerr(m, m->last, ETOOLONG));
}
if (NULL == (m->meta.os = strdup(buf)))
return(mdoc_nerr(m, m->last, EMALLOC));
m->flags |= MDOC_PBODY;
return(post_prol(m));
}
/*
* Calculate the -width for a `Bl -tag' list if it hasn't been provided.
* Uses the first head macro.
*/
static int
post_bl_tagwidth(struct mdoc *m)
{
struct mdoc_node *n;
int sz;
char buf[32];
/*
* Use the text width, if a text node, or the default macro
* width if a macro.
*/
n = m->last->body->child;
if (n) {
assert(MDOC_BLOCK == n->type);
assert(MDOC_It == n->tok);
n = n->head->child;
}
sz = 10; /* Default size. */
if (n) {
if (MDOC_TEXT != n->type) {
if (0 == (sz = (int)mdoc_macro2len(n->tok)))
if ( ! mdoc_nwarn(m, m->last, ENOWIDTH))
return(0);
} else
sz = (int)strlen(n->string) + 1;
}
if (-1 == snprintf(buf, sizeof(buf), "%dn", sz))
return(mdoc_nerr(m, m->last, ENUMFMT));
/*
* We have to dynamically add this to the macro's argument list.
* We're guaranteed that a MDOC_Width doesn't already exist.
*/
n = m->last;
assert(n->args);
sz = (int)(n->args->argc)++;
n->args->argv = realloc(n->args->argv,
n->args->argc * sizeof(struct mdoc_argv));
if (NULL == n->args->argv)
return(mdoc_nerr(m, m->last, EMALLOC));
n->args->argv[sz].arg = MDOC_Width;
n->args->argv[sz].line = m->last->line;
n->args->argv[sz].pos = m->last->pos;
n->args->argv[sz].sz = 1;
n->args->argv[sz].value = calloc(1, sizeof(char *));
if (NULL == n->args->argv[sz].value)
return(mdoc_nerr(m, m->last, EMALLOC));
if (NULL == (n->args->argv[sz].value[0] = strdup(buf)))
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
static int
post_bl_width(struct mdoc *m)
{
size_t width;
int i, tok;
char buf[32];
char *p;
if (NULL == m->last->args)
return(1);
for (i = 0; i < (int)m->last->args->argc; i++)
if (MDOC_Width == m->last->args->argv[i].arg)
break;
if (i == (int)m->last->args->argc)
return(1);
p = m->last->args->argv[i].value[0];
/*
* If the value to -width is a macro, then we re-write it to be
* the macro's width as set in share/tmac/mdoc/doc-common.
*/
if (0 == strcmp(p, "Ds"))
width = 6;
else if (MDOC_MAX == (tok = mdoc_hash_find(m->htab, p)))
return(1);
else if (0 == (width = mdoc_macro2len(tok)))
return(mdoc_nwarn(m, m->last, ENOWIDTH));
/* The value already exists: free and reallocate it. */
if (-1 == snprintf(buf, sizeof(buf), "%zun", width))
return(mdoc_nerr(m, m->last, ENUMFMT));
free(m->last->args->argv[i].value[0]);
m->last->args->argv[i].value[0] = strdup(buf);
if (NULL == m->last->args->argv[i].value[0])
return(mdoc_nerr(m, m->last, EMALLOC));
return(1);
}
static int
post_bl_head(POST_ARGS)
{
int i, c;
struct mdoc_node *n, *nn, *nnp;
if (NULL == m->last->child)
return(1);
n = m->last->parent;
assert(n->args);
for (c = 0; c < (int)n->args->argc; c++)
if (MDOC_Column == n->args->argv[c].arg)
break;
/* Only process -column. */
if (c == (int)n->args->argc)
return(1);
assert(0 == n->args->argv[c].sz);
/*
* Accomodate for new-style groff column syntax. Shuffle the
* child nodes, all of which must be TEXT, as arguments for the
* column field. Then, delete the head children.
*/
n->args->argv[c].sz = (size_t)m->last->nchild;
n->args->argv[c].value = malloc
((size_t)m->last->nchild * sizeof(char *));
for (i = 0, nn = m->last->child; nn; i++) {
n->args->argv[c].value[i] = nn->string;
nn->string = NULL;
nnp = nn;
nn = nn->next;
mdoc_node_free(nnp);
}
m->last->nchild = 0;
m->last->child = NULL;
return(1);
}
static int
post_bl(POST_ARGS)
{
int i, r, len;
if (MDOC_HEAD == m->last->type)
return(post_bl_head(m));
if (MDOC_BLOCK != m->last->type)
return(1);
/*
* These are fairly complicated, so we've broken them into two
* functions. post_bl_tagwidth() is called when a -tag is
* specified, but no -width (it must be guessed). The second
* when a -width is specified (macro indicators must be
* rewritten into real lengths).
*/
len = (int)(m->last->args ? m->last->args->argc : 0);
for (r = i = 0; i < len; i++) {
if (MDOC_Tag == m->last->args->argv[i].arg)
r |= 1 << 0;
if (MDOC_Width == m->last->args->argv[i].arg)
r |= 1 << 1;
}
if (r & (1 << 0) && ! (r & (1 << 1))) {
if ( ! post_bl_tagwidth(m))
return(0);
} else if (r & (1 << 1))
if ( ! post_bl_width(m))
return(0);
return(1);
}
static int
post_tilde(POST_ARGS)
{
struct mdoc_node *n;
if (m->last->child)
return(1);
n = m->last;
m->next = MDOC_NEXT_CHILD;
/* XXX: not documented for `Lk'. */
if ( ! mdoc_word_alloc(m, m->last->line, m->last->pos, "~"))
return(0);
m->last = n;
m->next = MDOC_NEXT_SIBLING;
return(1);
}
static int
post_ar(POST_ARGS)
{
struct mdoc_node *n;
if (m->last->child)
return(1);
n = m->last;
m->next = MDOC_NEXT_CHILD;
if ( ! mdoc_word_alloc(m, m->last->line, m->last->pos, "file"))
return(0);
m->next = MDOC_NEXT_SIBLING;
if ( ! mdoc_word_alloc(m, m->last->line, m->last->pos, "..."))
return(0);
m->last = n;
m->next = MDOC_NEXT_SIBLING;
return(1);
}
static int
post_dd(POST_ARGS)
{
char buf[64];
buf[0] = 0;
if ( ! concat(m, m->last->child, buf, sizeof(buf)))
return(0);
if (0 == (m->meta.date = mdoc_atotime(buf))) {
if ( ! mdoc_nwarn(m, m->last, EBADDATE))
return(0);
m->meta.date = time(NULL);
}
return(post_prol(m));
}
static int
post_prol(POST_ARGS)
{
struct mdoc_node *n;
/*
* The end document shouldn't have the prologue macros as part
* of the syntax tree (they encompass only meta-data).
*/
if (m->last->parent->child == m->last)
m->last->parent->child = m->last->prev;
if (m->last->prev)
m->last->prev->next = NULL;
n = m->last;
assert(NULL == m->last->next);
if (m->last->prev) {
m->last = m->last->prev;
m->next = MDOC_NEXT_SIBLING;
} else {
m->last = m->last->parent;
m->next = MDOC_NEXT_CHILD;
}
mdoc_node_freelist(n);
return(1);
}
static int
pre_dl(PRE_ARGS)
{
if (MDOC_BODY == n->type)
m->flags |= MDOC_LITERAL;
return(1);
}
static int
pre_bd(PRE_ARGS)
{
int i;
if (MDOC_BODY != n->type)
return(1);
/* Enter literal context if `Bd -literal' or `-unfilled'. */
/*
* TODO: `-offset' without an argument should be the width of
* the literal "<string>".
*/
for (n = n->parent, i = 0; i < (int)n->args->argc; i++)
if (MDOC_Literal == n->args->argv[i].arg)
break;
else if (MDOC_Unfilled == n->args->argv[i].arg)
break;
if (i < (int)n->args->argc)
m->flags |= MDOC_LITERAL;
return(1);
}
static int
post_display(POST_ARGS)
{
if (MDOC_BODY == m->last->type)
m->flags &= ~MDOC_LITERAL;
return(1);
}