/* $Id$ */
/*
* Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "config.h"
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "mandoc.h"
#include "mandoc_aux.h"
#include "libmandoc.h"
#include "libroff.h"
#define EQN_NEST_MAX 128 /* maximum nesting of defines */
#define STRNEQ(p1, sz1, p2, sz2) \
((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
enum eqn_tok {
EQN_TOK_DYAD = 0,
EQN_TOK_VEC,
EQN_TOK_UNDER,
EQN_TOK_BAR,
EQN_TOK_TILDE,
EQN_TOK_HAT,
EQN_TOK_DOT,
EQN_TOK_DOTDOT,
EQN_TOK_FWD,
EQN_TOK_BACK,
EQN_TOK_DOWN,
EQN_TOK_UP,
EQN_TOK_FAT,
EQN_TOK_ROMAN,
EQN_TOK_ITALIC,
EQN_TOK_BOLD,
EQN_TOK_SIZE,
EQN_TOK_SUB,
EQN_TOK_SUP,
EQN_TOK_SQRT,
EQN_TOK_OVER,
EQN_TOK_FROM,
EQN_TOK_TO,
EQN_TOK_BRACE_OPEN,
EQN_TOK_BRACE_CLOSE,
EQN_TOK_GSIZE,
EQN_TOK_GFONT,
EQN_TOK_MARK,
EQN_TOK_LINEUP,
EQN_TOK_LEFT,
EQN_TOK_RIGHT,
EQN_TOK_PILE,
EQN_TOK_LPILE,
EQN_TOK_RPILE,
EQN_TOK_CPILE,
EQN_TOK_MATRIX,
EQN_TOK_CCOL,
EQN_TOK_LCOL,
EQN_TOK_RCOL,
EQN_TOK_DELIM,
EQN_TOK_DEFINE,
EQN_TOK_TDEFINE,
EQN_TOK_NDEFINE,
EQN_TOK_UNDEF,
EQN_TOK_ABOVE,
EQN_TOK__MAX,
EQN_TOK_FUNC,
EQN_TOK_QUOTED,
EQN_TOK_SYM,
EQN_TOK_EOF
};
static const char *eqn_toks[EQN_TOK__MAX] = {
"dyad", /* EQN_TOK_DYAD */
"vec", /* EQN_TOK_VEC */
"under", /* EQN_TOK_UNDER */
"bar", /* EQN_TOK_BAR */
"tilde", /* EQN_TOK_TILDE */
"hat", /* EQN_TOK_HAT */
"dot", /* EQN_TOK_DOT */
"dotdot", /* EQN_TOK_DOTDOT */
"fwd", /* EQN_TOK_FWD * */
"back", /* EQN_TOK_BACK */
"down", /* EQN_TOK_DOWN */
"up", /* EQN_TOK_UP */
"fat", /* EQN_TOK_FAT */
"roman", /* EQN_TOK_ROMAN */
"italic", /* EQN_TOK_ITALIC */
"bold", /* EQN_TOK_BOLD */
"size", /* EQN_TOK_SIZE */
"sub", /* EQN_TOK_SUB */
"sup", /* EQN_TOK_SUP */
"sqrt", /* EQN_TOK_SQRT */
"over", /* EQN_TOK_OVER */
"from", /* EQN_TOK_FROM */
"to", /* EQN_TOK_TO */
"{", /* EQN_TOK_BRACE_OPEN */
"}", /* EQN_TOK_BRACE_CLOSE */
"gsize", /* EQN_TOK_GSIZE */
"gfont", /* EQN_TOK_GFONT */
"mark", /* EQN_TOK_MARK */
"lineup", /* EQN_TOK_LINEUP */
"left", /* EQN_TOK_LEFT */
"right", /* EQN_TOK_RIGHT */
"pile", /* EQN_TOK_PILE */
"lpile", /* EQN_TOK_LPILE */
"rpile", /* EQN_TOK_RPILE */
"cpile", /* EQN_TOK_CPILE */
"matrix", /* EQN_TOK_MATRIX */
"ccol", /* EQN_TOK_CCOL */
"lcol", /* EQN_TOK_LCOL */
"rcol", /* EQN_TOK_RCOL */
"delim", /* EQN_TOK_DELIM */
"define", /* EQN_TOK_DEFINE */
"tdefine", /* EQN_TOK_TDEFINE */
"ndefine", /* EQN_TOK_NDEFINE */
"undef", /* EQN_TOK_UNDEF */
"above", /* EQN_TOK_ABOVE */
};
static const char *const eqn_func[] = {
"acos", "acsc", "and", "arc", "asec", "asin", "atan",
"cos", "cosh", "coth", "csc", "det", "exp", "for",
"if", "lim", "ln", "log", "max", "min",
"sec", "sin", "sinh", "tan", "tanh", "Im", "Re",
};
enum eqn_symt {
EQNSYM_alpha = 0,
EQNSYM_beta,
EQNSYM_chi,
EQNSYM_delta,
EQNSYM_epsilon,
EQNSYM_eta,
EQNSYM_gamma,
EQNSYM_iota,
EQNSYM_kappa,
EQNSYM_lambda,
EQNSYM_mu,
EQNSYM_nu,
EQNSYM_omega,
EQNSYM_omicron,
EQNSYM_phi,
EQNSYM_pi,
EQNSYM_ps,
EQNSYM_rho,
EQNSYM_sigma,
EQNSYM_tau,
EQNSYM_theta,
EQNSYM_upsilon,
EQNSYM_xi,
EQNSYM_zeta,
EQNSYM_DELTA,
EQNSYM_GAMMA,
EQNSYM_LAMBDA,
EQNSYM_OMEGA,
EQNSYM_PHI,
EQNSYM_PI,
EQNSYM_PSI,
EQNSYM_SIGMA,
EQNSYM_THETA,
EQNSYM_UPSILON,
EQNSYM_XI,
EQNSYM_inter,
EQNSYM_union,
EQNSYM_prod,
EQNSYM_int,
EQNSYM_sum,
EQNSYM_grad,
EQNSYM_del,
EQNSYM_times,
EQNSYM_cdot,
EQNSYM_nothing,
EQNSYM_approx,
EQNSYM_prime,
EQNSYM_half,
EQNSYM_partial,
EQNSYM_inf,
EQNSYM_muchgreat,
EQNSYM_muchless,
EQNSYM_larrow,
EQNSYM_rarrow,
EQNSYM_pm,
EQNSYM_nequal,
EQNSYM_equiv,
EQNSYM_lessequal,
EQNSYM_moreequal,
EQNSYM_minus,
EQNSYM__MAX
};
struct eqnsym {
const char *str;
const char *sym;
};
static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
{ "alpha", "*a" }, /* EQNSYM_alpha */
{ "beta", "*b" }, /* EQNSYM_beta */
{ "chi", "*x" }, /* EQNSYM_chi */
{ "delta", "*d" }, /* EQNSYM_delta */
{ "epsilon", "*e" }, /* EQNSYM_epsilon */
{ "eta", "*y" }, /* EQNSYM_eta */
{ "gamma", "*g" }, /* EQNSYM_gamma */
{ "iota", "*i" }, /* EQNSYM_iota */
{ "kappa", "*k" }, /* EQNSYM_kappa */
{ "lambda", "*l" }, /* EQNSYM_lambda */
{ "mu", "*m" }, /* EQNSYM_mu */
{ "nu", "*n" }, /* EQNSYM_nu */
{ "omega", "*w" }, /* EQNSYM_omega */
{ "omicron", "*o" }, /* EQNSYM_omicron */
{ "phi", "*f" }, /* EQNSYM_phi */
{ "pi", "*p" }, /* EQNSYM_pi */
{ "psi", "*q" }, /* EQNSYM_psi */
{ "rho", "*r" }, /* EQNSYM_rho */
{ "sigma", "*s" }, /* EQNSYM_sigma */
{ "tau", "*t" }, /* EQNSYM_tau */
{ "theta", "*h" }, /* EQNSYM_theta */
{ "upsilon", "*u" }, /* EQNSYM_upsilon */
{ "xi", "*c" }, /* EQNSYM_xi */
{ "zeta", "*z" }, /* EQNSYM_zeta */
{ "DELTA", "*D" }, /* EQNSYM_DELTA */
{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
{ "PHI", "*F" }, /* EQNSYM_PHI */
{ "PI", "*P" }, /* EQNSYM_PI */
{ "PSI", "*Q" }, /* EQNSYM_PSI */
{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
{ "THETA", "*H" }, /* EQNSYM_THETA */
{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
{ "XI", "*C" }, /* EQNSYM_XI */
{ "inter", "ca" }, /* EQNSYM_inter */
{ "union", "cu" }, /* EQNSYM_union */
{ "prod", "product" }, /* EQNSYM_prod */
{ "int", "integral" }, /* EQNSYM_int */
{ "sum", "sum" }, /* EQNSYM_sum */
{ "grad", "gr" }, /* EQNSYM_grad */
{ "del", "gr" }, /* EQNSYM_del */
{ "times", "mu" }, /* EQNSYM_times */
{ "cdot", "pc" }, /* EQNSYM_cdot */
{ "nothing", "&" }, /* EQNSYM_nothing */
{ "approx", "~~" }, /* EQNSYM_approx */
{ "prime", "fm" }, /* EQNSYM_prime */
{ "half", "12" }, /* EQNSYM_half */
{ "partial", "pd" }, /* EQNSYM_partial */
{ "inf", "if" }, /* EQNSYM_inf */
{ ">>", ">>" }, /* EQNSYM_muchgreat */
{ "<<", "<<" }, /* EQNSYM_muchless */
{ "<-", "<-" }, /* EQNSYM_larrow */
{ "->", "->" }, /* EQNSYM_rarrow */
{ "+-", "+-" }, /* EQNSYM_pm */
{ "!=", "!=" }, /* EQNSYM_nequal */
{ "==", "==" }, /* EQNSYM_equiv */
{ "<=", "<=" }, /* EQNSYM_lessequal */
{ ">=", ">=" }, /* EQNSYM_moreequal */
{ "-", "mi" }, /* EQNSYM_minus */
};
enum parse_mode {
MODE_QUOTED,
MODE_NOSUB,
MODE_SUB,
MODE_TOK
};
static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
static void eqn_box_free(struct eqn_box *);
static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
enum eqn_post, struct eqn_box *);
static void eqn_def(struct eqn_node *);
static struct eqn_def *eqn_def_find(struct eqn_node *);
static void eqn_delim(struct eqn_node *);
static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode);
static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
static void eqn_undef(struct eqn_node *);
enum rofferr
eqn_read(struct eqn_node **epp, int ln,
const char *p, int pos, int *offs)
{
size_t sz;
struct eqn_node *ep;
enum rofferr er;
ep = *epp;
/*
* If we're the terminating mark, unset our equation status and
* validate the full equation.
*/
if (0 == strncmp(p, ".EN", 3)) {
er = eqn_end(epp);
p += 3;
while (' ' == *p || '\t' == *p)
p++;
if ('\0' == *p)
return er;
mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
ln, pos, "EN %s", p);
return er;
}
/*
* Build up the full string, replacing all newlines with regular
* whitespace.
*/
sz = strlen(p + pos) + 1;
ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
/* First invocation: nil terminate the string. */
if (0 == ep->sz)
*ep->data = '\0';
ep->sz += sz;
strlcat(ep->data, p + pos, ep->sz + 1);
strlcat(ep->data, " ", ep->sz + 1);
return ROFF_IGN;
}
struct eqn_node *
eqn_alloc(int pos, int line, struct mparse *parse)
{
struct eqn_node *p;
p = mandoc_calloc(1, sizeof(struct eqn_node));
p->parse = parse;
p->eqn.ln = line;
p->eqn.pos = pos;
p->gsize = EQN_DEFSIZE;
return p;
}
/*
* Find the key "key" of the give size within our eqn-defined values.
*/
static struct eqn_def *
eqn_def_find(struct eqn_node *ep)
{
int i;
for (i = 0; i < (int)ep->defsz; i++)
if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
ep->defs[i].keysz, ep->start, ep->toksz))
return &ep->defs[i];
return NULL;
}
/*
* Parse a token from the input text. The modes are:
* MODE_QUOTED: Use *ep->start as the delimiter; the token ends
* before its next occurence. Do not interpret the token in any
* way and return EQN_TOK_QUOTED. All other modes behave like
* MODE_QUOTED when *ep->start is '"'.
* MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
* otherwise, it ends before the next whitespace or brace.
* Do not interpret the token and return EQN_TOK__MAX.
* MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
* alias created with define. If it is an alias, replace it with
* its string value and reparse.
* MODE_TOK: Like MODE_SUB, but also check the token against the list
* of tokens, and if there is a match, return that token. Otherwise,
* if the token matches a symbol, return EQN_TOK_SYM; if it matches
* a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for
* a token match, *ep->start is set to an allocated string that the
* caller is expected to free.
* All modes skip whitespace following the end of the token.
*/
static enum eqn_tok
eqn_next(struct eqn_node *ep, enum parse_mode mode)
{
static int last_len, lim;
struct eqn_def *def;
size_t start;
int diff, i, quoted;
enum eqn_tok tok;
/*
* Reset the recursion counter after advancing
* beyond the end of the previous substitution.
*/
if (ep->end - ep->data >= last_len)
lim = 0;
ep->start = ep->end;
quoted = mode == MODE_QUOTED;
for (;;) {
switch (*ep->start) {
case '\0':
ep->toksz = 0;
return EQN_TOK_EOF;
case '"':
quoted = 1;
break;
default:
break;
}
if (quoted) {
ep->end = strchr(ep->start + 1, *ep->start);
ep->start++; /* Skip opening quote. */
if (ep->end == NULL) {
mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
ep->eqn.ln, ep->eqn.pos, NULL);
ep->end = strchr(ep->start, '\0');
}
} else {
ep->end = ep->start + 1;
if (*ep->start != '{' && *ep->start != '}')
ep->end += strcspn(ep->end, " ^~\"{}\t");
}
ep->toksz = ep->end - ep->start;
if (quoted && *ep->end != '\0')
ep->end++; /* Skip closing quote. */
while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
ep->end++;
if (quoted) /* Cannot return, may have to strndup. */
break;
if (mode == MODE_NOSUB)
return EQN_TOK__MAX;
if ((def = eqn_def_find(ep)) == NULL)
break;
if (++lim > EQN_NEST_MAX) {
mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
ep->eqn.ln, ep->eqn.pos, NULL);
return EQN_TOK_EOF;
}
/* Replace a defined name with its string value. */
if ((diff = def->valsz - ep->toksz) > 0) {
start = ep->start - ep->data;
ep->sz += diff;
ep->data = mandoc_realloc(ep->data, ep->sz + 1);
ep->start = ep->data + start;
}
if (diff)
memmove(ep->start + def->valsz, ep->start + ep->toksz,
strlen(ep->start + ep->toksz) + 1);
memcpy(ep->start, def->val, def->valsz);
last_len = ep->start - ep->data + def->valsz;
}
if (mode != MODE_TOK)
return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
if (quoted) {
ep->start = mandoc_strndup(ep->start, ep->toksz);
return EQN_TOK_QUOTED;
}
for (tok = 0; tok < EQN_TOK__MAX; tok++)
if (STRNEQ(ep->start, ep->toksz,
eqn_toks[tok], strlen(eqn_toks[tok])))
return tok;
for (i = 0; i < EQNSYM__MAX; i++) {
if (STRNEQ(ep->start, ep->toksz,
eqnsyms[i].str, strlen(eqnsyms[i].str))) {
mandoc_asprintf(&ep->start,
"\\[%s]", eqnsyms[i].sym);
return EQN_TOK_SYM;
}
}
ep->start = mandoc_strndup(ep->start, ep->toksz);
for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
if (STRNEQ(ep->start, ep->toksz,
eqn_func[i], strlen(eqn_func[i])))
return EQN_TOK_FUNC;
return EQN_TOK__MAX;
}
static void
eqn_box_free(struct eqn_box *bp)
{
if (bp->first)
eqn_box_free(bp->first);
if (bp->next)
eqn_box_free(bp->next);
free(bp->text);
free(bp->left);
free(bp->right);
free(bp->top);
free(bp->bottom);
free(bp);
}
/*
* Allocate a box as the last child of the parent node.
*/
static struct eqn_box *
eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
{
struct eqn_box *bp;
bp = mandoc_calloc(1, sizeof(struct eqn_box));
bp->parent = parent;
bp->parent->args++;
bp->expectargs = UINT_MAX;
bp->font = bp->parent->font;
bp->size = ep->gsize;
if (NULL != parent->first) {
parent->last->next = bp;
bp->prev = parent->last;
} else
parent->first = bp;
parent->last = bp;
return bp;
}
/*
* Reparent the current last node (of the current parent) under a new
* EQN_SUBEXPR as the first element.
* Then return the new parent.
* The new EQN_SUBEXPR will have a two-child limit.
*/
static struct eqn_box *
eqn_box_makebinary(struct eqn_node *ep,
enum eqn_post pos, struct eqn_box *parent)
{
struct eqn_box *b, *newb;
assert(NULL != parent->last);
b = parent->last;
if (parent->last == parent->first)
parent->first = NULL;
parent->args--;
parent->last = b->prev;
b->prev = NULL;
newb = eqn_box_alloc(ep, parent);
newb->pos = pos;
newb->type = EQN_SUBEXPR;
newb->expectargs = 2;
newb->args = 1;
newb->first = newb->last = b;
newb->first->next = NULL;
b->parent = newb;
return newb;
}
/*
* Parse the "delim" control statement.
*/
static void
eqn_delim(struct eqn_node *ep)
{
if (ep->end[0] == '\0' || ep->end[1] == '\0') {
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, "delim");
if (ep->end[0] != '\0')
ep->end++;
} else if (strncmp(ep->end, "off", 3) == 0) {
ep->delim = 0;
ep->end += 3;
} else if (strncmp(ep->end, "on", 2) == 0) {
if (ep->odelim && ep->cdelim)
ep->delim = 1;
ep->end += 2;
} else {
ep->odelim = *ep->end++;
ep->cdelim = *ep->end++;
ep->delim = 1;
}
}
/*
* Undefine a previously-defined string.
*/
static void
eqn_undef(struct eqn_node *ep)
{
struct eqn_def *def;
if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, "undef");
return;
}
if ((def = eqn_def_find(ep)) == NULL)
return;
free(def->key);
free(def->val);
def->key = def->val = NULL;
def->keysz = def->valsz = 0;
}
static void
eqn_def(struct eqn_node *ep)
{
struct eqn_def *def;
int i;
if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, "define");
return;
}
/*
* Search for a key that already exists.
* Create a new key if none is found.
*/
if ((def = eqn_def_find(ep)) == NULL) {
/* Find holes in string array. */
for (i = 0; i < (int)ep->defsz; i++)
if (0 == ep->defs[i].keysz)
break;
if (i == (int)ep->defsz) {
ep->defsz++;
ep->defs = mandoc_reallocarray(ep->defs,
ep->defsz, sizeof(struct eqn_def));
ep->defs[i].key = ep->defs[i].val = NULL;
}
def = ep->defs + i;
free(def->key);
def->key = mandoc_strndup(ep->start, ep->toksz);
def->keysz = ep->toksz;
}
if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
free(def->key);
free(def->val);
def->key = def->val = NULL;
def->keysz = def->valsz = 0;
return;
}
free(def->val);
def->val = mandoc_strndup(ep->start, ep->toksz);
def->valsz = ep->toksz;
}
/*
* Recursively parse an eqn(7) expression.
*/
static enum rofferr
eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
{
struct eqn_box *cur, *nbox, *split;
const char *cp, *cpn;
char *p;
enum eqn_tok tok;
enum eqn_post pos;
enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
int size;
assert(parent != NULL);
/*
* Empty equation.
* Do not add it to the high-level syntax tree.
*/
if (ep->data == NULL)
return ROFF_IGN;
ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
next_tok:
tok = eqn_next(ep, MODE_TOK);
switch (tok) {
case EQN_TOK_UNDEF:
eqn_undef(ep);
break;
case EQN_TOK_NDEFINE:
case EQN_TOK_DEFINE:
eqn_def(ep);
break;
case EQN_TOK_TDEFINE:
if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, "tdefine");
break;
case EQN_TOK_DELIM:
eqn_delim(ep);
break;
case EQN_TOK_GFONT:
if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
case EQN_TOK_MARK:
case EQN_TOK_LINEUP:
/* Ignore these. */
break;
case EQN_TOK_DYAD:
case EQN_TOK_VEC:
case EQN_TOK_UNDER:
case EQN_TOK_BAR:
case EQN_TOK_TILDE:
case EQN_TOK_HAT:
case EQN_TOK_DOT:
case EQN_TOK_DOTDOT:
if (parent->last == NULL) {
mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
cur = eqn_box_alloc(ep, parent);
cur->type = EQN_TEXT;
cur->text = mandoc_strdup("");
}
parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
parent->type = EQN_LISTONE;
parent->expectargs = 1;
parent->font = EQNFONT_ROMAN;
switch (tok) {
case EQN_TOK_DOTDOT:
parent->top = mandoc_strdup("\\[ad]");
break;
case EQN_TOK_VEC:
parent->top = mandoc_strdup("\\[->]");
break;
case EQN_TOK_DYAD:
parent->top = mandoc_strdup("\\[<>]");
break;
case EQN_TOK_TILDE:
parent->top = mandoc_strdup("\\[a~]");
break;
case EQN_TOK_UNDER:
parent->bottom = mandoc_strdup("\\[ul]");
break;
case EQN_TOK_BAR:
parent->top = mandoc_strdup("\\[rl]");
break;
case EQN_TOK_DOT:
parent->top = mandoc_strdup("\\[a.]");
break;
case EQN_TOK_HAT:
parent->top = mandoc_strdup("\\[ha]");
break;
default:
abort();
}
parent = parent->parent;
break;
case EQN_TOK_FWD:
case EQN_TOK_BACK:
case EQN_TOK_DOWN:
case EQN_TOK_UP:
if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
case EQN_TOK_FAT:
case EQN_TOK_ROMAN:
case EQN_TOK_ITALIC:
case EQN_TOK_BOLD:
while (parent->args == parent->expectargs)
parent = parent->parent;
/*
* These values apply to the next word or sequence of
* words; thus, we mark that we'll have a child with
* exactly one of those.
*/
parent = eqn_box_alloc(ep, parent);
parent->type = EQN_LISTONE;
parent->expectargs = 1;
switch (tok) {
case EQN_TOK_FAT:
parent->font = EQNFONT_FAT;
break;
case EQN_TOK_ROMAN:
parent->font = EQNFONT_ROMAN;
break;
case EQN_TOK_ITALIC:
parent->font = EQNFONT_ITALIC;
break;
case EQN_TOK_BOLD:
parent->font = EQNFONT_BOLD;
break;
default:
abort();
}
break;
case EQN_TOK_SIZE:
case EQN_TOK_GSIZE:
/* Accept two values: integral size and a single. */
if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
}
size = mandoc_strntoi(ep->start, ep->toksz, 10);
if (-1 == size) {
mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
}
if (EQN_TOK_GSIZE == tok) {
ep->gsize = size;
break;
}
parent = eqn_box_alloc(ep, parent);
parent->type = EQN_LISTONE;
parent->expectargs = 1;
parent->size = size;
break;
case EQN_TOK_FROM:
case EQN_TOK_TO:
case EQN_TOK_SUB:
case EQN_TOK_SUP:
/*
* We have a left-right-associative expression.
* Repivot under a positional node, open a child scope
* and keep on reading.
*/
if (parent->last == NULL) {
mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
cur = eqn_box_alloc(ep, parent);
cur->type = EQN_TEXT;
cur->text = mandoc_strdup("");
}
/* Handle the "subsup" and "fromto" positions. */
if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
parent->expectargs = 3;
parent->pos = EQNPOS_SUBSUP;
break;
}
if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
parent->expectargs = 3;
parent->pos = EQNPOS_FROMTO;
break;
}
switch (tok) {
case EQN_TOK_FROM:
pos = EQNPOS_FROM;
break;
case EQN_TOK_TO:
pos = EQNPOS_TO;
break;
case EQN_TOK_SUP:
pos = EQNPOS_SUP;
break;
case EQN_TOK_SUB:
pos = EQNPOS_SUB;
break;
default:
abort();
}
parent = eqn_box_makebinary(ep, pos, parent);
break;
case EQN_TOK_SQRT:
while (parent->args == parent->expectargs)
parent = parent->parent;
/*
* Accept a left-right-associative set of arguments just
* like sub and sup and friends but without rebalancing
* under a pivot.
*/
parent = eqn_box_alloc(ep, parent);
parent->type = EQN_SUBEXPR;
parent->pos = EQNPOS_SQRT;
parent->expectargs = 1;
break;
case EQN_TOK_OVER:
/*
* We have a right-left-associative fraction.
* Close out anything that's currently open, then
* rebalance and continue reading.
*/
if (parent->last == NULL) {
mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
cur = eqn_box_alloc(ep, parent);
cur->type = EQN_TEXT;
cur->text = mandoc_strdup("");
}
while (EQN_SUBEXPR == parent->type)
parent = parent->parent;
parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
break;
case EQN_TOK_RIGHT:
case EQN_TOK_BRACE_CLOSE:
/*
* Close out the existing brace.
* FIXME: this is a shitty sentinel: we should really
* have a native EQN_BRACE type or whatnot.
*/
for (cur = parent; cur != NULL; cur = cur->parent)
if (cur->type == EQN_LIST &&
(tok == EQN_TOK_BRACE_CLOSE ||
cur->left != NULL))
break;
if (cur == NULL) {
mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
}
parent = cur;
if (EQN_TOK_RIGHT == tok) {
if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
mandoc_msg(MANDOCERR_REQ_EMPTY,
ep->parse, ep->eqn.ln,
ep->eqn.pos, eqn_toks[tok]);
break;
}
/* Handling depends on right/left. */
if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
parent->right = mandoc_strdup("\\[rc]");
else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
parent->right = mandoc_strdup("\\[rf]");
else
parent->right =
mandoc_strndup(ep->start, ep->toksz);
}
parent = parent->parent;
if (tok == EQN_TOK_BRACE_CLOSE &&
(parent->type == EQN_PILE ||
parent->type == EQN_MATRIX))
parent = parent->parent;
/* Close out any "singleton" lists. */
while (parent->type == EQN_LISTONE &&
parent->args == parent->expectargs)
parent = parent->parent;
break;
case EQN_TOK_BRACE_OPEN:
case EQN_TOK_LEFT:
/*
* If we already have something in the stack and we're
* in an expression, then rewind til we're not any more
* (just like with the text node).
*/
while (parent->args == parent->expectargs)
parent = parent->parent;
if (EQN_TOK_LEFT == tok &&
eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
}
parent = eqn_box_alloc(ep, parent);
parent->type = EQN_LIST;
if (EQN_TOK_LEFT == tok) {
if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
parent->left = mandoc_strdup("\\[lc]");
else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
parent->left = mandoc_strdup("\\[lf]");
else
parent->left =
mandoc_strndup(ep->start, ep->toksz);
}
break;
case EQN_TOK_PILE:
case EQN_TOK_LPILE:
case EQN_TOK_RPILE:
case EQN_TOK_CPILE:
case EQN_TOK_CCOL:
case EQN_TOK_LCOL:
case EQN_TOK_RCOL:
while (parent->args == parent->expectargs)
parent = parent->parent;
parent = eqn_box_alloc(ep, parent);
parent->type = EQN_PILE;
parent->expectargs = 1;
break;
case EQN_TOK_ABOVE:
for (cur = parent; cur != NULL; cur = cur->parent)
if (cur->type == EQN_PILE)
break;
if (cur == NULL) {
mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
break;
}
parent = eqn_box_alloc(ep, cur);
parent->type = EQN_LIST;
break;
case EQN_TOK_MATRIX:
while (parent->args == parent->expectargs)
parent = parent->parent;
parent = eqn_box_alloc(ep, parent);
parent->type = EQN_MATRIX;
parent->expectargs = 1;
break;
case EQN_TOK_EOF:
/*
* End of file!
* TODO: make sure we're not in an open subexpression.
*/
return ROFF_EQN;
case EQN_TOK__MAX:
case EQN_TOK_FUNC:
case EQN_TOK_QUOTED:
case EQN_TOK_SYM:
p = ep->start;
assert(p != NULL);
/*
* If we already have something in the stack and we're
* in an expression, then rewind til we're not any more.
*/
while (parent->args == parent->expectargs)
parent = parent->parent;
cur = eqn_box_alloc(ep, parent);
cur->type = EQN_TEXT;
cur->text = p;
switch (tok) {
case EQN_TOK_FUNC:
cur->font = EQNFONT_ROMAN;
break;
case EQN_TOK_QUOTED:
if (cur->font == EQNFONT_NONE)
cur->font = EQNFONT_ITALIC;
break;
case EQN_TOK_SYM:
break;
default:
if (cur->font != EQNFONT_NONE || *p == '\0')
break;
cpn = p - 1;
ccln = CCL_LET;
split = NULL;
for (;;) {
/* Advance to next character. */
cp = cpn++;
ccl = ccln;
ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
isdigit((unsigned char)*cpn) ||
(*cpn == '.' && (ccl == CCL_DIG ||
isdigit((unsigned char)cpn[1]))) ?
CCL_DIG : CCL_PUN;
/* No boundary before first character. */
if (cp < p)
continue;
cur->font = ccl == CCL_LET ?
EQNFONT_ITALIC : EQNFONT_ROMAN;
if (*cp == '\\')
mandoc_escape(&cpn, NULL, NULL);
/* No boundary after last character. */
if (*cpn == '\0')
break;
if (ccln == ccl)
continue;
/* Boundary found, split the text. */
if (parent->args == parent->expectargs) {
/* Remove the text from the tree. */
if (cur->prev == NULL)
parent->first = cur->next;
else
cur->prev->next = NULL;
parent->last = cur->prev;
parent->args--;
/* Set up a list instead. */
split = eqn_box_alloc(ep, parent);
split->type = EQN_LIST;
/* Insert the word into the list. */
split->first = split->last = cur;
cur->parent = split;
cur->prev = NULL;
parent = split;
}
/* Append a new text box. */
nbox = eqn_box_alloc(ep, parent);
nbox->type = EQN_TEXT;
nbox->text = mandoc_strdup(cpn);
/* Truncate the old box. */
p = mandoc_strndup(cur->text,
cpn - cur->text);
free(cur->text);
cur->text = p;
/* Setup to process the new box. */
cur = nbox;
p = nbox->text;
cpn = p - 1;
ccln = CCL_LET;
}
if (split != NULL)
parent = split->parent;
break;
}
/*
* Post-process list status.
*/
while (parent->type == EQN_LISTONE &&
parent->args == parent->expectargs)
parent = parent->parent;
break;
default:
abort();
}
goto next_tok;
}
enum rofferr
eqn_end(struct eqn_node **epp)
{
struct eqn_node *ep;
ep = *epp;
*epp = NULL;
ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
ep->eqn.root->expectargs = UINT_MAX;
return eqn_parse(ep, ep->eqn.root);
}
void
eqn_free(struct eqn_node *p)
{
int i;
eqn_box_free(p->eqn.root);
for (i = 0; i < (int)p->defsz; i++) {
free(p->defs[i].key);
free(p->defs[i].val);
}
free(p->data);
free(p->defs);
free(p);
}