diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-12 19:31:41 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-10-12 19:31:41 +0000 |
commit | 3aae5aea6108aec8f9c8d9e61ef797f5291a0c60 (patch) | |
tree | 10b6c19833cb61eade9dbb7d75c53a1dc079c393 /eqn.c | |
parent | bb74cbfe07e7341a1b5980e1331be5009b5eca0e (diff) | |
download | mandoc-3aae5aea6108aec8f9c8d9e61ef797f5291a0c60.tar.gz |
Improve error handling in the eqn(7) parser.
Get rid of the first fatal error, MANDOCERR_EQNSYNT.
In eqn(7), there is no need to be bug-compatible with groff, so there
is no need to abondon the whole equation in case of a syntax error.
In particular:
* Skip "back", "delim", "down", "fwd", "gfont", "gsize", "left",
"right", "size", and "up" without arguments.
* Skip "gsize" and "size" with a non-numeric argument.
* Skip closing delimiters that are not open.
* Skip "above" outside piles.
* For diacritic marks and binary operators without a left operand,
default to an empty box.
* Let piles and matrices take one argument rather than insisting
on a braced list. Let HTML output handle that, too.
* When rewinding, if the root box is guaranteed to match
the termination condition, no error handling is needed.
Diffstat (limited to 'eqn.c')
-rw-r--r-- | eqn.c | 184 |
1 files changed, 83 insertions, 101 deletions
@@ -656,7 +656,7 @@ static int eqn_parse(struct eqn_node *ep, struct eqn_box *parent) { char *p; - enum eqn_tok tok; + enum eqn_tok tok, subtok; enum eqn_post pos; struct eqn_box *cur; int rc, size; @@ -665,9 +665,12 @@ eqn_parse(struct eqn_node *ep, struct eqn_box *parent) const char *start; assert(NULL != parent); -again: - switch ((tok = eqn_tok_parse(ep, &p))) { +next_tok: + tok = eqn_tok_parse(ep, &p); + +this_tok: + switch (tok) { case (EQN_TOK_UNDEF): if ((rc = eqn_undef(ep)) <= 0) return(rc); @@ -686,10 +689,9 @@ again: break; case (EQN_TOK_DELIM): case (EQN_TOK_GFONT): - if (NULL == eqn_nextrawtok(ep, NULL)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + if (eqn_nextrawtok(ep, NULL) == NULL) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); break; case (EQN_TOK_MARK): case (EQN_TOK_LINEUP): @@ -703,9 +705,12 @@ again: case (EQN_TOK_HAT): case (EQN_TOK_DOT): case (EQN_TOK_DOTDOT): - if (NULL == parent->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); } parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); parent->type = EQN_LISTONE; @@ -761,10 +766,12 @@ again: case (EQN_TOK_BACK): case (EQN_TOK_DOWN): case (EQN_TOK_UP): - tok = eqn_tok_parse(ep, NULL); - if (EQN_TOK__MAX != tok) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + subtok = eqn_tok_parse(ep, NULL); + if (subtok != EQN_TOK__MAX) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + tok = subtok; + goto this_tok; } break; case (EQN_TOK_FAT): @@ -772,10 +779,7 @@ again: case (EQN_TOK_ITALIC): case (EQN_TOK_BOLD): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; /* * These values apply to the next word or sequence of * words; thus, we mark that we'll have a child with @@ -805,13 +809,15 @@ again: case (EQN_TOK_GSIZE): /* Accept two values: integral size and a single. */ if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; } size = mandoc_strntoi(start, sz, 10); if (-1 == size) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; } if (EQN_TOK_GSIZE == tok) { ep->gsize = size; @@ -831,9 +837,12 @@ again: * Repivot under a positional node, open a child scope * and keep on reading. */ - if (NULL == parent->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); } /* Handle the "subsup" and "fromto" positions. */ if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { @@ -866,10 +875,7 @@ again: break; case (EQN_TOK_SQRT): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; /* * Accept a left-right-associative set of arguments just * like sub and sup and friends but without rebalancing @@ -886,15 +892,15 @@ again: * Close out anything that's currently open, then * rebalance and continue reading. */ - if (NULL == parent->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); } while (EQN_SUBEXPR == parent->type) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); break; case (EQN_TOK_RIGHT): @@ -904,19 +910,23 @@ again: * FIXME: this is a shitty sentinel: we should really * have a native EQN_BRACE type or whatnot. */ - while (parent->type != EQN_LIST) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_LIST && + (tok == EQN_TOK_BRACE_CLOSE || + cur->left != NULL)) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = cur; if (EQN_TOK_RIGHT == tok) { - if (NULL == parent->left) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->parse, ep->eqn.ln, + ep->eqn.pos, eqn_toks[tok]); + break; } /* Handling depends on right/left. */ if (STRNEQ(start, sz, "ceiling", 7)) { @@ -928,10 +938,7 @@ again: } else parent->right = mandoc_strndup(start, sz); } - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; if (EQN_TOK_BRACE_CLOSE == tok && parent && (parent->type == EQN_PILE || parent->type == EQN_MATRIX)) @@ -939,10 +946,7 @@ again: /* Close out any "singleton" lists. */ while (parent->type == EQN_LISTONE && parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; break; case (EQN_TOK_BRACE_OPEN): case (EQN_TOK_LEFT): @@ -952,18 +956,16 @@ again: * (just like with the text node). */ while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; + if (EQN_TOK_LEFT == tok && + (start = eqn_nexttok(ep, &sz)) == NULL) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } parent = eqn_box_alloc(ep, parent); parent->type = EQN_LIST; if (EQN_TOK_LEFT == tok) { - if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - /* Handling depends on right/left. */ if (STRNEQ(start, sz, "ceiling", 7)) { strlcpy(sym, "\\[lc]", sizeof(sym)); parent->left = mandoc_strdup(sym); @@ -982,42 +984,29 @@ again: case (EQN_TOK_LCOL): case (EQN_TOK_RCOL): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; parent = eqn_box_alloc(ep, parent); parent->type = EQN_PILE; - parent = eqn_box_alloc(ep, parent); - parent->type = EQN_LIST; + parent->expectargs = 1; break; case (EQN_TOK_ABOVE): - while (parent->type != EQN_PILE) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - parent = eqn_box_alloc(ep, parent); + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_PILE) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = eqn_box_alloc(ep, cur); parent->type = EQN_LIST; break; case (EQN_TOK_MATRIX): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; parent = eqn_box_alloc(ep, parent); parent->type = EQN_MATRIX; - parent = eqn_box_alloc(ep, parent); - parent->type = EQN_LIST; + parent->expectargs = 1; break; case (EQN_TOK_EOF): /* @@ -1033,11 +1022,7 @@ again: * in an expression, then rewind til we're not any more. */ while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - free(p); - return(-1); - } + parent = parent->parent; cur = eqn_box_alloc(ep, parent); cur->type = EQN_TEXT; for (i = 0; i < EQNSYM__MAX; i++) @@ -1055,14 +1040,11 @@ again: * Post-process list status. */ while (parent->type == EQN_LISTONE && - parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent->args == parent->expectargs) + parent = parent->parent; break; } - goto again; + goto next_tok; } enum rofferr |