summaryrefslogtreecommitdiffstats
path: root/tbl_layout.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2015-01-26 18:42:30 +0000
committerIngo Schwarze <schwarze@openbsd.org>2015-01-26 18:42:30 +0000
commitfb1f845c9ebafe8648f3df58f8a4581041ff067e (patch)
tree07765bfe61b2b3ed91ec2f3e112706b0a88c6a8b /tbl_layout.c
parentba5583393dfa12234c2752e45824ac58a3851700 (diff)
downloadmandoc-fb1f845c9ebafe8648f3df58f8a4581041ff067e.tar.gz
Rework tbl(7) layout parsing:
* Continue parsing even if part of the input is invalid. * Do not require whitespace between cell specifications. * Allow tabs as well as blanks between modifiers. * Mark the 'm' modifier as unsupported. * Parse and ignore the 'p' and 'v' modifiers. * Better warning and error messages. * Get rid of a static buffer. Improved functionality but minus 50 lines of code.
Diffstat (limited to 'tbl_layout.c')
-rw-r--r--tbl_layout.c236
1 files changed, 82 insertions, 154 deletions
diff --git a/tbl_layout.c b/tbl_layout.c
index ec85b56b..7ca81659 100644
--- a/tbl_layout.c
+++ b/tbl_layout.c
@@ -1,7 +1,7 @@
/* $Id$ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -34,15 +34,7 @@ struct tbl_phrase {
enum tbl_cellt key;
};
-/*
- * FIXME: we can make this parse a lot nicer by, when an error is
- * encountered in a layout key, bailing to the next key (i.e. to the
- * next whitespace then continuing).
- */
-
-#define KEYS_MAX 11
-
-static const struct tbl_phrase keys[KEYS_MAX] = {
+static const struct tbl_phrase keys[] = {
{ 'c', TBL_CELL_CENTRE },
{ 'r', TBL_CELL_RIGHT },
{ 'l', TBL_CELL_LEFT },
@@ -55,57 +47,30 @@ static const struct tbl_phrase keys[KEYS_MAX] = {
{ '=', TBL_CELL_DHORIZ }
};
-static int mods(struct tbl_node *, struct tbl_cell *,
+#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0])))
+
+static void mods(struct tbl_node *, struct tbl_cell *,
int, const char *, int *);
-static int cell(struct tbl_node *, struct tbl_row *,
+static void cell(struct tbl_node *, struct tbl_row *,
int, const char *, int *);
static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
enum tbl_cellt, int vert);
-static int
+static void
mods(struct tbl_node *tbl, struct tbl_cell *cp,
int ln, const char *p, int *pos)
{
- char buf[5];
- int i;
+ char *endptr;
- /* Not all types accept modifiers. */
+mod:
+ while (p[*pos] == ' ' || p[*pos] == '\t')
+ (*pos)++;
- switch (cp->pos) {
- case TBL_CELL_DOWN:
- /* FALLTHROUGH */
- case TBL_CELL_HORIZ:
- /* FALLTHROUGH */
- case TBL_CELL_DHORIZ:
- return(1);
- default:
- break;
- }
+ /* Row delimiters and cell specifiers end modifier lists. */
-mod:
- /*
- * XXX: since, at least for now, modifiers are non-conflicting
- * (are separable by value, regardless of position), we let
- * modifiers come in any order. The existing tbl doesn't let
- * this happen.
- */
- switch (p[*pos]) {
- case '\0':
- /* FALLTHROUGH */
- case ' ':
- /* FALLTHROUGH */
- case '\t':
- /* FALLTHROUGH */
- case ',':
- /* FALLTHROUGH */
- case '.':
- /* FALLTHROUGH */
- case '|':
- return(1);
- default:
- break;
- }
+ if (strchr(".,-=^_ACLNRSaclnrs|", p[*pos]) != NULL)
+ return;
/* Throw away parenthesised expression. */
@@ -117,72 +82,65 @@ mod:
(*pos)++;
goto mod;
}
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
+ mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse,
ln, *pos, NULL);
- return(0);
+ return;
}
/* Parse numerical spacing from modifier string. */
if (isdigit((unsigned char)p[*pos])) {
- for (i = 0; i < 4; i++) {
- if ( ! isdigit((unsigned char)p[*pos + i]))
- break;
- buf[i] = p[*pos + i];
- }
- buf[i] = '\0';
-
- /* No greater than 4 digits. */
-
- if (4 == i) {
- mandoc_msg(MANDOCERR_TBLLAYOUT,
- tbl->parse, ln, *pos, NULL);
- return(0);
- }
-
- *pos += i;
- cp->spacing = (size_t)atoi(buf);
-
+ cp->spacing = strtoull(p + *pos, &endptr, 10);
+ *pos = endptr - p;
goto mod;
- /* NOTREACHED */
}
- /* TODO: GNU has many more extensions. */
-
switch (tolower((unsigned char)p[(*pos)++])) {
- case 'z':
- cp->flags |= TBL_CELL_WIGN;
- goto mod;
- case 'u':
- cp->flags |= TBL_CELL_UP;
+ case 'b':
+ /* FALLTHROUGH */
+ case 'i':
+ /* FALLTHROUGH */
+ case 'r':
+ (*pos)--;
+ break;
+ case 'd':
+ cp->flags |= TBL_CELL_BALIGN;
goto mod;
case 'e':
cp->flags |= TBL_CELL_EQUAL;
goto mod;
+ case 'f':
+ break;
+ case 'm':
+ mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse,
+ ln, *pos, "m");
+ goto mod;
+ case 'p':
+ /* FALLTHROUGH */
+ case 'v':
+ if (p[*pos] == '-' || p[*pos] == '+')
+ (*pos)++;
+ while (isdigit((unsigned char)p[*pos]))
+ (*pos)++;
+ goto mod;
case 't':
cp->flags |= TBL_CELL_TALIGN;
goto mod;
- case 'd':
- cp->flags |= TBL_CELL_BALIGN;
+ case 'u':
+ cp->flags |= TBL_CELL_UP;
goto mod;
case 'w': /* XXX for now, ignore minimal column width */
goto mod;
case 'x':
cp->flags |= TBL_CELL_WMAX;
goto mod;
- case 'f':
- break;
- case 'r':
- /* FALLTHROUGH */
- case 'b':
- /* FALLTHROUGH */
- case 'i':
- (*pos)--;
- break;
+ case 'z':
+ cp->flags |= TBL_CELL_WIGN;
+ goto mod;
default:
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
+ mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+ ln, *pos - 1, "%c", p[*pos - 1]);
+ goto mod;
}
switch (tolower((unsigned char)p[(*pos)++])) {
@@ -201,20 +159,13 @@ mod:
case 'r':
goto mod;
default:
- break;
- }
- if (isalnum((unsigned char)p[*pos - 1])) {
mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse,
ln, *pos - 1, "TS f%c", p[*pos - 1]);
goto mod;
}
-
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
}
-static int
+static void
cell(struct tbl_node *tbl, struct tbl_row *rp,
int ln, const char *p, int *pos)
{
@@ -223,16 +174,24 @@ cell(struct tbl_node *tbl, struct tbl_row *rp,
/* Handle vertical lines. */
- for (vert = 0; '|' == p[*pos]; ++*pos)
- vert++;
- while (' ' == p[*pos])
+ vert = 0;
+again:
+ while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') {
+ if (p[*pos] == '|') {
+ if (vert < 2)
+ vert++;
+ else
+ mandoc_msg(MANDOCERR_TBLLAYOUT_VERT,
+ tbl->parse, ln, *pos, NULL);
+ }
(*pos)++;
+ }
/* Handle trailing vertical lines */
if ('.' == p[*pos] || '\0' == p[*pos]) {
rp->vert = vert;
- return(1);
+ return;
}
/* Parse the column position (`c', `l', `r', ...). */
@@ -241,62 +200,32 @@ cell(struct tbl_node *tbl, struct tbl_row *rp,
if (tolower((unsigned char)p[*pos]) == keys[i].name)
break;
- if (KEYS_MAX == i) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos, NULL);
- return(0);
+ if (i == KEYS_MAX) {
+ mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+ ln, *pos, "%c", p[*pos]);
+ (*pos)++;
+ goto again;
}
-
c = keys[i].key;
- /*
- * If a span cell is found first, raise a warning and abort the
- * parse. If a span cell is found and the last layout element
- * isn't a "normal" layout, bail.
- *
- * FIXME: recover from this somehow?
- */
-
- if (TBL_CELL_SPAN == c) {
- if (NULL == rp->first) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos, NULL);
- return(0);
- } else if (rp->last)
- switch (rp->last->pos) {
- case TBL_CELL_HORIZ:
- /* FALLTHROUGH */
- case TBL_CELL_DHORIZ:
- mandoc_msg(MANDOCERR_TBLLAYOUT,
- tbl->parse, ln, *pos, NULL);
- return(0);
- default:
- break;
- }
- }
+ /* Special cases of spanners. */
- /*
- * If a vertical spanner is found, we may not be in the first
- * row.
- */
-
- if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
- return(0);
- }
+ if (c == TBL_CELL_SPAN) {
+ if (rp->last == NULL)
+ mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN,
+ tbl->parse, ln, *pos, NULL);
+ else if (rp->last->pos == TBL_CELL_HORIZ ||
+ rp->last->pos == TBL_CELL_DHORIZ)
+ c = rp->last->pos;
+ } else if (c == TBL_CELL_DOWN && rp == tbl->first_row)
+ mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN,
+ tbl->parse, ln, *pos, NULL);
(*pos)++;
- /* Disallow adjacent spacers. */
-
- if (vert > 2) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
- return(0);
- }
-
/* Allocate cell then parse its modifiers. */
- return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
+ mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos);
}
void
@@ -311,7 +240,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
for (;;) {
/* Skip whitespace before and after each cell. */
- while (isspace((unsigned char)p[pos]))
+ while (p[pos] == ' ' || p[pos] == '\t')
pos++;
switch (p[pos]) {
@@ -326,7 +255,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
tbl->part = TBL_PART_DATA;
if (tbl->first_row != NULL)
return;
- mandoc_msg(MANDOCERR_TBLNOLAYOUT,
+ mandoc_msg(MANDOCERR_TBLLAYOUT_NONE,
tbl->parse, ln, pos, NULL);
rp = mandoc_calloc(1, sizeof(*rp));
cell_alloc(tbl, rp, TBL_CELL_LEFT, 0);
@@ -344,8 +273,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
tbl->first_row = rp;
tbl->last_row = rp;
}
- if ( ! cell(tbl, rp, ln, p, &pos))
- return;
+ cell(tbl, rp, ln, p, &pos);
}
}