summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-12-29 01:16:57 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-12-29 01:16:57 +0000
commitd5144ffde6d84d22eacbce42462bf2486d995d91 (patch)
tree28406a7c095ee0bce715cd92848a1380533df824
parent752a79323a3403cb04e5f83d296286dbb0457369 (diff)
downloadmandoc-d5144ffde6d84d22eacbce42462bf2486d995d91.tar.gz
Significant update to options handling, which now departs almost
completely with the BSD.lv code due to performance issues and flat-out errors. Performance issues: functions called per character. Ugly. Flat-out errors: disallowing "reserved" tokens as arguments to those options accepting arguments. Also added are two mandoc.h error codes for general tbl syntax errors and for bad options.
-rw-r--r--libroff.h21
-rw-r--r--main.c2
-rw-r--r--mandoc.h2
-rw-r--r--roff.c2
-rw-r--r--tbl.c85
-rw-r--r--tbl_opts.c197
6 files changed, 165 insertions, 144 deletions
diff --git a/libroff.h b/libroff.h
index 755a3e1d..50c59485 100644
--- a/libroff.h
+++ b/libroff.h
@@ -19,18 +19,6 @@
__BEGIN_DECLS
-enum tbl_tok {
- TBL_TOK_OPENPAREN = 0,
- TBL_TOK_CLOSEPAREN,
- TBL_TOK_COMMA,
- TBL_TOK_SEMICOLON,
- TBL_TOK_PERIOD,
- TBL_TOK_SPACE,
- TBL_TOK_TAB,
- TBL_TOK_NIL,
- TBL_TOK__MAX
-};
-
enum tbl_part {
TBL_PART_OPTS, /* in options (first line) */
TBL_PART_LAYOUT, /* describing layout */
@@ -38,8 +26,9 @@ enum tbl_part {
};
struct tbl {
+ mandocmsg msg; /* status messages */
+ void *data; /* privdata for messages */
enum tbl_part part;
- char buf[BUFSIZ];
char tab; /* cell-separator */
char decimal; /* decimal point */
int linesize;
@@ -54,11 +43,15 @@ struct tbl {
#define TBL_OPT_NOSPACE (1 << 6)
};
-struct tbl *tbl_alloc(void);
+#define TBL_MSG(tblp, type, line, col) \
+ (*(tblp)->msg)((type), (tblp)->data, (line), (col), NULL)
+
+struct tbl *tbl_alloc(void *, mandocmsg);
void tbl_free(struct tbl *);
void tbl_reset(struct tbl *);
enum rofferr tbl_read(struct tbl *, int, const char *, int);
enum tbl_tok tbl_next(struct tbl *, const char *, int *);
+int tbl_option(struct tbl *, int, const char *);
__END_DECLS
diff --git a/main.c b/main.c
index c60984cd..3776c0fd 100644
--- a/main.c
+++ b/main.c
@@ -179,6 +179,8 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"generic error",
+ "bad table syntax",
+ "bad table option",
"input stack limit exceeded, infinite loop?",
"skipping bad character",
"skipping text before the first section header",
diff --git a/mandoc.h b/mandoc.h
index 58c339fd..e5edc7b0 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -101,6 +101,8 @@ enum mandocerr {
MANDOCERR_ERROR, /* ===== start of errors ===== */
+ MANDOCERR_TBL, /* bad table syntax */
+ MANDOCERR_TBLOPT, /* bad table option */
MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */
MANDOCERR_BADCHAR, /* skipping bad character */
MANDOCERR_NOTEXT, /* skipping text before the first section header */
diff --git a/roff.c b/roff.c
index d8a503f9..bf1dbf0c 100644
--- a/roff.c
+++ b/roff.c
@@ -1130,7 +1130,7 @@ roff_TS(ROFF_ARGS)
(*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL);
tbl_reset(r->tbl);
} else
- r->tbl = tbl_alloc();
+ r->tbl = tbl_alloc(r->data, r->msg);
return(ROFF_IGN);
}
diff --git a/tbl.c b/tbl.c
index 99e61884..771266c3 100644
--- a/tbl.c
+++ b/tbl.c
@@ -25,14 +25,8 @@
#include "libmandoc.h"
#include "libroff.h"
-static const char tbl_toks[TBL_TOK__MAX] = {
- '(', ')', ',', ';', '.',
- ' ', '\t', '\0'
-};
-
static void tbl_init(struct tbl *);
static void tbl_clear(struct tbl *);
-static enum tbl_tok tbl_next_char(char);
static void
tbl_clear(struct tbl *tbl)
@@ -45,6 +39,9 @@ tbl_init(struct tbl *tbl)
{
tbl->part = TBL_PART_OPTS;
+ tbl->tab = '\t';
+ tbl->linesize = 12;
+ tbl->decimal = '.';
}
enum rofferr
@@ -56,19 +53,37 @@ tbl_read(struct tbl *tbl, int ln, const char *p, int offs)
cp = &p[offs];
len = (int)strlen(cp);
- if (len && TBL_PART_OPTS == tbl->part)
+ /*
+ * If we're in the options section and we don't have a
+ * terminating semicolon, assume we've moved directly into the
+ * layout section. No need to report a warning: this is,
+ * apparently, standard behaviour.
+ */
+
+ if (TBL_PART_OPTS == tbl->part && len)
if (';' != cp[len - 1])
tbl->part = TBL_PART_LAYOUT;
+
+ /* Now process each logical section of the table. */
+
+ switch (tbl->part) {
+ case (TBL_PART_OPTS):
+ return(tbl_option(tbl, ln, p) ? ROFF_IGN : ROFF_ERR);
+ default:
+ break;
+ }
return(ROFF_CONT);
}
struct tbl *
-tbl_alloc(void)
+tbl_alloc(void *data, const mandocmsg msg)
{
struct tbl *p;
p = mandoc_malloc(sizeof(struct tbl));
+ p->data = data;
+ p->msg = msg;
tbl_init(p);
return(p);
}
@@ -89,57 +104,3 @@ tbl_reset(struct tbl *tbl)
tbl_init(tbl);
}
-static enum tbl_tok
-tbl_next_char(char c)
-{
- int i;
-
- /*
- * These are delimiting tokens. They separate out words in the
- * token stream.
- *
- * FIXME: make this into a hashtable for faster lookup.
- */
- for (i = 0; i < TBL_TOK__MAX; i++)
- if (c == tbl_toks[i])
- return((enum tbl_tok)i);
-
- return(TBL_TOK__MAX);
-}
-
-enum tbl_tok
-tbl_next(struct tbl *tbl, const char *p, int *pos)
-{
- int i;
- enum tbl_tok c;
-
- tbl->buf[0] = '\0';
-
- if (TBL_TOK__MAX != (c = tbl_next_char(p[*pos]))) {
- if (TBL_TOK_NIL != c) {
- tbl->buf[0] = p[*pos];
- tbl->buf[1] = '\0';
- (*pos)++;
- }
- return(c);
- }
-
- /*
- * Copy words into a nil-terminated buffer. For now, we use a
- * static buffer. FIXME: eventually this should be made into a
- * dynamic one living in struct tbl.
- */
-
- for (i = 0; i < BUFSIZ; i++, (*pos)++)
- if (TBL_TOK__MAX == tbl_next_char(p[*pos]))
- tbl->buf[i] = p[*pos];
- else
- break;
-
- assert(i < BUFSIZ);
- tbl->buf[i] = '\0';
-
- return(TBL_TOK__MAX);
-}
-
-
diff --git a/tbl_opts.c b/tbl_opts.c
index 0b5b230b..f946279e 100644
--- a/tbl_opts.c
+++ b/tbl_opts.c
@@ -14,10 +14,12 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "mandoc.h"
#include "libroff.h"
enum tbl_ident {
@@ -46,6 +48,12 @@ struct tbl_phrase {
/* Handle Commonwealth/American spellings. */
#define KEY_MAXKEYS 14
+/* Maximum length of key name string. */
+#define KEY_MAXNAME 13
+
+/* Maximum length of key number size. */
+#define KEY_MAXNUMSZ 10
+
static const struct tbl_phrase keys[KEY_MAXKEYS] = {
{ "center", TBL_OPT_CENTRE, KEY_CENTRE},
{ "centre", TBL_OPT_CENTRE, KEY_CENTRE},
@@ -64,86 +72,99 @@ static const struct tbl_phrase keys[KEY_MAXKEYS] = {
};
static int arg(struct tbl *, int, const char *, int *, int);
-static int opt(struct tbl *, int, const char *, int *);
+static void opt(struct tbl *, int, const char *, int *);
static int
arg(struct tbl *tbl, int ln, const char *p, int *pos, int key)
{
- int sv;
+ int i;
+ char buf[KEY_MAXNUMSZ];
-again:
- sv = *pos;
+ while (isspace((unsigned char)p[*pos]))
+ (*pos)++;
- switch (tbl_next(tbl, p, pos)) {
- case (TBL_TOK_OPENPAREN):
- break;
- case (TBL_TOK_SPACE):
- /* FALLTHROUGH */
- case (TBL_TOK_TAB):
- goto again;
- default:
+ /* Arguments always begin with a parenthesis. */
+
+ if ('(' != p[*pos]) {
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos);
return(0);
}
- sv = *pos;
+ (*pos)++;
- switch (tbl_next(tbl, p, pos)) {
- case (TBL_TOK__MAX):
- break;
- default:
- return(0);
- }
+ /*
+ * The arguments can be ANY value, so we can't just stop at the
+ * next close parenthesis (the argument can be a closed
+ * parenthesis itself).
+ */
switch (key) {
case (KEY_DELIM):
- /* FIXME: cache this value. */
- if (2 != strlen(tbl->buf))
+ if ('\0' == (tbl->delims[0] = p[(*pos)++])) {
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1);
return(0);
- tbl->delims[0] = tbl->buf[0];
- tbl->delims[1] = tbl->buf[1];
- break;
- case (KEY_TAB):
- /* FIXME: cache this value. */
- if (1 != strlen(tbl->buf))
+ }
+
+ if ('\0' == (tbl->delims[1] = p[(*pos)++])) {
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1);
return(0);
- tbl->tab = tbl->buf[0];
+ }
break;
+ case (KEY_TAB):
+ if ('\0' != (tbl->tab = p[(*pos)++]))
+ break;
+
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1);
+ return(0);
case (KEY_LINESIZE):
- if ((tbl->linesize = atoi(tbl->buf)) <= 0)
- return(0);
- break;
+ for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) {
+ buf[i] = p[*pos];
+ if ( ! isdigit((unsigned char)buf[i]))
+ break;
+ }
+
+ if (i < KEY_MAXNUMSZ) {
+ buf[i] = '\0';
+ tbl->linesize = atoi(buf);
+ break;
+ }
+
+ (*tbl->msg)(MANDOCERR_TBL, tbl->data, ln, *pos, NULL);
+ return(0);
case (KEY_DPOINT):
- /* FIXME: cache this value. */
- if (1 != strlen(tbl->buf))
- return(0);
- tbl->decimal = tbl->buf[0];
- break;
+ if ('\0' != (tbl->decimal = p[(*pos)++]))
+ break;
+
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1);
+ return(0);
default:
abort();
+ /* NOTREACHED */
}
- sv = *pos;
+ /* End with a close parenthesis. */
- switch (tbl_next(tbl, p, pos)) {
- case (TBL_TOK_CLOSEPAREN):
- break;
- default:
- return(0);
- }
+ if (')' == p[(*pos)++])
+ return(1);
- return(1);
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1);
+ return(0);
}
-
-static int
+static void
opt(struct tbl *tbl, int ln, const char *p, int *pos)
{
int i, sv;
-
-again:
- sv = *pos;
+ char buf[KEY_MAXNAME];
/*
+ * Parse individual options from the stream as surrounded by
+ * this goto. Each pass through the routine parses out a single
+ * option and registers it. Option arguments are processed in
+ * the arg() function.
+ */
+
+again: /*
* EBNF describing this section:
*
* options ::= option_list [:space:]* [;][\n]
@@ -154,36 +175,69 @@ again:
* args ::= [:space:]* [(] [:alpha:]+ [)]
*/
- switch (tbl_next(tbl, p, pos)) {
- case (TBL_TOK__MAX):
- break;
- case (TBL_TOK_SPACE):
- /* FALLTHROUGH */
- case (TBL_TOK_TAB):
- goto again;
- case (TBL_TOK_SEMICOLON):
- tbl->part = TBL_PART_LAYOUT;
- return(1);
- default:
- return(0);
+ while (isspace((unsigned char)p[*pos]))
+ (*pos)++;
+
+ /* Safe exit point. */
+
+ if (';' == p[*pos])
+ return;
+
+ /* Copy up to first non-alpha character. */
+
+ for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) {
+ buf[i] = tolower(p[*pos]);
+ if ( ! isalpha((unsigned char)buf[i]))
+ break;
+ }
+
+ /* Exit if buffer is empty (or overrun). */
+
+ if (KEY_MAXNAME == i || 0 == i) {
+ TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos);
+ return;
}
+ buf[i] = '\0';
+
+ while (isspace((unsigned char)p[*pos]))
+ (*pos)++;
+
+ /*
+ * Look through all of the available keys to find one that
+ * matches the input. FIXME: hashtable this.
+ */
+
for (i = 0; i < KEY_MAXKEYS; i++) {
- /* FIXME: hashtable this? */
- if (strcasecmp(tbl->buf, keys[i].name))
+ if (strcmp(buf, keys[i].name))
continue;
+
+ /*
+ * Note: this is more difficult to recover from, as we
+ * can be anywhere in the option sequence and it's
+ * harder to jump to the next. Meanwhile, just bail out
+ * of the sequence altogether.
+ */
+
if (keys[i].key)
tbl->opts |= keys[i].key;
else if ( ! arg(tbl, ln, p, pos, keys[i].ident))
- return(0);
+ return;
break;
}
+ /*
+ * Allow us to recover from bad options by continuing to another
+ * parse sequence.
+ */
+
if (KEY_MAXKEYS == i)
- return(0);
+ TBL_MSG(tbl, MANDOCERR_TBLOPT, ln, sv);
- return(opt(tbl, ln, p, pos));
+ /* Try again... */
+
+ goto again;
}
int
@@ -191,6 +245,15 @@ tbl_option(struct tbl *tbl, int ln, const char *p)
{
int pos;
+ /*
+ * Table options are always on just one line, so automatically
+ * switch into the next input mode here.
+ */
+ tbl->part = TBL_PART_LAYOUT;
+
pos = 0;
- return(opt(tbl, ln, p, &pos));
+ opt(tbl, ln, p, &pos);
+
+ /* Always succeed. */
+ return(1);
}