diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-06-18 16:18:04 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-06-18 16:18:04 +0000 |
commit | 1319b92b1f8192d5db7ce4e0e07f0b617dc9e1ea (patch) | |
tree | 9b07c52f53dde27f9dc2b31252e56b0f7db55ee7 /mdoc_argv.c | |
parent | 1936f3f811dbbd2a8a94c22d8f57901f399e7ec5 (diff) | |
download | mandoc-1319b92b1f8192d5db7ce4e0e07f0b617dc9e1ea.tar.gz |
Fix an assertion failure raised by the following interesting scenario: a
auto-opened `It' (i.e., a column list with a free-text first line) with
leading spaces in the line triggering assertion when searching for
arguments.
This led to a fix giving a nice performance speed-ups (a few percent,
with some quick trials): the search for flags immediately exits if the
macro has no flags, instead of having to first parse the leading word
then look it up. I also cleaned up the argv parsing stuff a little bit
and added more documentation.
This comes from a TODO by joerg@.
Diffstat (limited to 'mdoc_argv.c')
-rw-r--r-- | mdoc_argv.c | 354 |
1 files changed, 163 insertions, 191 deletions
diff --git a/mdoc_argv.c b/mdoc_argv.c index efb23ccc..65bef1c4 100644 --- a/mdoc_argv.c +++ b/mdoc_argv.c @@ -47,7 +47,11 @@ enum argvflag { ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ }; -static enum mdocargt argv_a2arg(enum mdoct, const char *); +struct mdocarg { + enum argsflag flags; + const enum mdocargt *argvs; +}; + static enum margserr args(struct mdoc *, int, int *, char *, enum argsflag, char **); static int args_checkpunct(const char *, int); @@ -90,131 +94,6 @@ static const enum argvflag argvflags[MDOC_ARG_MAX] = { ARGV_NONE /* MDOC_Symbolic */ }; -static const enum argsflag argflags[MDOC_MAX] = { - ARGSFL_NONE, /* Ap */ - ARGSFL_NONE, /* Dd */ - ARGSFL_NONE, /* Dt */ - ARGSFL_NONE, /* Os */ - ARGSFL_NONE, /* Sh */ - ARGSFL_NONE, /* Ss */ - ARGSFL_NONE, /* Pp */ - ARGSFL_DELIM, /* D1 */ - ARGSFL_DELIM, /* Dl */ - ARGSFL_NONE, /* Bd */ - ARGSFL_NONE, /* Ed */ - ARGSFL_NONE, /* Bl */ - ARGSFL_NONE, /* El */ - ARGSFL_NONE, /* It */ - ARGSFL_DELIM, /* Ad */ - ARGSFL_DELIM, /* An */ - ARGSFL_DELIM, /* Ar */ - ARGSFL_NONE, /* Cd */ - ARGSFL_DELIM, /* Cm */ - ARGSFL_DELIM, /* Dv */ - ARGSFL_DELIM, /* Er */ - ARGSFL_DELIM, /* Ev */ - ARGSFL_NONE, /* Ex */ - ARGSFL_DELIM, /* Fa */ - ARGSFL_NONE, /* Fd */ - ARGSFL_DELIM, /* Fl */ - ARGSFL_DELIM, /* Fn */ - ARGSFL_DELIM, /* Ft */ - ARGSFL_DELIM, /* Ic */ - ARGSFL_NONE, /* In */ - ARGSFL_DELIM, /* Li */ - ARGSFL_NONE, /* Nd */ - ARGSFL_DELIM, /* Nm */ - ARGSFL_DELIM, /* Op */ - ARGSFL_NONE, /* Ot */ - ARGSFL_DELIM, /* Pa */ - ARGSFL_NONE, /* Rv */ - ARGSFL_DELIM, /* St */ - ARGSFL_DELIM, /* Va */ - ARGSFL_DELIM, /* Vt */ - ARGSFL_DELIM, /* Xr */ - ARGSFL_NONE, /* %A */ - ARGSFL_NONE, /* %B */ - ARGSFL_NONE, /* %D */ - ARGSFL_NONE, /* %I */ - ARGSFL_NONE, /* %J */ - ARGSFL_NONE, /* %N */ - ARGSFL_NONE, /* %O */ - ARGSFL_NONE, /* %P */ - ARGSFL_NONE, /* %R */ - ARGSFL_NONE, /* %T */ - ARGSFL_NONE, /* %V */ - ARGSFL_DELIM, /* Ac */ - ARGSFL_NONE, /* Ao */ - ARGSFL_DELIM, /* Aq */ - ARGSFL_DELIM, /* At */ - ARGSFL_DELIM, /* Bc */ - ARGSFL_NONE, /* Bf */ - ARGSFL_NONE, /* Bo */ - ARGSFL_DELIM, /* Bq */ - ARGSFL_DELIM, /* Bsx */ - ARGSFL_DELIM, /* Bx */ - ARGSFL_NONE, /* Db */ - ARGSFL_DELIM, /* Dc */ - ARGSFL_NONE, /* Do */ - ARGSFL_DELIM, /* Dq */ - ARGSFL_DELIM, /* Ec */ - ARGSFL_NONE, /* Ef */ - ARGSFL_DELIM, /* Em */ - ARGSFL_NONE, /* Eo */ - ARGSFL_DELIM, /* Fx */ - ARGSFL_DELIM, /* Ms */ - ARGSFL_DELIM, /* No */ - ARGSFL_DELIM, /* Ns */ - ARGSFL_DELIM, /* Nx */ - ARGSFL_DELIM, /* Ox */ - ARGSFL_DELIM, /* Pc */ - ARGSFL_DELIM, /* Pf */ - ARGSFL_NONE, /* Po */ - ARGSFL_DELIM, /* Pq */ - ARGSFL_DELIM, /* Qc */ - ARGSFL_DELIM, /* Ql */ - ARGSFL_NONE, /* Qo */ - ARGSFL_DELIM, /* Qq */ - ARGSFL_NONE, /* Re */ - ARGSFL_NONE, /* Rs */ - ARGSFL_DELIM, /* Sc */ - ARGSFL_NONE, /* So */ - ARGSFL_DELIM, /* Sq */ - ARGSFL_NONE, /* Sm */ - ARGSFL_DELIM, /* Sx */ - ARGSFL_DELIM, /* Sy */ - ARGSFL_DELIM, /* Tn */ - ARGSFL_DELIM, /* Ux */ - ARGSFL_DELIM, /* Xc */ - ARGSFL_NONE, /* Xo */ - ARGSFL_NONE, /* Fo */ - ARGSFL_NONE, /* Fc */ - ARGSFL_NONE, /* Oo */ - ARGSFL_DELIM, /* Oc */ - ARGSFL_NONE, /* Bk */ - ARGSFL_NONE, /* Ek */ - ARGSFL_NONE, /* Bt */ - ARGSFL_NONE, /* Hf */ - ARGSFL_NONE, /* Fr */ - ARGSFL_NONE, /* Ud */ - ARGSFL_NONE, /* Lb */ - ARGSFL_NONE, /* Lp */ - ARGSFL_DELIM, /* Lk */ - ARGSFL_DELIM, /* Mt */ - ARGSFL_DELIM, /* Brq */ - ARGSFL_NONE, /* Bro */ - ARGSFL_DELIM, /* Brc */ - ARGSFL_NONE, /* %C */ - ARGSFL_NONE, /* Es */ - ARGSFL_NONE, /* En */ - ARGSFL_NONE, /* Dx */ - ARGSFL_NONE, /* %Q */ - ARGSFL_NONE, /* br */ - ARGSFL_NONE, /* sp */ - ARGSFL_NONE, /* %U */ - ARGSFL_NONE, /* Ta */ -}; - static const enum mdocargt args_Ex[] = { MDOC_Std, MDOC_ARG_MAX @@ -269,6 +148,132 @@ static const enum mdocargt args_Bl[] = { MDOC_ARG_MAX }; +static const struct mdocarg mdocargs[MDOC_MAX] = { + { ARGSFL_NONE, NULL }, /* Ap */ + { ARGSFL_NONE, NULL }, /* Dd */ + { ARGSFL_NONE, NULL }, /* Dt */ + { ARGSFL_NONE, NULL }, /* Os */ + { ARGSFL_NONE, NULL }, /* Sh */ + { ARGSFL_NONE, NULL }, /* Ss */ + { ARGSFL_NONE, NULL }, /* Pp */ + { ARGSFL_DELIM, NULL }, /* D1 */ + { ARGSFL_DELIM, NULL }, /* Dl */ + { ARGSFL_NONE, args_Bd }, /* Bd */ + { ARGSFL_NONE, NULL }, /* Ed */ + { ARGSFL_NONE, args_Bl }, /* Bl */ + { ARGSFL_NONE, NULL }, /* El */ + { ARGSFL_NONE, NULL }, /* It */ + { ARGSFL_DELIM, NULL }, /* Ad */ + { ARGSFL_DELIM, args_An }, /* An */ + { ARGSFL_DELIM, NULL }, /* Ar */ + { ARGSFL_NONE, NULL }, /* Cd */ + { ARGSFL_DELIM, NULL }, /* Cm */ + { ARGSFL_DELIM, NULL }, /* Dv */ + { ARGSFL_DELIM, NULL }, /* Er */ + { ARGSFL_DELIM, NULL }, /* Ev */ + { ARGSFL_NONE, args_Ex }, /* Ex */ + { ARGSFL_DELIM, NULL }, /* Fa */ + { ARGSFL_NONE, NULL }, /* Fd */ + { ARGSFL_DELIM, NULL }, /* Fl */ + { ARGSFL_DELIM, NULL }, /* Fn */ + { ARGSFL_DELIM, NULL }, /* Ft */ + { ARGSFL_DELIM, NULL }, /* Ic */ + { ARGSFL_NONE, NULL }, /* In */ + { ARGSFL_DELIM, NULL }, /* Li */ + { ARGSFL_NONE, NULL }, /* Nd */ + { ARGSFL_DELIM, NULL }, /* Nm */ + { ARGSFL_DELIM, NULL }, /* Op */ + { ARGSFL_NONE, NULL }, /* Ot */ + { ARGSFL_DELIM, NULL }, /* Pa */ + { ARGSFL_NONE, args_Ex }, /* Rv */ + { ARGSFL_DELIM, NULL }, /* St */ + { ARGSFL_DELIM, NULL }, /* Va */ + { ARGSFL_DELIM, NULL }, /* Vt */ + { ARGSFL_DELIM, NULL }, /* Xr */ + { ARGSFL_NONE, NULL }, /* %A */ + { ARGSFL_NONE, NULL }, /* %B */ + { ARGSFL_NONE, NULL }, /* %D */ + { ARGSFL_NONE, NULL }, /* %I */ + { ARGSFL_NONE, NULL }, /* %J */ + { ARGSFL_NONE, NULL }, /* %N */ + { ARGSFL_NONE, NULL }, /* %O */ + { ARGSFL_NONE, NULL }, /* %P */ + { ARGSFL_NONE, NULL }, /* %R */ + { ARGSFL_NONE, NULL }, /* %T */ + { ARGSFL_NONE, NULL }, /* %V */ + { ARGSFL_DELIM, NULL }, /* Ac */ + { ARGSFL_NONE, NULL }, /* Ao */ + { ARGSFL_DELIM, NULL }, /* Aq */ + { ARGSFL_DELIM, NULL }, /* At */ + { ARGSFL_DELIM, NULL }, /* Bc */ + { ARGSFL_NONE, args_Bf }, /* Bf */ + { ARGSFL_NONE, NULL }, /* Bo */ + { ARGSFL_DELIM, NULL }, /* Bq */ + { ARGSFL_DELIM, NULL }, /* Bsx */ + { ARGSFL_DELIM, NULL }, /* Bx */ + { ARGSFL_NONE, NULL }, /* Db */ + { ARGSFL_DELIM, NULL }, /* Dc */ + { ARGSFL_NONE, NULL }, /* Do */ + { ARGSFL_DELIM, NULL }, /* Dq */ + { ARGSFL_DELIM, NULL }, /* Ec */ + { ARGSFL_NONE, NULL }, /* Ef */ + { ARGSFL_DELIM, NULL }, /* Em */ + { ARGSFL_NONE, NULL }, /* Eo */ + { ARGSFL_DELIM, NULL }, /* Fx */ + { ARGSFL_DELIM, NULL }, /* Ms */ + { ARGSFL_DELIM, NULL }, /* No */ + { ARGSFL_DELIM, NULL }, /* Ns */ + { ARGSFL_DELIM, NULL }, /* Nx */ + { ARGSFL_DELIM, NULL }, /* Ox */ + { ARGSFL_DELIM, NULL }, /* Pc */ + { ARGSFL_DELIM, NULL }, /* Pf */ + { ARGSFL_NONE, NULL }, /* Po */ + { ARGSFL_DELIM, NULL }, /* Pq */ + { ARGSFL_DELIM, NULL }, /* Qc */ + { ARGSFL_DELIM, NULL }, /* Ql */ + { ARGSFL_NONE, NULL }, /* Qo */ + { ARGSFL_DELIM, NULL }, /* Qq */ + { ARGSFL_NONE, NULL }, /* Re */ + { ARGSFL_NONE, NULL }, /* Rs */ + { ARGSFL_DELIM, NULL }, /* Sc */ + { ARGSFL_NONE, NULL }, /* So */ + { ARGSFL_DELIM, NULL }, /* Sq */ + { ARGSFL_NONE, NULL }, /* Sm */ + { ARGSFL_DELIM, NULL }, /* Sx */ + { ARGSFL_DELIM, NULL }, /* Sy */ + { ARGSFL_DELIM, NULL }, /* Tn */ + { ARGSFL_DELIM, NULL }, /* Ux */ + { ARGSFL_DELIM, NULL }, /* Xc */ + { ARGSFL_NONE, NULL }, /* Xo */ + { ARGSFL_NONE, NULL }, /* Fo */ + { ARGSFL_NONE, NULL }, /* Fc */ + { ARGSFL_NONE, NULL }, /* Oo */ + { ARGSFL_DELIM, NULL }, /* Oc */ + { ARGSFL_NONE, args_Bk }, /* Bk */ + { ARGSFL_NONE, NULL }, /* Ek */ + { ARGSFL_NONE, NULL }, /* Bt */ + { ARGSFL_NONE, NULL }, /* Hf */ + { ARGSFL_NONE, NULL }, /* Fr */ + { ARGSFL_NONE, NULL }, /* Ud */ + { ARGSFL_NONE, NULL }, /* Lb */ + { ARGSFL_NONE, NULL }, /* Lp */ + { ARGSFL_DELIM, NULL }, /* Lk */ + { ARGSFL_DELIM, NULL }, /* Mt */ + { ARGSFL_DELIM, NULL }, /* Brq */ + { ARGSFL_NONE, NULL }, /* Bro */ + { ARGSFL_DELIM, NULL }, /* Brc */ + { ARGSFL_NONE, NULL }, /* %C */ + { ARGSFL_NONE, NULL }, /* Es */ + { ARGSFL_NONE, NULL }, /* En */ + { ARGSFL_NONE, NULL }, /* Dx */ + { ARGSFL_NONE, NULL }, /* %Q */ + { ARGSFL_NONE, NULL }, /* br */ + { ARGSFL_NONE, NULL }, /* sp */ + { ARGSFL_NONE, NULL }, /* %U */ + { ARGSFL_NONE, NULL }, /* Ta */ +}; + + /* * Parse an argument from line text. This comes in the form of -key * [value0...], which may either have a single mandatory value, at least @@ -281,47 +286,62 @@ mdoc_argv(struct mdoc *m, int line, enum mdoct tok, char *p, sv; struct mdoc_argv tmp; struct mdoc_arg *arg; + const enum mdocargt *ap; if ('\0' == buf[*pos]) return(ARGV_EOLN); + else if (NULL == (ap = mdocargs[tok].argvs)) + return(ARGV_WORD); assert(' ' != buf[*pos]); - /* Parse through to the first unescaped space. */ + /* Seek to the first unescaped space. */ p = &buf[++(*pos)]; assert(*pos > 0); - /* LINTED */ - while (buf[*pos]) { - if (' ' == buf[*pos]) - if ('\\' != buf[*pos - 1]) - break; - (*pos)++; - } + for ( ; buf[*pos] ; (*pos)++) + if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) + break; - /* XXX - save zeroed byte, if not an argument. */ + /* + * We want to nil-terminate the word to look it up (it's easier + * that way). But we may not have a flag, in which case we need + * to restore the line as-is. So keep around the stray byte, + * which we'll reset upon exiting (if necessary). + */ - sv = '\0'; - if (buf[*pos]) { - sv = buf[*pos]; + if ('\0' != (sv = buf[*pos])) buf[(*pos)++] = '\0'; - } + + /* + * Now look up the word as a flag. Use temporary storage that + * we'll copy into the node's flags, if necessary. + */ memset(&tmp, 0, sizeof(struct mdoc_argv)); + tmp.line = line; tmp.pos = *pos; + tmp.arg = MDOC_ARG_MAX; - /* See if our token accepts the argument. */ + while (MDOC_ARG_MAX != (tmp.arg = *ap++)) + if (0 == strcmp(p, mdoc_argnames[tmp.arg])) + break; - if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) { - /* XXX - restore saved zeroed byte. */ + if (MDOC_ARG_MAX == tmp.arg) { + /* + * The flag was not found. + * Restore saved zeroed byte and return as a word. + */ if (sv) buf[*pos - 1] = sv; return(ARGV_WORD); } + /* Read to the next word (the argument). */ + while (buf[*pos] && ' ' == buf[*pos]) (*pos)++; @@ -395,7 +415,7 @@ mdoc_args(struct mdoc *m, int line, int *pos, enum argsflag fl; struct mdoc_node *n; - fl = argflags[tok]; + fl = mdocargs[tok].flags; if (MDOC_It != tok) return(args(m, line, pos, buf, fl, v)); @@ -424,8 +444,6 @@ args(struct mdoc *m, int line, int *pos, char *p, *pp; enum margserr rc; - assert(' ' != buf[*pos]); - if ('\0' == buf[*pos]) { if (MDOC_PPHRASE & m->flags) return(ARGS_EOLN); @@ -613,52 +631,6 @@ args_checkpunct(const char *buf, int i) return('\0' == buf[i]); } -/* - * Match up an argument string (e.g., `-foo bar' having "foo") with the - * correrct identifier. It must apply to the given macro. If none was - * found (including bad matches), return MDOC_ARG_MAX. - */ -static enum mdocargt -argv_a2arg(enum mdoct tok, const char *p) -{ - const enum mdocargt *argsp; - - argsp = NULL; - - switch (tok) { - case (MDOC_An): - argsp = args_An; - break; - case (MDOC_Bd): - argsp = args_Bd; - break; - case (MDOC_Bf): - argsp = args_Bf; - break; - case (MDOC_Bk): - argsp = args_Bk; - break; - case (MDOC_Bl): - argsp = args_Bl; - break; - case (MDOC_Rv): - /* FALLTHROUGH */ - case (MDOC_Ex): - argsp = args_Ex; - break; - default: - return(MDOC_ARG_MAX); - } - - assert(argsp); - - for ( ; MDOC_ARG_MAX != *argsp ; argsp++) - if (0 == strcmp(p, mdoc_argnames[*argsp])) - return(*argsp); - - return(MDOC_ARG_MAX); -} - static int argv_multi(struct mdoc *m, int line, struct mdoc_argv *v, int *pos, char *buf) |