diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-07-21 10:24:35 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-07-21 10:24:35 +0000 |
commit | 49de222b7899f01575a6ac00a26bc345864dc826 (patch) | |
tree | e8b6249505f745bf14ced273989037807624f076 | |
parent | f1454c7cbdfb3ec91f3bcce1e004d4c81d3b08cf (diff) | |
download | mandoc-49de222b7899f01575a6ac00a26bc345864dc826.tar.gz |
Finish the eqn syntactic parser. This correctly parses terms and does
the proper `define' dance, which amounts to pure word-replace (you can,
say, define `foo' as `define' then define `define' as something else).
eqn.c is now ready for some semantic parsing of `box' and `eqn'
productions as defined by the grammar.
-rw-r--r-- | eqn.7 | 71 | ||||
-rw-r--r-- | eqn.c | 364 | ||||
-rw-r--r-- | libroff.h | 19 | ||||
-rw-r--r-- | man.c | 4 | ||||
-rw-r--r-- | mandoc.h | 2 | ||||
-rw-r--r-- | mdoc.c | 4 | ||||
-rw-r--r-- | roff.c | 2 |
7 files changed, 253 insertions, 213 deletions
@@ -71,8 +71,8 @@ text : TEXT .Pp Data in TEXT form is a non-empty sequence of non-space characters or a non-empty quoted string. -White-space (and enclosing literal quote pairs) is thrown away and -productions may not be broken by newlines. +Unless within a quoted string, white-space (and enclosing literal quote +pairs) is thrown away. .Pp The following control statements are available: .Bl -tag -width Ds @@ -80,11 +80,20 @@ The following control statements are available: Replace all occurances of a key with a value. Its syntax is as follows: .Pp -.D1 define Ar key val +.D1 define Ar key cvalc +.Pp +The first character of the value string, +.Ar c , +is used as the delimiter for the value +.Ar val . +This allows for arbitrary enclosure of terms (not just quotes), such as +.Pp +.D1 define Ar foo 'bar baz' +.D1 define Ar foo cbar bazc .Pp It is an error to have an empty .Ar key or -.Ar value . +.Ar val . Note that a quoted .Ar key causes errors in some @@ -93,6 +102,14 @@ implementations and should not be considered portable. Definitions may refer to other definitions; these are evaluated recursively when text replacement occurs and not when the definition is created. +.Pp +Definitions can create arbitrary strings, for example, the following is +a legal construction. +.Bd -literal -offset indent +define foo 'define' +foo bar 'baz' +.Ed +.Pp Self-referencing definitions will raise an error. .It Cm set Set an equation mode. @@ -107,6 +124,20 @@ Once invoked, the definition for .Ar key is discarded. .El +.Sh COMPATIBILITY +This section documents the compatibility of mandoc +.Nm +and the troff +.Nm +implementation (including GNU troff). +.Pp +.Bl -dash -compact +.It +The text string +.Sq \e\*q +is interpreted as a literal quote in troff. +In mandoc, this is interpreted as a comment. +.El .Sh SEE ALSO .Xr mandoc 1 , .Xr man 7 , @@ -122,18 +153,28 @@ is discarded. .%P 151\(en157 .%D March, 1975 .Re -.\" .Sh HISTORY -.\" The tbl utility, a preprocessor for troff, was originally written by M. -.\" E. Lesk at Bell Labs in 1975. -.\" The GNU reimplementation of tbl, part of the groff package, was released -.\" in 1990 by James Clark. -.\" A standalone tbl implementation was written by Kristaps Dzonsons in -.\" 2010. -.\" This formed the basis of the implementation that is part of the -.\" .Xr mandoc 1 -.\" utility. +.Rs +.%A Brian W. Kernighan +.%A Lorinda L. Cherry +.%T Typesetting Mathematics, User's Guide +.%D 1976 +.Re +.Rs +.%A Brian W. Kernighan +.%A Lorinda L. Cherry +.%T Typesetting Mathematics, User's Guide (Second Edition) +.%D 1978 +.Re +.Sh HISTORY +The eqn utility, a preprocessor for troff, was originally written by +Brian W. Kernighan and Lorinda L. Cherry in 1975. +The GNU reimplementation of eqn, part of the GNU troff package, was +released in 1989 by James Clark. +The eqn component of +.Xr mandoc 1 +was added in 2011. .Sh AUTHORS -This partial +This .Nm reference was written by .An Kristaps Dzonsons Aq kristaps@bsd.lv . @@ -29,16 +29,12 @@ #include "libroff.h" #define EQN_NEST_MAX 128 /* maximum nesting of defines */ - -#define EQN_ARGS struct eqn_node *ep, \ - int ln, \ - int pos, \ - const char **end +#define EQN_MSG(t, x) mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL) struct eqnpart { const char *name; size_t sz; - int (*fp)(EQN_ARGS); + int (*fp)(struct eqn_node *); }; enum eqnpartt { @@ -48,14 +44,14 @@ enum eqnpartt { EQN__MAX }; -static void eqn_append(struct eqn_node *, - struct mparse *, int, - int, const char *, int); -static int eqn_do_define(EQN_ARGS); -static int eqn_do_ign2(EQN_ARGS); -static int eqn_do_undef(EQN_ARGS); -static const char *eqn_nexttok(struct mparse *, int, int, - const char **, size_t *); +static struct eqn_def *eqn_def_find(struct eqn_node *, + const char *, size_t); +static int eqn_do_define(struct eqn_node *); +static int eqn_do_ign2(struct eqn_node *); +static int eqn_do_undef(struct eqn_node *); +static const char *eqn_nexttok(struct eqn_node *, size_t *); +static const char *eqn_next(struct eqn_node *, char, size_t *); +static int eqn_box(struct eqn_node *); static const struct eqnpart eqnparts[EQN__MAX] = { { "define", 6, eqn_do_define }, /* EQN_DEFINE */ @@ -70,89 +66,40 @@ eqn_read(struct eqn_node **epp, int ln, { size_t sz; struct eqn_node *ep; - struct mparse *mp; - const char *start, *end; - int i, c; + enum rofferr er; + + ep = *epp; + + /* + * If we're the terminating mark, unset our equation status and + * validate the full equation. + */ if (0 == strcmp(p, ".EN")) { + er = eqn_end(ep); *epp = NULL; - return(ROFF_EQN); + return(er); } - ep = *epp; - mp = ep->parse; - end = p + pos; - - if (NULL == (start = eqn_nexttok(mp, ln, pos, &end, &sz))) - return(ROFF_IGN); - - for (i = 0; i < (int)EQN__MAX; i++) { - if (eqnparts[i].sz != sz) - continue; - if (strncmp(eqnparts[i].name, start, sz)) - continue; + /* + * Build up the full string, replacing all newlines with regular + * whitespace. + */ - if ((c = (*eqnparts[i].fp)(ep, ln, pos, &end)) < 0) - return(ROFF_ERR); - else if (0 == c || '\0' == *end) - return(ROFF_IGN); + sz = strlen(p + pos) + 1; + ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); - /* - * Re-calculate offset and rerun, if trailing text. - * This allows multiple definitions (say) on each line. - */ + /* First invocation: nil terminate the string. */ - *offs = end - (p + pos); - return(ROFF_RERUN); - } + if (0 == ep->sz) + *ep->data = '\0'; - eqn_append(ep, mp, ln, pos, p + pos, 0); + ep->sz += sz; + strlcat(ep->data, p + pos, ep->sz + 1); + strlcat(ep->data, " ", ep->sz + 1); return(ROFF_IGN); } -static void -eqn_append(struct eqn_node *ep, struct mparse *mp, - int ln, int pos, const char *end, int re) -{ - const char *start; - size_t sz; - int i; - - if (re >= EQN_NEST_MAX) { - mandoc_msg(MANDOCERR_BADQUOTE, mp, ln, pos, NULL); - return; - } - - while (NULL != (start = eqn_nexttok(mp, ln, pos, &end, &sz))) { - if (0 == sz) - continue; - for (i = 0; i < (int)ep->defsz; i++) { - if (0 == ep->defs[i].keysz) - continue; - if (ep->defs[i].keysz != sz) - continue; - if (strncmp(ep->defs[i].key, start, sz)) - continue; - start = ep->defs[i].val; - sz = ep->defs[i].valsz; - - eqn_append(ep, mp, ln, pos, start, re + 1); - break; - } - if (i < (int)ep->defsz) - continue; - - ep->eqn.data = mandoc_realloc - (ep->eqn.data, ep->eqn.sz + sz + 1); - - if (0 == ep->eqn.sz) - *ep->eqn.data = '\0'; - - ep->eqn.sz += sz; - strlcat(ep->eqn.data, start, ep->eqn.sz + 1); - } -} - struct eqn_node * eqn_alloc(int pos, int line, struct mparse *parse) { @@ -160,18 +107,61 @@ eqn_alloc(int pos, int line, struct mparse *parse) p = mandoc_calloc(1, sizeof(struct eqn_node)); p->parse = parse; - p->eqn.line = line; + p->eqn.ln = line; p->eqn.pos = pos; return(p); } -/* ARGSUSED */ -void -eqn_end(struct eqn_node *e) +enum rofferr +eqn_end(struct eqn_node *ep) { + int c; + + /* + * Validate the expression. + * Use the grammar found in the literature. + */ + + if (0 == ep->sz) + return(ROFF_IGN); + + while (1 == (c = eqn_box(ep))) + /* Keep parsing. */ ; + + return(c < 0 ? ROFF_IGN : ROFF_EQN); +} + +static int +eqn_box(struct eqn_node *ep) +{ + size_t sz; + const char *start; + int i; + + if (NULL == (start = eqn_nexttok(ep, &sz))) + return(0); + + for (i = 0; i < (int)EQN__MAX; i++) { + if (eqnparts[i].sz != sz) + continue; + if (strncmp(eqnparts[i].name, start, sz)) + continue; + if ( ! (*eqnparts[i].fp)(ep)) + return(-1); + + return(1); + } - /* Nothing to do. */ + ep->eqn.data = mandoc_realloc + (ep->eqn.data, ep->eqn.sz + sz + 1); + + if (0 == ep->eqn.sz) + *ep->eqn.data = '\0'; + + ep->eqn.sz += sz; + strlcat(ep->eqn.data, start, ep->eqn.sz + 1); + return(1); } void @@ -186,115 +176,122 @@ eqn_free(struct eqn_node *p) free(p->defs[i].val); } + free(p->data); free(p->defs); free(p); } -/* - * Return the current equation token setting "next" on the next one, - * setting the token size in "sz". - * This does the Right Thing for quoted strings, too. - * Returns NULL if no more tokens exist. - */ static const char * -eqn_nexttok(struct mparse *mp, int ln, int pos, - const char **next, size_t *sz) +eqn_nexttok(struct eqn_node *ep, size_t *sz) { - const char *start; - int q; - start = *next; + return(eqn_next(ep, '"', sz)); +} + +static const char * +eqn_next(struct eqn_node *ep, char quote, size_t *sz) +{ + char *start, *next; + int q, diff, lim; + size_t sv, ssz; + struct eqn_def *def; + + if (NULL == sz) + sz = &ssz; + + start = &ep->data[(int)ep->cur]; q = 0; if ('\0' == *start) return(NULL); - if ('"' == *start) { - start++; + if (quote == *start) { + ep->cur++; q = 1; } - *next = q ? strchr(start, '"') : strchr(start, ' '); + lim = 0; - if (NULL != *next) { - *sz = (size_t)(*next - start); + sv = ep->cur; +again: + if (lim >= EQN_NEST_MAX) { + EQN_MSG(MANDOCERR_EQNNEST, ep); + return(NULL); + } + + ep->cur = sv; + start = &ep->data[(int)ep->cur]; + next = q ? strchr(start, quote) : strchr(start, ' '); + + if (NULL != next) { + *sz = (size_t)(next - start); + ep->cur += *sz; if (q) - (*next)++; - while (' ' == **next) - (*next)++; + ep->cur++; + while (' ' == ep->data[(int)ep->cur]) + ep->cur++; } else { - /* - * XXX: groff gets confused by this and doesn't always - * do the "right thing" (just terminate it and warn - * about it). - */ if (q) - mandoc_msg(MANDOCERR_BADQUOTE, - mp, ln, pos, NULL); - *next = strchr(start, '\0'); - *sz = (size_t)(*next - start); + EQN_MSG(MANDOCERR_BADQUOTE, ep); + next = strchr(start, '\0'); + *sz = (size_t)(next - start); + ep->cur += *sz; + } + + if (NULL != (def = eqn_def_find(ep, start, *sz))) { + diff = def->valsz - *sz; + + if (def->valsz > *sz) { + ep->sz += diff; + ep->data = mandoc_realloc(ep->data, ep->sz + 1); + ep->data[ep->sz] = '\0'; + start = &ep->data[(int)sv]; + } + + diff = def->valsz - *sz; + memmove(start + *sz + diff, start + *sz, + (strlen(start) - *sz) + 1); + memcpy(start, def->val, def->valsz); + goto again; } return(start); } static int -eqn_do_ign2(struct eqn_node *ep, int ln, int pos, const char **end) +eqn_do_ign2(struct eqn_node *ep) { const char *start; - struct mparse *mp; - size_t sz; - - mp = ep->parse; - start = eqn_nexttok(ep->parse, ln, pos, end, &sz); - if (NULL == start || 0 == sz) { - mandoc_msg(MANDOCERR_EQNARGS, mp, ln, pos, NULL); - return(0); - } + if (NULL == (start = eqn_nexttok(ep, NULL))) + EQN_MSG(MANDOCERR_EQNARGS, ep); + else if (NULL == (start = eqn_nexttok(ep, NULL))) + EQN_MSG(MANDOCERR_EQNARGS, ep); + else + return(1); - start = eqn_nexttok(ep->parse, ln, pos, end, &sz); - if (NULL == start || 0 == sz) { - mandoc_msg(MANDOCERR_EQNARGS, mp, ln, pos, NULL); - return(0); - } - - return(1); + return(0); } static int -eqn_do_define(struct eqn_node *ep, int ln, int pos, const char **end) +eqn_do_define(struct eqn_node *ep) { const char *start; - struct mparse *mp; size_t sz; + struct eqn_def *def; int i; - mp = ep->parse; - - start = eqn_nexttok(mp, ln, pos, end, &sz); - if (NULL == start || 0 == sz) { - mandoc_msg(MANDOCERR_EQNARGS, mp, ln, pos, NULL); + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNARGS, ep); return(0); } - /* TODO: merge this code with roff_getstr(). */ - /* * Search for a key that already exists. - * Note that the string array can have "holes" (null key). + * Create a new key if none is found. */ - for (i = 0; i < (int)ep->defsz; i++) { - if (0 == ep->defs[i].keysz || ep->defs[i].keysz != sz) - continue; - if (0 == strncmp(ep->defs[i].key, start, sz)) - break; - } - - /* Create a new key. */ - - if (i == (int)ep->defsz) { + if (NULL == (def = eqn_def_find(ep, start, sz))) { /* Find holes in string array. */ for (i = 0; i < (int)ep->defsz; i++) if (0 == ep->defs[i].keysz) @@ -314,49 +311,48 @@ eqn_do_define(struct eqn_node *ep, int ln, int pos, const char **end) memcpy(ep->defs[i].key, start, sz); ep->defs[i].key[(int)sz] = '\0'; + def = &ep->defs[i]; } - start = eqn_nexttok(mp, ln, pos, end, &sz); + start = eqn_next(ep, ep->data[(int)ep->cur], &sz); - if (NULL == start || 0 == sz) { - ep->defs[i].keysz = 0; - mandoc_msg(MANDOCERR_EQNARGS, mp, ln, pos, NULL); + if (NULL == start) { + EQN_MSG(MANDOCERR_EQNARGS, ep); return(0); } - ep->defs[i].valsz = sz; - ep->defs[i].val = mandoc_realloc - (ep->defs[i].val, sz + 1); - memcpy(ep->defs[i].val, start, sz); - ep->defs[i].val[(int)sz] = '\0'; - - return(sz ? 1 : 0); + def->valsz = sz; + def->val = mandoc_realloc(ep->defs[i].val, sz + 1); + memcpy(def->val, start, sz); + def->val[(int)sz] = '\0'; + return(1); } static int -eqn_do_undef(struct eqn_node *ep, int ln, int pos, const char **end) +eqn_do_undef(struct eqn_node *ep) { const char *start; - struct mparse *mp; + struct eqn_def *def; size_t sz; - int i; - - mp = ep->parse; - start = eqn_nexttok(mp, ln, pos, end, &sz); - if (NULL == start || 0 == sz) { - mandoc_msg(MANDOCERR_EQNARGS, mp, ln, pos, NULL); + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNARGS, ep); return(0); - } - - for (i = 0; i < (int)ep->defsz; i++) { - if (0 == ep->defs[i].keysz || ep->defs[i].keysz != sz) - continue; - if (strncmp(ep->defs[i].key, start, sz)) - continue; - ep->defs[i].keysz = 0; - break; - } + } else if (NULL != (def = eqn_def_find(ep, start, sz))) + def->keysz = 0; return(1); } + +static struct eqn_def * +eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) +{ + int i; + + for (i = 0; i < (int)ep->defsz; i++) + if (ep->defs[i].keysz && ep->defs[i].keysz == sz && + 0 == strncmp(ep->defs[i].key, key, sz)) + return(&ep->defs[i]); + + return(NULL); +} @@ -42,21 +42,24 @@ struct tbl_node { struct tbl_node *next; }; -struct eqn_def { - char *key; - size_t keysz; - char *val; - size_t valsz; -}; - struct eqn_node { struct eqn_def *defs; size_t defsz; + char *data; + size_t cur; + size_t sz; struct eqn eqn; struct mparse *parse; struct eqn_node *next; }; +struct eqn_def { + char *key; + size_t keysz; + char *val; + size_t valsz; +}; + struct tbl_node *tbl_alloc(int, int, struct mparse *); void tbl_restart(int, int, struct tbl_node *); void tbl_free(struct tbl_node *); @@ -69,7 +72,7 @@ int tbl_cdata(struct tbl_node *, int, const char *); const struct tbl_span *tbl_span(struct tbl_node *); void tbl_end(struct tbl_node *); struct eqn_node *eqn_alloc(int, int, struct mparse *); -void eqn_end(struct eqn_node *); +enum rofferr eqn_end(struct eqn_node *); void eqn_free(struct eqn_node *); enum rofferr eqn_read(struct eqn_node **, int, const char *, int, int *); @@ -371,14 +371,14 @@ man_addeqn(struct man *m, const struct eqn *ep) assert( ! (MAN_HALT & m->flags)); - n = man_node_alloc(m, ep->line, ep->pos, MAN_EQN, MAN_MAX); + n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX); n->eqn = ep; if ( ! man_node_append(m, n)) return(0); m->next = MAN_NEXT_SIBLING; - return(man_descope(m, ep->line, ep->pos)); + return(man_descope(m, ep->ln, ep->pos)); } int @@ -280,7 +280,7 @@ struct tbl_span { struct eqn { size_t sz; char *data; - int line; /* invocation line */ + int ln; /* invocation line */ int pos; /* invocation position */ }; @@ -233,11 +233,11 @@ mdoc_addeqn(struct mdoc *m, const struct eqn *ep) /* No text before an initial macro. */ if (SEC_NONE == m->lastnamed) { - mdoc_pmsg(m, ep->line, ep->pos, MANDOCERR_NOTEXT); + mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT); return(1); } - n = node_alloc(m, ep->line, ep->pos, MDOC_MAX, MDOC_EQN); + n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); n->eqn = ep; if ( ! node_append(m, n)) @@ -583,7 +583,7 @@ roff_endparse(struct roff *r) if (r->eqn) { mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, - r->eqn->eqn.line, r->eqn->eqn.pos, NULL); + r->eqn->eqn.ln, r->eqn->eqn.pos, NULL); eqn_end(r->eqn); r->eqn = NULL; } |