summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-07-21 23:30:39 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-07-21 23:30:39 +0000
commit561fdaf67b6be484562a755697a834c21b10e4d9 (patch)
tree3f7792cc5662c919c2fc20e0490f6ec09d7e41a4
parentff6ddfd72fee08b3063dffb618af31325e51beea (diff)
downloadmandoc-561fdaf67b6be484562a755697a834c21b10e4d9.tar.gz
Complete eqn.7 parsing. Features all productions from the original 1975
CACM paper in an LR(1) parse (1 -> eqn_rewind()). Right now the code is a little jungly, but will clear up as I consolidate parse components. The AST structure will also be cleaned up, as right now it's pretty ad hoc (this won't change the parse itself). I added the mandoc_strndup() function will here.
-rw-r--r--eqn.76
-rw-r--r--eqn.c347
-rw-r--r--libroff.h1
-rw-r--r--mandoc.c10
-rw-r--r--mandoc.h25
-rw-r--r--read.c3
-rw-r--r--tree.c11
7 files changed, 279 insertions, 124 deletions
diff --git a/eqn.7 b/eqn.7
index 3f8b7f7c..b4910b2e 100644
--- a/eqn.7
+++ b/eqn.7
@@ -69,9 +69,14 @@ box : text
| UNDEF text
| box pos box
| box mark
+ | pile { list }
| font box
| SIZE text box
+ | LEFT text box [RIGHT text]
text : TEXT
+pile : LPILE
+ | CPILE
+ | RPILE
pos : OVER
| SUP
| SUB
@@ -88,6 +93,7 @@ mark : DOT
font : ROMAN
| ITALIC
| BOLD
+list : eqn | list ABOVE eqn
.Ed
.Pp
Data in TEXT form is a non-empty sequence of non-space characters or a
diff --git a/eqn.c b/eqn.c
index ca68b1b9..4e4f4f5d 100644
--- a/eqn.c
+++ b/eqn.c
@@ -32,6 +32,13 @@
#define EQN_NEST_MAX 128 /* maximum nesting of defines */
#define EQN_MSG(t, x) mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL)
+enum eqn_rest {
+ EQN_DESCOPE,
+ EQN_ERR,
+ EQN_OK,
+ EQN_EOF
+};
+
struct eqnstr {
const char *name;
size_t sz;
@@ -49,6 +56,7 @@ enum eqnpartt {
EQN__MAX
};
+static struct eqn_box *eqn_box_alloc(struct eqn_box *);
static void eqn_box_free(struct eqn_box *);
static struct eqn_def *eqn_def_find(struct eqn_node *,
const char *, size_t);
@@ -59,8 +67,9 @@ static const char *eqn_nexttok(struct eqn_node *, size_t *);
static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
static const char *eqn_next(struct eqn_node *,
char, size_t *, int);
-static int eqn_box(struct eqn_node *,
- struct eqn_box *, struct eqn_box **);
+static void eqn_rewind(struct eqn_node *);
+static enum eqn_rest eqn_eqn(struct eqn_node *, struct eqn_box *);
+static enum eqn_rest eqn_box(struct eqn_node *, struct eqn_box *);
static const struct eqnpart eqnparts[EQN__MAX] = {
{ { "define", 6 }, eqn_do_define }, /* EQN_DEFINE */
@@ -81,20 +90,26 @@ static const struct eqnstr eqnmarks[EQNMARK__MAX] = {
};
static const struct eqnstr eqnfonts[EQNFONT__MAX] = {
- { "", 0 },
- { "roman", 5 },
- { "bold", 4 },
- { "italic", 6 },
+ { "", 0 }, /* EQNFONT_NONE */
+ { "roman", 5 }, /* EQNFONT_ROMAN */
+ { "bold", 4 }, /* EQNFONT_BOLD */
+ { "italic", 6 }, /* EQNFONT_ITALIC */
};
static const struct eqnstr eqnposs[EQNPOS__MAX] = {
- { "", 0 },
- { "over", 4 },
- { "sup", 3 },
- { "sub", 3 },
- { "to", 2 },
- { "from", 4 },
- { "above", 5 },
+ { "", 0 }, /* EQNPOS_NONE */
+ { "over", 4 }, /* EQNPOS_OVER */
+ { "sup", 3 }, /* EQNPOS_SUP */
+ { "sub", 3 }, /* EQNPOS_SUB */
+ { "to", 2 }, /* EQNPOS_TO */
+ { "from", 4 }, /* EQNPOS_FROM */
+};
+
+static const struct eqnstr eqnpiles[EQNPILE__MAX] = {
+ { "", 0 }, /* EQNPILE_NONE */
+ { "cpile", 5 }, /* EQNPILE_CPILE */
+ { "rpile", 5 }, /* EQNPILE_RPILE */
+ { "lpile", 5 }, /* EQNPILE_LPILE */
};
/* ARGSUSED */
@@ -154,142 +169,215 @@ eqn_alloc(int pos, int line, struct mparse *parse)
enum rofferr
eqn_end(struct eqn_node *ep)
{
- struct eqn_box *root, *last;
- int c;
+ struct eqn_box *root;
+ enum eqn_rest c;
- ep->eqn.root = root =
- mandoc_calloc(1, sizeof(struct eqn_box));
+ ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
+
+ root = ep->eqn.root;
root->type = EQN_ROOT;
if (0 == ep->sz)
return(ROFF_IGN);
- /*
- * Run the parser.
- * If we return before reaching the end of our input, our scope
- * is still open somewhere.
- * If we return alright but don't have a symmetric scoping, then
- * something's not right either.
- * Otherwise, return the equation.
- */
-
- if (0 == (c = eqn_box(ep, root, &last))) {
- if (last != root) {
- EQN_MSG(MANDOCERR_EQNSCOPE, ep);
- c = 0;
- }
- } else if (c > 0)
+ if (EQN_DESCOPE == (c = eqn_eqn(ep, root))) {
EQN_MSG(MANDOCERR_EQNNSCOPE, ep);
+ c = EQN_ERR;
+ }
- return(0 == c ? ROFF_EQN : ROFF_IGN);
+ return(EQN_EOF == c ? ROFF_EQN : ROFF_IGN);
}
-static int
-eqn_box(struct eqn_node *ep, struct eqn_box *last, struct eqn_box **sv)
+static enum eqn_rest
+eqn_eqn(struct eqn_node *ep, struct eqn_box *last)
+{
+ struct eqn_box *bp;
+ enum eqn_rest c;
+
+ bp = eqn_box_alloc(last);
+ bp->type = EQN_SUBEXPR;
+
+ while (EQN_OK == (c = eqn_box(ep, bp)))
+ /* Spin! */ ;
+
+ return(c);
+}
+
+static enum eqn_rest
+eqn_box(struct eqn_node *ep, struct eqn_box *last)
{
size_t sz;
const char *start;
- int c, i, nextc, size;
- enum eqn_fontt font;
+ char *left;
+ enum eqn_rest c;
+ int i, size;
struct eqn_box *bp;
- /*
- * Mark our last level of subexpression.
- * Also mark whether that the next node should be a
- * subexpression node.
- */
-
- *sv = last;
- nextc = 1;
- font = EQNFONT_NONE;
- size = EQN_DEFSIZE;
-again:
if (NULL == (start = eqn_nexttok(ep, &sz)))
- return(0);
+ return(EQN_EOF);
- for (i = 0; i < (int)EQNFONT__MAX; i++) {
- if (eqnfonts[i].sz != sz)
+ if (1 == sz && 0 == strncmp("}", start, 1))
+ return(EQN_DESCOPE);
+ else if (5 == sz && 0 == strncmp("right", start, 5))
+ return(EQN_DESCOPE);
+ else if (5 == sz && 0 == strncmp("above", start, 5))
+ return(EQN_DESCOPE);
+
+ for (i = 0; i < (int)EQN__MAX; i++) {
+ if (eqnparts[i].str.sz != sz)
continue;
- if (strncmp(eqnfonts[i].name, start, sz))
+ if (strncmp(eqnparts[i].str.name, start, sz))
continue;
- font = (enum eqn_fontt)i;
- goto again;
- }
+ return((*eqnparts[i].fp)(ep) ? EQN_OK : EQN_ERR);
+ }
- for (i = 0; i < (int)EQNFONT__MAX; i++) {
- if (eqnposs[i].sz != sz)
+ if (1 == sz && 0 == strncmp("{", start, 1)) {
+ if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) {
+ if (EQN_ERR != c)
+ EQN_MSG(MANDOCERR_EQNSCOPE, ep);
+ return(EQN_ERR);
+ }
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (1 == sz && 0 == strncmp("}", start, 1))
+ return(EQN_OK);
+ EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
+ return(EQN_ERR);
+ }
+
+ for (i = 0; i < (int)EQNPILE__MAX; i++) {
+ if (eqnpiles[i].sz != sz)
continue;
- if (strncmp(eqnposs[i].name, start, sz))
+ if (strncmp(eqnpiles[i].name, start, sz))
continue;
- last->pos = (enum eqn_post)i;
- goto again;
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ if (1 != sz || strncmp("{", start, 1)) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+ if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) {
+ if (EQN_ERR != c)
+ EQN_MSG(MANDOCERR_EQNSCOPE, ep);
+ return(EQN_ERR);
+ }
+ assert(last->last);
+ last->last->pile = (enum eqn_pilet)i;
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (1 == sz && 0 == strncmp("}", start, 1))
+ return(EQN_OK);
+ if (5 != sz || strncmp("above", start, 5)) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+ last->last->above = 1;
+ if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) {
+ if (EQN_ERR != c)
+ EQN_MSG(MANDOCERR_EQNSCOPE, ep);
+ return(EQN_ERR);
+ }
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (1 == sz && 0 == strncmp("}", start, 1))
+ return(EQN_OK);
+ EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
+ return(EQN_ERR);
}
- for (i = 0; i < (int)EQN__MAX; i++) {
- if (eqnparts[i].str.sz != sz)
+ if (4 == sz && 0 == strncmp("left", start, 4)) {
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ left = mandoc_strndup(start, sz);
+ if (EQN_DESCOPE != (c = eqn_eqn(ep, last)))
+ return(c);
+ assert(last->last);
+ last->last->left = left;
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (5 != sz || strncmp("right", start, 5))
+ return(EQN_DESCOPE);
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ last->last->right = mandoc_strndup(start, sz);
+ return(EQN_OK);
+ }
+
+ for (i = 0; i < (int)EQNPOS__MAX; i++) {
+ if (eqnposs[i].sz != sz)
continue;
- if (strncmp(eqnparts[i].str.name, start, sz))
+ if (strncmp(eqnposs[i].name, start, sz))
continue;
- if ( ! (*eqnparts[i].fp)(ep))
- return(-1);
- goto again;
- }
+ if (NULL == last->last) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+ last->last->pos = (enum eqn_post)i;
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ return(c);
+ }
for (i = 0; i < (int)EQNMARK__MAX; i++) {
if (eqnmarks[i].sz != sz)
continue;
if (strncmp(eqnmarks[i].name, start, sz))
continue;
- last->mark = (enum eqn_markt)i;
- goto again;
+ if (NULL == last->last) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+ last->last->mark = (enum eqn_markt)i;
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ return(c);
}
- if (sz == 4 && 0 == strncmp("size", start, 1)) {
- if (NULL == (start = eqn_nexttok(ep, &sz)))
- return(0);
- size = mandoc_strntoi(start, sz, 10);
- goto again;
+ for (i = 0; i < (int)EQNFONT__MAX; i++) {
+ if (eqnfonts[i].sz != sz)
+ continue;
+ if (strncmp(eqnfonts[i].name, start, sz))
+ continue;
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ } else if (EQN_OK == c)
+ last->last->font = (enum eqn_fontt)i;
+ return(c);
}
- if (sz == 1 && 0 == strncmp("}", start, 1))
- return(1);
-
- bp = mandoc_calloc(1, sizeof(struct eqn_box));
- bp->font = font;
- bp->size = size;
-
- font = EQNFONT_NONE;
- size = EQN_DEFSIZE;
-
- if (nextc)
- last->child = bp;
- else
- last->next = bp;
-
- last = bp;
-
- /*
- * See if we're to open a new subexpression.
- * If so, mark our node as such and descend.
- */
-
- if (sz == 1 && 0 == strncmp("{", start, 1)) {
- bp->type = EQN_SUBEXPR;
- c = eqn_box(ep, bp, sv);
-
- nextc = 0;
- goto again;
+ if (4 == sz && 0 == strncmp("size", start, 4)) {
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ size = mandoc_strntoi(start, sz, 10);
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ } else if (EQN_OK != c)
+ return(c);
+ last->last->size = size;
}
- /* A regular text node. */
-
+ bp = eqn_box_alloc(last);
bp->type = EQN_TEXT;
- bp->text = mandoc_malloc(sz + 1);
- *bp->text = '\0';
- strlcat(bp->text, start, sz + 1);
-
- nextc = 0;
- goto again;
+ bp->text = mandoc_strndup(start, sz);
+ return(EQN_OK);
}
void
@@ -309,16 +397,36 @@ eqn_free(struct eqn_node *p)
free(p);
}
+static struct eqn_box *
+eqn_box_alloc(struct eqn_box *parent)
+{
+ struct eqn_box *bp;
+
+ bp = mandoc_calloc(1, sizeof(struct eqn_box));
+ bp->parent = parent;
+ bp->size = EQN_DEFSIZE;
+
+ if (NULL == parent->first)
+ parent->first = bp;
+ else
+ parent->last->next = bp;
+
+ parent->last = bp;
+ return(bp);
+}
+
static void
eqn_box_free(struct eqn_box *bp)
{
- if (bp->child)
- eqn_box_free(bp->child);
+ if (bp->first)
+ eqn_box_free(bp->first);
if (bp->next)
eqn_box_free(bp->next);
free(bp->text);
+ free(bp->left);
+ free(bp->right);
free(bp);
}
@@ -336,19 +444,26 @@ eqn_nexttok(struct eqn_node *ep, size_t *sz)
return(eqn_next(ep, '"', sz, 1));
}
+static void
+eqn_rewind(struct eqn_node *ep)
+{
+
+ ep->cur = ep->rew;
+}
+
static const char *
eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
{
char *start, *next;
int q, diff, lim;
- size_t sv, ssz;
+ size_t ssz;
struct eqn_def *def;
if (NULL == sz)
sz = &ssz;
lim = 0;
- sv = ep->cur;
+ ep->rew = ep->cur;
again:
/* Prevent self-definitions. */
@@ -357,7 +472,7 @@ again:
return(NULL);
}
- ep->cur = sv;
+ ep->cur = ep->rew;
start = &ep->data[(int)ep->cur];
q = 0;
@@ -399,7 +514,7 @@ again:
ep->sz += diff;
ep->data = mandoc_realloc(ep->data, ep->sz + 1);
ep->data[ep->sz] = '\0';
- start = &ep->data[(int)sv];
+ start = &ep->data[(int)ep->rew];
}
diff = def->valsz - *sz;
diff --git a/libroff.h b/libroff.h
index 1cb2f534..3fa0e95a 100644
--- a/libroff.h
+++ b/libroff.h
@@ -46,6 +46,7 @@ struct eqn_node {
struct eqn_def *defs;
size_t defsz;
char *data;
+ size_t rew;
size_t cur;
size_t sz;
struct eqn eqn;
diff --git a/mandoc.c b/mandoc.c
index 6d11e9b7..0f8e5394 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -432,6 +432,16 @@ mandoc_realloc(void *ptr, size_t size)
return(ptr);
}
+char *
+mandoc_strndup(const char *ptr, size_t sz)
+{
+ char *p;
+
+ p = mandoc_malloc(sz + 1);
+ memcpy(p, ptr, sz);
+ p[(int)sz] = '\0';
+ return(p);
+}
char *
mandoc_strdup(const char *ptr)
diff --git a/mandoc.h b/mandoc.h
index 681a0f88..41c8ad6c 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -114,6 +114,9 @@ enum mandocerr {
MANDOCERR_EQNNEST, /* too many nested equation defines */
MANDOCERR_EQNNSCOPE, /* unexpected equation scope closure*/
MANDOCERR_EQNSCOPE, /* equation scope open on exit */
+ MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */
+ MANDOCERR_EQNEOF, /* unexpected end of equation */
+ MANDOCERR_EQNSYNT, /* equation syntax error */
/* related to tables */
MANDOCERR_TBL, /* bad table syntax */
@@ -313,10 +316,17 @@ enum eqn_post {
EQNPOS_SUB,
EQNPOS_TO,
EQNPOS_FROM,
- EQNPOS_ABOVE,
EQNPOS__MAX
};
+enum eqn_pilet {
+ EQNPILE_NONE = 0,
+ EQNPILE_CPILE,
+ EQNPILE_RPILE,
+ EQNPILE_LPILE,
+ EQNPILE__MAX
+};
+
/*
* A "box" is a parsed mathematical expression as defined by the eqn.7
* grammar.
@@ -325,12 +335,18 @@ struct eqn_box {
int size; /* font size of expression */
#define EQN_DEFSIZE INT_MIN
enum eqn_boxt type; /* type of node */
- struct eqn_box *child; /* child node */
- struct eqn_box *next; /* next in tree */
- enum eqn_post pos; /* position of next box */
+ struct eqn_box *first; /* first child node */
+ struct eqn_box *last; /* last child node */
+ struct eqn_box *next; /* node sibling */
+ struct eqn_box *parent; /* node sibling */
char *text; /* text (or NULL) */
+ char *left;
+ char *right;
+ enum eqn_post pos; /* position of next box */
enum eqn_markt mark; /* a mark about the box */
enum eqn_fontt font; /* font of box */
+ enum eqn_pilet pile; /* equation piling */
+ int above; /* next node is above */
};
/*
@@ -391,6 +407,7 @@ void *mandoc_calloc(size_t, size_t);
void *mandoc_malloc(size_t);
void *mandoc_realloc(void *, size_t);
char *mandoc_strdup(const char *);
+char *mandoc_strndup(const char *, size_t);
enum mandoc_esc mandoc_escape(const char **, const char **, int *);
diff --git a/read.c b/read.c
index 9984cc3c..eee2c6f1 100644
--- a/read.c
+++ b/read.c
@@ -156,6 +156,9 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"too many nested equation defines",
"unexpected equation scope closure",
"equation scope open on exit",
+ "overlapping equation scopes",
+ "unexpected end of equation",
+ "equation syntax error",
/* related to tables */
"bad table syntax",
diff --git a/tree.c b/tree.c
index c1d7937e..96ca7eb3 100644
--- a/tree.c
+++ b/tree.c
@@ -274,13 +274,16 @@ print_box(const struct eqn_box *ep, int indent)
printf("eqn-root(%d, %d, %d, %d)\n",
EQN_DEFSIZE == ep->size ? 0 : ep->size,
ep->pos, ep->font, ep->mark);
- print_box(ep->child, indent + 1);
+ print_box(ep->first, indent + 1);
break;
case (EQN_SUBEXPR):
- printf("eqn-subxpr(%d, %d, %d, %d)\n",
+ printf("eqn-subxpr(%d, %d, %d, %d, %d, %d, \"%s\", \"%s\")\n",
EQN_DEFSIZE == ep->size ? 0 : ep->size,
- ep->pos, ep->font, ep->mark);
- print_box(ep->child, indent + 1);
+ ep->pos, ep->font, ep->mark,
+ ep->pile, ep->above,
+ ep->left ? ep->left : "",
+ ep->right ? ep->right : "");
+ print_box(ep->first, indent + 1);
break;
case (EQN_TEXT):
printf("eqn-text(%d, %d, %d, %d): [%s]\n",