diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2010-05-16 10:59:36 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2010-05-16 10:59:36 +0000 |
commit | 110e3743cb6c70af4d1a25c2527e9d923e1f163b (patch) | |
tree | aad4e38ad6261d3903a43823cbfc9c9ee51dc014 | |
parent | 110ec8e74f50c3c583a7b5930b26eede65e6f8e7 (diff) | |
download | mandoc-110e3743cb6c70af4d1a25c2527e9d923e1f163b.tar.gz |
Allow roff_parseln() to be re-run.
Allow roff_parseln() to manipulate the line buffer offset. This is used
in situations like `.ie n .TH FOO 1' or `.ie n .ie n', where the line
buffer offset is recalculated then the roff parser re-run.
Fix mdoc_parseln() and man_parseln() to accept the initial line offset.
WARNING: backed-out ALL roff macros whilst accomodating for how roff
handles multi-line conditionals (in short, re-running the parser).
-rw-r--r-- | main.c | 38 | ||||
-rw-r--r-- | man.c | 56 | ||||
-rw-r--r-- | man.h | 2 | ||||
-rw-r--r-- | mdoc.c | 58 | ||||
-rw-r--r-- | mdoc.h | 2 | ||||
-rw-r--r-- | roff.c | 139 | ||||
-rw-r--r-- | roff.h | 10 |
7 files changed, 206 insertions, 99 deletions
@@ -386,7 +386,7 @@ static void fdesc(struct curparse *curp) { struct buf ln, blk; - int i, pos, lnn, lnn_start, with_mmap; + int i, pos, lnn, lnn_start, with_mmap, of; enum rofferr re; struct man *man; struct mdoc *mdoc; @@ -467,22 +467,42 @@ fdesc(struct curparse *curp) goto bailout; ln.buf[pos] = '\0'; - re = roff_parseln(roff, lnn_start, &ln.buf, &ln.sz); + /* + * A significant amount of complexity is contained by + * the roff preprocessor. It's line-oriented but can be + * expressed on one line, so we need at times to + * readjust our starting point and re-run it. The roff + * preprocessor can also readjust the buffers with new + * data, so we pass them in wholesale. + */ + + of = 0; + do { + re = roff_parseln(roff, lnn_start, + &ln.buf, &ln.sz, of, &of); + } while (ROFF_RERUN == re); + if (ROFF_IGN == re) continue; else if (ROFF_ERR == re) goto bailout; - /* If unset, assign parser in pset(). */ + /* + * If input parsers have not been allocated, do so now. + * We keep these instanced betwen parsers, but set them + * locally per parse routine since we can use different + * parsers with each one. + */ - if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc)) - goto bailout; + if ( ! (man || mdoc)) + if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc)) + goto bailout; - /* Pass down into parsers. */ + /* Lastly, push down into the parsers themselves. */ - if (man && ! man_parseln(man, lnn_start, ln.buf)) + if (man && ! man_parseln(man, lnn_start, ln.buf, of)) goto bailout; - if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf)) + if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) goto bailout; } @@ -493,6 +513,8 @@ fdesc(struct curparse *curp) goto bailout; } + /* Clean up the parse routine ASTs. */ + if (mdoc && ! mdoc_endparse(mdoc)) goto bailout; if (man && ! man_endparse(man)) @@ -74,11 +74,11 @@ static int man_node_append(struct man *, static void man_node_free(struct man_node *); static void man_node_unlink(struct man *, struct man_node *); -static int man_ptext(struct man *, int, char *); -static int man_pmacro(struct man *, int, char *); +static int man_ptext(struct man *, int, char *, int); +static int man_pmacro(struct man *, int, char *, int); static void man_free1(struct man *); static void man_alloc1(struct man *); -static int macrowarn(struct man *, int, const char *); +static int macrowarn(struct man *, int, const char *, int); const struct man_node * @@ -148,12 +148,15 @@ man_endparse(struct man *m) int -man_parseln(struct man *m, int ln, char *buf) +man_parseln(struct man *m, int ln, char *buf, int offs) { - return(('.' == *buf || '\'' == *buf) ? - man_pmacro(m, ln, buf) : - man_ptext(m, ln, buf)); + if (MAN_HALT & m->flags) + return(0); + + return(('.' == buf[offs] || '\'' == buf[offs]) ? + man_pmacro(m, ln, buf, offs) : + man_ptext(m, ln, buf, offs)); } @@ -363,31 +366,33 @@ man_node_delete(struct man *m, struct man_node *p) static int -man_ptext(struct man *m, int line, char *buf) +man_ptext(struct man *m, int line, char *buf, int offs) { int i; /* Ignore bogus comments. */ - if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) - return(man_pwarn(m, line, 0, WBADCOMMENT)); + if ('\\' == buf[offs] && + '.' == buf[offs + 1] && + '"' == buf[offs + 2]) + return(man_pwarn(m, line, offs, WBADCOMMENT)); /* Literal free-form text whitespace is preserved. */ if (MAN_LITERAL & m->flags) { - if ( ! man_word_alloc(m, line, 0, buf)) + if ( ! man_word_alloc(m, line, offs, buf + offs)) return(0); goto descope; } /* Pump blank lines directly into the backend. */ - for (i = 0; ' ' == buf[i]; i++) + for (i = offs; ' ' == buf[i]; i++) /* Skip leading whitespace. */ ; if ('\0' == buf[i]) { /* Allocate a blank entry. */ - if ( ! man_word_alloc(m, line, 0, "")) + if ( ! man_word_alloc(m, line, offs, "")) return(0); goto descope; } @@ -414,7 +419,7 @@ man_ptext(struct man *m, int line, char *buf) buf[i] = '\0'; } - if ( ! man_word_alloc(m, line, 0, buf)) + if ( ! man_word_alloc(m, line, offs, buf + offs)) return(0); /* @@ -423,10 +428,7 @@ man_ptext(struct man *m, int line, char *buf) * sentence. The front-end will know how to interpret this. */ - /* FIXME: chain of close delims. */ - assert(i); - if (mandoc_eos(buf, (size_t)i)) m->last->flags |= MAN_EOS; @@ -449,23 +451,23 @@ descope: if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); - return(man_body_alloc(m, line, 0, m->last->tok)); + return(man_body_alloc(m, line, offs, m->last->tok)); } static int -macrowarn(struct man *m, int ln, const char *buf) +macrowarn(struct man *m, int ln, const char *buf, int offs) { if ( ! (MAN_IGN_MACRO & m->pflags)) - return(man_verr(m, ln, 0, "unknown macro: %s%s", + return(man_verr(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); - return(man_vwarn(m, ln, 0, "unknown macro: %s%s", + return(man_vwarn(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); } int -man_pmacro(struct man *m, int ln, char *buf) +man_pmacro(struct man *m, int ln, char *buf, int offs) { int i, j, ppos; enum mant tok; @@ -474,10 +476,12 @@ man_pmacro(struct man *m, int ln, char *buf) /* Comments and empties are quickly ignored. */ - if ('\0' == buf[1]) + offs++; + + if ('\0' == buf[offs]) return(1); - i = 1; + i = offs; /* * Skip whitespace between the control character and initial @@ -522,7 +526,7 @@ man_pmacro(struct man *m, int ln, char *buf) } if (MAN_MAX == (tok = man_hash_find(mac))) { - if ( ! macrowarn(m, ln, mac)) + if ( ! macrowarn(m, ln, mac, ppos)) goto err; return(1); } @@ -628,7 +632,7 @@ out: if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); - return(man_body_alloc(m, ln, 0, m->last->tok)); + return(man_body_alloc(m, ln, offs, m->last->tok)); err: /* Error out. */ @@ -111,7 +111,7 @@ struct man; void man_free(struct man *); struct man *man_alloc(void *, int, const struct man_cb *); void man_reset(struct man *); -int man_parseln(struct man *, int, char *buf); +int man_parseln(struct man *, int, char *, int); int man_endparse(struct man *); const struct man_node *man_node(const struct man *); @@ -150,9 +150,10 @@ static struct mdoc_node *node_alloc(struct mdoc *, int, int, enum mdoct, enum mdoc_type); static int node_append(struct mdoc *, struct mdoc_node *); -static int mdoc_ptext(struct mdoc *, int, char *); -static int mdoc_pmacro(struct mdoc *, int, char *); -static int macrowarn(struct mdoc *, int, const char *); +static int mdoc_ptext(struct mdoc *, int, char *, int); +static int mdoc_pmacro(struct mdoc *, int, char *, int); +static int macrowarn(struct mdoc *, int, + const char *, int); const struct mdoc_node * @@ -284,16 +285,16 @@ mdoc_endparse(struct mdoc *m) * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). */ int -mdoc_parseln(struct mdoc *m, int ln, char *buf) +mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) { if (MDOC_HALT & m->flags) return(0); m->flags |= MDOC_NEWLINE; - return(('.' == *buf || '\'' == *buf) ? - mdoc_pmacro(m, ln, buf) : - mdoc_ptext(m, ln, buf)); + return(('.' == buf[offs] || '\'' == buf[offs]) ? + mdoc_pmacro(m, ln, buf, offs) : + mdoc_ptext(m, ln, buf, offs)); } @@ -630,32 +631,34 @@ mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) * control character. */ static int -mdoc_ptext(struct mdoc *m, int line, char *buf) +mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) { int i; /* Ignore bogus comments. */ - if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) - return(mdoc_pwarn(m, line, 0, EBADCOMMENT)); + if ('\\' == buf[offs] && + '.' == buf[offs + 1] && + '"' == buf[offs + 2]) + return(mdoc_pwarn(m, line, offs, EBADCOMMENT)); /* No text before an initial macro. */ if (SEC_NONE == m->lastnamed) - return(mdoc_perr(m, line, 0, ETEXTPROL)); + return(mdoc_perr(m, line, offs, ETEXTPROL)); /* Literal just gets pulled in as-is. */ if (MDOC_LITERAL & m->flags) - return(mdoc_word_alloc(m, line, 0, buf)); + return(mdoc_word_alloc(m, line, offs, buf + offs)); /* Check for a blank line, which may also consist of spaces. */ - for (i = 0; ' ' == buf[i]; i++) + for (i = offs; ' ' == buf[i]; i++) /* Skip to first non-space. */ ; if ('\0' == buf[i]) { - if ( ! mdoc_pwarn(m, line, 0, ENOBLANK)) + if ( ! mdoc_pwarn(m, line, offs, ENOBLANK)) return(0); /* @@ -663,7 +666,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) * blank lines aren't allowed, but enough manuals assume this * behaviour that we want to work around it. */ - if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL)) + if ( ! mdoc_elem_alloc(m, line, offs, MDOC_Pp, NULL)) return(0); m->next = MDOC_NEXT_SIBLING; @@ -694,7 +697,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) /* Allocate the whole word. */ - if ( ! mdoc_word_alloc(m, line, 0, buf)) + if ( ! mdoc_word_alloc(m, line, offs, buf + offs)) return(0); /* @@ -703,10 +706,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) * sentence. The front-end will know how to interpret this. */ - /* FIXME: chain of close delims. */ - assert(i); - if (mandoc_eos(buf, (size_t)i)) m->last->flags |= MDOC_EOS; @@ -715,12 +715,12 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) static int -macrowarn(struct mdoc *m, int ln, const char *buf) +macrowarn(struct mdoc *m, int ln, const char *buf, int offs) { if ( ! (MDOC_IGN_MACRO & m->pflags)) - return(mdoc_verr(m, ln, 0, "unknown macro: %s%s", + return(mdoc_verr(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); - return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", + return(mdoc_vwarn(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); } @@ -730,7 +730,7 @@ macrowarn(struct mdoc *m, int ln, const char *buf) * character. */ int -mdoc_pmacro(struct mdoc *m, int ln, char *buf) +mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) { enum mdoct tok; int i, j, sv; @@ -738,10 +738,12 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf) /* Empty lines are ignored. */ - if ('\0' == buf[1]) + offs++; + + if ('\0' == buf[offs]) return(1); - i = 1; + i = offs; /* Accept whitespace after the initial control char. */ @@ -770,16 +772,16 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf) return(mdoc_perr(m, ln, i, EPRINT)); } - mac[j] = 0; + mac[j] = '\0'; if (j == 4 || j < 2) { - if ( ! macrowarn(m, ln, mac)) + if ( ! macrowarn(m, ln, mac, sv)) goto err; return(1); } if (MDOC_MAX == (tok = mdoc_hash_find(mac))) { - if ( ! macrowarn(m, ln, mac)) + if ( ! macrowarn(m, ln, mac, sv)) goto err; return(1); } @@ -299,7 +299,7 @@ struct mdoc; void mdoc_free(struct mdoc *); struct mdoc *mdoc_alloc(void *, int, const struct mdoc_cb *); void mdoc_reset(struct mdoc *); -int mdoc_parseln(struct mdoc *, int, char *buf); +int mdoc_parseln(struct mdoc *, int, char *, int); const struct mdoc_node *mdoc_node(const struct mdoc *); const struct mdoc_meta *mdoc_meta(const struct mdoc *); int mdoc_endparse(struct mdoc *); @@ -26,12 +26,15 @@ #include "roff.h" enum rofft { - ROFF_de, - ROFF_dei, +#if 0 ROFF_am, ROFF_ami, + ROFF_de, + ROFF_dei, + ROFF_if, ROFF_ig, ROFF_close, +#endif ROFF_MAX }; @@ -44,9 +47,11 @@ struct roff { struct roffnode { enum rofft tok; /* type of node */ struct roffnode *parent; /* up one in stack */ - char *end; /* custom end-token */ + char *end; /* end-token: custom */ int line; /* parse line */ int col; /* parse col */ + int flags; +#define ROFF_PARSEONLY (1 << 0) }; #define ROFF_ARGS struct roff *r, /* parse ctx */ \ @@ -54,7 +59,8 @@ struct roffnode { char **bufp, /* input buffer */ \ size_t *szp, /* size of input buffer */ \ int ln, /* parse line */ \ - int ppos /* current pos in buffer */ + int ppos, /* current pos in buffer */ \ + int *offs /* reset offset of buffer data */ typedef enum rofferr (*roffproc)(ROFF_ARGS); @@ -64,17 +70,24 @@ struct roffmac { roffproc new; /* root of stack (type = ROFF_MAX) */ }; +#if 0 static enum rofferr roff_new_close(ROFF_ARGS); +static enum rofferr roff_new_if(ROFF_ARGS); +static enum rofferr roff_sub_if(ROFF_ARGS); static enum rofferr roff_new_ig(ROFF_ARGS); static enum rofferr roff_sub_ig(ROFF_ARGS); +#endif const struct roffmac roffs[ROFF_MAX] = { - { "de", roff_sub_ig, roff_new_ig }, - { "dei", roff_sub_ig, roff_new_ig }, +#if 0 { "am", roff_sub_ig, roff_new_ig }, { "ami", roff_sub_ig, roff_new_ig }, + { "de", roff_sub_ig, roff_new_ig }, + { "dei", roff_sub_ig, roff_new_ig }, + { "if", roff_sub_if, roff_new_if }, { "ig", roff_sub_ig, roff_new_ig }, { ".", NULL, roff_new_close }, +#endif }; static void roff_free1(struct roff *); @@ -116,6 +129,8 @@ roffnode_pop(struct roff *r) if (NULL == (p = r->last)) return; r->last = p->parent; + if (p->end) + free(p->end); free(p); } @@ -187,30 +202,42 @@ roff_alloc(const mandocmsg msg, void *data) enum rofferr -roff_parseln(struct roff *r, int ln, char **bufp, size_t *szp) +roff_parseln(struct roff *r, int ln, + char **bufp, size_t *szp, int pos, int *offs) { enum rofft t; - int ppos; + + /* If stacked, jump directly into its processing function. */ if (NULL != r->last) { - /* - * If there's a node on the stack, then jump directly - * into its processing function. - */ t = r->last->tok; assert(roffs[t].sub); - return((*roffs[t].sub)(r, t, bufp, szp, ln, 0)); - } else if ('.' != (*bufp)[0] && NULL == r->last) - /* Return when in free text without a context. */ + return((*roffs[t].sub)(r, t, bufp, szp, ln, pos, offs)); + } + + /* Return when in free text without a context. */ + + if ('.' != (*bufp)[0] && '\'' != (*bufp)[0] && NULL == r->last) return(ROFF_CONT); /* There's nothing on the stack: make us anew. */ - if (ROFF_MAX == (t = roff_parse(*bufp, &ppos))) + if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) return(ROFF_CONT); assert(roffs[t].new); - return((*roffs[t].new)(r, t, bufp, szp, ln, ppos)); + return((*roffs[t].new)(r, t, bufp, szp, ln, pos, offs)); +} + + +int +roff_endparse(struct roff *r) +{ + + if (NULL == r->last) + return(1); + return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line, + r->last->col, NULL)); } @@ -225,7 +252,7 @@ roff_parse(const char *buf, int *pos) char mac[5]; enum rofft t; - assert('.' == buf[0]); + assert('.' == buf[0] || '\'' == buf[0]); *pos = 1; while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos])) @@ -255,6 +282,7 @@ roff_parse(const char *buf, int *pos) } +#if 0 /* ARGSUSED */ static enum rofferr roff_sub_ig(ROFF_ARGS) @@ -263,7 +291,7 @@ roff_sub_ig(ROFF_ARGS) /* Ignore free-text lines. */ - if ('.' != (*bufp)[ppos]) + if ('.' != (*bufp)[ppos] && '\'' != (*bufp)[ppos]) return(ROFF_IGN); if (r->last->end) { @@ -310,7 +338,61 @@ roff_new_close(ROFF_ARGS) } -/* ARGSUSED */ +static enum rofferr +roff_sub_if(ROFF_ARGS) +{ + int i; + enum rofft t; + + i = (int)strlen(*bufp); + + if (i > 1 && '}' == (*bufp)[i - 1] && '\\' == (*bufp)[i - 2]) + roffnode_pop(r); + + return(ROFF_IGN); +} + + +static enum rofferr +roff_new_if(ROFF_ARGS) +{ + struct roffnode *n; + enum rofferr re; + + /* + * Read ahead past the conditional. + * FIXME: this does not work, as conditionals don't end on + * whitespace, but are parsed according to a formal grammar. + * It's good enough for now, however. + */ + + while ((*bufp)[ppos] && ' ' != (*bufp)[ppos]) + ppos++; + while (' ' == (*bufp)[ppos]) + ppos++; + + if ( ! roffnode_push(r, tok, ln, ppos)) + return(ROFF_ERR); + + n = r->last; + + /* Don't evaluate: just assume NO. */ + + r->last->flags |= ROFF_PARSEONLY; + + if ('\\' == (*bufp)[ppos] && '{' == (*bufp)[ppos + 1]) { + re = roff_parseln(r, ln, bufp, szp, pos); + if (ROFF_ERR == re) + return(re); + if (r->last == n) + roffnode_pop(r, tok, ln, ppos); + return(re); + } + + return(ROFF_IGN); +} + + static enum rofferr roff_new_ig(ROFF_ARGS) { @@ -318,6 +400,11 @@ roff_new_ig(ROFF_ARGS) if ( ! roffnode_push(r, tok, ln, ppos)) return(ROFF_ERR); + + /* + * Other macros (not `ig') using this routine have additional + * crap here that we discard. + */ if (ROFF_ig != tok) { while ((*bufp)[ppos] && ' ' != (*bufp)[ppos]) @@ -360,14 +447,4 @@ roff_new_ig(ROFF_ARGS) return(ROFF_IGN); } - - -int -roff_endparse(struct roff *r) -{ - - if (NULL == r->last) - return(1); - return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, - r->last->line, r->last->col, NULL)); -} +#endif @@ -18,9 +18,10 @@ #define ROFF_H enum rofferr { - ROFF_CONT, /* re-process line with libmdoc or libman */ - ROFF_IGN, /* ignore line */ - ROFF_ERR, /* badness */ + ROFF_CONT, /* continue processing line */ + ROFF_RERUN, /* re-run roff interpreter with offset */ + ROFF_IGN, /* ignore current line */ + ROFF_ERR, /* badness: puke and stop */ }; __BEGIN_DECLS @@ -30,7 +31,8 @@ struct roff; void roff_free(struct roff *); struct roff *roff_alloc(mandocmsg, void *); void roff_reset(struct roff *); -enum rofferr roff_parseln(struct roff *, int, char **, size_t *); +enum rofferr roff_parseln(struct roff *, int, + char **, size_t *, int, int *); int roff_endparse(struct roff *); __END_DECLS |