diff options
-rw-r--r-- | html.c | 87 | ||||
-rw-r--r-- | libmandoc.h | 1 | ||||
-rw-r--r-- | man_validate.c | 54 | ||||
-rw-r--r-- | mandoc.c | 444 | ||||
-rw-r--r-- | mandoc.h | 16 | ||||
-rw-r--r-- | mdoc_validate.c | 30 | ||||
-rw-r--r-- | out.c | 237 | ||||
-rw-r--r-- | out.h | 17 | ||||
-rw-r--r-- | read.c | 2 | ||||
-rw-r--r-- | term.c | 107 |
10 files changed, 470 insertions, 525 deletions
@@ -94,14 +94,13 @@ static const char *const htmlattrs[ATTR_MAX] = { }; static void print_num(struct html *, const char *, size_t); -static void print_spec(struct html *, enum roffdeco, - const char *, size_t); +static void print_spec(struct html *, const char *, size_t); static void print_res(struct html *, const char *, size_t); static void print_ctag(struct html *, enum htmltag); static void print_doctype(struct html *); static void print_xmltype(struct html *); static int print_encode(struct html *, const char *, int); -static void print_metaf(struct html *, enum roffdeco); +static void print_metaf(struct html *, enum mandoc_esc); static void print_attr(struct html *, const char *, const char *); static void *ml_alloc(char *, enum htmltype); @@ -221,7 +220,7 @@ print_num(struct html *h, const char *p, size_t len) } static void -print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) +print_spec(struct html *h, const char *p, size_t len) { int cp; const char *rhs; @@ -230,7 +229,7 @@ print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { printf("&#%d;", cp); return; - } else if (-1 == cp && DECO_SSPECIAL == d) { + } else if (-1 == cp && 1 == len) { fwrite(p, 1, len, stdout); return; } else if (-1 == cp) @@ -260,21 +259,21 @@ print_res(struct html *h, const char *p, size_t len) static void -print_metaf(struct html *h, enum roffdeco deco) +print_metaf(struct html *h, enum mandoc_esc deco) { enum htmlfont font; switch (deco) { - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): font = h->metal; break; - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): font = HTMLFONT_ITALIC; break; - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): font = HTMLFONT_BOLD; break; - case (DECO_ROMAN): + case (ESCAPE_FONTROMAN): font = HTMLFONT_NONE; break; default: @@ -303,73 +302,69 @@ print_encode(struct html *h, const char *p, int norecurse) size_t sz; int len, nospace; const char *seq; - enum roffdeco deco; + enum mandoc_esc esc; static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; nospace = 0; - for (; *p; p++) { + while ('\0' != *p) { sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); - p += /* LINTED */ - sz; + p += (int)sz; - if ('<' == *p) { + if ('\0' == *p) + break; + + switch (*p++) { + case ('<'): printf("<"); continue; - } else if ('>' == *p) { + case ('>'): printf(">"); continue; - } else if ('&' == *p) { + case ('&'): printf("&"); continue; - } else if (ASCII_HYPH == *p) { - /* - * Note: "soft hyphens" aren't graphically - * displayed when not breaking the text; we want - * them to be displayed. - */ - /*printf("­");*/ + case (ASCII_HYPH): putchar('-'); continue; - } else if ('\0' == *p) + default: break; + } - seq = ++p; - len = a2roffdeco(&deco, &seq, &sz); + esc = mandoc_escape(&p, &seq, &len); + if (ESCAPE_ERROR == esc) + break; - switch (deco) { - case (DECO_NUMBERED): - print_num(h, seq, sz); + switch (esc) { + case (ESCAPE_NUMBERED): + print_num(h, seq, len); break; - case (DECO_RESERVED): - print_res(h, seq, sz); + case (ESCAPE_PREDEF): + print_res(h, seq, len); break; - case (DECO_SSPECIAL): - /* FALLTHROUGH */ - case (DECO_SPECIAL): - print_spec(h, deco, seq, sz); + case (ESCAPE_SPECIAL): + print_spec(h, seq, len); break; - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): /* FALLTHROUGH */ - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): /* FALLTHROUGH */ - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): /* FALLTHROUGH */ - case (DECO_ROMAN): + case (ESCAPE_FONTROMAN): if (norecurse) break; - print_metaf(h, deco); + print_metaf(h, esc); + break; + case (ESCAPE_NOSPACE): + if ('\0' == *p) + nospace = 1; break; default: break; } - - p += len - 1; - - if (DECO_NOSPACE == deco && '\0' == *(p + 1)) - nospace = 1; } return(nospace); diff --git a/libmandoc.h b/libmandoc.h index 8a801bd7..cdfa2c6e 100644 --- a/libmandoc.h +++ b/libmandoc.h @@ -73,7 +73,6 @@ void mandoc_msg(enum mandocerr, struct mparse *, int, int, const char *); void mandoc_vmsg(enum mandocerr, struct mparse *, int, int, const char *, ...); -int mandoc_special(char *); char *mandoc_strdup(const char *); char *mandoc_getarg(struct mparse *, char **, int, int *); char *mandoc_normdate(struct mparse *, char *, int, int); diff --git a/man_validate.c b/man_validate.c index b9e1ff51..c1968989 100644 --- a/man_validate.c +++ b/man_validate.c @@ -54,7 +54,7 @@ static int check_par(CHKARGS); static int check_part(CHKARGS); static int check_root(CHKARGS); static int check_sec(CHKARGS); -static int check_text(CHKARGS); +static void check_text(CHKARGS); static int post_AT(CHKARGS); static int post_fi(CHKARGS); @@ -151,7 +151,8 @@ man_valid_post(struct man *m) switch (m->last->type) { case (MAN_TEXT): - return(check_text(m, m->last)); + check_text(m, m->last); + return(1); case (MAN_ROOT): return(check_root(m, m->last)); case (MAN_EQN): @@ -204,43 +205,48 @@ check_root(CHKARGS) return(1); } - -static int +static void check_text(CHKARGS) { - char *p; - int pos, c; + char *p, *pp, *cpp; + int pos; size_t sz; - for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + p = n->string; + pos = n->pos + 1; - if ('\0' == *p) - break; + while ('\0' != *p) { + sz = strcspn(p, "\t\\"); + p += (int)sz; pos += (int)sz; if ('\t' == *p) { - if (MAN_LITERAL & m->flags) - continue; - man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + if ( ! (MAN_LITERAL & m->flags)) + man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; - /* Check the special character. */ + pos++; + pp = ++p; - c = mandoc_special(p); - if (c) { - p += c - 1; - pos += c - 1; - } else + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); - } + break; + } - return(1); -} + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + pos += pp - p; + p = pp; + } +} #define INEQ_DEFINE(x, ineq, name) \ static int \ @@ -35,199 +35,363 @@ static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); +static int numescape(const char *); -int -mandoc_special(char *p) +/* + * Pass over recursive numerical expressions. This context of this + * function is important: it's only called within character-terminating + * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial + * recursion: we don't care about what's in these blocks. + * This returns the number of characters skipped or -1 if an error + * occurs (the caller should bail). + */ +static int +numescape(const char *start) +{ + int i; + size_t sz; + const char *cp; + + i = 0; + + /* The expression consists of a subexpression. */ + + if ('\\' == start[i]) { + cp = &start[++i]; + /* + * Read past the end of the subexpression. + * Bail immediately on errors. + */ + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + return(i + cp - &start[i]); + } + + if ('(' != start[i++]) + return(0); + + /* + * A parenthesised subexpression. Read until the closing + * parenthesis, making sure to handle any nested subexpressions + * that might ruin our parse. + */ + + while (')' != start[i]) { + sz = strcspn(&start[i], ")\\"); + i += (int)sz; + + if ('\0' == start[i]) + return(-1); + else if ('\\' != start[i]) + continue; + + cp = &start[++i]; + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + i += cp - &start[i]; + } + + /* Read past the terminating ')'. */ + return(++i); +} + +/* + * Handle an escaped sequeence. This should be called with any + * string subsequent a `\'. Pass a pointer to this substring as "end"; + * it will be set to the supremum of the parsed escape sequence. If + * this returns ESCAPE_ERROR, the string is bogus and should be thrown + * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the + * first relevant character of the substring (font, glyph, whatever) of + * length sz. Both "start" and "sz" may be NULL. + */ +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) { - int len, i; - char term; - char *sv; - - len = 0; + char c, term, numeric; + int i, lim, ssz, rlim; + const char *cp, *rstart; + enum mandoc_esc gly; + + cp = *end; + rstart = cp; + if (start) + *start = rstart; + i = 0; + gly = ESCAPE_ERROR; term = '\0'; - sv = p; + numeric = 0; - assert('\\' == *p); - p++; + switch ((c = cp[i++])) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case ('('): + gly = ESCAPE_SPECIAL; + lim = 2; + break; + case ('['): + gly = ESCAPE_SPECIAL; + term = ']'; + break; + case ('C'): + if ('\'' != cp[i]) + return(ESCAPE_ERROR); + gly = ESCAPE_SPECIAL; + term = '\''; + break; - switch (*p++) { -#if 0 - case ('Z'): + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case ('F'): /* FALLTHROUGH */ - case ('X'): + case ('g'): /* FALLTHROUGH */ - case ('x'): + case ('k'): /* FALLTHROUGH */ - case ('S'): + case ('M'): /* FALLTHROUGH */ - case ('R'): + case ('m'): /* FALLTHROUGH */ - case ('N'): + case ('n'): /* FALLTHROUGH */ - case ('l'): + case ('V'): /* FALLTHROUGH */ - case ('L'): + case ('Y'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('H'): + case ('*'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_PREDEF; /* FALLTHROUGH */ - case ('h'): + case ('f'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; + + rstart= &cp[i]; + if (start) + *start = rstart; + + switch (cp[i++]) { + case ('('): + lim = 2; + break; + case ('['): + term = ']'; + break; + default: + lim = 1; + i--; + break; + } + break; + + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + */ + case ('A'): /* FALLTHROUGH */ - case ('D'): + case ('b'): /* FALLTHROUGH */ - case ('C'): + case ('D'): /* FALLTHROUGH */ - case ('b'): + case ('o'): /* FALLTHROUGH */ - case ('B'): + case ('R'): /* FALLTHROUGH */ - case ('a'): + case ('X'): /* FALLTHROUGH */ - case ('A'): - if (*p++ != '\'') - return(0); + case ('Z'): + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + gly = ESCAPE_IGNORE; term = '\''; break; -#endif + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case ('B'): + /* FALLTHROUGH */ case ('h'): /* FALLTHROUGH */ + case ('H'): + /* FALLTHROUGH */ + case ('L'): + /* FALLTHROUGH */ + case ('l'): + /* FALLTHROUGH */ + case ('N'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_NUMBERED; + /* FALLTHROUGH */ + case ('S'): + /* FALLTHROUGH */ case ('v'): /* FALLTHROUGH */ + case ('w'): + /* FALLTHROUGH */ + case ('x'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + term = numeric = '\''; + break; + + /* + * Sizes get a special category of their own. + */ case ('s'): - if (ASCII_HYPH == *p) - *p = '-'; + gly = ESCAPE_IGNORE; - i = 0; - if ('+' == *p || '-' == *p) { - p++; - i = 1; - } + rstart = &cp[i]; + if (start) + *start = rstart; + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; - switch (*p++) { + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): - term = ']'; + term = numeric = ']'; break; case ('\''): - term = '\''; + term = numeric = '\''; break; - case ('0'): - i = 1; - /* FALLTHROUGH */ default: - len = 1; - p--; + lim = 1; + i--; break; } - if (ASCII_HYPH == *p) - *p = '-'; - if ('+' == *p || '-' == *p) { - if (i) - return(0); - p++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == *p) { - while (*p && ')' != *p) - if ('\\' == *p++) { - i = mandoc_special(--p); - if (0 == i) - return(0); - p += i; - } - - if (')' == *p++) - break; + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; - return(0); - } else if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - } + break; + /* + * Anything else is assumed to be a glyph. + */ + default: + gly = ESCAPE_SPECIAL; + lim = 1; + i--; break; -#if 0 - case ('Y'): - /* FALLTHROUGH */ - case ('V'): - /* FALLTHROUGH */ - case ('$'): - /* FALLTHROUGH */ - case ('n'): - /* FALLTHROUGH */ -#endif - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('f'): - /* FALLTHROUGH */ - case ('F'): - /* FALLTHROUGH */ - case ('*'): - switch (*p++) { - case ('('): - len = 2; + } + + assert(ESCAPE_ERROR != gly); + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* + * If a terminating block has been specified, we need to + * handle the case of recursion, which could have their + * own terminating blocks that mess up our parse. This, by the + * way, means that the "start" and "size" values will be + * effectively meaningless. + */ + + ssz = 0; + if (numeric && -1 == (ssz = numescape(&cp[i]))) + return(ESCAPE_ERROR); + + i += ssz; + rlim = -1; + + /* + * We have a character terminator. Try to read up to that + * character. If we can't (i.e., we hit the nil), then return + * an error; if we can, calculate our length, read past the + * terminating character, and exit. + */ + + if ('\0' != term) { + *end = strchr(&cp[i], term); + if ('\0' == *end) + return(ESCAPE_ERROR); + + rlim = *end - &cp[i]; + if (sz) + *sz = rlim; + (*end)++; + goto out; + } + + assert(lim > 0); + + /* + * We have a numeric limit. If the string is shorter than that, + * stop and return an error. Else adjust our endpoint, length, + * and return the current glyph. + */ + + if ((size_t)lim > strlen(&cp[i])) + return(ESCAPE_ERROR); + + rlim = lim; + if (sz) + *sz = rlim; + + *end = &cp[i] + lim; + +out: + assert(rlim >= 0 && rstart); + + /* Run post-processors. */ + + switch (gly) { + case (ESCAPE_FONT): + if (1 != rlim) break; - case ('['): - term = ']'; + switch (*rstart) { + case ('3'): + /* FALLTHROUGH */ + case ('B'): + gly = ESCAPE_FONTBOLD; break; - default: - len = 1; - p--; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + gly = ESCAPE_FONTITALIC; break; - } - break; - case ('('): - len = 2; - break; - case ('['): - term = ']'; - break; - case ('z'): - len = 1; - if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - return(*p ? (int)(p - sv) : 0); - } - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == *p++) { - term = '\''; + case ('P'): + gly = ESCAPE_FONTPREV; + break; + case ('1'): + /* FALLTHROUGH */ + case ('R'): + gly = ESCAPE_FONTROMAN; break; } - /* FALLTHROUGH */ + case (ESCAPE_SPECIAL): + if (1 != rlim) + break; + if ('c' == *rstart) + gly = ESCAPE_NOSPACE; + break; default: - len = 1; - p--; break; } - if (term) { - for ( ; *p && term != *p; p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(*p ? (int)(p - sv) : 0); - } - - for (i = 0; *p && i < len; i++, p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(i == len ? (int)(p - sv) : 0); + return(gly); } - void * mandoc_calloc(size_t num, size_t size) { @@ -288,6 +288,20 @@ enum mparset { MPARSE_MAN /* assume -man */ }; +enum mandoc_esc { + ESCAPE_ERROR = 0, /* bail! unparsable escape */ + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_PREDEF, /* a predefined special character */ + ESCAPE_FONT, /* a generic font mode */ + ESCAPE_FONTBOLD, /* bold font mode */ + ESCAPE_FONTITALIC, /* italic font mode */ + ESCAPE_FONTROMAN, /* roman font mode */ + ESCAPE_FONTPREV, /* previous font mode */ + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_NOSPACE /* suppress space if the last on a line */ +}; + typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, const char *, int, int, const char *); @@ -310,6 +324,8 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); +enum mandoc_esc mandoc_escape(const char **, const char **, int *); + __END_DECLS #endif /*!MANDOC_H*/ diff --git a/mdoc_validate.c b/mdoc_validate.c index 9180b580..acd855eb 100644 --- a/mdoc_validate.c +++ b/mdoc_validate.c @@ -545,31 +545,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v) static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; + char *cpp, *pp; size_t sz; - for ( ; *p; p++, pos++) { + while ('\0' != *p) { sz = strcspn(p, "\t\\"); - p += (int)sz; - - if ('\0' == *p) - break; + p += (int)sz; pos += (int)sz; if ('\t' == *p) { if ( ! (MDOC_LITERAL & m->flags)) mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; + + pos++; + pp = ++p; - if (0 == (c = mandoc_special(p))) { + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; + break; } - p += c - 1; - pos += c - 1; + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + + pos += pp - p; + p = pp; } } @@ -174,243 +174,6 @@ time2a(time_t t, char *dst, size_t sz) (void)strftime(p, sz, "%Y", &tm); } - -int -a2roffdeco(enum roffdeco *d, const char **word, size_t *sz) -{ - int i, j, lim; - char term, c; - const char *wp; - enum roffdeco dd; - - *d = DECO_NONE; - lim = i = 0; - term = '\0'; - wp = *word; - - switch ((c = wp[i++])) { - case ('('): - *d = DECO_SPECIAL; - lim = 2; - break; - case ('F'): - /* FALLTHROUGH */ - case ('f'): - *d = 'F' == c ? DECO_FFONT : DECO_FONT; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('3'): - /* FALLTHROUGH */ - case ('B'): - *d = DECO_BOLD; - return(i); - case ('2'): - /* FALLTHROUGH */ - case ('I'): - *d = DECO_ITALIC; - return(i); - case ('P'): - *d = DECO_PREVIOUS; - return(i); - case ('1'): - /* FALLTHROUGH */ - case ('R'): - *d = DECO_ROMAN; - return(i); - default: - i--; - lim = 1; - break; - } - break; - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('*'): - if ('*' == c) - *d = DECO_RESERVED; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - default: - i--; - lim = 1; - break; - } - break; - - case ('N'): - - /* - * Sequence of characters: backslash, 'N' (i = 0), - * starting delimiter (i = 1), character number (i = 2). - */ - - *word = wp + 2; - *sz = 0; - - /* - * Cannot use a digit as a starting delimiter; - * but skip the digit anyway. - */ - - if (isdigit((int)wp[1])) - return(2); - - /* - * Any non-digit terminates the character number. - * That is, the terminating delimiter need not - * match the starting delimiter. - */ - - for (i = 2; isdigit((int)wp[i]); i++) - (*sz)++; - - /* - * This is only a numbered character - * if the character number has at least one digit. - */ - - if (*sz) - *d = DECO_NUMBERED; - - /* - * Skip the terminating delimiter, even if it does not - * match, and even if there is no character number. - */ - - return(++i); - - case ('h'): - /* FALLTHROUGH */ - case ('v'): - /* FALLTHROUGH */ - case ('s'): - j = 0; - if ('+' == wp[i] || '-' == wp[i]) { - i++; - j = 1; - } - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('\''): - term = '\''; - break; - case ('0'): - j = 1; - /* FALLTHROUGH */ - default: - i--; - lim = 1; - break; - } - - if ('+' == wp[i] || '-' == wp[i]) { - if (j) - return(i); - i++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == wp[i]) { - while (wp[i] && ')' != wp[i]) - if ('\\' == wp[i++]) { - /* Handle embedded escape. */ - *word = &wp[i]; - i += a2roffdeco(&dd, word, sz); - } - - if (')' == wp[i++]) - break; - - *d = DECO_NONE; - return(i - 1); - } else if ('\\' == wp[i]) { - *word = &wp[++i]; - i += a2roffdeco(&dd, word, sz); - } - - break; - case ('['): - *d = DECO_SPECIAL; - term = ']'; - break; - case ('c'): - *d = DECO_NOSPACE; - return(i); - case ('z'): - *d = DECO_NONE; - if ('\\' == wp[i]) { - *word = &wp[++i]; - return(i + a2roffdeco(&dd, word, sz)); - } else - lim = 1; - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == wp[i++]) { - term = '\''; - break; - } - /* FALLTHROUGH */ - default: - *d = DECO_SSPECIAL; - i--; - lim = 1; - break; - } - - assert(term || lim); - *word = &wp[i]; - - if (term) { - j = i; - while (wp[i] && wp[i] != term) - i++; - if ('\0' == wp[i]) { - *d = DECO_NONE; - return(i); - } - - assert(i >= j); - *sz = (size_t)(i - j); - - return(i + 1); - } - - assert(lim > 0); - *sz = (size_t)lim; - - for (j = 0; wp[i] && j < lim; j++) - i++; - if (j < lim) - *d = DECO_NONE; - - return(i); -} - /* * Calculate the abstract widths and decimal positions of columns in a * table. This routine allocates the columns structures then runs over @@ -31,22 +31,6 @@ enum roffscale { SCALE_MAX }; -enum roffdeco { - DECO_NONE, - DECO_NUMBERED, /* numbered character */ - DECO_SPECIAL, /* special character */ - DECO_SSPECIAL, /* single-char special */ - DECO_RESERVED, /* reserved word */ - DECO_BOLD, /* bold font */ - DECO_ITALIC, /* italic font */ - DECO_ROMAN, /* "normal" undecorated font */ - DECO_PREVIOUS, /* revert to previous font */ - DECO_NOSPACE, /* suppress spacing */ - DECO_FONT, /* font */ - DECO_FFONT, /* font family */ - DECO_MAX -}; - enum chars { CHARS_ASCII, /* 7-bit ascii representation */ CHARS_HTML /* unicode values */ @@ -85,7 +69,6 @@ __BEGIN_DECLS while (/* CONSTCOND */ 0) int a2roffsu(const char *, struct roffsu *, enum roffscale); -int a2roffdeco(enum roffdeco *, const char **, size_t *); void time2a(time_t, char *, size_t); void tblcalc(struct rofftbl *tbl, const struct tbl_span *); @@ -142,7 +142,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "tab in non-literal context", "end of line whitespace", "bad comment style", - "unknown escape sequence", + "bad escape sequence", "unterminated quoted string", "generic error", @@ -33,8 +33,7 @@ #include "term.h" #include "main.h" -static void spec(struct termp *, enum roffdeco, - const char *, size_t); +static void spec(struct termp *, const char *, size_t); static void res(struct termp *, const char *, size_t); static void bufferc(struct termp *, char); static void adjbuf(struct termp *p, size_t); @@ -358,7 +357,7 @@ numbered(struct termp *p, const char *word, size_t len) static void -spec(struct termp *p, enum roffdeco d, const char *word, size_t len) +spec(struct termp *p, const char *word, size_t len) { const char *rhs; size_t sz; @@ -366,7 +365,7 @@ spec(struct termp *p, enum roffdeco d, const char *word, size_t len) rhs = chars_spec2str(p->symtab, word, len, &sz); if (rhs) encode(p, rhs, sz); - else if (DECO_SSPECIAL == d) + else if (1 == len) encode(p, word, len); } @@ -457,8 +456,9 @@ void term_word(struct termp *p, const char *word) { const char *seq; + int sz; size_t ssz; - enum roffdeco deco; + enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { @@ -478,7 +478,7 @@ term_word(struct termp *p, const char *word) p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); - while (*word) { + while ('\0' != *word) { if ((ssz = strcspn(word, "\\")) > 0) encode(p, word, ssz); @@ -486,39 +486,40 @@ term_word(struct termp *p, const char *word) if ('\\' != *word) continue; - seq = ++word; - word += a2roffdeco(&deco, &seq, &ssz); + word++; + esc = mandoc_escape(&word, &seq, &sz); + if (ESCAPE_ERROR == esc) + break; - switch (deco) { - case (DECO_NUMBERED): - numbered(p, seq, ssz); + switch (esc) { + case (ESCAPE_NUMBERED): + numbered(p, seq, sz); break; - case (DECO_RESERVED): - res(p, seq, ssz); + case (ESCAPE_PREDEF): + res(p, seq, sz); break; - case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): - spec(p, deco, seq, ssz); + case (ESCAPE_SPECIAL): + spec(p, seq, sz); break; - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): term_fontrepl(p, TERMFONT_BOLD); break; - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; - case (DECO_ROMAN): + case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): term_fontlast(p); break; + case (ESCAPE_NOSPACE): + if ('\0' == *word) + p->flags |= TERMP_NOSPACE; + break; default: break; } - - if (DECO_NOSPACE == deco && '\0' == *word) - p->flags |= TERMP_NOSPACE; } } @@ -600,33 +601,36 @@ term_len(const struct termp *p, size_t sz) size_t term_strlen(const struct termp *p, const char *cp) { - size_t sz, ssz, rsz, i; - enum roffdeco d; + size_t sz, rsz, i; + int ssz; + enum mandoc_esc esc; const char *seq, *rhs; - for (sz = 0; '\0' != *cp; ) - /* - * Account for escaped sequences within string length - * calculations. This follows the logic in term_word() - * as we must calculate the width of produced strings. - */ - if ('\\' == *cp) { - seq = ++cp; - cp += a2roffdeco(&d, &seq, &ssz); + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_word() as we + * must calculate the width of produced strings. + */ - switch (d) { - case (DECO_RESERVED): + sz = 0; + while ('\0' != *cp) + switch (*cp) { + case ('\\'): + ++cp; + esc = mandoc_escape(&cp, &seq, &ssz); + if (ESCAPE_ERROR == esc) + return(sz); + + switch (esc) { + case (ESCAPE_PREDEF): rhs = chars_res2str (p->symtab, seq, ssz, &rsz); break; - case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): + case (ESCAPE_SPECIAL): rhs = chars_spec2str (p->symtab, seq, ssz, &rsz); - /* Allow for one-char escapes. */ - if (DECO_SSPECIAL != d || rhs) + if (ssz != 1 || rhs) break; rhs = seq; @@ -637,17 +641,24 @@ term_strlen(const struct termp *p, const char *cp) break; } - if (rhs) - for (i = 0; i < rsz; i++) - sz += (*p->width)(p, *rhs++); - } else if (ASCII_NBRSP == *cp) { + if (NULL == rhs) + break; + + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *rhs++); + break; + case (ASCII_NBRSP): sz += (*p->width)(p, ' '); cp++; - } else if (ASCII_HYPH == *cp) { + break; + case (ASCII_HYPH): sz += (*p->width)(p, '-'); cp++; - } else + break; + default: sz += (*p->width)(p, *cp++); + break; + } return(sz); } |