summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-04-09 15:29:40 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-04-09 15:29:40 +0000
commit523c7db3c47602feb761c95b387c6c93138264f5 (patch)
treeb36433935eca0c86910b6a754aef3fa1363fe8bb
parente99813aa631956ea6bc1323879f2817f2fe6f761 (diff)
downloadmandoc-523c7db3c47602feb761c95b387c6c93138264f5.tar.gz
Remove a2roffdeco() and mandoc_special() functions and replace them with
a public (mandoc.h) function mandoc_escape(), which merges the functionality of both prior functions. Reason: code duplication. The a2roffdeco() and mandoc_special() functions were pretty much the same thing and both quite complex. This allows one function to receive improvements in (e.g.) subexpression handling and performance, instead of having to replicate functionality. As such, the mandoc_escape() function already handles a superset of the escapes handled in previous versions and has improvements in performance (using strcspn(), for example) and reliable handling of subexpressions. This code Works For Me, but may need work to catch any regressions. Since the benefits are great (leaner code, simpler API), I'd rather have it in-tree than floating as a patch.
-rw-r--r--html.c87
-rw-r--r--libmandoc.h1
-rw-r--r--man_validate.c54
-rw-r--r--mandoc.c444
-rw-r--r--mandoc.h16
-rw-r--r--mdoc_validate.c30
-rw-r--r--out.c237
-rw-r--r--out.h17
-rw-r--r--read.c2
-rw-r--r--term.c107
10 files changed, 470 insertions, 525 deletions
diff --git a/html.c b/html.c
index b80846db..bc5049c7 100644
--- a/html.c
+++ b/html.c
@@ -94,14 +94,13 @@ static const char *const htmlattrs[ATTR_MAX] = {
};
static void print_num(struct html *, const char *, size_t);
-static void print_spec(struct html *, enum roffdeco,
- const char *, size_t);
+static void print_spec(struct html *, const char *, size_t);
static void print_res(struct html *, const char *, size_t);
static void print_ctag(struct html *, enum htmltag);
static void print_doctype(struct html *);
static void print_xmltype(struct html *);
static int print_encode(struct html *, const char *, int);
-static void print_metaf(struct html *, enum roffdeco);
+static void print_metaf(struct html *, enum mandoc_esc);
static void print_attr(struct html *,
const char *, const char *);
static void *ml_alloc(char *, enum htmltype);
@@ -221,7 +220,7 @@ print_num(struct html *h, const char *p, size_t len)
}
static void
-print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
+print_spec(struct html *h, const char *p, size_t len)
{
int cp;
const char *rhs;
@@ -230,7 +229,7 @@ print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
printf("&#%d;", cp);
return;
- } else if (-1 == cp && DECO_SSPECIAL == d) {
+ } else if (-1 == cp && 1 == len) {
fwrite(p, 1, len, stdout);
return;
} else if (-1 == cp)
@@ -260,21 +259,21 @@ print_res(struct html *h, const char *p, size_t len)
static void
-print_metaf(struct html *h, enum roffdeco deco)
+print_metaf(struct html *h, enum mandoc_esc deco)
{
enum htmlfont font;
switch (deco) {
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
font = h->metal;
break;
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
font = HTMLFONT_ITALIC;
break;
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
font = HTMLFONT_BOLD;
break;
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
font = HTMLFONT_NONE;
break;
default:
@@ -303,73 +302,69 @@ print_encode(struct html *h, const char *p, int norecurse)
size_t sz;
int len, nospace;
const char *seq;
- enum roffdeco deco;
+ enum mandoc_esc esc;
static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
nospace = 0;
- for (; *p; p++) {
+ while ('\0' != *p) {
sz = strcspn(p, rejs);
fwrite(p, 1, sz, stdout);
- p += /* LINTED */
- sz;
+ p += (int)sz;
- if ('<' == *p) {
+ if ('\0' == *p)
+ break;
+
+ switch (*p++) {
+ case ('<'):
printf("&lt;");
continue;
- } else if ('>' == *p) {
+ case ('>'):
printf("&gt;");
continue;
- } else if ('&' == *p) {
+ case ('&'):
printf("&amp;");
continue;
- } else if (ASCII_HYPH == *p) {
- /*
- * Note: "soft hyphens" aren't graphically
- * displayed when not breaking the text; we want
- * them to be displayed.
- */
- /*printf("&#173;");*/
+ case (ASCII_HYPH):
putchar('-');
continue;
- } else if ('\0' == *p)
+ default:
break;
+ }
- seq = ++p;
- len = a2roffdeco(&deco, &seq, &sz);
+ esc = mandoc_escape(&p, &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
- switch (deco) {
- case (DECO_NUMBERED):
- print_num(h, seq, sz);
+ switch (esc) {
+ case (ESCAPE_NUMBERED):
+ print_num(h, seq, len);
break;
- case (DECO_RESERVED):
- print_res(h, seq, sz);
+ case (ESCAPE_PREDEF):
+ print_res(h, seq, len);
break;
- case (DECO_SSPECIAL):
- /* FALLTHROUGH */
- case (DECO_SPECIAL):
- print_spec(h, deco, seq, sz);
+ case (ESCAPE_SPECIAL):
+ print_spec(h, seq, len);
break;
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
/* FALLTHROUGH */
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
/* FALLTHROUGH */
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
/* FALLTHROUGH */
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
if (norecurse)
break;
- print_metaf(h, deco);
+ print_metaf(h, esc);
+ break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *p)
+ nospace = 1;
break;
default:
break;
}
-
- p += len - 1;
-
- if (DECO_NOSPACE == deco && '\0' == *(p + 1))
- nospace = 1;
}
return(nospace);
diff --git a/libmandoc.h b/libmandoc.h
index 8a801bd7..cdfa2c6e 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -73,7 +73,6 @@ void mandoc_msg(enum mandocerr, struct mparse *,
int, int, const char *);
void mandoc_vmsg(enum mandocerr, struct mparse *,
int, int, const char *, ...);
-int mandoc_special(char *);
char *mandoc_strdup(const char *);
char *mandoc_getarg(struct mparse *, char **, int, int *);
char *mandoc_normdate(struct mparse *, char *, int, int);
diff --git a/man_validate.c b/man_validate.c
index b9e1ff51..c1968989 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -54,7 +54,7 @@ static int check_par(CHKARGS);
static int check_part(CHKARGS);
static int check_root(CHKARGS);
static int check_sec(CHKARGS);
-static int check_text(CHKARGS);
+static void check_text(CHKARGS);
static int post_AT(CHKARGS);
static int post_fi(CHKARGS);
@@ -151,7 +151,8 @@ man_valid_post(struct man *m)
switch (m->last->type) {
case (MAN_TEXT):
- return(check_text(m, m->last));
+ check_text(m, m->last);
+ return(1);
case (MAN_ROOT):
return(check_root(m, m->last));
case (MAN_EQN):
@@ -204,43 +205,48 @@ check_root(CHKARGS)
return(1);
}
-
-static int
+static void
check_text(CHKARGS)
{
- char *p;
- int pos, c;
+ char *p, *pp, *cpp;
+ int pos;
size_t sz;
- for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
- sz = strcspn(p, "\t\\");
- p += (int)sz;
+ p = n->string;
+ pos = n->pos + 1;
- if ('\0' == *p)
- break;
+ while ('\0' != *p) {
+ sz = strcspn(p, "\t\\");
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
- if (MAN_LITERAL & m->flags)
- continue;
- man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ if ( ! (MAN_LITERAL & m->flags))
+ man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
- /* Check the special character. */
+ pos++;
+ pp = ++p;
- c = mandoc_special(p);
- if (c) {
- p += c - 1;
- pos += c - 1;
- } else
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
- }
+ break;
+ }
- return(1);
-}
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+ pos += pp - p;
+ p = pp;
+ }
+}
#define INEQ_DEFINE(x, ineq, name) \
static int \
diff --git a/mandoc.c b/mandoc.c
index 3d1d0f99..e53b19f2 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -35,199 +35,363 @@
static int a2time(time_t *, const char *, const char *);
static char *time2a(time_t);
+static int numescape(const char *);
-int
-mandoc_special(char *p)
+/*
+ * Pass over recursive numerical expressions. This context of this
+ * function is important: it's only called within character-terminating
+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
+ * recursion: we don't care about what's in these blocks.
+ * This returns the number of characters skipped or -1 if an error
+ * occurs (the caller should bail).
+ */
+static int
+numescape(const char *start)
+{
+ int i;
+ size_t sz;
+ const char *cp;
+
+ i = 0;
+
+ /* The expression consists of a subexpression. */
+
+ if ('\\' == start[i]) {
+ cp = &start[++i];
+ /*
+ * Read past the end of the subexpression.
+ * Bail immediately on errors.
+ */
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ return(i + cp - &start[i]);
+ }
+
+ if ('(' != start[i++])
+ return(0);
+
+ /*
+ * A parenthesised subexpression. Read until the closing
+ * parenthesis, making sure to handle any nested subexpressions
+ * that might ruin our parse.
+ */
+
+ while (')' != start[i]) {
+ sz = strcspn(&start[i], ")\\");
+ i += (int)sz;
+
+ if ('\0' == start[i])
+ return(-1);
+ else if ('\\' != start[i])
+ continue;
+
+ cp = &start[++i];
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ i += cp - &start[i];
+ }
+
+ /* Read past the terminating ')'. */
+ return(++i);
+}
+
+/*
+ * Handle an escaped sequeence. This should be called with any
+ * string subsequent a `\'. Pass a pointer to this substring as "end";
+ * it will be set to the supremum of the parsed escape sequence. If
+ * this returns ESCAPE_ERROR, the string is bogus and should be thrown
+ * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the
+ * first relevant character of the substring (font, glyph, whatever) of
+ * length sz. Both "start" and "sz" may be NULL.
+ */
+enum mandoc_esc
+mandoc_escape(const char **end, const char **start, int *sz)
{
- int len, i;
- char term;
- char *sv;
-
- len = 0;
+ char c, term, numeric;
+ int i, lim, ssz, rlim;
+ const char *cp, *rstart;
+ enum mandoc_esc gly;
+
+ cp = *end;
+ rstart = cp;
+ if (start)
+ *start = rstart;
+ i = 0;
+ gly = ESCAPE_ERROR;
term = '\0';
- sv = p;
+ numeric = 0;
- assert('\\' == *p);
- p++;
+ switch ((c = cp[i++])) {
+ /*
+ * First the glyphs. There are several different forms of
+ * these, but each eventually returns a substring of the glyph
+ * name.
+ */
+ case ('('):
+ gly = ESCAPE_SPECIAL;
+ lim = 2;
+ break;
+ case ('['):
+ gly = ESCAPE_SPECIAL;
+ term = ']';
+ break;
+ case ('C'):
+ if ('\'' != cp[i])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_SPECIAL;
+ term = '\'';
+ break;
- switch (*p++) {
-#if 0
- case ('Z'):
+ /*
+ * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
+ * 'X' is the trigger. These have opaque sub-strings.
+ */
+ case ('F'):
/* FALLTHROUGH */
- case ('X'):
+ case ('g'):
/* FALLTHROUGH */
- case ('x'):
+ case ('k'):
/* FALLTHROUGH */
- case ('S'):
+ case ('M'):
/* FALLTHROUGH */
- case ('R'):
+ case ('m'):
/* FALLTHROUGH */
- case ('N'):
+ case ('n'):
/* FALLTHROUGH */
- case ('l'):
+ case ('V'):
/* FALLTHROUGH */
- case ('L'):
+ case ('Y'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('H'):
+ case ('*'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_PREDEF;
/* FALLTHROUGH */
- case ('h'):
+ case ('f'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_FONT;
+
+ rstart= &cp[i];
+ if (start)
+ *start = rstart;
+
+ switch (cp[i++]) {
+ case ('('):
+ lim = 2;
+ break;
+ case ('['):
+ term = ']';
+ break;
+ default:
+ lim = 1;
+ i--;
+ break;
+ }
+ break;
+
+ /*
+ * These escapes are of the form \X'Y', where 'X' is the trigger
+ * and 'Y' is any string. These have opaque sub-strings.
+ */
+ case ('A'):
/* FALLTHROUGH */
- case ('D'):
+ case ('b'):
/* FALLTHROUGH */
- case ('C'):
+ case ('D'):
/* FALLTHROUGH */
- case ('b'):
+ case ('o'):
/* FALLTHROUGH */
- case ('B'):
+ case ('R'):
/* FALLTHROUGH */
- case ('a'):
+ case ('X'):
/* FALLTHROUGH */
- case ('A'):
- if (*p++ != '\'')
- return(0);
+ case ('Z'):
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_IGNORE;
term = '\'';
break;
-#endif
+
+ /*
+ * These escapes are of the form \X'N', where 'X' is the trigger
+ * and 'N' resolves to a numerical expression.
+ */
+ case ('B'):
+ /* FALLTHROUGH */
case ('h'):
/* FALLTHROUGH */
+ case ('H'):
+ /* FALLTHROUGH */
+ case ('L'):
+ /* FALLTHROUGH */
+ case ('l'):
+ /* FALLTHROUGH */
+ case ('N'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_NUMBERED;
+ /* FALLTHROUGH */
+ case ('S'):
+ /* FALLTHROUGH */
case ('v'):
/* FALLTHROUGH */
+ case ('w'):
+ /* FALLTHROUGH */
+ case ('x'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ term = numeric = '\'';
+ break;
+
+ /*
+ * Sizes get a special category of their own.
+ */
case ('s'):
- if (ASCII_HYPH == *p)
- *p = '-';
+ gly = ESCAPE_IGNORE;
- i = 0;
- if ('+' == *p || '-' == *p) {
- p++;
- i = 1;
- }
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
- switch (*p++) {
+ switch (cp[i++]) {
case ('('):
- len = 2;
+ lim = 2;
break;
case ('['):
- term = ']';
+ term = numeric = ']';
break;
case ('\''):
- term = '\'';
+ term = numeric = '\'';
break;
- case ('0'):
- i = 1;
- /* FALLTHROUGH */
default:
- len = 1;
- p--;
+ lim = 1;
+ i--;
break;
}
- if (ASCII_HYPH == *p)
- *p = '-';
- if ('+' == *p || '-' == *p) {
- if (i)
- return(0);
- p++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == *p) {
- while (*p && ')' != *p)
- if ('\\' == *p++) {
- i = mandoc_special(--p);
- if (0 == i)
- return(0);
- p += i;
- }
-
- if (')' == *p++)
- break;
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
- return(0);
- } else if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- }
+ break;
+ /*
+ * Anything else is assumed to be a glyph.
+ */
+ default:
+ gly = ESCAPE_SPECIAL;
+ lim = 1;
+ i--;
break;
-#if 0
- case ('Y'):
- /* FALLTHROUGH */
- case ('V'):
- /* FALLTHROUGH */
- case ('$'):
- /* FALLTHROUGH */
- case ('n'):
- /* FALLTHROUGH */
-#endif
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('f'):
- /* FALLTHROUGH */
- case ('F'):
- /* FALLTHROUGH */
- case ('*'):
- switch (*p++) {
- case ('('):
- len = 2;
+ }
+
+ assert(ESCAPE_ERROR != gly);
+
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /*
+ * If a terminating block has been specified, we need to
+ * handle the case of recursion, which could have their
+ * own terminating blocks that mess up our parse. This, by the
+ * way, means that the "start" and "size" values will be
+ * effectively meaningless.
+ */
+
+ ssz = 0;
+ if (numeric && -1 == (ssz = numescape(&cp[i])))
+ return(ESCAPE_ERROR);
+
+ i += ssz;
+ rlim = -1;
+
+ /*
+ * We have a character terminator. Try to read up to that
+ * character. If we can't (i.e., we hit the nil), then return
+ * an error; if we can, calculate our length, read past the
+ * terminating character, and exit.
+ */
+
+ if ('\0' != term) {
+ *end = strchr(&cp[i], term);
+ if ('\0' == *end)
+ return(ESCAPE_ERROR);
+
+ rlim = *end - &cp[i];
+ if (sz)
+ *sz = rlim;
+ (*end)++;
+ goto out;
+ }
+
+ assert(lim > 0);
+
+ /*
+ * We have a numeric limit. If the string is shorter than that,
+ * stop and return an error. Else adjust our endpoint, length,
+ * and return the current glyph.
+ */
+
+ if ((size_t)lim > strlen(&cp[i]))
+ return(ESCAPE_ERROR);
+
+ rlim = lim;
+ if (sz)
+ *sz = rlim;
+
+ *end = &cp[i] + lim;
+
+out:
+ assert(rlim >= 0 && rstart);
+
+ /* Run post-processors. */
+
+ switch (gly) {
+ case (ESCAPE_FONT):
+ if (1 != rlim)
break;
- case ('['):
- term = ']';
+ switch (*rstart) {
+ case ('3'):
+ /* FALLTHROUGH */
+ case ('B'):
+ gly = ESCAPE_FONTBOLD;
break;
- default:
- len = 1;
- p--;
+ case ('2'):
+ /* FALLTHROUGH */
+ case ('I'):
+ gly = ESCAPE_FONTITALIC;
break;
- }
- break;
- case ('('):
- len = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('z'):
- len = 1;
- if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- return(*p ? (int)(p - sv) : 0);
- }
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == *p++) {
- term = '\'';
+ case ('P'):
+ gly = ESCAPE_FONTPREV;
+ break;
+ case ('1'):
+ /* FALLTHROUGH */
+ case ('R'):
+ gly = ESCAPE_FONTROMAN;
break;
}
- /* FALLTHROUGH */
+ case (ESCAPE_SPECIAL):
+ if (1 != rlim)
+ break;
+ if ('c' == *rstart)
+ gly = ESCAPE_NOSPACE;
+ break;
default:
- len = 1;
- p--;
break;
}
- if (term) {
- for ( ; *p && term != *p; p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(*p ? (int)(p - sv) : 0);
- }
-
- for (i = 0; *p && i < len; i++, p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(i == len ? (int)(p - sv) : 0);
+ return(gly);
}
-
void *
mandoc_calloc(size_t num, size_t size)
{
diff --git a/mandoc.h b/mandoc.h
index 60e05a34..a838c325 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -288,6 +288,20 @@ enum mparset {
MPARSE_MAN /* assume -man */
};
+enum mandoc_esc {
+ ESCAPE_ERROR = 0, /* bail! unparsable escape */
+ ESCAPE_IGNORE, /* escape to be ignored */
+ ESCAPE_SPECIAL, /* a regular special character */
+ ESCAPE_PREDEF, /* a predefined special character */
+ ESCAPE_FONT, /* a generic font mode */
+ ESCAPE_FONTBOLD, /* bold font mode */
+ ESCAPE_FONTITALIC, /* italic font mode */
+ ESCAPE_FONTROMAN, /* roman font mode */
+ ESCAPE_FONTPREV, /* previous font mode */
+ ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_NOSPACE /* suppress space if the last on a line */
+};
+
typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,
const char *, int, int, const char *);
@@ -310,6 +324,8 @@ void *mandoc_calloc(size_t, size_t);
void *mandoc_malloc(size_t);
void *mandoc_realloc(void *, size_t);
+enum mandoc_esc mandoc_escape(const char **, const char **, int *);
+
__END_DECLS
#endif /*!MANDOC_H*/
diff --git a/mdoc_validate.c b/mdoc_validate.c
index 9180b580..acd855eb 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -545,31 +545,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v)
static void
check_text(struct mdoc *m, int ln, int pos, char *p)
{
- int c;
+ char *cpp, *pp;
size_t sz;
- for ( ; *p; p++, pos++) {
+ while ('\0' != *p) {
sz = strcspn(p, "\t\\");
- p += (int)sz;
-
- if ('\0' == *p)
- break;
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
if ( ! (MDOC_LITERAL & m->flags))
mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
+
+ pos++;
+ pp = ++p;
- if (0 == (c = mandoc_special(p))) {
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
- continue;
+ break;
}
- p += c - 1;
- pos += c - 1;
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+
+ pos += pp - p;
+ p = pp;
}
}
diff --git a/out.c b/out.c
index 97da8498..e57077fc 100644
--- a/out.c
+++ b/out.c
@@ -174,243 +174,6 @@ time2a(time_t t, char *dst, size_t sz)
(void)strftime(p, sz, "%Y", &tm);
}
-
-int
-a2roffdeco(enum roffdeco *d, const char **word, size_t *sz)
-{
- int i, j, lim;
- char term, c;
- const char *wp;
- enum roffdeco dd;
-
- *d = DECO_NONE;
- lim = i = 0;
- term = '\0';
- wp = *word;
-
- switch ((c = wp[i++])) {
- case ('('):
- *d = DECO_SPECIAL;
- lim = 2;
- break;
- case ('F'):
- /* FALLTHROUGH */
- case ('f'):
- *d = 'F' == c ? DECO_FFONT : DECO_FONT;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('3'):
- /* FALLTHROUGH */
- case ('B'):
- *d = DECO_BOLD;
- return(i);
- case ('2'):
- /* FALLTHROUGH */
- case ('I'):
- *d = DECO_ITALIC;
- return(i);
- case ('P'):
- *d = DECO_PREVIOUS;
- return(i);
- case ('1'):
- /* FALLTHROUGH */
- case ('R'):
- *d = DECO_ROMAN;
- return(i);
- default:
- i--;
- lim = 1;
- break;
- }
- break;
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('*'):
- if ('*' == c)
- *d = DECO_RESERVED;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- default:
- i--;
- lim = 1;
- break;
- }
- break;
-
- case ('N'):
-
- /*
- * Sequence of characters: backslash, 'N' (i = 0),
- * starting delimiter (i = 1), character number (i = 2).
- */
-
- *word = wp + 2;
- *sz = 0;
-
- /*
- * Cannot use a digit as a starting delimiter;
- * but skip the digit anyway.
- */
-
- if (isdigit((int)wp[1]))
- return(2);
-
- /*
- * Any non-digit terminates the character number.
- * That is, the terminating delimiter need not
- * match the starting delimiter.
- */
-
- for (i = 2; isdigit((int)wp[i]); i++)
- (*sz)++;
-
- /*
- * This is only a numbered character
- * if the character number has at least one digit.
- */
-
- if (*sz)
- *d = DECO_NUMBERED;
-
- /*
- * Skip the terminating delimiter, even if it does not
- * match, and even if there is no character number.
- */
-
- return(++i);
-
- case ('h'):
- /* FALLTHROUGH */
- case ('v'):
- /* FALLTHROUGH */
- case ('s'):
- j = 0;
- if ('+' == wp[i] || '-' == wp[i]) {
- i++;
- j = 1;
- }
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('\''):
- term = '\'';
- break;
- case ('0'):
- j = 1;
- /* FALLTHROUGH */
- default:
- i--;
- lim = 1;
- break;
- }
-
- if ('+' == wp[i] || '-' == wp[i]) {
- if (j)
- return(i);
- i++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == wp[i]) {
- while (wp[i] && ')' != wp[i])
- if ('\\' == wp[i++]) {
- /* Handle embedded escape. */
- *word = &wp[i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- if (')' == wp[i++])
- break;
-
- *d = DECO_NONE;
- return(i - 1);
- } else if ('\\' == wp[i]) {
- *word = &wp[++i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- break;
- case ('['):
- *d = DECO_SPECIAL;
- term = ']';
- break;
- case ('c'):
- *d = DECO_NOSPACE;
- return(i);
- case ('z'):
- *d = DECO_NONE;
- if ('\\' == wp[i]) {
- *word = &wp[++i];
- return(i + a2roffdeco(&dd, word, sz));
- } else
- lim = 1;
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == wp[i++]) {
- term = '\'';
- break;
- }
- /* FALLTHROUGH */
- default:
- *d = DECO_SSPECIAL;
- i--;
- lim = 1;
- break;
- }
-
- assert(term || lim);
- *word = &wp[i];
-
- if (term) {
- j = i;
- while (wp[i] && wp[i] != term)
- i++;
- if ('\0' == wp[i]) {
- *d = DECO_NONE;
- return(i);
- }
-
- assert(i >= j);
- *sz = (size_t)(i - j);
-
- return(i + 1);
- }
-
- assert(lim > 0);
- *sz = (size_t)lim;
-
- for (j = 0; wp[i] && j < lim; j++)
- i++;
- if (j < lim)
- *d = DECO_NONE;
-
- return(i);
-}
-
/*
* Calculate the abstract widths and decimal positions of columns in a
* table. This routine allocates the columns structures then runs over
diff --git a/out.h b/out.h
index 8b2a083b..77414d71 100644
--- a/out.h
+++ b/out.h
@@ -31,22 +31,6 @@ enum roffscale {
SCALE_MAX
};
-enum roffdeco {
- DECO_NONE,
- DECO_NUMBERED, /* numbered character */
- DECO_SPECIAL, /* special character */
- DECO_SSPECIAL, /* single-char special */
- DECO_RESERVED, /* reserved word */
- DECO_BOLD, /* bold font */
- DECO_ITALIC, /* italic font */
- DECO_ROMAN, /* "normal" undecorated font */
- DECO_PREVIOUS, /* revert to previous font */
- DECO_NOSPACE, /* suppress spacing */
- DECO_FONT, /* font */
- DECO_FFONT, /* font family */
- DECO_MAX
-};
-
enum chars {
CHARS_ASCII, /* 7-bit ascii representation */
CHARS_HTML /* unicode values */
@@ -85,7 +69,6 @@ __BEGIN_DECLS
while (/* CONSTCOND */ 0)
int a2roffsu(const char *, struct roffsu *, enum roffscale);
-int a2roffdeco(enum roffdeco *, const char **, size_t *);
void time2a(time_t, char *, size_t);
void tblcalc(struct rofftbl *tbl, const struct tbl_span *);
diff --git a/read.c b/read.c
index 78ff681f..a43b60d8 100644
--- a/read.c
+++ b/read.c
@@ -142,7 +142,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"tab in non-literal context",
"end of line whitespace",
"bad comment style",
- "unknown escape sequence",
+ "bad escape sequence",
"unterminated quoted string",
"generic error",
diff --git a/term.c b/term.c
index 4b468e0b..742b9877 100644
--- a/term.c
+++ b/term.c
@@ -33,8 +33,7 @@
#include "term.h"
#include "main.h"
-static void spec(struct termp *, enum roffdeco,
- const char *, size_t);
+static void spec(struct termp *, const char *, size_t);
static void res(struct termp *, const char *, size_t);
static void bufferc(struct termp *, char);
static void adjbuf(struct termp *p, size_t);
@@ -358,7 +357,7 @@ numbered(struct termp *p, const char *word, size_t len)
static void
-spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
+spec(struct termp *p, const char *word, size_t len)
{
const char *rhs;
size_t sz;
@@ -366,7 +365,7 @@ spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
rhs = chars_spec2str(p->symtab, word, len, &sz);
if (rhs)
encode(p, rhs, sz);
- else if (DECO_SSPECIAL == d)
+ else if (1 == len)
encode(p, word, len);
}
@@ -457,8 +456,9 @@ void
term_word(struct termp *p, const char *word)
{
const char *seq;
+ int sz;
size_t ssz;
- enum roffdeco deco;
+ enum mandoc_esc esc;
if ( ! (TERMP_NOSPACE & p->flags)) {
if ( ! (TERMP_KEEP & p->flags)) {
@@ -478,7 +478,7 @@ term_word(struct termp *p, const char *word)
p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
- while (*word) {
+ while ('\0' != *word) {
if ((ssz = strcspn(word, "\\")) > 0)
encode(p, word, ssz);
@@ -486,39 +486,40 @@ term_word(struct termp *p, const char *word)
if ('\\' != *word)
continue;
- seq = ++word;
- word += a2roffdeco(&deco, &seq, &ssz);
+ word++;
+ esc = mandoc_escape(&word, &seq, &sz);
+ if (ESCAPE_ERROR == esc)
+ break;
- switch (deco) {
- case (DECO_NUMBERED):
- numbered(p, seq, ssz);
+ switch (esc) {
+ case (ESCAPE_NUMBERED):
+ numbered(p, seq, sz);
break;
- case (DECO_RESERVED):
- res(p, seq, ssz);
+ case (ESCAPE_PREDEF):
+ res(p, seq, sz);
break;
- case (DECO_SPECIAL):
- /* FALLTHROUGH */
- case (DECO_SSPECIAL):
- spec(p, deco, seq, ssz);
+ case (ESCAPE_SPECIAL):
+ spec(p, seq, sz);
break;
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
term_fontrepl(p, TERMFONT_BOLD);
break;
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
term_fontrepl(p, TERMFONT_UNDER);
break;
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
term_fontrepl(p, TERMFONT_NONE);
break;
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
term_fontlast(p);
break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *word)
+ p->flags |= TERMP_NOSPACE;
+ break;
default:
break;
}
-
- if (DECO_NOSPACE == deco && '\0' == *word)
- p->flags |= TERMP_NOSPACE;
}
}
@@ -600,33 +601,36 @@ term_len(const struct termp *p, size_t sz)
size_t
term_strlen(const struct termp *p, const char *cp)
{
- size_t sz, ssz, rsz, i;
- enum roffdeco d;
+ size_t sz, rsz, i;
+ int ssz;
+ enum mandoc_esc esc;
const char *seq, *rhs;
- for (sz = 0; '\0' != *cp; )
- /*
- * Account for escaped sequences within string length
- * calculations. This follows the logic in term_word()
- * as we must calculate the width of produced strings.
- */
- if ('\\' == *cp) {
- seq = ++cp;
- cp += a2roffdeco(&d, &seq, &ssz);
+ /*
+ * Account for escaped sequences within string length
+ * calculations. This follows the logic in term_word() as we
+ * must calculate the width of produced strings.
+ */
- switch (d) {
- case (DECO_RESERVED):
+ sz = 0;
+ while ('\0' != *cp)
+ switch (*cp) {
+ case ('\\'):
+ ++cp;
+ esc = mandoc_escape(&cp, &seq, &ssz);
+ if (ESCAPE_ERROR == esc)
+ return(sz);
+
+ switch (esc) {
+ case (ESCAPE_PREDEF):
rhs = chars_res2str
(p->symtab, seq, ssz, &rsz);
break;
- case (DECO_SPECIAL):
- /* FALLTHROUGH */
- case (DECO_SSPECIAL):
+ case (ESCAPE_SPECIAL):
rhs = chars_spec2str
(p->symtab, seq, ssz, &rsz);
- /* Allow for one-char escapes. */
- if (DECO_SSPECIAL != d || rhs)
+ if (ssz != 1 || rhs)
break;
rhs = seq;
@@ -637,17 +641,24 @@ term_strlen(const struct termp *p, const char *cp)
break;
}
- if (rhs)
- for (i = 0; i < rsz; i++)
- sz += (*p->width)(p, *rhs++);
- } else if (ASCII_NBRSP == *cp) {
+ if (NULL == rhs)
+ break;
+
+ for (i = 0; i < rsz; i++)
+ sz += (*p->width)(p, *rhs++);
+ break;
+ case (ASCII_NBRSP):
sz += (*p->width)(p, ' ');
cp++;
- } else if (ASCII_HYPH == *cp) {
+ break;
+ case (ASCII_HYPH):
sz += (*p->width)(p, '-');
cp++;
- } else
+ break;
+ default:
sz += (*p->width)(p, *cp++);
+ break;
+ }
return(sz);
}