summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--html.c87
-rw-r--r--libmandoc.h1
-rw-r--r--man_validate.c54
-rw-r--r--mandoc.c444
-rw-r--r--mandoc.h16
-rw-r--r--mdoc_validate.c30
-rw-r--r--out.c237
-rw-r--r--out.h17
-rw-r--r--read.c2
-rw-r--r--term.c107
10 files changed, 470 insertions, 525 deletions
diff --git a/html.c b/html.c
index b80846db..bc5049c7 100644
--- a/html.c
+++ b/html.c
@@ -94,14 +94,13 @@ static const char *const htmlattrs[ATTR_MAX] = {
};
static void print_num(struct html *, const char *, size_t);
-static void print_spec(struct html *, enum roffdeco,
- const char *, size_t);
+static void print_spec(struct html *, const char *, size_t);
static void print_res(struct html *, const char *, size_t);
static void print_ctag(struct html *, enum htmltag);
static void print_doctype(struct html *);
static void print_xmltype(struct html *);
static int print_encode(struct html *, const char *, int);
-static void print_metaf(struct html *, enum roffdeco);
+static void print_metaf(struct html *, enum mandoc_esc);
static void print_attr(struct html *,
const char *, const char *);
static void *ml_alloc(char *, enum htmltype);
@@ -221,7 +220,7 @@ print_num(struct html *h, const char *p, size_t len)
}
static void
-print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
+print_spec(struct html *h, const char *p, size_t len)
{
int cp;
const char *rhs;
@@ -230,7 +229,7 @@ print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
printf("&#%d;", cp);
return;
- } else if (-1 == cp && DECO_SSPECIAL == d) {
+ } else if (-1 == cp && 1 == len) {
fwrite(p, 1, len, stdout);
return;
} else if (-1 == cp)
@@ -260,21 +259,21 @@ print_res(struct html *h, const char *p, size_t len)
static void
-print_metaf(struct html *h, enum roffdeco deco)
+print_metaf(struct html *h, enum mandoc_esc deco)
{
enum htmlfont font;
switch (deco) {
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
font = h->metal;
break;
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
font = HTMLFONT_ITALIC;
break;
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
font = HTMLFONT_BOLD;
break;
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
font = HTMLFONT_NONE;
break;
default:
@@ -303,73 +302,69 @@ print_encode(struct html *h, const char *p, int norecurse)
size_t sz;
int len, nospace;
const char *seq;
- enum roffdeco deco;
+ enum mandoc_esc esc;
static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
nospace = 0;
- for (; *p; p++) {
+ while ('\0' != *p) {
sz = strcspn(p, rejs);
fwrite(p, 1, sz, stdout);
- p += /* LINTED */
- sz;
+ p += (int)sz;
- if ('<' == *p) {
+ if ('\0' == *p)
+ break;
+
+ switch (*p++) {
+ case ('<'):
printf("&lt;");
continue;
- } else if ('>' == *p) {
+ case ('>'):
printf("&gt;");
continue;
- } else if ('&' == *p) {
+ case ('&'):
printf("&amp;");
continue;
- } else if (ASCII_HYPH == *p) {
- /*
- * Note: "soft hyphens" aren't graphically
- * displayed when not breaking the text; we want
- * them to be displayed.
- */
- /*printf("&#173;");*/
+ case (ASCII_HYPH):
putchar('-');
continue;
- } else if ('\0' == *p)
+ default:
break;
+ }
- seq = ++p;
- len = a2roffdeco(&deco, &seq, &sz);
+ esc = mandoc_escape(&p, &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
- switch (deco) {
- case (DECO_NUMBERED):
- print_num(h, seq, sz);
+ switch (esc) {
+ case (ESCAPE_NUMBERED):
+ print_num(h, seq, len);
break;
- case (DECO_RESERVED):
- print_res(h, seq, sz);
+ case (ESCAPE_PREDEF):
+ print_res(h, seq, len);
break;
- case (DECO_SSPECIAL):
- /* FALLTHROUGH */
- case (DECO_SPECIAL):
- print_spec(h, deco, seq, sz);
+ case (ESCAPE_SPECIAL):
+ print_spec(h, seq, len);
break;
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
/* FALLTHROUGH */
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
/* FALLTHROUGH */
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
/* FALLTHROUGH */
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
if (norecurse)
break;
- print_metaf(h, deco);
+ print_metaf(h, esc);
+ break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *p)
+ nospace = 1;
break;
default:
break;
}
-
- p += len - 1;
-
- if (DECO_NOSPACE == deco && '\0' == *(p + 1))
- nospace = 1;
}
return(nospace);
diff --git a/libmandoc.h b/libmandoc.h
index 8a801bd7..cdfa2c6e 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -73,7 +73,6 @@ void mandoc_msg(enum mandocerr, struct mparse *,
int, int, const char *);
void mandoc_vmsg(enum mandocerr, struct mparse *,
int, int, const char *, ...);
-int mandoc_special(char *);
char *mandoc_strdup(const char *);
char *mandoc_getarg(struct mparse *, char **, int, int *);
char *mandoc_normdate(struct mparse *, char *, int, int);
diff --git a/man_validate.c b/man_validate.c
index b9e1ff51..c1968989 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -54,7 +54,7 @@ static int check_par(CHKARGS);
static int check_part(CHKARGS);
static int check_root(CHKARGS);
static int check_sec(CHKARGS);
-static int check_text(CHKARGS);
+static void check_text(CHKARGS);
static int post_AT(CHKARGS);
static int post_fi(CHKARGS);
@@ -151,7 +151,8 @@ man_valid_post(struct man *m)
switch (m->last->type) {
case (MAN_TEXT):
- return(check_text(m, m->last));
+ check_text(m, m->last);
+ return(1);
case (MAN_ROOT):
return(check_root(m, m->last));
case (MAN_EQN):
@@ -204,43 +205,48 @@ check_root(CHKARGS)
return(1);
}
-
-static int
+static void
check_text(CHKARGS)
{
- char *p;
- int pos, c;
+ char *p, *pp, *cpp;
+ int pos;
size_t sz;
- for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
- sz = strcspn(p, "\t\\");
- p += (int)sz;
+ p = n->string;
+ pos = n->pos + 1;
- if ('\0' == *p)
- break;
+ while ('\0' != *p) {
+ sz = strcspn(p, "\t\\");
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
- if (MAN_LITERAL & m->flags)
- continue;
- man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ if ( ! (MAN_LITERAL & m->flags))
+ man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
- /* Check the special character. */
+ pos++;
+ pp = ++p;
- c = mandoc_special(p);
- if (c) {
- p += c - 1;
- pos += c - 1;
- } else
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
- }
+ break;
+ }
- return(1);
-}
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+ pos += pp - p;
+ p = pp;
+ }
+}
#define INEQ_DEFINE(x, ineq, name) \
static int \
diff --git a/mandoc.c b/mandoc.c
index 3d1d0f99..e53b19f2 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -35,199 +35,363 @@
static int a2time(time_t *, const char *, const char *);
static char *time2a(time_t);
+static int numescape(const char *);
-int
-mandoc_special(char *p)
+/*
+ * Pass over recursive numerical expressions. This context of this
+ * function is important: it's only called within character-terminating
+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
+ * recursion: we don't care about what's in these blocks.
+ * This returns the number of characters skipped or -1 if an error
+ * occurs (the caller should bail).
+ */
+static int
+numescape(const char *start)
+{
+ int i;
+ size_t sz;
+ const char *cp;
+
+ i = 0;
+
+ /* The expression consists of a subexpression. */
+
+ if ('\\' == start[i]) {
+ cp = &start[++i];
+ /*
+ * Read past the end of the subexpression.
+ * Bail immediately on errors.
+ */
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ return(i + cp - &start[i]);
+ }
+
+ if ('(' != start[i++])
+ return(0);
+
+ /*
+ * A parenthesised subexpression. Read until the closing
+ * parenthesis, making sure to handle any nested subexpressions
+ * that might ruin our parse.
+ */
+
+ while (')' != start[i]) {
+ sz = strcspn(&start[i], ")\\");
+ i += (int)sz;
+
+ if ('\0' == start[i])
+ return(-1);
+ else if ('\\' != start[i])
+ continue;
+
+ cp = &start[++i];
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ i += cp - &start[i];
+ }
+
+ /* Read past the terminating ')'. */
+ return(++i);
+}
+
+/*
+ * Handle an escaped sequeence. This should be called with any
+ * string subsequent a `\'. Pass a pointer to this substring as "end";
+ * it will be set to the supremum of the parsed escape sequence. If
+ * this returns ESCAPE_ERROR, the string is bogus and should be thrown
+ * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the
+ * first relevant character of the substring (font, glyph, whatever) of
+ * length sz. Both "start" and "sz" may be NULL.
+ */
+enum mandoc_esc
+mandoc_escape(const char **end, const char **start, int *sz)
{
- int len, i;
- char term;
- char *sv;
-
- len = 0;
+ char c, term, numeric;
+ int i, lim, ssz, rlim;
+ const char *cp, *rstart;
+ enum mandoc_esc gly;
+
+ cp = *end;
+ rstart = cp;
+ if (start)
+ *start = rstart;
+ i = 0;
+ gly = ESCAPE_ERROR;
term = '\0';
- sv = p;
+ numeric = 0;
- assert('\\' == *p);
- p++;
+ switch ((c = cp[i++])) {
+ /*
+ * First the glyphs. There are several different forms of
+ * these, but each eventually returns a substring of the glyph
+ * name.
+ */
+ case ('('):
+ gly = ESCAPE_SPECIAL;
+ lim = 2;
+ break;
+ case ('['):
+ gly = ESCAPE_SPECIAL;
+ term = ']';
+ break;
+ case ('C'):
+ if ('\'' != cp[i])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_SPECIAL;
+ term = '\'';
+ break;
- switch (*p++) {
-#if 0
- case ('Z'):
+ /*
+ * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
+ * 'X' is the trigger. These have opaque sub-strings.
+ */
+ case ('F'):
/* FALLTHROUGH */
- case ('X'):
+ case ('g'):
/* FALLTHROUGH */
- case ('x'):
+ case ('k'):
/* FALLTHROUGH */
- case ('S'):
+ case ('M'):
/* FALLTHROUGH */
- case ('R'):
+ case ('m'):
/* FALLTHROUGH */
- case ('N'):
+ case ('n'):
/* FALLTHROUGH */
- case ('l'):
+ case ('V'):
/* FALLTHROUGH */
- case ('L'):
+ case ('Y'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('H'):
+ case ('*'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_PREDEF;
/* FALLTHROUGH */
- case ('h'):
+ case ('f'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_FONT;
+
+ rstart= &cp[i];
+ if (start)
+ *start = rstart;
+
+ switch (cp[i++]) {
+ case ('('):
+ lim = 2;
+ break;
+ case ('['):
+ term = ']';
+ break;
+ default:
+ lim = 1;
+ i--;
+ break;
+ }
+ break;
+
+ /*
+ * These escapes are of the form \X'Y', where 'X' is the trigger
+ * and 'Y' is any string. These have opaque sub-strings.
+ */
+ case ('A'):
/* FALLTHROUGH */
- case ('D'):
+ case ('b'):
/* FALLTHROUGH */
- case ('C'):
+ case ('D'):
/* FALLTHROUGH */
- case ('b'):
+ case ('o'):
/* FALLTHROUGH */
- case ('B'):
+ case ('R'):
/* FALLTHROUGH */
- case ('a'):
+ case ('X'):
/* FALLTHROUGH */
- case ('A'):
- if (*p++ != '\'')
- return(0);
+ case ('Z'):
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_IGNORE;
term = '\'';
break;
-#endif
+
+ /*
+ * These escapes are of the form \X'N', where 'X' is the trigger
+ * and 'N' resolves to a numerical expression.
+ */
+ case ('B'):
+ /* FALLTHROUGH */
case ('h'):
/* FALLTHROUGH */
+ case ('H'):
+ /* FALLTHROUGH */
+ case ('L'):
+ /* FALLTHROUGH */
+ case ('l'):
+ /* FALLTHROUGH */
+ case ('N'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_NUMBERED;
+ /* FALLTHROUGH */
+ case ('S'):
+ /* FALLTHROUGH */
case ('v'):
/* FALLTHROUGH */
+ case ('w'):
+ /* FALLTHROUGH */
+ case ('x'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ term = numeric = '\'';
+ break;
+
+ /*
+ * Sizes get a special category of their own.
+ */
case ('s'):
- if (ASCII_HYPH == *p)
- *p = '-';
+ gly = ESCAPE_IGNORE;
- i = 0;
- if ('+' == *p || '-' == *p) {
- p++;
- i = 1;
- }
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
- switch (*p++) {
+ switch (cp[i++]) {
case ('('):
- len = 2;
+ lim = 2;
break;
case ('['):
- term = ']';
+ term = numeric = ']';
break;
case ('\''):
- term = '\'';
+ term = numeric = '\'';
break;
- case ('0'):
- i = 1;
- /* FALLTHROUGH */
default:
- len = 1;
- p--;
+ lim = 1;
+ i--;
break;
}
- if (ASCII_HYPH == *p)
- *p = '-';
- if ('+' == *p || '-' == *p) {
- if (i)
- return(0);
- p++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == *p) {
- while (*p && ')' != *p)
- if ('\\' == *p++) {
- i = mandoc_special(--p);
- if (0 == i)
- return(0);
- p += i;
- }
-
- if (')' == *p++)
- break;
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
- return(0);
- } else if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- }
+ break;
+ /*
+ * Anything else is assumed to be a glyph.
+ */
+ default:
+ gly = ESCAPE_SPECIAL;
+ lim = 1;
+ i--;
break;
-#if 0
- case ('Y'):
- /* FALLTHROUGH */
- case ('V'):
- /* FALLTHROUGH */
- case ('$'):
- /* FALLTHROUGH */
- case ('n'):
- /* FALLTHROUGH */
-#endif
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('f'):
- /* FALLTHROUGH */
- case ('F'):
- /* FALLTHROUGH */
- case ('*'):
- switch (*p++) {
- case ('('):
- len = 2;
+ }
+
+ assert(ESCAPE_ERROR != gly);
+
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /*
+ * If a terminating block has been specified, we need to
+ * handle the case of recursion, which could have their
+ * own terminating blocks that mess up our parse. This, by the
+ * way, means that the "start" and "size" values will be
+ * effectively meaningless.
+ */
+
+ ssz = 0;
+ if (numeric && -1 == (ssz = numescape(&cp[i])))
+ return(ESCAPE_ERROR);
+
+ i += ssz;
+ rlim = -1;
+
+ /*
+ * We have a character terminator. Try to read up to that
+ * character. If we can't (i.e., we hit the nil), then return
+ * an error; if we can, calculate our length, read past the
+ * terminating character, and exit.
+ */
+
+ if ('\0' != term) {
+ *end = strchr(&cp[i], term);
+ if ('\0' == *end)
+ return(ESCAPE_ERROR);
+
+ rlim = *end - &cp[i];
+ if (sz)
+ *sz = rlim;
+ (*end)++;
+ goto out;
+ }
+
+ assert(lim > 0);
+
+ /*
+ * We have a numeric limit. If the string is shorter than that,
+ * stop and return an error. Else adjust our endpoint, length,
+ * and return the current glyph.
+ */
+
+ if ((size_t)lim > strlen(&cp[i]))
+ return(ESCAPE_ERROR);
+
+ rlim = lim;
+ if (sz)
+ *sz = rlim;
+
+ *end = &cp[i] + lim;
+
+out:
+ assert(rlim >= 0 && rstart);
+
+ /* Run post-processors. */
+
+ switch (gly) {
+ case (ESCAPE_FONT):
+ if (1 != rlim)
break;
- case ('['):
- term = ']';
+ switch (*rstart) {
+ case ('3'):
+ /* FALLTHROUGH */
+ case ('B'):
+ gly = ESCAPE_FONTBOLD;
break;
- default:
- len = 1;
- p--;
+ case ('2'):
+ /* FALLTHROUGH */
+ case ('I'):
+ gly = ESCAPE_FONTITALIC;
break;
- }
- break;
- case ('('):
- len = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('z'):
- len = 1;
- if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- return(*p ? (int)(p - sv) : 0);
- }
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == *p++) {
- term = '\'';
+ case ('P'):
+ gly = ESCAPE_FONTPREV;
+ break;
+ case ('1'):
+ /* FALLTHROUGH */
+ case ('R'):
+ gly = ESCAPE_FONTROMAN;
break;
}
- /* FALLTHROUGH */
+ case (ESCAPE_SPECIAL):
+ if (1 != rlim)
+ break;
+ if ('c' == *rstart)
+ gly = ESCAPE_NOSPACE;
+ break;
default:
- len = 1;
- p--;
break;
}
- if (term) {
- for ( ; *p && term != *p; p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(*p ? (int)(p - sv) : 0);
- }
-
- for (i = 0; *p && i < len; i++, p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(i == len ? (int)(p - sv) : 0);
+ return(gly);
}
-
void *
mandoc_calloc(size_t num, size_t size)
{
diff --git a/mandoc.h b/mandoc.h
index 60e05a34..a838c325 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -288,6 +288,20 @@ enum mparset {
MPARSE_MAN /* assume -man */
};
+enum mandoc_esc {
+ ESCAPE_ERROR = 0, /* bail! unparsable escape */
+ ESCAPE_IGNORE, /* escape to be ignored */
+ ESCAPE_SPECIAL, /* a regular special character */
+ ESCAPE_PREDEF, /* a predefined special character */
+ ESCAPE_FONT, /* a generic font mode */
+ ESCAPE_FONTBOLD, /* bold font mode */
+ ESCAPE_FONTITALIC, /* italic font mode */
+ ESCAPE_FONTROMAN, /* roman font mode */
+ ESCAPE_FONTPREV, /* previous font mode */
+ ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_NOSPACE /* suppress space if the last on a line */
+};
+
typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,
const char *, int, int, const char *);
@@ -310,6 +324,8 @@ void *mandoc_calloc(size_t, size_t);
void *mandoc_malloc(size_t);
void *mandoc_realloc(void *, size_t);
+enum mandoc_esc mandoc_escape(const char **, const char **, int *);
+
__END_DECLS
#endif /*!MANDOC_H*/
diff --git a/mdoc_validate.c b/mdoc_validate.c
index 9180b580..acd855eb 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -545,31 +545,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v)
static void
check_text(struct mdoc *m, int ln, int pos, char *p)
{
- int c;
+ char *cpp, *pp;
size_t sz;
- for ( ; *p; p++, pos++) {
+ while ('\0' != *p) {
sz = strcspn(p, "\t\\");
- p += (int)sz;
-
- if ('\0' == *p)
- break;
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
if ( ! (MDOC_LITERAL & m->flags))
mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
+
+ pos++;
+ pp = ++p;
- if (0 == (c = mandoc_special(p))) {
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
- continue;
+ break;
}
- p += c - 1;
- pos += c - 1;
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+
+ pos += pp - p;
+ p = pp;
}
}
diff --git a/out.c b/out.c
index 97da8498..e57077fc 100644
--- a/out.c
+++ b/out.c
@@ -174,243 +174,6 @@ time2a(time_t t, char *dst, size_t sz)
(void)strftime(p, sz, "%Y", &tm);
}
-
-int
-a2roffdeco(enum roffdeco *d, const char **word, size_t *sz)
-{
- int i, j, lim;
- char term, c;
- const char *wp;
- enum roffdeco dd;
-
- *d = DECO_NONE;
- lim = i = 0;
- term = '\0';
- wp = *word;
-
- switch ((c = wp[i++])) {
- case ('('):
- *d = DECO_SPECIAL;
- lim = 2;
- break;
- case ('F'):
- /* FALLTHROUGH */
- case ('f'):
- *d = 'F' == c ? DECO_FFONT : DECO_FONT;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('3'):
- /* FALLTHROUGH */
- case ('B'):
- *d = DECO_BOLD;
- return(i);
- case ('2'):
- /* FALLTHROUGH */
- case ('I'):
- *d = DECO_ITALIC;
- return(i);
- case ('P'):
- *d = DECO_PREVIOUS;
- return(i);
- case ('1'):
- /* FALLTHROUGH */
- case ('R'):
- *d = DECO_ROMAN;
- return(i);
- default:
- i--;
- lim = 1;
- break;
- }
- break;
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('*'):
- if ('*' == c)
- *d = DECO_RESERVED;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- default:
- i--;
- lim = 1;
- break;
- }
- break;
-
- case ('N'):
-
- /*
- * Sequence of characters: backslash, 'N' (i = 0),
- * starting delimiter (i = 1), character number (i = 2).
- */
-
- *word = wp + 2;
- *sz = 0;
-
- /*
- * Cannot use a digit as a starting delimiter;
- * but skip the digit anyway.
- */
-
- if (isdigit((int)wp[1]))
- return(2);
-
- /*
- * Any non-digit terminates the character number.
- * That is, the terminating delimiter need not
- * match the starting delimiter.
- */
-
- for (i = 2; isdigit((int)wp[i]); i++)
- (*sz)++;
-
- /*
- * This is only a numbered character
- * if the character number has at least one digit.
- */
-
- if (*sz)
- *d = DECO_NUMBERED;
-
- /*
- * Skip the terminating delimiter, even if it does not
- * match, and even if there is no character number.
- */
-
- return(++i);
-
- case ('h'):
- /* FALLTHROUGH */
- case ('v'):
- /* FALLTHROUGH */
- case ('s'):
- j = 0;
- if ('+' == wp[i] || '-' == wp[i]) {
- i++;
- j = 1;
- }
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('\''):
- term = '\'';
- break;
- case ('0'):
- j = 1;
- /* FALLTHROUGH */
- default:
- i--;
- lim = 1;
- break;
- }
-
- if ('+' == wp[i] || '-' == wp[i]) {
- if (j)
- return(i);
- i++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == wp[i]) {
- while (wp[i] && ')' != wp[i])
- if ('\\' == wp[i++]) {
- /* Handle embedded escape. */
- *word = &wp[i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- if (')' == wp[i++])
- break;
-
- *d = DECO_NONE;
- return(i - 1);
- } else if ('\\' == wp[i]) {
- *word = &wp[++i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- break;
- case ('['):
- *d = DECO_SPECIAL;
- term = ']';
- break;
- case ('c'):
- *d = DECO_NOSPACE;
- return(i);
- case ('z'):
- *d = DECO_NONE;
- if ('\\' == wp[i]) {
- *word = &wp[++i];
- return(i + a2roffdeco(&dd, word, sz));
- } else
- lim = 1;
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == wp[i++]) {
- term = '\'';
- break;
- }
- /* FALLTHROUGH */
- default:
- *d = DECO_SSPECIAL;
- i--;
- lim = 1;
- break;
- }
-
- assert(term || lim);
- *word = &wp[i];
-
- if (term) {
- j = i;
- while (wp[i] && wp[i] != term)
- i++;
- if ('\0' == wp[i]) {
- *d = DECO_NONE;
- return(i);
- }
-
- assert(i >= j);
- *sz = (size_t)(i - j);
-
- return(i + 1);
- }
-
- assert(lim > 0);
- *sz = (size_t)lim;
-
- for (j = 0; wp[i] && j < lim; j++)
- i++;
- if (j < lim)
- *d = DECO_NONE;
-
- return(i);
-}
-
/*
* Calculate the abstract widths and decimal positions of columns in a
* table. This routine allocates the columns structures then runs over
diff --git a/out.h b/out.h
index 8b2a083b..77414d71 100644
--- a/out.h
+++ b/out.h
@@ -31,22 +31,6 @@ enum roffscale {
SCALE_MAX
};
-enum roffdeco {
- DECO_NONE,
- DECO_NUMBERED, /* numbered character */
- DECO_SPECIAL, /* special character */
- DECO_SSPECIAL, /* single-char special */
- DECO_RESERVED, /* reserved word */
- DECO_BOLD, /* bold font */
- DECO_ITALIC, /* italic font */
- DECO_ROMAN, /* "normal" undecorated font */
- DECO_PREVIOUS, /* revert to previous font */
- DECO_NOSPACE, /* suppress spacing */
- DECO_FONT, /* font */
- DECO_FFONT, /* font family */
- DECO_MAX
-};
-
enum chars {
CHARS_ASCII, /* 7-bit ascii representation */
CHARS_HTML /* unicode values */
@@ -85,7 +69,6 @@ __BEGIN_DECLS
while (/* CONSTCOND */ 0)
int a2roffsu(const char *, struct roffsu *, enum roffscale);
-int a2roffdeco(enum roffdeco *, const char **, size_t *);
void time2a(time_t, char *, size_t);
void tblcalc(struct rofftbl *tbl, const struct tbl_span *);
diff --git a/read.c b/read.c
index 78ff681f..a43b60d8 100644
--- a/read.c
+++ b/read.c
@@ -142,7 +142,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"tab in non-literal context",
"end of line whitespace",
"bad comment style",
- "unknown escape sequence",
+ "bad escape sequence",
"unterminated quoted string",
"generic error",
diff --git a/term.c b/term.c
index 4b468e0b..742b9877 100644
--- a/term.c
+++ b/term.c
@@ -33,8 +33,7 @@
#include "term.h"
#include "main.h"
-static void spec(struct termp *, enum roffdeco,
- const char *, size_t);
+static void spec(struct termp *, const char *, size_t);
static void res(struct termp *, const char *, size_t);
static void bufferc(struct termp *, char);
static void adjbuf(struct termp *p, size_t);
@@ -358,7 +357,7 @@ numbered(struct termp *p, const char *word, size_t len)
static void
-spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
+spec(struct termp *p, const char *word, size_t len)
{
const char *rhs;
size_t sz;
@@ -366,7 +365,7 @@ spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
rhs = chars_spec2str(p->symtab, word, len, &sz);
if (rhs)
encode(p, rhs, sz);
- else if (DECO_SSPECIAL == d)
+ else if (1 == len)
encode(p, word, len);
}
@@ -457,8 +456,9 @@ void
term_word(struct termp *p, const char *word)
{
const char *seq;
+ int sz;
size_t ssz;
- enum roffdeco deco;
+ enum mandoc_esc esc;
if ( ! (TERMP_NOSPACE & p->flags)) {
if ( ! (TERMP_KEEP & p->flags)) {
@@ -478,7 +478,7 @@ term_word(struct termp *p, const char *word)
p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
- while (*word) {
+ while ('\0' != *word) {
if ((ssz = strcspn(word, "\\")) > 0)
encode(p, word, ssz);
@@ -486,39 +486,40 @@ term_word(struct termp *p, const char *word)
if ('\\' != *word)
continue;
- seq = ++word;
- word += a2roffdeco(&deco, &seq, &ssz);
+ word++;
+ esc = mandoc_escape(&word, &seq, &sz);
+ if (ESCAPE_ERROR == esc)
+ break;
- switch (deco) {
- case (DECO_NUMBERED):
- numbered(p, seq, ssz);
+ switch (esc) {
+ case (ESCAPE_NUMBERED):
+ numbered(p, seq, sz);
break;
- case (DECO_RESERVED):
- res(p, seq, ssz);
+ case (ESCAPE_PREDEF):
+ res(p, seq, sz);
break;
- case (DECO_SPECIAL):
- /* FALLTHROUGH */
- case (DECO_SSPECIAL):
- spec(p, deco, seq, ssz);
+ case (ESCAPE_SPECIAL):
+ spec(p, seq, sz);
break;
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
term_fontrepl(p, TERMFONT_BOLD);
break;
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
term_fontrepl(p, TERMFONT_UNDER);
break;
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
term_fontrepl(p, TERMFONT_NONE);
break;
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
term_fontlast(p);
break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *word)
+ p->flags |= TERMP_NOSPACE;
+ break;
default:
break;
}
-
- if (DECO_NOSPACE == deco && '\0' == *word)
- p->flags |= TERMP_NOSPACE;
}
}
@@ -600,33 +601,36 @@ term_len(const struct termp *p, size_t sz)
size_t
term_strlen(const struct termp *p, const char *cp)
{
- size_t sz, ssz, rsz, i;
- enum roffdeco d;
+ size_t sz, rsz, i;
+ int ssz;
+ enum mandoc_esc esc;
const char *seq, *rhs;
- for (sz = 0; '\0' != *cp; )
- /*
- * Account for escaped sequences within string length
- * calculations. This follows the logic in term_word()
- * as we must calculate the width of produced strings.
- */
- if ('\\' == *cp) {
- seq = ++cp;
- cp += a2roffdeco(&d, &seq, &ssz);
+ /*
+ * Account for escaped sequences within string length
+ * calculations. This follows the logic in term_word() as we
+ * must calculate the width of produced strings.
+ */
- switch (d) {
- case (DECO_RESERVED):
+ sz = 0;
+ while ('\0' != *cp)
+ switch (*cp) {
+ case ('\\'):
+ ++cp;
+ esc = mandoc_escape(&cp, &seq, &ssz);
+ if (ESCAPE_ERROR == esc)
+ return(sz);
+
+ switch (esc) {
+ case (ESCAPE_PREDEF):
rhs = chars_res2str
(p->symtab, seq, ssz, &rsz);
break;
- case (DECO_SPECIAL):
- /* FALLTHROUGH */
- case (DECO_SSPECIAL):
+ case (ESCAPE_SPECIAL):
rhs = chars_spec2str
(p->symtab, seq, ssz, &rsz);
- /* Allow for one-char escapes. */
- if (DECO_SSPECIAL != d || rhs)
+ if (ssz != 1 || rhs)
break;
rhs = seq;
@@ -637,17 +641,24 @@ term_strlen(const struct termp *p, const char *cp)
break;
}
- if (rhs)
- for (i = 0; i < rsz; i++)
- sz += (*p->width)(p, *rhs++);
- } else if (ASCII_NBRSP == *cp) {
+ if (NULL == rhs)
+ break;
+
+ for (i = 0; i < rsz; i++)
+ sz += (*p->width)(p, *rhs++);
+ break;
+ case (ASCII_NBRSP):
sz += (*p->width)(p, ' ');
cp++;
- } else if (ASCII_HYPH == *cp) {
+ break;
+ case (ASCII_HYPH):
sz += (*p->width)(p, '-');
cp++;
- } else
+ break;
+ default:
sz += (*p->width)(p, *cp++);
+ break;
+ }
return(sz);
}