summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2017-06-04 00:13:15 +0000
committerIngo Schwarze <schwarze@openbsd.org>2017-06-04 00:13:15 +0000
commitafcc0a6de7e295d807dddcf20b26811ab1e7803c (patch)
tree76794e8ec7bb9e27f2b19bec6d03de49933df4fa
parent90897efbceb54855c1943f8e19d5bba5a542ff65 (diff)
downloadmandoc-afcc0a6de7e295d807dddcf20b26811ab1e7803c.tar.gz
Pure preprocessor implementation of the roff(7) .ec and .eo requests
(escape character control), touching nothing after the preprocessing stage and keeping even the state variable local to the preprocessor. Since the escape character is also used for line continuation, this requires pulling the implementation of line continuation from the input reader to the preprocessor, which also considerably shortens the code required for that. When the escape character is changed, simply let the preprocessor replace bare by escaped backslashes and instances of the non-standard escape character with bare backslashes - that's all we need. Oh, and if anybody dares to use these requests in OpenBSD manuals, sending a medium-sized pack of axe-murderers after them might be a worthwhile part of the punishment, but probably insuffient on its own.
-rw-r--r--read.c72
-rw-r--r--roff.78
-rw-r--r--roff.c112
3 files changed, 107 insertions, 85 deletions
diff --git a/read.c b/read.c
index 5d85a576..edbdc08c 100644
--- a/read.c
+++ b/read.c
@@ -326,7 +326,6 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
const char *save_file;
char *cp;
size_t pos; /* byte number in the ln buffer */
- size_t j; /* auxiliary byte number in the blk buffer */
enum rofferr rr;
int of;
int lnn; /* line number in the real file */
@@ -408,79 +407,14 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
continue;
}
- /* Trailing backslash = a plain char. */
-
- if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
-
- /*
- * Found escape and at least one other character.
- * When it's a newline character, skip it.
- * When there is a carriage return in between,
- * skip that one as well.
- */
-
- if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
- '\n' == blk.buf[i + 2])
- ++i;
- if ('\n' == blk.buf[i + 1]) {
- i += 2;
- ++lnn;
- continue;
- }
-
- if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
- j = i;
- i += 2;
- /* Comment, skip to end of line */
- for (; i < blk.sz; ++i) {
- if (blk.buf[i] != '\n')
- continue;
- if (blk.buf[i - 1] == ' ' ||
- blk.buf[i - 1] == '\t')
- mandoc_msg(
- MANDOCERR_SPACE_EOL,
- curp, curp->line,
- pos + i-1 - j, NULL);
- ++i;
- ++lnn;
- break;
- }
-
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
-
- /* Catch escaped bogus characters. */
-
- c = (unsigned char) blk.buf[i+1];
-
- if ( ! (isascii(c) &&
- (isgraph(c) || isblank(c)))) {
- mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
- curp->line, pos, "0x%x", c);
- i += 2;
- ln.buf[pos++] = '?';
- continue;
- }
-
- /* Some other escape sequence, copy & cont. */
-
- ln.buf[pos++] = blk.buf[i++];
ln.buf[pos++] = blk.buf[i++];
}
- if (pos >= ln.sz)
+ if (pos + 1 >= ln.sz)
resize_buf(&ln, 256);
+ if (i == blk.sz || blk.buf[i] == '\0')
+ ln.buf[pos++] = '\n';
ln.buf[pos] = '\0';
/*
diff --git a/roff.7 b/roff.7
index d2e45027..86d6cb84 100644
--- a/roff.7
+++ b/roff.7
@@ -808,8 +808,11 @@ This is a Heirloom extension and currently unsupported.
Set a trap within a diversion.
Currently unsupported.
.It Ic \&ec Op Ar char
-Change the escape character.
-Currently unsupported.
+Enable the escape mechanism and change the escape character.
+The
+.Ar char
+argument defaults to the backslash
+.Pq Sq \e .
.It Ic \&ecr
Restore the escape character.
Currently unsupported.
@@ -839,7 +842,6 @@ See
.Ic \&EQ .
.It Ic \&eo
Disable the escape mechanism completely.
-Currently unsupported.
.It Ic \&EP
End a picture started by
.Ic \&BP .
diff --git a/roff.c b/roff.c
index ff6d9806..83376e90 100644
--- a/roff.c
+++ b/roff.c
@@ -99,6 +99,7 @@ struct roff {
int format; /* current file in mdoc or man format */
int argc; /* number of args of the last macro */
char control; /* control character */
+ char escape; /* escape character */
};
struct roffnode {
@@ -155,6 +156,8 @@ static enum rofferr roff_cond(ROFF_ARGS);
static enum rofferr roff_cond_text(ROFF_ARGS);
static enum rofferr roff_cond_sub(ROFF_ARGS);
static enum rofferr roff_ds(ROFF_ARGS);
+static enum rofferr roff_ec(ROFF_ARGS);
+static enum rofferr roff_eo(ROFF_ARGS);
static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
static int roff_evalcond(struct roff *r, int, char *, int *);
static int roff_evalnum(struct roff *, int,
@@ -385,13 +388,13 @@ static struct roffmac roffs[TOKEN_NONE] = {
{ roff_ds, NULL, NULL, 0 }, /* ds1 */
{ roff_unsupp, NULL, NULL, 0 }, /* dwh */
{ roff_unsupp, NULL, NULL, 0 }, /* dt */
- { roff_unsupp, NULL, NULL, 0 }, /* ec */
+ { roff_ec, NULL, NULL, 0 }, /* ec */
{ roff_unsupp, NULL, NULL, 0 }, /* ecr */
{ roff_unsupp, NULL, NULL, 0 }, /* ecs */
{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
{ roff_unsupp, NULL, NULL, 0 }, /* em */
{ roff_EN, NULL, NULL, 0 }, /* EN */
- { roff_unsupp, NULL, NULL, 0 }, /* eo */
+ { roff_eo, NULL, NULL, 0 }, /* eo */
{ roff_unsupp, NULL, NULL, 0 }, /* EP */
{ roff_EQ, NULL, NULL, 0 }, /* EQ */
{ roff_line_ignore, NULL, NULL, 0 }, /* errprint */
@@ -751,7 +754,8 @@ roff_reset(struct roff *r)
{
roff_free1(r);
r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
- r->control = 0;
+ r->control = '\0';
+ r->escape = '\\';
}
void
@@ -773,6 +777,7 @@ roff_alloc(struct mparse *parse, int options)
r->options = options;
r->format = options & (MPARSE_MDOC | MPARSE_MAN);
r->rstackpos = -1;
+ r->escape = '\\';
return r;
}
@@ -1149,27 +1154,80 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
int expand_count; /* to avoid infinite loops */
int npos; /* position in numeric expression */
int arg_complete; /* argument not interrupted by eol */
+ int done; /* no more input available */
char term; /* character terminating the escape */
- expand_count = 0;
+ /* Search forward for comments. */
+
+ done = 0;
start = buf->buf + pos;
- stesc = strchr(start, '\0') - 1;
- while (stesc-- > start) {
+ for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
+ if (stesc[0] != r->escape || stesc[1] == '\0')
+ continue;
+ stesc++;
+ if (*stesc != '"' && *stesc != '#')
+ continue;
+ cp = strchr(stesc--, '\0') - 1;
+ if (*cp == '\n') {
+ done = 1;
+ cp--;
+ }
+ if (*cp == ' ' || *cp == '\t')
+ mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
+ ln, cp - buf->buf, NULL);
+ while (stesc > start && stesc[-1] == ' ')
+ stesc--;
+ *stesc = '\0';
+ break;
+ }
+ if (stesc == start)
+ return ROFF_CONT;
+ stesc--;
+
+ /* Notice the end of the input. */
+
+ if (*stesc == '\n') {
+ *stesc-- = '\0';
+ done = 1;
+ }
+
+ expand_count = 0;
+ while (stesc >= start) {
/* Search backwards for the next backslash. */
- if (*stesc != '\\')
+ if (*stesc != r->escape) {
+ if (*stesc == '\\') {
+ *stesc = '\0';
+ buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
+ buf->buf, stesc + 1) + 1;
+ start = nbuf + pos;
+ stesc = nbuf + (stesc - buf->buf);
+ free(buf->buf);
+ buf->buf = nbuf;
+ }
+ stesc--;
continue;
+ }
/* If it is escaped, skip it. */
for (cp = stesc - 1; cp >= start; cp--)
- if (*cp != '\\')
+ if (*cp != r->escape)
break;
if ((stesc - cp) % 2 == 0) {
- stesc = (char *)cp;
+ while (stesc > cp)
+ *stesc-- = '\\';
continue;
+ } else if (stesc[1] != '\0') {
+ *stesc = '\\';
+ } else {
+ *stesc-- = '\0';
+ if (done)
+ continue;
+ else
+ return ROFF_APPEND;
}
/* Decide whether to expand or to check only. */
@@ -1195,6 +1253,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
mandoc_vmsg(MANDOCERR_ESC_BAD,
r->parse, ln, (int)(stesc - buf->buf),
"%.*s", (int)(cp - stesc), stesc);
+ stesc--;
continue;
}
@@ -1409,7 +1468,7 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
/* Expand some escape sequences. */
e = roff_res(r, buf, ln, pos);
- if (e == ROFF_IGN)
+ if (e == ROFF_IGN || e == ROFF_APPEND)
return e;
assert(e == ROFF_CONT);
@@ -2849,7 +2908,7 @@ roff_cc(ROFF_ARGS)
p = buf->buf + pos;
if (*p == '\0' || (r->control = *p++) == '.')
- r->control = 0;
+ r->control = '\0';
if (*p != '\0')
mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
@@ -2859,6 +2918,33 @@ roff_cc(ROFF_ARGS)
}
static enum rofferr
+roff_ec(ROFF_ARGS)
+{
+ const char *p;
+
+ p = buf->buf + pos;
+ if (*p == '\0')
+ r->escape = '\\';
+ else {
+ r->escape = *p;
+ if (*++p != '\0')
+ mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
+ ln, p - buf->buf, "ec ... %s", p);
+ }
+ return ROFF_IGN;
+}
+
+static enum rofferr
+roff_eo(ROFF_ARGS)
+{
+ r->escape = '\0';
+ if (buf->buf[pos] != '\0')
+ mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
+ ln, pos, "eo %s", buf->buf + pos);
+ return ROFF_IGN;
+}
+
+static enum rofferr
roff_tr(ROFF_ARGS)
{
const char *p, *first, *second;
@@ -3385,9 +3471,9 @@ roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
pos = *ppos;
- if (0 != r->control && cp[pos] == r->control)
+ if (r->control != '\0' && cp[pos] == r->control)
pos++;
- else if (0 != r->control)
+ else if (r->control != '\0')
return 0;
else if ('\\' == cp[pos] && '.' == cp[pos + 1])
pos += 2;