summaryrefslogtreecommitdiffstats
path: root/roff.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2022-06-03 12:15:55 +0000
committerIngo Schwarze <schwarze@openbsd.org>2022-06-03 12:15:55 +0000
commitf1a1415f2ac83d7454ab33bab918fe77f90664bd (patch)
treecca4619f0ecb728de9a8697ed509838d8c4186bd /roff.c
parent4de2134e6e8bcf00651ea443b38238baef53467e (diff)
downloadmandoc-f1a1415f2ac83d7454ab33bab918fe77f90664bd.tar.gz
During identifier parsing, handle undefined escape sequences
in the same way as groff: * \\ is always reduced to \ * \. is always reduced to . * other undefined escape sequences are usually reduced to the escape name, for example \G to G, except during the expansion of expanding escape sequences having the standard argument form (in particular \* and \n), in which case the backslash is preserved literally. Yes, this is confusing indeed. For example, the following have the same meaning: * .ds \. and .ds . which is not the same as .ds \\. * \*[\.] and \*[.] which is not the same as \*[\\.] * .ds \G and .ds G which is not the same as .ds \\G * \*[\G] and \*[\\G] which is not the same as \*[G] <- sic! To feel less dirty, have a leaning toothpick, if you are so inclined. This patch also slightly improves the string shown by the "escaped character not allowed in a name" error message.
Diffstat (limited to 'roff.c')
-rw-r--r--roff.c58
1 files changed, 48 insertions, 10 deletions
diff --git a/roff.c b/roff.c
index dbe47b43..785097b6 100644
--- a/roff.c
+++ b/roff.c
@@ -1375,6 +1375,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
int iarg; /* index beginning the argument */
int iendarg; /* index right after the argument */
int iend; /* index right after the sequence */
+ int isrc, idst; /* to reduce \\ and \. in names */
int deftype; /* type of definition to paste */
int argi; /* macro argument index */
int quote_args; /* true for \\$@, false for \\$* */
@@ -1428,6 +1429,21 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
continue;
}
+ /* Reduce \\ and \. in names. */
+
+ if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
+ isrc = idst = iarg;
+ while (isrc < iendarg) {
+ if (isrc + 1 < iendarg &&
+ buf->buf[isrc] == '\\' &&
+ (buf->buf[isrc + 1] == '\\' ||
+ buf->buf[isrc + 1] == '.'))
+ isrc++;
+ buf->buf[idst++] = buf->buf[isrc++];
+ }
+ iendarg -= isrc - idst;
+ }
+
/* Handle expansion. */
res = NULL;
@@ -4002,7 +4018,7 @@ static size_t
roff_getname(struct roff *r, char **cpp, int ln, int pos)
{
char *name, *cp;
- size_t namesz;
+ int namesz, inam, iend;
name = *cpp;
if (*name == '\0')
@@ -4010,24 +4026,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int pos)
/* Advance cp to the byte after the end of the name. */
- for (cp = name; 1; cp++) {
- namesz = cp - name;
+ cp = name;
+ namesz = 0;
+ for (;;) {
if (*cp == '\0')
break;
if (*cp == ' ' || *cp == '\t') {
cp++;
break;
}
- if (*cp != '\\')
+ if (*cp != '\\') {
+ if (name + namesz < cp) {
+ name[namesz] = *cp;
+ *cp = ' ';
+ }
+ namesz++;
+ cp++;
continue;
+ }
if (cp[1] == '{' || cp[1] == '}')
break;
- if (*++cp == '\\')
- continue;
- mandoc_msg(MANDOCERR_NAMESC, ln, pos,
- "%.*s", (int)(cp - name + 1), name);
- mandoc_escape((const char **)&cp, NULL, NULL);
- break;
+ if (roff_escape(cp, 0, 0, NULL, &inam,
+ NULL, NULL, &iend) != ESCAPE_UNDEF) {
+ mandoc_msg(MANDOCERR_NAMESC, ln, pos,
+ "%.*s%.*s", namesz, name, iend, cp);
+ cp += iend;
+ break;
+ }
+
+ /*
+ * In an identifier, \\, \., \G and so on
+ * are reduced to \, ., G and so on,
+ * vaguely similar to copy mode.
+ */
+
+ name[namesz++] = cp[inam];
+ while (iend--) {
+ if (cp >= name + namesz)
+ *cp = ' ';
+ cp++;
+ }
}
/* Read past spaces. */