summaryrefslogtreecommitdiffstats
path: root/roff_escape.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2022-06-05 13:54:09 +0000
committerIngo Schwarze <schwarze@openbsd.org>2022-06-05 13:54:09 +0000
commit4051ca4c1c6b3fe1989bc59d0c584464e8f8d84b (patch)
treef7dbab9b16049f04a8729b95a4186ce290d13224 /roff_escape.c
parent7e17b66dfe3fa638b5bdaa235a97312af792f229 (diff)
downloadmandoc-4051ca4c1c6b3fe1989bc59d0c584464e8f8d84b.tar.gz
With the improved escape sequence parser, it becomes easy to also improve
diagnostics. Distinguish "incomplete escape sequence", "invalid special character", and "unknown special character" from the generic "invalid escape sequence", also promoting them from WARNING to ERROR because incomplete escape sequences are severe syntax violations and because encountering an invalid or unknown special character makes it likely that part of the document content intended by the authors gets lost.
Diffstat (limited to 'roff_escape.c')
-rw-r--r--roff_escape.c33
1 files changed, 22 insertions, 11 deletions
diff --git a/roff_escape.c b/roff_escape.c
index 8145a9dd..777d753b 100644
--- a/roff_escape.c
+++ b/roff_escape.c
@@ -310,13 +310,12 @@ roff_escape(const char *buf, const int ln, const int aesc,
iendarg = iarg;
while (maxl > 0) {
if (buf[iendarg] == '\0') {
+ err = MANDOCERR_ESC_INCOMPLETE;
+ if (rval != ESCAPE_EXPAND)
+ rval = ESCAPE_ERROR;
/* Ignore an incomplete argument except for \w. */
if (buf[inam] != 'w')
iendarg = iarg;
- if (rval == ESCAPE_EXPAND)
- err = MANDOCERR_ESC_BAD;
- else
- rval = ESCAPE_ERROR;
break;
}
if (buf[iendarg] == term) {
@@ -401,6 +400,7 @@ roff_escape(const char *buf, const int ln, const int aesc,
*/
if (term != '\0' && argl == 1 && buf[iarg] != '-') {
+ err = MANDOCERR_ESC_BADCHAR;
rval = ESCAPE_ERROR;
break;
}
@@ -416,8 +416,10 @@ roff_escape(const char *buf, const int ln, const int aesc,
c = 0;
for (i = iarg; i < iendarg; i++)
c = 10 * c + (buf[i] - '0');
- if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
+ if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
iarg += 4;
rval = ESCAPE_NUMBERED;
break;
@@ -433,13 +435,19 @@ roff_escape(const char *buf, const int ln, const int aesc,
if (buf[iarg] != 'u' || argl < 5 || argl > 7)
break;
if (argl == 7 &&
- (buf[iarg + 1] != '1' || buf[iarg + 2] != '0'))
+ (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
- if (argl == 6 && buf[iarg + 1] == '0')
+ }
+ if (argl == 6 && buf[iarg + 1] == '0') {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
if (argl == 5 && buf[iarg + 1] == 'D' &&
- strchr("89ABCDEF", buf[iarg + 2]) != NULL)
+ strchr("89ABCDEF", buf[iarg + 2]) != NULL) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
+ 1 == argl)
rval = ESCAPE_UNICODE;
@@ -477,7 +485,8 @@ out:
*resc = iesc;
switch (rval) {
case ESCAPE_ERROR:
- err = MANDOCERR_ESC_BAD;
+ if (err == MANDOCERR_OK)
+ err = MANDOCERR_ESC_BAD;
break;
case ESCAPE_UNSUPP:
err = MANDOCERR_ESC_UNSUPP;
@@ -487,8 +496,10 @@ out:
err = MANDOCERR_ESC_UNDEF;
break;
case ESCAPE_SPECIAL:
- if (mchars_spec2cp(buf + iarg, argl) < 0)
- err = MANDOCERR_ESC_BAD;
+ if (mchars_spec2cp(buf + iarg, argl) >= 0)
+ err = MANDOCERR_OK;
+ else if (err == MANDOCERR_OK)
+ err = MANDOCERR_ESC_UNKCHAR;
break;
default:
break;