diff options
35 files changed, 515 insertions, 92 deletions
diff --git a/libmandoc.h b/libmandoc.h index c2422590..ad76f021 100644 --- a/libmandoc.h +++ b/libmandoc.h @@ -48,7 +48,6 @@ struct buf { struct roff; struct roff_man; -char *mandoc_getarg(char **, int, int *); char *mandoc_normdate(struct roff_man *, char *, int, int); int mandoc_eos(const char *, size_t); int mandoc_strntoi(const char *, size_t, int); @@ -76,6 +75,7 @@ void roff_endparse(struct roff *); void roff_setreg(struct roff *, const char *, int, char sign); int roff_getreg(struct roff *, const char *); char *roff_strdup(const struct roff *, const char *); +char *roff_getarg(struct roff *, char **, int, int *); int roff_getcontrol(const struct roff *, const char *, int *); int roff_getformat(const struct roff *); @@ -38,6 +38,7 @@ enum margserr { ARGS_ERROR, ARGS_EOLN, /* end-of-line */ ARGS_WORD, /* normal word */ + ARGS_ALLOC, /* normal word from roff_getarg() */ ARGS_PUNCT, /* series of punctuation */ ARGS_PHRASE /* Bl -column phrase */ }; diff --git a/man_macro.c b/man_macro.c index 3fb04dac..36701eac 100644 --- a/man_macro.c +++ b/man_macro.c @@ -202,22 +202,25 @@ blk_close(MACRO_PROT_ARGS) { enum roff_tok ctok, ntok; const struct roff_node *nn; - char *p; - int cline, cpos, nrew, target; + char *p, *ep; + int cline, cpos, la, nrew, target; nrew = 1; switch (tok) { case MAN_RE: ntok = MAN_RS; + la = *pos; if ( ! man_args(man, line, pos, buf, &p)) break; for (nn = man->last->parent; nn; nn = nn->parent) if (nn->tok == ntok && nn->type == ROFFT_BLOCK) nrew++; - target = strtol(p, &p, 10); - if (*p != '\0') + target = strtol(p, &ep, 10); + if (*ep != '\0') mandoc_msg(MANDOCERR_ARG_EXCESS, line, - (int)(p - buf), "RE ... %s", p); + la + (buf[la] == '"') + (int)(ep - p), + "RE ... %s", ep); + free(p); if (target == 0) target = 1; nrew -= target; @@ -312,6 +315,7 @@ blk_exp(MACRO_PROT_ARGS) roff_setreg(man->roff, "an-margin", head->aux, '+'); } + free(p); } if (buf[*pos] != '\0') @@ -348,6 +352,7 @@ blk_imp(MACRO_PROT_ARGS) if ( ! man_args(man, line, pos, buf, &p)) break; roff_word_alloc(man, line, la, p); + free(p); } /* @@ -397,6 +402,7 @@ in_line_eoln(MACRO_PROT_ARGS) roff_word_append(man, p); else roff_word_alloc(man, line, la, p); + free(p); } /* @@ -456,6 +462,6 @@ man_args(struct roff_man *man, int line, int *pos, char *buf, char **v) if ('\0' == *start) return 0; - *v = mandoc_getarg(v, line, pos); + *v = roff_getarg(man->roff, v, line, pos); return 1; } diff --git a/mdoc_argv.c b/mdoc_argv.c index 752fb327..5ca2bc04 100644 --- a/mdoc_argv.c +++ b/mdoc_argv.c @@ -416,11 +416,8 @@ mdoc_args(struct roff_man *mdoc, int line, int *pos, char *buf, enum roff_tok tok, char **v) { struct roff_node *n; - char *v_local; enum argsflag fl; - if (v == NULL) - v = &v_local; fl = tok == TOKEN_NONE ? ARGSFL_NONE : mdocargs[tok - MDOC_Dd].flags; /* @@ -448,6 +445,7 @@ args(struct roff_man *mdoc, int line, int *pos, char *buf, enum argsflag fl, char **v) { char *p; + char *v_local; int pairs; if (buf[*pos] == '\0') { @@ -459,6 +457,8 @@ args(struct roff_man *mdoc, int line, int *pos, return ARGS_EOLN; } + if (v == NULL) + v = &v_local; *v = buf + *pos; if (fl == ARGSFL_DELIM && args_checkpunct(buf, *pos)) @@ -525,13 +525,12 @@ args(struct roff_man *mdoc, int line, int *pos, * Whitespace is NOT involved in literal termination. */ - if (mdoc->flags & MDOC_PHRASELIT || buf[*pos] == '\"') { - if ( ! (mdoc->flags & MDOC_PHRASELIT)) + if (mdoc->flags & MDOC_PHRASELIT || + (mdoc->flags & MDOC_PHRASE && buf[*pos] == '\"')) { + if ((mdoc->flags & MDOC_PHRASELIT) == 0) { *v = &buf[++(*pos)]; - - if (mdoc->flags & MDOC_PHRASE) mdoc->flags |= MDOC_PHRASELIT; - + } pairs = 0; for ( ; buf[*pos]; (*pos)++) { /* Move following text left after quoted quotes. */ @@ -572,7 +571,9 @@ args(struct roff_man *mdoc, int line, int *pos, } p = &buf[*pos]; - *v = mandoc_getarg(&p, line, pos); + *v = roff_getarg(mdoc->roff, &p, line, pos); + if (v == &v_local) + free(*v); /* * After parsing the last word in this phrase, @@ -583,7 +584,7 @@ args(struct roff_man *mdoc, int line, int *pos, mdoc->flags &= ~MDOC_PHRASEQL; mdoc->flags |= MDOC_PHRASEQF; } - return ARGS_WORD; + return ARGS_ALLOC; } /* @@ -654,7 +655,9 @@ argv_multi(struct roff_man *mdoc, int line, v->value = mandoc_reallocarray(v->value, v->sz + MULTI_STEP, sizeof(char *)); - v->value[(int)v->sz] = mandoc_strdup(p); + if (ac != ARGS_ALLOC) + p = mandoc_strdup(p); + v->value[(int)v->sz] = p; } } @@ -669,7 +672,10 @@ argv_single(struct roff_man *mdoc, int line, if (ac == ARGS_EOLN) return; + if (ac != ARGS_ALLOC) + p = mandoc_strdup(p); + v->sz = 1; v->value = mandoc_malloc(sizeof(char *)); - v->value[0] = mandoc_strdup(p); + v->value[0] = p; } diff --git a/mdoc_macro.c b/mdoc_macro.c index f858f42e..8fc08336 100644 --- a/mdoc_macro.c +++ b/mdoc_macro.c @@ -49,7 +49,7 @@ static void dword(struct roff_man *, int, int, const char *, static int find_pending(struct roff_man *, enum roff_tok, int, int, struct roff_node *); static int lookup(struct roff_man *, int, int, int, const char *); -static int macro_or_word(MACRO_PROT_ARGS, int); +static int macro_or_word(MACRO_PROT_ARGS, char *, int); static void break_intermediate(struct roff_node *, struct roff_node *); static int parse_rest(struct roff_man *, enum roff_tok, @@ -474,14 +474,15 @@ append_delims(struct roff_man *mdoc, int line, int *pos, char *buf) { char *p; int la; + enum margserr ac; if (buf[*pos] == '\0') return; for (;;) { la = *pos; - if (mdoc_args(mdoc, line, pos, buf, TOKEN_NONE, &p) == - ARGS_EOLN) + ac = mdoc_args(mdoc, line, pos, buf, TOKEN_NONE, &p); + if (ac == ARGS_EOLN) break; dword(mdoc, line, la, p, DELIM_MAX, 1); @@ -499,6 +500,8 @@ append_delims(struct roff_man *mdoc, int line, int *pos, char *buf) if (mandoc_eos(p, strlen(p))) mdoc->last->flags |= NODE_EOS; + if (ac == ARGS_ALLOC) + free(p); } } @@ -508,17 +511,13 @@ append_delims(struct roff_man *mdoc, int line, int *pos, char *buf) * Otherwise, allocate it and return 0. */ static int -macro_or_word(MACRO_PROT_ARGS, int parsed) +macro_or_word(MACRO_PROT_ARGS, char *p, int parsed) { - char *p; int ntok; - p = buf + ppos; - ntok = TOKEN_NONE; - if (*p == '"') - p++; - else if (parsed && ! (mdoc->flags & MDOC_PHRASELIT)) - ntok = lookup(mdoc, tok, line, ppos, p); + ntok = buf[ppos] == '"' || parsed == 0 || + mdoc->flags & MDOC_PHRASELIT ? TOKEN_NONE : + lookup(mdoc, tok, line, ppos, p); if (ntok == TOKEN_NONE) { dword(mdoc, line, ppos, p, DELIM_MAX, tok == TOKEN_NONE || @@ -720,8 +719,12 @@ blk_exp_close(MACRO_PROT_ARGS) if (ntok == TOKEN_NONE) { dword(mdoc, line, lastarg, p, DELIM_MAX, mdoc_macro(tok)->flags & MDOC_JOIN); + if (ac == ARGS_ALLOC) + free(p); continue; } + if (ac == ARGS_ALLOC) + free(p); if (n != NULL) rew_last(mdoc, n); @@ -836,6 +839,8 @@ in_line(MACRO_PROT_ARGS) line, la, pos, buf); if (nl) append_delims(mdoc, line, pos, buf); + if (ac == ARGS_ALLOC) + free(p); return; } @@ -879,6 +884,9 @@ in_line(MACRO_PROT_ARGS) dword(mdoc, line, la, p, d, mdoc_macro(tok)->flags & MDOC_JOIN); + if (ac == ARGS_ALLOC) + free(p); + /* * If the first argument is a closing delimiter, * do not suppress spacing before it. @@ -929,7 +937,7 @@ in_line(MACRO_PROT_ARGS) static void blk_full(MACRO_PROT_ARGS) { - int la, nl, parsed; + int done, la, nl, parsed; struct mdoc_arg *arg; struct roff_node *blk; /* Our own or a broken block. */ struct roff_node *head; /* Our own head. */ @@ -1095,11 +1103,15 @@ blk_full(MACRO_PROT_ARGS) if (tok == MDOC_Bd || tok == MDOC_Bk) { mandoc_msg(MANDOCERR_ARG_EXCESS, line, la, "%s ... %s", roff_name[tok], buf + la); + if (ac == ARGS_ALLOC) + free(p); break; } if (tok == MDOC_Rs) { mandoc_msg(MANDOCERR_ARG_SKIP, line, la, "Rs %s", buf + la); + if (ac == ARGS_ALLOC) + free(p); break; } if (ac == ARGS_PUNCT) @@ -1114,6 +1126,8 @@ blk_full(MACRO_PROT_ARGS) ac != ARGS_PHRASE && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); continue; } @@ -1145,7 +1159,10 @@ blk_full(MACRO_PROT_ARGS) continue; } - if (macro_or_word(mdoc, tok, line, la, pos, buf, parsed)) + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, parsed); + if (ac == ARGS_ALLOC) + free(p); + if (done) break; } @@ -1175,7 +1192,7 @@ out: static void blk_part_imp(MACRO_PROT_ARGS) { - int la, nl; + int done, la, nl; enum margserr ac; char *p; struct roff_node *blk; /* saved block context */ @@ -1210,13 +1227,18 @@ blk_part_imp(MACRO_PROT_ARGS) if (body == NULL && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); continue; } if (body == NULL) body = roff_body_alloc(mdoc, line, ppos, tok); - if (macro_or_word(mdoc, tok, line, la, pos, buf, 1)) + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, 1); + if (ac == ARGS_ALLOC) + free(p); + if (done) break; } if (body == NULL) @@ -1241,7 +1263,7 @@ blk_part_imp(MACRO_PROT_ARGS) static void blk_part_exp(MACRO_PROT_ARGS) { - int la, nl; + int done, la, nl; enum margserr ac; struct roff_node *head; /* keep track of head */ char *p; @@ -1266,6 +1288,8 @@ blk_part_exp(MACRO_PROT_ARGS) if (head == NULL && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); continue; } @@ -1275,11 +1299,17 @@ blk_part_exp(MACRO_PROT_ARGS) dword(mdoc, line, la, p, DELIM_MAX, 0); rew_last(mdoc, head); roff_body_alloc(mdoc, line, ppos, tok); - if (tok == MDOC_Eo) + if (tok == MDOC_Eo) { + if (ac == ARGS_ALLOC) + free(p); continue; + } } - if (macro_or_word(mdoc, tok, line, la, pos, buf, 1)) + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, 1); + if (ac == ARGS_ALLOC) + free(p); + if (done) break; } @@ -1337,10 +1367,12 @@ in_line_argn(MACRO_PROT_ARGS) la = *pos; ac = mdoc_args(mdoc, line, pos, buf, tok, &p); - if (ac == ARGS_WORD && state == -1 && + if ((ac == ARGS_WORD || ac == ARGS_ALLOC) && state == -1 && (mdoc_macro(tok)->flags & MDOC_IGNDELIM) == 0 && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); continue; } @@ -1373,6 +1405,8 @@ in_line_argn(MACRO_PROT_ARGS) } (*mdoc_macro(ntok)->fp)(mdoc, ntok, line, la, pos, buf); + if (ac == ARGS_ALLOC) + free(p); break; } @@ -1390,6 +1424,9 @@ in_line_argn(MACRO_PROT_ARGS) dword(mdoc, line, la, p, DELIM_MAX, mdoc_macro(tok)->flags & MDOC_JOIN); + if (ac == ARGS_ALLOC) + free(p); + p = mdoc->last->string; } if (state == -1) { @@ -1444,13 +1481,19 @@ static int parse_rest(struct roff_man *mdoc, enum roff_tok tok, int line, int *pos, char *buf) { - int la; + char *p; + int done, la; + enum margserr ac; for (;;) { la = *pos; - if (mdoc_args(mdoc, line, pos, buf, tok, NULL) == ARGS_EOLN) + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_EOLN) return 0; - if (macro_or_word(mdoc, tok, line, la, pos, buf, 1)) + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, 1); + if (ac == ARGS_ALLOC) + free(p); + if (done) return 1; } } diff --git a/regress/man/RS/REarg.in b/regress/man/RS/REarg.in index 27af5017..7901d3cf 100644 --- a/regress/man/RS/REarg.in +++ b/regress/man/RS/REarg.in @@ -1,8 +1,9 @@ -.\" $OpenBSD: REarg.in,v 1.3 2017/07/04 14:53:23 schwarze Exp $ -.TH RS-REARG 1 "January 24, 2015" +.\" $OpenBSD: REarg.in,v 1.4 2018/12/21 16:58:49 schwarze Exp $ +.TH RS-REARG 1 "December 21, 2018" .SH NAME RS-REarg \- arguments to the RE macro .SH DESCRIPTION +.nr one 1 level 1 .RS 4n 2i level 2 @@ -10,13 +11,13 @@ level 2 level 3 .RE 2a back to 2 -.RE 1b +.RE \n[one]b back to 1 .RS 4n level 2 .RS 2n level 3 -.RE 1c +.RE "\\n[one]c" back to 1 .RS 4n level 2 @@ -24,4 +25,4 @@ level 2 level 3 .RE 0d back to 1 -.RE 1e +.RE \\n[one]e diff --git a/regress/man/RS/REarg.out_ascii b/regress/man/RS/REarg.out_ascii index 747e037b..45326a88 100644 --- a/regress/man/RS/REarg.out_ascii +++ b/regress/man/RS/REarg.out_ascii @@ -20,4 +20,4 @@ DDEESSCCRRIIPPTTIIOONN -OpenBSD January 24, 2015 RS-REARG(1) +OpenBSD December 21, 2018 RS-REARG(1) diff --git a/regress/man/RS/REarg.out_lint b/regress/man/RS/REarg.out_lint index 457ccb15..a912a7e5 100644 --- a/regress/man/RS/REarg.out_lint +++ b/regress/man/RS/REarg.out_lint @@ -1,7 +1,7 @@ -mandoc: REarg.in:7:8: ERROR: skipping excess arguments: RS ... 2i -mandoc: REarg.in:11:6: ERROR: skipping excess arguments: RE ... a -mandoc: REarg.in:13:6: ERROR: skipping excess arguments: RE ... b -mandoc: REarg.in:19:6: ERROR: skipping excess arguments: RE ... c -mandoc: REarg.in:25:6: ERROR: skipping excess arguments: RE ... d -mandoc: REarg.in:27:6: ERROR: skipping excess arguments: RE ... e -mandoc: REarg.in:27:2: ERROR: fewer RS blocks open, skipping: RE 1 +mandoc: REarg.in:8:8: ERROR: skipping excess arguments: RS ... 2i +mandoc: REarg.in:12:6: ERROR: skipping excess arguments: RE ... a +mandoc: REarg.in:14:6: ERROR: skipping excess arguments: RE ... b +mandoc: REarg.in:20:7: ERROR: skipping excess arguments: RE ... c +mandoc: REarg.in:26:6: ERROR: skipping excess arguments: RE ... d +mandoc: REarg.in:28:6: ERROR: skipping excess arguments: RE ... e +mandoc: REarg.in:28:2: ERROR: fewer RS blocks open, skipping: RE 1 diff --git a/regress/mdoc/Bl/Makefile b/regress/mdoc/Bl/Makefile index aeb6bd56..8fde5d9e 100644 --- a/regress/mdoc/Bl/Makefile +++ b/regress/mdoc/Bl/Makefile @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile,v 1.31 2016/10/17 19:00:16 schwarze Exp $ +# $OpenBSD: Makefile,v 1.40 2018/12/21 16:58:49 schwarze Exp $ REGRESS_TARGETS = item inset diag ohang bullet dash enum hang tag REGRESS_TARGETS += column column_nogroff colNoIt -REGRESS_TARGETS += extend nested offset secstart +REGRESS_TARGETS += esc extend nested offset secstart REGRESS_TARGETS += notype multitype badargs REGRESS_TARGETS += empty noIt emptyhead emptytag emptyitem multitag diff --git a/regress/mdoc/Bl/esc.in b/regress/mdoc/Bl/esc.in new file mode 100644 index 00000000..60eb8a30 --- /dev/null +++ b/regress/mdoc/Bl/esc.in @@ -0,0 +1,19 @@ +.\" $OpenBSD: esc.in,v 1.1 2018/12/21 16:58:49 schwarze Exp $ +.Dd $Mdocdate$ +.Dt BL-ESC 1 +.Os +.Sh NAME +.Nm Bl-esc +.Nd escape sequences in full block macro heads +.Sh DESCRIPTION +.ds a \(at +.Bl -tag -width 2n +.It \*a +unquoted unescaped +.It "\*a" +quoted unescaped +.It \\*a +unquoted escaped +.It "\\*a" +quoted escaped +.El diff --git a/regress/mdoc/Bl/esc.out_ascii b/regress/mdoc/Bl/esc.out_ascii new file mode 100644 index 00000000..e73c929f --- /dev/null +++ b/regress/mdoc/Bl/esc.out_ascii @@ -0,0 +1,15 @@ +BL-ESC(1) General Commands Manual BL-ESC(1) + +NNAAMMEE + BBll--eesscc - escape sequences in full block macro heads + +DDEESSCCRRIIPPTTIIOONN + @ unquoted unescaped + + @ quoted unescaped + + @ unquoted escaped + + @ quoted escaped + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Bl/esc.out_markdown b/regress/mdoc/Bl/esc.out_markdown new file mode 100644 index 00000000..780a61b8 --- /dev/null +++ b/regress/mdoc/Bl/esc.out_markdown @@ -0,0 +1,25 @@ +BL-ESC(1) - General Commands Manual + +# NAME + +**Bl-esc** - escape sequences in full block macro heads + +# DESCRIPTION + +@ + +> unquoted unescaped + +@ + +> quoted unescaped + +@ + +> unquoted escaped + +@ + +> quoted escaped + +OpenBSD - December 21, 2018 diff --git a/regress/mdoc/Eo/Makefile b/regress/mdoc/Eo/Makefile index 0034e271..1e101ace 100644 --- a/regress/mdoc/Eo/Makefile +++ b/regress/mdoc/Eo/Makefile @@ -1,6 +1,7 @@ -# $OpenBSD: Makefile,v 1.4 2015/02/11 14:14:53 schwarze Exp $ +# $OpenBSD: Makefile,v 1.8 2018/12/21 16:58:49 schwarze Exp $ -REGRESS_TARGETS = break empty obsolete unclosed +REGRESS_TARGETS = arg break empty obsolete unclosed +UTF8_TARGETS = arg LINT_TARGETS = break obsolete unclosed # groff-1.22.3 defect: diff --git a/regress/mdoc/Eo/arg.in b/regress/mdoc/Eo/arg.in new file mode 100644 index 00000000..ecb30ed2 --- /dev/null +++ b/regress/mdoc/Eo/arg.in @@ -0,0 +1,25 @@ +.\" $OpenBSD: arg.in,v 1.1 2018/12/21 16:58:49 schwarze Exp $ +.Dd $Mdocdate$ +.Dt EO-ARG 1 +.Os +.Sh NAME +.Nm Eo-arg +.Nd escapes in arguments of enclosure macros +.Sh DESCRIPTION +.ds o \(Fo +.ds c \(Fc +.Eo \*o +unquoted unescaped +.Ec \*c +.Pp +.Eo "\*o" +quoted unescaped +.Ec "\*c" +.Pp +.Eo \\*o +unquoted escaped +.Ec \\*c +.Pp +.Eo "\\*o" +quoted escaped +.Ec "\\*c" diff --git a/regress/mdoc/Eo/arg.out_ascii b/regress/mdoc/Eo/arg.out_ascii new file mode 100644 index 00000000..004c3fe5 --- /dev/null +++ b/regress/mdoc/Eo/arg.out_ascii @@ -0,0 +1,15 @@ +EO-ARG(1) General Commands Manual EO-ARG(1) + +NNAAMMEE + EEoo--aarrgg - escapes in arguments of enclosure macros + +DDEESSCCRRIIPPTTIIOONN + <<unquoted unescaped>> + + <<quoted unescaped>> + + <<unquoted escaped>> + + <<quoted escaped>> + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Eo/arg.out_markdown b/regress/mdoc/Eo/arg.out_markdown new file mode 100644 index 00000000..9da6ff4f --- /dev/null +++ b/regress/mdoc/Eo/arg.out_markdown @@ -0,0 +1,17 @@ +EO-ARG(1) - General Commands Manual + +# NAME + +**Eo-arg** - escapes in arguments of enclosure macros + +# DESCRIPTION + +«unquoted unescaped» + +«quoted unescaped» + +«unquoted escaped» + +«quoted escaped» + +OpenBSD - December 21, 2018 diff --git a/regress/mdoc/Eo/arg.out_utf8 b/regress/mdoc/Eo/arg.out_utf8 new file mode 100644 index 00000000..3b4a30bd --- /dev/null +++ b/regress/mdoc/Eo/arg.out_utf8 @@ -0,0 +1,15 @@ +EO-ARG(1) General Commands Manual EO-ARG(1) + +NNAAMMEE + EEoo--aarrgg – escapes in arguments of enclosure macros + +DDEESSCCRRIIPPTTIIOONN + «unquoted unescaped» + + «quoted unescaped» + + «unquoted escaped» + + «quoted escaped» + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Fd/Makefile b/regress/mdoc/Fd/Makefile index 7cd78c65..39b07f1d 100644 --- a/regress/mdoc/Fd/Makefile +++ b/regress/mdoc/Fd/Makefile @@ -1,6 +1,6 @@ -# $OpenBSD: Makefile,v 1.3 2015/02/06 03:31:11 schwarze Exp $ +# $OpenBSD: Makefile,v 1.7 2018/12/21 16:58:49 schwarze Exp $ -REGRESS_TARGETS = break empty eos font +REGRESS_TARGETS = arg break empty eos font LINT_TARGETS = empty SKIP_TMAN = eos diff --git a/regress/mdoc/Fd/arg.in b/regress/mdoc/Fd/arg.in new file mode 100644 index 00000000..c54e399c --- /dev/null +++ b/regress/mdoc/Fd/arg.in @@ -0,0 +1,16 @@ +.\" $OpenBSD: arg.in,v 1.1 2018/12/21 16:58:49 schwarze Exp $ +.Dd $Mdocdate$ +.Dt FD-ARG 2 +.Os +.Sh NAME +.Nm Fd-arg +.Nd escape sequences in the arguments of in-line EOL macros +.Sh DESCRIPTION +.ds s \(sh +.Fd \*sunquoted unescaped +.Pp +.Fd "\*squoted" unescaped +.Pp +.Fd \\*sunquoted escaped +.Pp +.Fd "\\*squoted" escaped diff --git a/regress/mdoc/Fd/arg.out_ascii b/regress/mdoc/Fd/arg.out_ascii new file mode 100644 index 00000000..42836e61 --- /dev/null +++ b/regress/mdoc/Fd/arg.out_ascii @@ -0,0 +1,15 @@ +FD-ARG(2) System Calls Manual FD-ARG(2) + +NNAAMMEE + FFdd--aarrgg - escape sequences in the arguments of in-line EOL macros + +DDEESSCCRRIIPPTTIIOONN + ##uunnqquuootteedd uunneessccaappeedd + + ##qquuootteedd uunneessccaappeedd + + ##uunnqquuootteedd eessccaappeedd + + ##qquuootteedd eessccaappeedd + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Fd/arg.out_markdown b/regress/mdoc/Fd/arg.out_markdown new file mode 100644 index 00000000..14dba6fc --- /dev/null +++ b/regress/mdoc/Fd/arg.out_markdown @@ -0,0 +1,17 @@ +FD-ARG(2) - System Calls Manual + +# NAME + +**Fd-arg** - escape sequences in the arguments of in-line EOL macros + +# DESCRIPTION + +**#unquoted unescaped** + +**#quoted unescaped** + +**#unquoted escaped** + +**#quoted escaped** + +OpenBSD - December 21, 2018 diff --git a/regress/mdoc/Li/Makefile b/regress/mdoc/Li/Makefile index f5afd199..7b2a3313 100644 --- a/regress/mdoc/Li/Makefile +++ b/regress/mdoc/Li/Makefile @@ -1,6 +1,6 @@ -# $OpenBSD: Makefile,v 1.4 2014/07/02 20:18:42 schwarze Exp $ +# $OpenBSD: Makefile,v 1.7 2018/12/21 16:58:49 schwarze Exp $ -REGRESS_TARGETS = punct font +REGRESS_TARGETS = arg punct font LINT_TARGETS = punct .include <bsd.regress.mk> diff --git a/regress/mdoc/Li/arg.in b/regress/mdoc/Li/arg.in new file mode 100644 index 00000000..199a1210 --- /dev/null +++ b/regress/mdoc/Li/arg.in @@ -0,0 +1,20 @@ +.\" $OpenBSD: arg.in,v 1.1 2018/12/21 16:58:49 schwarze Exp $ +.Dd $Mdocdate$ +.Dt LI-ARG 1 +.Os +.Sh NAME +.Nm Li-arg +.Nd escape sequences in arguments of in-line macros +.Sh DESCRIPTION +.ds a \(at +unquoted unescaped: +.Li \*a +.Pp +quoted unescaped: +.Li "\*a" +.Pp +unquoted escaped: +.Li \\*a +.Pp +quoted escaped: +.Li "\\*a" diff --git a/regress/mdoc/Li/arg.out_ascii b/regress/mdoc/Li/arg.out_ascii new file mode 100644 index 00000000..f0a69f18 --- /dev/null +++ b/regress/mdoc/Li/arg.out_ascii @@ -0,0 +1,15 @@ +LI-ARG(1) General Commands Manual LI-ARG(1) + +NNAAMMEE + LLii--aarrgg - escape sequences in arguments of in-line macros + +DDEESSCCRRIIPPTTIIOONN + unquoted unescaped: @ + + quoted unescaped: @ + + unquoted escaped: @ + + quoted escaped: @ + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Li/arg.out_markdown b/regress/mdoc/Li/arg.out_markdown new file mode 100644 index 00000000..1b0ba54e --- /dev/null +++ b/regress/mdoc/Li/arg.out_markdown @@ -0,0 +1,21 @@ +LI-ARG(1) - General Commands Manual + +# NAME + +**Li-arg** - escape sequences in arguments of in-line macros + +# DESCRIPTION + +unquoted unescaped: +`@` + +quoted unescaped: +`@` + +unquoted escaped: +`@` + +quoted escaped: +`@` + +OpenBSD - December 21, 2018 diff --git a/regress/mdoc/Ns/Makefile b/regress/mdoc/Ns/Makefile index 45db66cc..b2c409ab 100644 --- a/regress/mdoc/Ns/Makefile +++ b/regress/mdoc/Ns/Makefile @@ -1,6 +1,6 @@ -# $OpenBSD: Makefile,v 1.5 2014/07/02 11:42:56 schwarze Exp $ +# $OpenBSD: Makefile,v 1.8 2018/12/21 16:58:49 schwarze Exp $ -REGRESS_TARGETS = position punct +REGRESS_TARGETS = arg position punct LINT_TARGETS = position .include <bsd.regress.mk> diff --git a/regress/mdoc/Ns/arg.in b/regress/mdoc/Ns/arg.in new file mode 100644 index 00000000..117e7140 --- /dev/null +++ b/regress/mdoc/Ns/arg.in @@ -0,0 +1,13 @@ +.\" $OpenBSD: arg.in,v 1.1 2018/12/21 16:58:49 schwarze Exp $ +.Dd $Mdocdate$ +.Dt NS-ARG 1 +.Os +.Sh NAME +.Nm Ns-arg +.Nd escape sequences in the arguments of in-line macros with an argument limit +.Sh DESCRIPTION +.ds a \(at +.No unquoted unescaped Ns \*a +.No quoted unescaped Ns "\*a" +.No unquoted escaped Ns \\*a +.No quoted escaped Ns "\\*a" diff --git a/regress/mdoc/Ns/arg.out_ascii b/regress/mdoc/Ns/arg.out_ascii new file mode 100644 index 00000000..2b8a5104 --- /dev/null +++ b/regress/mdoc/Ns/arg.out_ascii @@ -0,0 +1,10 @@ +NS-ARG(1) General Commands Manual NS-ARG(1) + +NNAAMMEE + NNss--aarrgg - escape sequences in the arguments of in-line macros with an + argument limit + +DDEESSCCRRIIPPTTIIOONN + unquoted unescaped@ quoted unescaped@ unquoted escaped@ quoted escaped@ + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Ns/arg.out_markdown b/regress/mdoc/Ns/arg.out_markdown new file mode 100644 index 00000000..7d600590 --- /dev/null +++ b/regress/mdoc/Ns/arg.out_markdown @@ -0,0 +1,14 @@ +NS-ARG(1) - General Commands Manual + +# NAME + +**Ns-arg** - escape sequences in the arguments of in-line macros with an argument limit + +# DESCRIPTION + +unquoted unescaped@ +quoted unescaped@ +unquoted escaped@ +quoted escaped@ + +OpenBSD - December 21, 2018 diff --git a/regress/mdoc/Op/Makefile b/regress/mdoc/Op/Makefile index 4a2d6fda..b2a79566 100644 --- a/regress/mdoc/Op/Makefile +++ b/regress/mdoc/Op/Makefile @@ -1,6 +1,6 @@ -# $OpenBSD: Makefile,v 1.3 2014/07/02 11:42:56 schwarze Exp $ +# $OpenBSD: Makefile,v 1.7 2018/12/21 16:58:49 schwarze Exp $ -REGRESS_TARGETS = break broken punct +REGRESS_TARGETS = arg break broken punct LINT_TARGETS = break broken punct SKIP_GROFF = break diff --git a/regress/mdoc/Op/arg.in b/regress/mdoc/Op/arg.in new file mode 100644 index 00000000..ed2ed567 --- /dev/null +++ b/regress/mdoc/Op/arg.in @@ -0,0 +1,20 @@ +.\" $OpenBSD: arg.in,v 1.1 2018/12/21 16:58:49 schwarze Exp $ +.Dd $Mdocdate$ +.Dt OP-ARG 1 +.Os +.Sh NAME +.Nm Op-arg +.Nd escape sequences in the arguments of partial implicit macros +.Sh DESCRIPTION +.ds a \(at +unquoted unescaped: +.Op \*a +.Pp +quoted unescaped: +.Op "\*a" +.Pp +unquoted escaped: +.Op \\*a +.Pp +quoted escaped: +.Op "\\*a" diff --git a/regress/mdoc/Op/arg.out_ascii b/regress/mdoc/Op/arg.out_ascii new file mode 100644 index 00000000..534917cb --- /dev/null +++ b/regress/mdoc/Op/arg.out_ascii @@ -0,0 +1,15 @@ +OP-ARG(1) General Commands Manual OP-ARG(1) + +NNAAMMEE + OOpp--aarrgg - escape sequences in the arguments of partial implicit macros + +DDEESSCCRRIIPPTTIIOONN + unquoted unescaped: [@] + + quoted unescaped: [@] + + unquoted escaped: [@] + + quoted escaped: [@] + +OpenBSD December 21, 2018 OpenBSD diff --git a/regress/mdoc/Op/arg.out_markdown b/regress/mdoc/Op/arg.out_markdown new file mode 100644 index 00000000..5ad86f7d --- /dev/null +++ b/regress/mdoc/Op/arg.out_markdown @@ -0,0 +1,21 @@ +OP-ARG(1) - General Commands Manual + +# NAME + +**Op-arg** - escape sequences in the arguments of partial implicit macros + +# DESCRIPTION + +unquoted unescaped: +\[@] + +quoted unescaped: +\[@] + +unquoted escaped: +\[@] + +quoted escaped: +\[@] + +OpenBSD - December 21, 2018 diff --git a/regress/roff/args/mdoc.out_lint b/regress/roff/args/mdoc.out_lint index f4715ba2..4422d754 100644 --- a/regress/roff/args/mdoc.out_lint +++ b/regress/roff/args/mdoc.out_lint @@ -4,10 +4,14 @@ mandoc: mdoc.in:46:15: STYLE: whitespace at end of input line mandoc: mdoc.in:86:25: STYLE: whitespace at end of input line mandoc: mdoc.in:90:26: STYLE: whitespace at end of input line mandoc: mdoc.in:94:27: STYLE: whitespace at end of input line -mandoc: mdoc.in:102:9: STYLE: unterminated quoted argument -mandoc: mdoc.in:103:13: STYLE: unterminated quoted argument -mandoc: mdoc.in:107:10: STYLE: unterminated quoted argument -mandoc: mdoc.in:108:14: STYLE: unterminated quoted argument -mandoc: mdoc.in:112:11: STYLE: unterminated quoted argument -mandoc: mdoc.in:113:15: STYLE: unterminated quoted argument +mandoc: mdoc.in:102:5: STYLE: unterminated quoted argument +mandoc: mdoc.in:103:9: STYLE: unterminated quoted argument +mandoc: mdoc.in:107:5: STYLE: unterminated quoted argument +mandoc: mdoc.in:107:10: STYLE: whitespace at end of input line +mandoc: mdoc.in:108:9: STYLE: unterminated quoted argument +mandoc: mdoc.in:108:14: STYLE: whitespace at end of input line +mandoc: mdoc.in:112:5: STYLE: unterminated quoted argument +mandoc: mdoc.in:112:11: STYLE: whitespace at end of input line +mandoc: mdoc.in:113:9: STYLE: unterminated quoted argument +mandoc: mdoc.in:113:15: STYLE: whitespace at end of input line mandoc: mdoc.in:121:1: ERROR: escaped character not allowed in a name: Fl\( @@ -38,6 +38,14 @@ #include "tbl_parse.h" #include "eqn_parse.h" +/* + * ASCII_ESC is used to signal from roff_getarg() to roff_expand() + * that an escape sequence resulted from copy-in processing and + * needs to be checked or interpolated. As it is used nowhere + * else, it is defined here rather than in a header file. + */ +#define ASCII_ESC 27 + /* Maximum number of string expansions per line, to break infinite loops. */ #define EXPAND_LIMIT 1000 @@ -191,6 +199,8 @@ static int roff_evalnum(struct roff *, int, static int roff_evalpar(struct roff *, int, const char *, int *, int *, int); static int roff_evalstrcond(const char *, int *); +static int roff_expand(struct roff *, struct buf *, + int, int, char); static void roff_free1(struct roff *); static void roff_freereg(struct roffreg *); static void roff_freestr(struct roffkv *); @@ -219,7 +229,6 @@ static enum roff_tok roff_parse(struct roff *, char *, int *, static int roff_parsetext(struct roff *, struct buf *, int, int *); static int roff_renamed(ROFF_ARGS); -static int roff_res(struct roff *, struct buf *, int, int); static int roff_return(ROFF_ARGS); static int roff_rm(ROFF_ARGS); static int roff_rn(ROFF_ARGS); @@ -1142,12 +1151,12 @@ deroff(char **dest, const struct roff_node *n) /* --- main functions of the roff parser ---------------------------------- */ /* - * In the current line, expand escape sequences that tend to get - * used in numerical expressions and conditional requests. - * Also check the syntax of the remaining escape sequences. + * In the current line, expand escape sequences that produce parsable + * input text. Also check the syntax of the remaining escape sequences, + * which typically produce output glyphs or change formatter state. */ static int -roff_res(struct roff *r, struct buf *buf, int ln, int pos) +roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) { struct mctx *ctx; /* current macro call context */ char ubuf[24]; /* buffer to print the number */ @@ -1181,7 +1190,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) done = 0; start = buf->buf + pos; for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { - if (stesc[0] != r->escape || stesc[1] == '\0') + if (stesc[0] != newesc || stesc[1] == '\0') continue; stesc++; if (*stesc != '"' && *stesc != '#') @@ -1223,7 +1232,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) * in the syntax tree. */ - if (r->format == 0) { + if (newesc != ASCII_ESC && r->format == 0) { while (*ep == ' ' || *ep == '\t') ep--; ep[1] = '\0'; @@ -1264,11 +1273,16 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) expand_count = 0; while (stesc >= start) { + if (*stesc != newesc) { - /* Search backwards for the next backslash. */ + /* + * If we have a non-standard escape character, + * escape literal backslashes because all + * processing in subsequent functions uses + * the standard escaping rules. + */ - if (*stesc != r->escape) { - if (*stesc == '\\') { + if (newesc != ASCII_ESC && *stesc == '\\') { *stesc = '\0'; buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", buf->buf, stesc + 1) + 1; @@ -1277,6 +1291,9 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) free(buf->buf); buf->buf = nbuf; } + + /* Search backwards for the next escape. */ + stesc--; continue; } @@ -1556,10 +1573,11 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) * or to the NUL byte terminating the argument line. */ char * -mandoc_getarg(char **cpp, int ln, int *pos) +roff_getarg(struct roff *r, char **cpp, int ln, int *pos) { - char *start, *cp; - int quoted, pairs, white; + struct buf buf; + char *cp, *start; + int newesc, pairs, quoted, white; /* Quoting can only start with a new word. */ start = *cpp; @@ -1569,8 +1587,7 @@ mandoc_getarg(char **cpp, int ln, int *pos) start++; } - pairs = 0; - white = 0; + newesc = pairs = white = 0; for (cp = start; '\0' != *cp; cp++) { /* @@ -1589,8 +1606,12 @@ mandoc_getarg(char **cpp, int ln, int *pos) case 'a': case 't': cp[-pairs] = '\t'; - /* FALLTHROUGH */ + pairs++; + cp++; + break; case '\\': + newesc = 1; + cp[-pairs] = ASCII_ESC; pairs++; cp++; break; @@ -1639,7 +1660,18 @@ mandoc_getarg(char **cpp, int ln, int *pos) if ('\0' == *cp && (white || ' ' == cp[-1])) mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); - return start; + start = mandoc_strdup(start); + if (newesc == 0) + return start; + + buf.buf = start; + buf.sz = strlen(start) + 1; + buf.next = NULL; + if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { + free(buf.buf); + buf.buf = mandoc_strdup(""); + } + return buf.buf; } @@ -1737,7 +1769,7 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) /* Expand some escape sequences. */ - e = roff_res(r, buf, ln, pos); + e = roff_expand(r, buf, ln, pos, r->escape); if ((e & ROFF_MASK) == ROFF_IGN) return e; assert(e == ROFF_CONT); @@ -3771,7 +3803,7 @@ roff_userdef(ROFF_ARGS) ctx->argv = mandoc_reallocarray(ctx->argv, ctx->argsz, sizeof(*ctx->argv)); } - arg = mandoc_getarg(&src, ln, &pos); + arg = roff_getarg(r, &src, ln, &pos); sz = 1; /* For the terminating NUL. */ for (ap = arg; *ap != '\0'; ap++) sz += *ap == '"' ? 4 : 1; @@ -3784,6 +3816,7 @@ roff_userdef(ROFF_ARGS) *dst++ = *ap; } *dst = '\0'; + free(arg); } /* Replace the macro invocation by the macro definition. */ @@ -4133,7 +4166,7 @@ roff_strdup(const struct roff *r, const char *p) /* * We bail out on bad escapes. * No need to warn: we already did so when - * roff_res() was called. + * roff_expand() was called. */ sz = (int)(p - pp); res = mandoc_realloc(res, ssz + sz + 1); |