From e0ab88fccf99335566d214b53e13683b1391271c Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Tue, 17 Jan 2017 15:32:43 +0000 Subject: Completely delete the buf field of struct html and all the buf*() interfaces. Such a static buffer was a bad idea in the first place, causing unfixable truncation that was only prevented by triggering an assertion failure. Instead, let the small number of remaining users allocate and free their own, temporary dynamic buffers, or for the case of .Xr and .In, pass the original data to be assembled in print_otag(). --- TODO | 9 --- html.c | 226 +++++++++++++++++++--------------------------------------- html.h | 15 ---- man_html.c | 9 ++- mandoc_html.3 | 46 ++++++------ mdoc_html.c | 150 +++++++++++++++++++------------------- 6 files changed, 174 insertions(+), 281 deletions(-) diff --git a/TODO b/TODO index 46e0290b..e79d3614 100644 --- a/TODO +++ b/TODO @@ -32,15 +32,6 @@ Many issues are annotated for difficulty as follows: Obviously, as the issues have not been solved yet, these annotations are mere guesses, and some may be wrong. -************************************************************************ -* crashes -************************************************************************ - -- The abort() in bufcat(), html.c, can be triggered via buffmt_includes() - by running -Thtml -Oincludes on a file containing a long .In argument. - Fixing this will probably require reworking the whole bufcat() concept. - loc ** exist * algo * size ** imp ** - ************************************************************************ * missing features ************************************************************************ diff --git a/html.c b/html.c index 45f03dfd..2439f460 100644 --- a/html.c +++ b/html.c @@ -106,12 +106,11 @@ static const char *const roffscales[SCALE_MAX] = { }; static void a2width(const char *, struct roffsu *); -static void bufncat(struct html *, const char *, size_t); static void print_ctag(struct html *, struct tag *); static int print_escape(char); -static int print_encode(struct html *, const char *, int); +static int print_encode(struct html *, const char *, const char *, int); +static void print_href(struct html *, const char *, const char *, int); static void print_metaf(struct html *, enum mandoc_esc); -static void print_attr(struct html *, const char *, const char *); void * @@ -304,7 +303,7 @@ print_escape(char c) } static int -print_encode(struct html *h, const char *p, int norecurse) +print_encode(struct html *h, const char *p, const char *pend, int norecurse) { size_t sz; int c, len, nospace; @@ -313,9 +312,12 @@ print_encode(struct html *h, const char *p, int norecurse) static const char rejs[9] = { '\\', '<', '>', '&', '"', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; + if (pend == NULL) + pend = strchr(p, '\0'); + nospace = 0; - while ('\0' != *p) { + while (p < pend) { if (HTML_SKIPCHAR & h->flags && '\\' != *p) { h->flags &= ~HTML_SKIPCHAR; p++; @@ -323,11 +325,13 @@ print_encode(struct html *h, const char *p, int norecurse) } sz = strcspn(p, rejs); + if (p + sz > pend) + sz = pend - p; fwrite(p, 1, sz, stdout); p += (int)sz; - if ('\0' == *p) + if (p >= pend) break; if (print_escape(*p++)) @@ -399,11 +403,27 @@ print_encode(struct html *h, const char *p, int norecurse) } static void -print_attr(struct html *h, const char *key, const char *val) +print_href(struct html *h, const char *name, const char *sec, int man) { - printf(" %s=\"", key); - (void)print_encode(h, val, 1); - putchar('\"'); + const char *p, *pp; + + pp = man ? h->base_man : h->base_includes; + while ((p = strchr(pp, '%')) != NULL) { + print_encode(h, pp, p, 1); + if (man && p[1] == 'S') { + if (sec == NULL) + putchar('1'); + else + print_encode(h, sec, NULL, 1); + } else if ((man && p[1] == 'N') || + (man == 0 && p[1] == 'I')) + print_encode(h, name, NULL, 1); + else + print_encode(h, p, p + 2, 1); + pp = p + 2; + } + if (*pp != '\0') + print_encode(h, pp, NULL, 1); } struct tag * @@ -412,7 +432,9 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) va_list ap; struct roffsu mysu, *su; struct tag *t; + const char *attr; char *s; + double v; int i, have_style; /* Push this tags onto the stack of open scopes. */ @@ -458,20 +480,40 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) s = va_arg(ap, char *); switch (*fmt++) { case 'c': - print_attr(h, "class", s); + attr = "class"; break; case 'h': - print_attr(h, "href", s); + attr = "href"; break; case 'i': - print_attr(h, "id", s); + attr = "id"; break; case '?': - print_attr(h, s, va_arg(ap, char *)); + attr = s; + s = va_arg(ap, char *); break; default: abort(); } + printf(" %s=\"", attr); + switch (*fmt) { + case 'M': + print_href(h, s, va_arg(ap, char *), 1); + fmt++; + break; + case 'I': + print_href(h, s, NULL, 0); + fmt++; + break; + case 'R': + putchar('#'); + fmt++; + /* FALLTHROUGH */ + default: + print_encode(h, s, NULL, 1); + break; + } + putchar('"'); } /* Print out styles. */ @@ -507,36 +549,40 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) /* Second letter: style name. */ - bufinit(h); switch (*fmt++) { case 'b': - bufcat_su(h, "margin-bottom", su); + attr = "margin-bottom"; break; case 'h': - bufcat_su(h, "height", su); + attr = "height"; break; case 'i': - bufcat_su(h, "text-indent", su); + attr = "text-indent"; break; case 'l': - bufcat_su(h, "margin-left", su); + attr = "margin-left"; break; case 't': - bufcat_su(h, "margin-top", su); + attr = "margin-top"; break; case 'w': - bufcat_su(h, "width", su); + attr = "width"; break; case 'W': - bufcat_su(h, "min-width", su); + attr = "min-width"; break; case '?': - bufcat_style(h, s, va_arg(ap, char *)); - break; + printf("%s: %s;", s, va_arg(ap, char *)); + continue; default: abort(); } - printf("%s", h->buf); + v = su->scale; + if (su->unit == SCALE_MM && (v /= 100.0) == 0.0) + v = 1.0; + else if (su->unit == SCALE_BU) + v /= 24.0; + printf("%s: %.2f%s;", attr, v, roffscales[su->unit]); } if (have_style) putchar('"'); @@ -619,7 +665,7 @@ print_text(struct html *h, const char *word) } assert(word); - if ( ! print_encode(h, word, 0)) { + if ( ! print_encode(h, word, NULL, 0)) { if ( ! (h->flags & HTML_NONOSPACE)) h->flags &= ~HTML_NOSPACE; h->flags &= ~HTML_NONEWLINE; @@ -682,131 +728,3 @@ a2width(const char *p, struct roffsu *su) } else if (su->scale < 0.0) su->scale = 0.0; } - -void -bufinit(struct html *h) -{ - - h->buf[0] = '\0'; - h->buflen = 0; -} - -void -bufcat_style(struct html *h, const char *key, const char *val) -{ - - bufcat(h, key); - bufcat(h, ":"); - bufcat(h, val); - bufcat(h, ";"); -} - -void -bufcat(struct html *h, const char *p) -{ - - /* - * XXX This is broken and not easy to fix. - * When using the -Oincludes option, buffmt_includes() - * may pass in strings overrunning BUFSIZ, causing a crash. - */ - - h->buflen = strlcat(h->buf, p, BUFSIZ); - assert(h->buflen < BUFSIZ); -} - -void -bufcat_fmt(struct html *h, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - (void)vsnprintf(h->buf + (int)h->buflen, - BUFSIZ - h->buflen - 1, fmt, ap); - va_end(ap); - h->buflen = strlen(h->buf); -} - -static void -bufncat(struct html *h, const char *p, size_t sz) -{ - - assert(h->buflen + sz + 1 < BUFSIZ); - strncat(h->buf, p, sz); - h->buflen += sz; -} - -void -buffmt_includes(struct html *h, const char *name) -{ - const char *p, *pp; - - pp = h->base_includes; - - bufinit(h); - while (NULL != (p = strchr(pp, '%'))) { - bufncat(h, pp, (size_t)(p - pp)); - switch (*(p + 1)) { - case 'I': - bufcat(h, name); - break; - default: - bufncat(h, p, 2); - break; - } - pp = p + 2; - } - if (pp) - bufcat(h, pp); -} - -void -buffmt_man(struct html *h, const char *name, const char *sec) -{ - const char *p, *pp; - - pp = h->base_man; - - bufinit(h); - while (NULL != (p = strchr(pp, '%'))) { - bufncat(h, pp, (size_t)(p - pp)); - switch (*(p + 1)) { - case 'S': - bufcat(h, sec ? sec : "1"); - break; - case 'N': - bufcat_fmt(h, "%s", name); - break; - default: - bufncat(h, p, 2); - break; - } - pp = p + 2; - } - if (pp) - bufcat(h, pp); -} - -void -bufcat_su(struct html *h, const char *p, const struct roffsu *su) -{ - double v; - - v = su->scale; - if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) - v = 1.0; - else if (SCALE_BU == su->unit) - v /= 24.0; - - bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); -} - -void -bufcat_id(struct html *h, const char *src) -{ - - /* Cf. . */ - - for (; '\0' != *src; src++) - bufncat(h, *src == ' ' ? "_" : src, 1); -} diff --git a/html.h b/html.h index c6095ea8..5f3e063d 100644 --- a/html.h +++ b/html.h @@ -101,8 +101,6 @@ struct html { char *base_man; /* base for manpage href */ char *base_includes; /* base for include href */ char *style; /* style-sheet URI */ - char buf[BUFSIZ]; /* see bufcat and friends */ - size_t buflen; struct tag *metaf; /* current open font scope */ enum htmlfont metal; /* last used font */ enum htmlfont metac; /* current font mode */ @@ -125,17 +123,4 @@ void print_tbl(struct html *, const struct tbl_span *); void print_eqn(struct html *, const struct eqn *); void print_paragraph(struct html *); -void bufcat_fmt(struct html *, const char *, ...) - __attribute__((__format__ (printf, 2, 3))); -void bufcat(struct html *, const char *); -void bufcat_id(struct html *, const char *); -void bufcat_style(struct html *, - const char *, const char *); -void bufcat_su(struct html *, const char *, - const struct roffsu *); -void bufinit(struct html *); -void buffmt_man(struct html *, - const char *, const char *); -void buffmt_includes(struct html *, const char *); - int html_strlen(const char *); diff --git a/man_html.c b/man_html.c index a1163627..7b3285cc 100644 --- a/man_html.c +++ b/man_html.c @@ -172,14 +172,13 @@ html_man(void *arg, const struct roff_man *man) static void print_man_head(MAN_ARGS) { + char *cp; print_gen_head(h); - assert(man->title); - assert(man->msec); - bufinit(h); - bufcat_fmt(h, "%s(%s)", man->title, man->msec); + mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec); print_otag(h, TAG_TITLE, ""); - print_text(h, h->buf); + print_text(h, cp); + free(cp); } static void diff --git a/mandoc_html.3 b/mandoc_html.3 index 5e132979..517b0eff 100644 --- a/mandoc_html.3 +++ b/mandoc_html.3 @@ -1,6 +1,6 @@ .\" $Id$ .\" -.\" Copyright (c) 2014 Ingo Schwarze +.\" Copyright (c) 2014, 2017 Ingo Schwarze .\" .\" Permission to use, copy, modify, and distribute this software for any .\" purpose with or without fee is hereby granted, provided that the above @@ -146,6 +146,26 @@ attribute. Print a .Cm href attribute. +This attribute letter can optionally be followed by a modifier letter. +If followed by +.Cm R , +it formats the link as a local one by prefixing a +.Sq # +character. +If followed by +.Cm I , +it interpretes the argument as a header file name +and generates a link using the +.Xr mandoc 1 +.Fl O Cm includes +option. +If followed by +.Cm M , +it takes two arguments instead of one, a manual page name and +section, and formats them as a link to a manual page using the +.Xr mandoc 1 +.Fl O Cm man +option. .It Cm i Print an .Cm id @@ -235,8 +255,6 @@ The first is the style name, the second its value. .Pp .Fn print_otag uses the private function -.Fn print_attr -which in turn uses the private function .Fn print_encode to take care of HTML encoding. If required by the element type, it remembers in @@ -270,28 +288,6 @@ and functions. .Pp The functions -.Fn bufinit , -.Fn bufcat* , -and -.Fn buffmt* -do not directly produce output but buffer text in the -.Fa buf -member of -.Fa h . -They are not used internally by -.Pa html.c -but intended for use by the language-specific formatters -to ease preparation of strings for the -.Fa p -argument of -.Fn print_otag -and for the -.Fa word -argument of -.Fn print_text . -Consequently, these functions do not do any HTML encoding. -.Pp -The functions .Fn html_strlen , .Fn print_eqn , .Fn print_tbl , diff --git a/mdoc_html.c b/mdoc_html.c index 40a78138..f613e6a1 100644 --- a/mdoc_html.c +++ b/mdoc_html.c @@ -324,17 +324,23 @@ html_mdoc(void *arg, const struct roff_man *mdoc) static void print_mdoc_head(MDOC_ARGS) { + char *cp; print_gen_head(h); - bufinit(h); - bufcat(h, meta->title); - if (meta->msec) - bufcat_fmt(h, "(%s)", meta->msec); - if (meta->arch) - bufcat_fmt(h, " (%s)", meta->arch); + + if (meta->arch != NULL && meta->msec != NULL) + mandoc_asprintf(&cp, "%s(%s) (%s)", meta->title, + meta->msec, meta->arch); + else if (meta->msec != NULL) + mandoc_asprintf(&cp, "%s(%s)", meta->title, meta->msec); + else if (meta->arch != NULL) + mandoc_asprintf(&cp, "%s (%s)", meta->title, meta->arch); + else + cp = mandoc_strdup(meta->title); print_otag(h, TAG_TITLE, ""); - print_text(h, h->buf); + print_text(h, cp); + free(cp); } static void @@ -489,9 +495,33 @@ mdoc_root_pre(MDOC_ARGS) return 1; } +char * +make_id(const struct roff_node *n) +{ + const struct roff_node *nch; + char *buf, *cp; + + for (nch = n->child; nch != NULL; nch = nch->next) + if (nch->type != ROFFT_TEXT) + return NULL; + + buf = NULL; + deroff(&buf, n); + + /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */ + + for (cp = buf; *cp != '\0'; cp++) + if (*cp == ' ') + *cp = '_'; + + return buf; +} + static int mdoc_sh_pre(MDOC_ARGS) { + char *id; + switch (n->type) { case ROFFT_BLOCK: print_otag(h, TAG_DIV, "c", "section"); @@ -504,17 +534,10 @@ mdoc_sh_pre(MDOC_ARGS) break; } - bufinit(h); - - for (n = n->child; n != NULL && n->type == ROFFT_TEXT; ) { - bufcat_id(h, n->string); - if (NULL != (n = n->next)) - bufcat_id(h, " "); - } - - if (NULL == n) - print_otag(h, TAG_H1, "i", h->buf); - else + if ((id = make_id(n)) != NULL) { + print_otag(h, TAG_H1, "i", id); + free(id); + } else print_otag(h, TAG_H1, ""); return 1; @@ -523,23 +546,18 @@ mdoc_sh_pre(MDOC_ARGS) static int mdoc_ss_pre(MDOC_ARGS) { + char *id; + if (n->type == ROFFT_BLOCK) { print_otag(h, TAG_DIV, "c", "subsection"); return 1; } else if (n->type == ROFFT_BODY) return 1; - bufinit(h); - - for (n = n->child; n != NULL && n->type == ROFFT_TEXT; ) { - bufcat_id(h, n->string); - if (NULL != (n = n->next)) - bufcat_id(h, " "); - } - - if (NULL == n) - print_otag(h, TAG_H2, "i", h->buf); - else + if ((id = make_id(n)) != NULL) { + print_otag(h, TAG_H2, "i", id); + free(id); + } else print_otag(h, TAG_H2, ""); return 1; @@ -623,12 +641,11 @@ mdoc_xr_pre(MDOC_ARGS) if (NULL == n->child) return 0; - if (h->base_man) { - buffmt_man(h, n->child->string, - n->child->next ? - n->child->next->string : NULL); - print_otag(h, TAG_A, "ch", "link-man", h->buf); - } else + if (h->base_man) + print_otag(h, TAG_A, "chM", "link-man", + n->child->string, n->child->next == NULL ? + NULL : n->child->next->string); + else print_otag(h, TAG_A, "c", "link-man"); n = n->child; @@ -850,17 +867,15 @@ mdoc_d1_pre(MDOC_ARGS) static int mdoc_sx_pre(MDOC_ARGS) { - bufinit(h); - bufcat(h, "#"); - - for (n = n->child; n; ) { - bufcat_id(h, n->string); - if (NULL != (n = n->next)) - bufcat_id(h, " "); - } + char *id; print_otag(h, TAG_I, "c", "link-sec"); - print_otag(h, TAG_A, "ch", "link-sec", h->buf); + if ((id = make_id(n)) != NULL) { + print_otag(h, TAG_A, "chR", "link-sec", id); + free(id); + } else + print_otag(h, TAG_A, "c", "link-sec"); + return 1; } @@ -1051,9 +1066,8 @@ mdoc_fa_pre(MDOC_ARGS) static int mdoc_fd_pre(MDOC_ARGS) { - char buf[BUFSIZ]; - size_t sz; struct tag *t; + char *buf, *cp; synopsis_pre(h, n); @@ -1073,25 +1087,16 @@ mdoc_fd_pre(MDOC_ARGS) if (NULL != (n = n->next)) { assert(n->type == ROFFT_TEXT); - /* - * XXX This is broken and not easy to fix. - * When using -Oincludes, truncation may occur. - * Dynamic allocation wouldn't help because - * passing long strings to buffmt_includes() - * does not work either. - */ - - strlcpy(buf, '<' == *n->string || '"' == *n->string ? - n->string + 1 : n->string, BUFSIZ); - - sz = strlen(buf); - if (sz && ('>' == buf[sz - 1] || '"' == buf[sz - 1])) - buf[sz - 1] = '\0'; - if (h->base_includes) { - buffmt_includes(h, buf); - t = print_otag(h, TAG_A, "ch", "link-includes", - h->buf); + cp = n->string; + if (*cp == '<' || *cp == '"') + cp++; + buf = mandoc_strdup(cp); + cp = strchr(buf, '\0') - 1; + if (cp >= buf && (*cp == '>' || *cp == '"')) + *cp = '\0'; + t = print_otag(h, TAG_A, "chI", "link-includes", buf); + free(buf); } else t = print_otag(h, TAG_A, "c", "link-includes"); @@ -1279,16 +1284,16 @@ static int mdoc_mt_pre(MDOC_ARGS) { struct tag *t; + char *cp; for (n = n->child; n; n = n->next) { assert(n->type == ROFFT_TEXT); - bufinit(h); - bufcat(h, "mailto:"); - bufcat(h, n->string); - t = print_otag(h, TAG_A, "ch", "link-mail", h->buf); + mandoc_asprintf(&cp, "mailto:%s", n->string); + t = print_otag(h, TAG_A, "ch", "link-mail", cp); print_text(h, n->string); print_tagq(h, t); + free(cp); } return 0; @@ -1355,11 +1360,10 @@ mdoc_in_pre(MDOC_ARGS) if (NULL != (n = n->child)) { assert(n->type == ROFFT_TEXT); - if (h->base_includes) { - buffmt_includes(h, n->string); - t = print_otag(h, TAG_A, "ch", "link-includes", - h->buf); - } else + if (h->base_includes) + t = print_otag(h, TAG_A, "chI", "link-includes", + n->string); + else t = print_otag(h, TAG_A, "c", "link-includes"); print_text(h, n->string); print_tagq(h, t); -- cgit