From 42d5f8ddfdcc19abca690eba6b3c1bc230e43079 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Fri, 13 Mar 2020 15:32:28 +0000 Subject: Split tagging into a validation part including prioritization in tag.{h,c} and {mdoc,man}_validate.c and into a formatting part including command line argument checking in term_tag.{h,c}, html.c, and {mdoc|man}_{term|html}.c. Immediate functional benefits include: * Improved prioritization of automatic tags for .Em and .Sy. * Avoiding bogus automatic tags when .Em, .Fn, or .Sy are explicitly tagged. * Explicit tagging of .Er and .Fl now works in HTML output. * Automatic tagging of .IP and .TP now works in HTML output. But mainly, this patch provides clean earth to build further improvements on. Technical changes: * Main program: Write a tag file for ASCII and UTF-8 output only. * All formatters: There is no more need to delay writing the tags. * mdoc(7)+man(7) formatters: No more need for elaborate syntax tree inspection. * HTML formatter: If available, use the "string" attribute as the tag. * HTML formatter: New function to write permalinks, to reduce code duplication. Style cleanup in the vicinity while here: * mdoc(7) terminal formatter: To set up bold font for children, defer to termp_bold_pre() rather than calling term_fontpush() manually. * mdoc(7) terminal formatter: Garbage collect some duplicate functions. * mdoc(7) HTML formatter: Unify handling, delete redundant functions. * Where possible, use switch statements rather than if cascades. * Get rid of some more Yoda notation. The necessity for such changes was first discussed with kn@, but i didn't bother him with a request to review the resulting -673/+782 line patch. --- mdoc_term.c | 284 ++++++++++++++++++------------------------------------------ 1 file changed, 82 insertions(+), 202 deletions(-) (limited to 'mdoc_term.c') diff --git a/mdoc_term.c b/mdoc_term.c index 35e05809..f7e59cca 100644 --- a/mdoc_term.c +++ b/mdoc_term.c @@ -1,7 +1,7 @@ -/* $Id$ */ +/* $Id$ */ /* - * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2012-2020 Ingo Schwarze + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2013 Franco Fichtner * * Permission to use, copy, modify, and distribute this software for any @@ -15,6 +15,9 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Plain text formatter for mdoc(7), used by mandoc(1) + * for ASCII, UTF-8, PostScript, and PDF output. */ #include "config.h" @@ -33,7 +36,7 @@ #include "mdoc.h" #include "out.h" #include "term.h" -#include "tag.h" +#include "term_tag.h" #include "main.h" struct termpair { @@ -89,11 +92,8 @@ static int termp_bf_pre(DECL_ARGS); static int termp_bk_pre(DECL_ARGS); static int termp_bl_pre(DECL_ARGS); static int termp_bold_pre(DECL_ARGS); -static int termp_cd_pre(DECL_ARGS); static int termp_d1_pre(DECL_ARGS); static int termp_eo_pre(DECL_ARGS); -static int termp_em_pre(DECL_ARGS); -static int termp_er_pre(DECL_ARGS); static int termp_ex_pre(DECL_ARGS); static int termp_fa_pre(DECL_ARGS); static int termp_fd_pre(DECL_ARGS); @@ -115,8 +115,6 @@ static int termp_skip_pre(DECL_ARGS); static int termp_sm_pre(DECL_ARGS); static int termp_pp_pre(DECL_ARGS); static int termp_ss_pre(DECL_ARGS); -static int termp_sy_pre(DECL_ARGS); -static int termp_tag_pre(DECL_ARGS); static int termp_under_pre(DECL_ARGS); static int termp_vt_pre(DECL_ARGS); static int termp_xr_pre(DECL_ARGS); @@ -140,11 +138,11 @@ static const struct mdoc_term_act mdoc_term_acts[MDOC_MAX - MDOC_Dd] = { { termp_an_pre, NULL }, /* An */ { termp_ap_pre, NULL }, /* Ap */ { termp_under_pre, NULL }, /* Ar */ - { termp_cd_pre, NULL }, /* Cd */ + { termp_fd_pre, NULL }, /* Cd */ { termp_bold_pre, NULL }, /* Cm */ { termp_li_pre, NULL }, /* Dv */ - { termp_er_pre, NULL }, /* Er */ - { termp_tag_pre, NULL }, /* Ev */ + { NULL, NULL }, /* Er */ + { NULL, NULL }, /* Ev */ { termp_ex_pre, NULL }, /* Ex */ { termp_fa_pre, NULL }, /* Fa */ { termp_fd_pre, termp_fd_post }, /* Fd */ @@ -191,7 +189,7 @@ static const struct mdoc_term_act mdoc_term_acts[MDOC_MAX - MDOC_Dd] = { { termp_quote_pre, termp_quote_post }, /* Dq */ { NULL, NULL }, /* Ec */ /* FIXME: no space */ { NULL, NULL }, /* Ef */ - { termp_em_pre, NULL }, /* Em */ + { termp_under_pre, NULL }, /* Em */ { termp_eo_pre, termp_eo_post }, /* Eo */ { termp_xx_pre, termp_xx_post }, /* Fx */ { termp_bold_pre, NULL }, /* Ms */ @@ -214,7 +212,7 @@ static const struct mdoc_term_act mdoc_term_acts[MDOC_MAX - MDOC_Dd] = { { termp_quote_pre, termp_quote_post }, /* Sq */ { termp_sm_pre, NULL }, /* Sm */ { termp_under_pre, NULL }, /* Sx */ - { termp_sy_pre, NULL }, /* Sy */ + { termp_bold_pre, NULL }, /* Sy */ { NULL, NULL }, /* Tn */ { termp_xx_pre, termp_xx_post }, /* Ux */ { NULL, NULL }, /* Xc */ @@ -246,8 +244,6 @@ static const struct mdoc_term_act mdoc_term_acts[MDOC_MAX - MDOC_Dd] = { { termp_skip_pre, NULL }, /* Tg */ }; -static int fn_prio = TAG_STRONG; - void terminal_mdoc(void *arg, const struct roff_meta *mdoc) @@ -300,7 +296,6 @@ terminal_mdoc(void *arg, const struct roff_meta *mdoc) static void print_mdoc_nodelist(DECL_ARGS) { - while (n != NULL) { print_mdoc_node(p, pair, meta, n); n = n->next; @@ -341,8 +336,7 @@ print_mdoc_node(DECL_ARGS) npair.ppair = pair; if (n->flags & NODE_ID) - tag_put(n->string == NULL ? n->child->string : n->string, - TAG_MANUAL, p->line); + term_tag_write(n, p->line); /* * Keeps only work until the end of a line. If a keep was @@ -1008,24 +1002,30 @@ termp_nm_pre(DECL_ARGS) p->flags |= TERMP_HANG; } } - - term_fontpush(p, TERMFONT_BOLD); - return 1; + return termp_bold_pre(p, pair, meta, n); } static void termp_nm_post(DECL_ARGS) { - - if (n->type == ROFFT_BLOCK) { + switch (n->type) { + case ROFFT_BLOCK: p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); - } else if (n->type == ROFFT_HEAD && - NULL != n->next && NULL != n->next->child) { + break; + case ROFFT_HEAD: + if (n->next == NULL || n->next->child == NULL) + break; term_flushln(p); p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); p->trailspace = 0; - } else if (n->type == ROFFT_BODY && n->child != NULL) - term_flushln(p); + break; + case ROFFT_BODY: + if (n->child != NULL) + term_flushln(p); + break; + default: + break; + } } static int @@ -1033,7 +1033,6 @@ termp_fl_pre(DECL_ARGS) { struct roff_node *nn; - termp_tag_pre(p, pair, meta, n); term_fontpush(p, TERMFONT_BOLD); term_word(p, "\\-"); @@ -1219,24 +1218,22 @@ synopsis_pre(struct termp *p, struct roff_node *n) static int termp_vt_pre(DECL_ARGS) { - - if (n->type == ROFFT_ELEM) { - synopsis_pre(p, n); - return termp_under_pre(p, pair, meta, n); - } else if (n->type == ROFFT_BLOCK) { + switch (n->type) { + case ROFFT_ELEM: + return termp_ft_pre(p, pair, meta, n); + case ROFFT_BLOCK: synopsis_pre(p, n); return 1; - } else if (n->type == ROFFT_HEAD) + case ROFFT_HEAD: return 0; - - return termp_under_pre(p, pair, meta, n); + default: + return termp_under_pre(p, pair, meta, n); + } } static int termp_bold_pre(DECL_ARGS) { - - termp_tag_pre(p, pair, meta, n); term_fontpush(p, TERMFONT_BOLD); return 1; } @@ -1244,7 +1241,6 @@ termp_bold_pre(DECL_ARGS) static int termp_fd_pre(DECL_ARGS) { - synopsis_pre(p, n); return termp_bold_pre(p, pair, meta, n); } @@ -1252,7 +1248,6 @@ termp_fd_pre(DECL_ARGS) static void termp_fd_post(DECL_ARGS) { - term_newln(p); } @@ -1273,23 +1268,14 @@ termp_sh_pre(DECL_ARGS) term_vspace(p); break; case ROFFT_HEAD: - term_fontpush(p, TERMFONT_BOLD); - break; + return termp_bold_pre(p, pair, meta, n); case ROFFT_BODY: p->tcol->offset = term_len(p, p->defindent); term_tab_set(p, NULL); term_tab_set(p, "T"); term_tab_set(p, ".5i"); - switch (n->sec) { - case SEC_DESCRIPTION: - fn_prio = TAG_STRONG; - break; - case SEC_AUTHORS: + if (n->sec == SEC_AUTHORS) p->flags &= ~(TERMP_SPLIT|TERMP_NOSPLIT); - break; - default: - break; - } break; default: break; @@ -1300,7 +1286,6 @@ termp_sh_pre(DECL_ARGS) static void termp_sh_post(DECL_ARGS) { - switch (n->type) { case ROFFT_HEAD: term_newln(p); @@ -1317,15 +1302,13 @@ termp_sh_post(DECL_ARGS) static void termp_lb_post(DECL_ARGS) { - - if (SEC_LIBRARY == n->sec && NODE_LINE & n->flags) + if (n->sec == SEC_LIBRARY && n->flags & NODE_LINE) term_newln(p); } static int termp_d1_pre(DECL_ARGS) { - if (n->type != ROFFT_BLOCK) return 1; term_newln(p); @@ -1339,11 +1322,8 @@ termp_d1_pre(DECL_ARGS) static int termp_ft_pre(DECL_ARGS) { - - /* NB: NODE_LINE does not effect this! */ synopsis_pre(p, n); - term_fontpush(p, TERMFONT_UNDER); - return 1; + return termp_under_pre(p, pair, meta, n); } static int @@ -1352,11 +1332,9 @@ termp_fn_pre(DECL_ARGS) size_t rmargin = 0; int pretty; - pretty = NODE_SYNPRETTY & n->flags; - synopsis_pre(p, n); - - if (NULL == (n = n->child)) + pretty = n->flags & NODE_SYNPRETTY; + if ((n = n->child) == NULL) return 0; if (pretty) { @@ -1370,9 +1348,6 @@ termp_fn_pre(DECL_ARGS) term_word(p, n->string); term_fontpop(p); - if (n->sec == SEC_DESCRIPTION || n->sec == SEC_CUSTOM) - tag_put(n->string, fn_prio++, p->line); - if (pretty) { term_flushln(p); p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); @@ -1407,7 +1382,6 @@ termp_fn_pre(DECL_ARGS) term_word(p, ";"); term_flushln(p); } - return 0; } @@ -1416,10 +1390,9 @@ termp_fa_pre(DECL_ARGS) { const struct roff_node *nn; - if (n->parent->tok != MDOC_Fo) { - term_fontpush(p, TERMFONT_UNDER); - return 1; - } + if (n->parent->tok != MDOC_Fo) + return termp_under_pre(p, pair, meta, n); + for (nn = n->child; nn != NULL; nn = nn->next) { term_fontpush(p, TERMFONT_UNDER); p->flags |= TERMP_NBRWORD; @@ -1530,9 +1503,8 @@ termp_ss_pre(DECL_ARGS) term_vspace(p); break; case ROFFT_HEAD: - term_fontpush(p, TERMFONT_BOLD); p->tcol->offset = term_len(p, (p->defindent+1)/2); - break; + return termp_bold_pre(p, pair, meta, n); case ROFFT_BODY: p->tcol->offset = term_len(p, p->defindent); term_tab_set(p, NULL); @@ -1552,22 +1524,11 @@ termp_ss_post(DECL_ARGS) term_newln(p); } -static int -termp_cd_pre(DECL_ARGS) -{ - - synopsis_pre(p, n); - term_fontpush(p, TERMFONT_BOLD); - return 1; -} - static int termp_in_pre(DECL_ARGS) { - synopsis_pre(p, n); - - if (NODE_SYNPRETTY & n->flags && NODE_LINE & n->flags) { + if (n->flags & NODE_SYNPRETTY && n->flags & NODE_LINE) { term_fontpush(p, TERMFONT_BOLD); term_word(p, "#include"); term_word(p, "<"); @@ -1575,7 +1536,6 @@ termp_in_pre(DECL_ARGS) term_word(p, "<"); term_fontpush(p, TERMFONT_UNDER); } - p->flags |= TERMP_NOSPACE; return 1; } @@ -1583,21 +1543,17 @@ termp_in_pre(DECL_ARGS) static void termp_in_post(DECL_ARGS) { - - if (NODE_SYNPRETTY & n->flags) + if (n->flags & NODE_SYNPRETTY) term_fontpush(p, TERMFONT_BOLD); - p->flags |= TERMP_NOSPACE; term_word(p, ">"); - - if (NODE_SYNPRETTY & n->flags) + if (n->flags & NODE_SYNPRETTY) term_fontpop(p); } static int termp_pp_pre(DECL_ARGS) { - fn_prio = TAG_STRONG; term_vspace(p); return 0; } @@ -1605,14 +1561,12 @@ termp_pp_pre(DECL_ARGS) static int termp_skip_pre(DECL_ARGS) { - return 0; } static int termp_quote_pre(DECL_ARGS) { - if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) return 1; @@ -1769,17 +1723,15 @@ termp_eo_post(DECL_ARGS) static int termp_fo_pre(DECL_ARGS) { - size_t rmargin = 0; - int pretty; - - pretty = NODE_SYNPRETTY & n->flags; + size_t rmargin; - if (n->type == ROFFT_BLOCK) { + switch (n->type) { + case ROFFT_BLOCK: synopsis_pre(p, n); return 1; - } else if (n->type == ROFFT_BODY) { - if (pretty) { - rmargin = p->tcol->rmargin; + case ROFFT_BODY: + rmargin = p->tcol->rmargin; + if (n->flags & NODE_SYNPRETTY) { p->tcol->rmargin = p->tcol->offset + term_len(p, 4); p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; @@ -1787,7 +1739,7 @@ termp_fo_pre(DECL_ARGS) p->flags |= TERMP_NOSPACE; term_word(p, "("); p->flags |= TERMP_NOSPACE; - if (pretty) { + if (n->flags & NODE_SYNPRETTY) { term_flushln(p); p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); @@ -1796,30 +1748,21 @@ termp_fo_pre(DECL_ARGS) p->tcol->rmargin = rmargin; } return 1; + default: + return termp_bold_pre(p, pair, meta, n); } - - if (NULL == n->child) - return 0; - - /* XXX: we drop non-initial arguments as per groff. */ - - assert(n->child->string); - term_fontpush(p, TERMFONT_BOLD); - term_word(p, n->child->string); - return 0; } static void termp_fo_post(DECL_ARGS) { - if (n->type != ROFFT_BODY) return; p->flags |= TERMP_NOSPACE; term_word(p, ")"); - if (NODE_SYNPRETTY & n->flags) { + if (n->flags & NODE_SYNPRETTY) { p->flags |= TERMP_NOSPACE; term_word(p, ";"); term_flushln(p); @@ -1829,29 +1772,30 @@ termp_fo_post(DECL_ARGS) static int termp_bf_pre(DECL_ARGS) { - - if (n->type == ROFFT_HEAD) + switch (n->type) { + case ROFFT_HEAD: return 0; - else if (n->type != ROFFT_BODY) + case ROFFT_BODY: + break; + default: return 1; - - if (FONT_Em == n->norm->Bf.font) - term_fontpush(p, TERMFONT_UNDER); - else if (FONT_Sy == n->norm->Bf.font) - term_fontpush(p, TERMFONT_BOLD); - else - term_fontpush(p, TERMFONT_NONE); - - return 1; + } + switch (n->norm->Bf.font) { + case FONT_Em: + return termp_under_pre(p, pair, meta, n); + case FONT_Sy: + return termp_bold_pre(p, pair, meta, n); + default: + return termp_li_pre(p, pair, meta, n); + } } static int termp_sm_pre(DECL_ARGS) { - - if (NULL == n->child) + if (n->child == NULL) p->flags ^= TERMP_NONOSPACE; - else if (0 == strcmp("on", n->child->string)) + else if (strcmp(n->child->string, "on") == 0) p->flags &= ~TERMP_NONOSPACE; else p->flags |= TERMP_NONOSPACE; @@ -1865,7 +1809,6 @@ termp_sm_pre(DECL_ARGS) static int termp_ap_pre(DECL_ARGS) { - p->flags |= TERMP_NOSPACE; term_word(p, "'"); p->flags |= TERMP_NOSPACE; @@ -1904,8 +1847,6 @@ termp____post(DECL_ARGS) static int termp_li_pre(DECL_ARGS) { - - termp_tag_pre(p, pair, meta, n); term_fontpush(p, TERMFONT_NONE); return 1; } @@ -1955,7 +1896,6 @@ termp_lk_pre(DECL_ARGS) static int termp_bk_pre(DECL_ARGS) { - switch (n->type) { case ROFFT_BLOCK: break; @@ -1968,106 +1908,46 @@ termp_bk_pre(DECL_ARGS) default: abort(); } - return 1; } static void termp_bk_post(DECL_ARGS) { - if (n->type == ROFFT_BODY) p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); } +/* + * If we are in an `Rs' and there is a journal present, + * then quote us instead of underlining us (for disambiguation). + */ static void termp__t_post(DECL_ARGS) { - - /* - * If we're in an `Rs' and there's a journal present, then quote - * us instead of underlining us (for disambiguation). - */ - if (n->parent && MDOC_Rs == n->parent->tok && + if (n->parent != NULL && n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) termp_quote_post(p, pair, meta, n); - termp____post(p, pair, meta, n); } static int termp__t_pre(DECL_ARGS) { - - /* - * If we're in an `Rs' and there's a journal present, then quote - * us instead of underlining us (for disambiguation). - */ - if (n->parent && MDOC_Rs == n->parent->tok && + if (n->parent != NULL && n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) return termp_quote_pre(p, pair, meta, n); - - term_fontpush(p, TERMFONT_UNDER); - return 1; + else + return termp_under_pre(p, pair, meta, n); } static int termp_under_pre(DECL_ARGS) { - - term_fontpush(p, TERMFONT_UNDER); - return 1; -} - -static int -termp_em_pre(DECL_ARGS) -{ - if (n->child != NULL && - n->child->type == ROFFT_TEXT) - tag_put(n->child->string, TAG_FALLBACK, p->line); term_fontpush(p, TERMFONT_UNDER); return 1; } -static int -termp_sy_pre(DECL_ARGS) -{ - if (n->child != NULL && - n->child->type == ROFFT_TEXT) - tag_put(n->child->string, TAG_FALLBACK, p->line); - term_fontpush(p, TERMFONT_BOLD); - return 1; -} - -static int -termp_er_pre(DECL_ARGS) -{ - - if (n->sec == SEC_ERRORS && - (n->parent->tok == MDOC_It || - (n->parent->tok == MDOC_Bq && - n->parent->parent->parent->tok == MDOC_It))) - tag_put(n->child->string, TAG_STRONG, p->line); - return 1; -} - -static int -termp_tag_pre(DECL_ARGS) -{ - - if (n->child != NULL && - n->child->type == ROFFT_TEXT && - (n->prev == NULL || - (n->prev->type == ROFFT_TEXT && - strcmp(n->prev->string, "|") == 0)) && - (n->parent->tok == MDOC_It || - (n->parent->tok == MDOC_Xo && - n->parent->parent->prev == NULL && - n->parent->parent->parent->tok == MDOC_It))) - tag_put(n->child->string, TAG_STRONG, p->line); - return 1; -} - static int termp_abort_pre(DECL_ARGS) { -- cgit