summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2023-11-24 05:02:18 +0000
committerIngo Schwarze <schwarze@openbsd.org>2023-11-24 05:02:18 +0000
commit7c821146e12f71433cc5a7c3ee230f03a36997e1 (patch)
treead2b5e4d2b2134325706db645fd0ae3d090685fd
parent501a9474db7fcfba9e8035ca739d8e215ad93fa6 (diff)
downloadmandoc-7c821146e12f71433cc5a7c3ee230f03a36997e1.tar.gz
1. Do not put ASCII_HYPH (0x1c) into the tag file.
That happened when tagging a string containing '-' on an input text line, most commonly in man(7) .TP next line scope. 2. Do not let "\-" end the tag. In both cases, translate ASCII_HYPH and "\-" to plain '-' for output. For example, this improves handling of unbound.conf(5). These two bugs were found thanks to a posting by weerd@.
-rw-r--r--TODO11
-rw-r--r--regress/mdoc/Cm/tag.out_html2
-rw-r--r--regress/mdoc/Cm/tag.out_tag2
-rw-r--r--tag.c57
4 files changed, 47 insertions, 25 deletions
diff --git a/TODO b/TODO
index 2789747b..5882c8ad 100644
--- a/TODO
+++ b/TODO
@@ -76,11 +76,6 @@ are mere guesses, and some may be wrong.
to Nab 8 Aug 2023 20:05:32 +0200 Subject: if/ie d condition always true
loc ** exist *** algo *** size ** imp *
-- tag.c, tag_put() should not put ASCII_HYPH into the tag file,
- which happens when the tag contains "-" on the input side
- weerd@ 28 Sep 2021 12:44:07 +0200
- loc * exist * algo * size * imp ***
-
************************************************************************
* missing features
@@ -324,12 +319,6 @@ are mere guesses, and some may be wrong.
mail to sternenseemann 19 Aug 2021 19:11:50 +0200
loc * exist ** algo ** size * imp **
-- tag.c, tag_put() and callers like man_validate.c, check_tag()
- should not mistake "\-" as a word-ending escape sequence but
- instead translate it to plain "-" in the tag name
- weerd@ 28 Sep 2021 12:44:07 +0200
- loc ** exist * algo * size * imp ***
-
- handle Unicode letters in tags in both HTML and terminal output
thread "section headers with diacritics" starting with
Mario Blaettermann 24 Mar 2022 18:13:23 +0100
diff --git a/regress/mdoc/Cm/tag.out_html b/regress/mdoc/Cm/tag.out_html
index 5141f52a..ceadb4b5 100644
--- a/regress/mdoc/Cm/tag.out_html
+++ b/regress/mdoc/Cm/tag.out_html
@@ -7,7 +7,7 @@
<dd>text</dd>
<dt id="hyphen"><a class="permalink" href="#hyphen"><code class="Cm">-hyphen</code></a></dt>
<dd>text</dd>
- <dt id="minus"><a class="permalink" href="#minus"><code class="Cm">-minus-sign</code></a></dt>
+ <dt id="minus-sign"><a class="permalink" href="#minus-sign"><code class="Cm">-minus-sign</code></a></dt>
<dd>text</dd>
<dt id="backslash"><a class="permalink" href="#backslash"><code class="Cm">\backslash</code></a></dt>
<dd>text</dd>
diff --git a/regress/mdoc/Cm/tag.out_tag b/regress/mdoc/Cm/tag.out_tag
index a59da516..d6bd49a5 100644
--- a/regress/mdoc/Cm/tag.out_tag
+++ b/regress/mdoc/Cm/tag.out_tag
@@ -4,6 +4,6 @@ one tag.mandoc_ascii 9
two tag.mandoc_ascii 9
three tag.mandoc_ascii 12
hyphen tag.mandoc_ascii 14
-minus tag.mandoc_ascii 17
+minus-sign tag.mandoc_ascii 17
backslash tag.mandoc_ascii 20
four tag.mandoc_ascii 22
diff --git a/tag.c b/tag.c
index 40a41eac..343edb67 100644
--- a/tag.c
+++ b/tag.c
@@ -1,6 +1,6 @@
/* $Id$ */
/*
- * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022
+ * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
* Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -26,11 +26,13 @@
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mandoc_aux.h"
#include "mandoc_ohash.h"
+#include "mandoc.h"
#include "roff.h"
#include "mdoc.h"
#include "roff_int.h"
@@ -88,9 +90,11 @@ tag_put(const char *s, int prio, struct roff_node *n)
{
struct tag_entry *entry;
struct roff_node *nold;
- const char *se;
+ const char *se, *src;
+ char *cpy;
size_t len;
unsigned int slot;
+ int changed;
assert(prio <= TAG_FALLBACK);
@@ -106,6 +110,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
/* Determine the implicit tag. */
+ changed = 1;
if (s == NULL) {
if (n->child == NULL || n->child->type != ROFFT_TEXT)
return;
@@ -122,27 +127,53 @@ tag_put(const char *s, int prio, struct roff_node *n)
s += 2;
break;
default:
- break;
+ return;
}
break;
default:
+ changed = 0;
break;
}
}
/*
+ * Translate \- and ASCII_HYPH to plain '-'.
* Skip whitespace and escapes and whatever follows,
* and if there is any, downgrade the priority.
*/
- len = strcspn(s, " \t\\");
+ cpy = mandoc_malloc(strlen(s) + 1);
+ for (src = s, len = 0; *src != '\0'; src++, len++) {
+ switch (*src) {
+ case '\t':
+ case ' ':
+ changed = 1;
+ break;
+ case ASCII_HYPH:
+ cpy[len] = '-';
+ changed = 1;
+ continue;
+ case '\\':
+ if (src[1] != '-')
+ break;
+ src++;
+ changed = 1;
+ /* FALLTHROUGH */
+ default:
+ cpy[len] = *src;
+ continue;
+ }
+ break;
+ }
if (len == 0)
- return;
+ goto out;
+ cpy[len] = '\0';
- se = s + len;
- if (*se != '\0' && prio < TAG_WEAK)
+ if (*src != '\0' && prio < TAG_WEAK)
prio = TAG_WEAK;
+ s = cpy;
+ se = cpy + len;
slot = ohash_qlookupi(&tag_data, s, &se);
entry = ohash_find(&tag_data, slot);
@@ -150,8 +181,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
if (entry == NULL) {
entry = mandoc_malloc(sizeof(*entry) + len + 1);
- memcpy(entry->s, s, len);
- entry->s[len] = '\0';
+ memcpy(entry->s, s, len + 1);
entry->nodes = NULL;
entry->maxnodes = entry->nnodes = 0;
ohash_insert(&tag_data, slot, entry);
@@ -163,7 +193,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
*/
else if (entry->prio < prio)
- return;
+ goto out;
/*
* If the existing entry is worse, clear it.
@@ -180,7 +210,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
}
if (prio == TAG_FALLBACK) {
entry->prio = TAG_DELETE;
- return;
+ goto out;
}
}
@@ -194,10 +224,13 @@ tag_put(const char *s, int prio, struct roff_node *n)
entry->nodes[entry->nnodes++] = n;
entry->prio = prio;
n->flags |= NODE_ID;
- if (n->child == NULL || n->child->string != s || *se != '\0') {
+ if (changed) {
assert(n->tag == NULL);
n->tag = mandoc_strndup(s, len);
}
+
+ out:
+ free(cpy);
}
int