/* $Id$ */
/*
* Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Functions to tag syntax tree nodes.
* For internal use by mandoc(1) validation modules only.
*/
#include "config.h"
#include <sys/types.h>
#include <assert.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "mandoc_aux.h"
#include "mandoc_ohash.h"
#include "roff.h"
#include "mdoc.h"
#include "roff_int.h"
#include "tag.h"
struct tag_entry {
struct roff_node **nodes;
size_t maxnodes;
size_t nnodes;
int prio;
char s[];
};
static void tag_move_href(struct roff_man *,
struct roff_node *, const char *);
static void tag_move_id(struct roff_node *);
static struct ohash tag_data;
/*
* Set up the ohash table to collect nodes
* where various marked-up terms are documented.
*/
void
tag_alloc(void)
{
mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
}
void
tag_free(void)
{
struct tag_entry *entry;
unsigned int slot;
if (tag_data.info.free == NULL)
return;
entry = ohash_first(&tag_data, &slot);
while (entry != NULL) {
free(entry->nodes);
free(entry);
entry = ohash_next(&tag_data, &slot);
}
ohash_delete(&tag_data);
tag_data.info.free = NULL;
}
/*
* Set a node where a term is defined,
* unless it is already defined at a lower priority.
*/
void
tag_put(const char *s, int prio, struct roff_node *n)
{
struct tag_entry *entry;
struct roff_node *nold;
const char *se;
size_t len;
unsigned int slot;
assert(prio <= TAG_FALLBACK);
if (s == NULL) {
if (n->child == NULL || n->child->type != ROFFT_TEXT)
return;
s = n->child->string;
switch (s[0]) {
case '-':
s++;
break;
case '\\':
switch (s[1]) {
case '&':
case '-':
case 'e':
s += 2;
break;
default:
break;
}
break;
default:
break;
}
}
/*
* Skip whitespace and escapes and whatever follows,
* and if there is any, downgrade the priority.
*/
len = strcspn(s, " \t\\");
if (len == 0)
return;
se = s + len;
if (*se != '\0' && prio < TAG_WEAK)
prio = TAG_WEAK;
slot = ohash_qlookupi(&tag_data, s, &se);
entry = ohash_find(&tag_data, slot);
/* Build a new entry. */
if (entry == NULL) {
entry = mandoc_malloc(sizeof(*entry) + len + 1);
memcpy(entry->s, s, len);
entry->s[len] = '\0';
entry->nodes = NULL;
entry->maxnodes = entry->nnodes = 0;
ohash_insert(&tag_data, slot, entry);
}
/*
* Lower priority numbers take precedence.
* If a better entry is already present, ignore the new one.
*/
else if (entry->prio < prio)
return;
/*
* If the existing entry is worse, clear it.
* In addition, a tag with priority TAG_FALLBACK
* is only used if the tag occurs exactly once.
*/
else if (entry->prio > prio || prio == TAG_FALLBACK) {
while (entry->nnodes > 0) {
nold = entry->nodes[--entry->nnodes];
nold->flags &= ~NODE_ID;
free(nold->tag);
nold->tag = NULL;
}
if (prio == TAG_FALLBACK) {
entry->prio = TAG_DELETE;
return;
}
}
/* Remember the new node. */
if (entry->maxnodes == entry->nnodes) {
entry->maxnodes += 4;
entry->nodes = mandoc_reallocarray(entry->nodes,
entry->maxnodes, sizeof(*entry->nodes));
}
entry->nodes[entry->nnodes++] = n;
entry->prio = prio;
n->flags |= NODE_ID;
if (n->child == NULL || n->child->string != s || *se != '\0') {
assert(n->tag == NULL);
n->tag = mandoc_strndup(s, len);
}
}
int
tag_exists(const char *tag)
{
return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
}
/*
* For in-line elements, move the link target
* to the enclosing paragraph when appropriate.
*/
static void
tag_move_id(struct roff_node *n)
{
struct roff_node *np;
np = n;
for (;;) {
if (np->prev != NULL)
np = np->prev;
else if ((np = np->parent) == NULL)
return;
switch (np->tok) {
case MDOC_It:
switch (np->parent->parent->norm->Bl.type) {
case LIST_column:
/* Target the ROFFT_BLOCK = <tr>. */
np = np->parent;
break;
case LIST_diag:
case LIST_hang:
case LIST_inset:
case LIST_ohang:
case LIST_tag:
/* Target the ROFFT_HEAD = <dt>. */
np = np->parent->head;
break;
default:
/* Target the ROFF_BODY = <li>. */
break;
}
/* FALLTHROUGH */
case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
if (np->tag == NULL) {
np->tag = mandoc_strdup(n->tag == NULL ?
n->child->string : n->tag);
np->flags |= NODE_ID;
n->flags &= ~NODE_ID;
}
return;
case MDOC_Sh:
case MDOC_Ss:
case MDOC_Bd:
case MDOC_Bl:
case MDOC_D1:
case MDOC_Dl:
case MDOC_Rs:
/* Do not move past major blocks. */
return;
default:
/*
* Move past in-line content and partial
* blocks, for example .It Xo or .It Bq Er.
*/
break;
}
}
}
/*
* When a paragraph is tagged and starts with text,
* move the permalink to the first few words.
*/
static void
tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
{
char *cp;
if (n == NULL || n->type != ROFFT_TEXT ||
*n->string == '\0' || *n->string == ' ')
return;
cp = n->string;
while (cp != NULL && cp - n->string < 5)
cp = strchr(cp + 1, ' ');
/* If the first text node is longer, split it. */
if (cp != NULL && cp[1] != '\0') {
man->last = n;
man->next = ROFF_NEXT_SIBLING;
roff_word_alloc(man, n->line,
n->pos + (cp - n->string), cp + 1);
man->last->flags = n->flags & ~NODE_LINE;
*cp = '\0';
}
assert(n->tag == NULL);
n->tag = mandoc_strdup(tag);
n->flags |= NODE_HREF;
}
/*
* When all tags have been set, decide where to put
* the associated permalinks, and maybe move some tags
* to the beginning of the respective paragraphs.
*/
void
tag_postprocess(struct roff_man *man, struct roff_node *n)
{
if (n->flags & NODE_ID) {
switch (n->tok) {
case MDOC_Pp:
tag_move_href(man, n->next, n->tag);
break;
case MDOC_Bd:
case MDOC_D1:
case MDOC_Dl:
tag_move_href(man, n->child, n->tag);
break;
case MDOC_Bl:
/* XXX No permalink for now. */
break;
default:
if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
tag_move_id(n);
if (n->tok != MDOC_Tg)
n->flags |= NODE_HREF;
else if ((n->flags & NODE_ID) == 0) {
n->flags |= NODE_NOPRT;
free(n->tag);
n->tag = NULL;
}
break;
}
}
for (n = n->child; n != NULL; n = n->next)
tag_postprocess(man, n);
}