summaryrefslogblamecommitdiffstats
path: root/tag.c
blob: 343edb67213705ab26c8119f838c4f992817f48a (plain) (tree)
1
2
3
4
          
  
                                                         
                                                     











                                                                           


                                                         
   

                   

                      
                   
                   
                   
                   
                  

                   
 
                       
                         
                   
                 
                 
                     


                  


                                 
                      


                     

                                                                  

                                                         
                                  


  

                                                
   

               
 

                                                                       
 




                                       
 

                                       




                                                     
         
                                
                                  


  
                                      
                                                          

    
                                                     

                                       
                                      

                                          
                                     
                                      
                                         
 
                                     
 











                                                                  
                    



                                                                     











                                       
                                       


                              
                                    

                              
         

          
                                                    
                                                            


                                                       






















                                                            
                     

                         
 
                                            
                                
 

                       
                                                 
                                            
 
                                
 
                            
                                                                
                                             

                                                    
                                                     
         
 



                                                                    
 
                                    
                         
 




                                                        
 
                                                              





                                                             

                                                 
                                 
                 

         
                                    
 



                                                                
         
                                          
                           
                            
                      

                                                
         


                  

 

                           
 
                                                                            
 




































                                                                    


                                                                        























                                                                   
































                                                                         




                                                      
                                                          


                                 
                             
                                                            
                              
                             



                                                             
                             






                                                                       
                                                             
                                                       


                                              



                                                  
                                        
 
/* $Id$ */
/*
 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
 *               Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Functions to tag syntax tree nodes.
 * For internal use by mandoc(1) validation modules only.
 */
#include "config.h"

#include <sys/types.h>

#include <assert.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "mandoc_aux.h"
#include "mandoc_ohash.h"
#include "mandoc.h"
#include "roff.h"
#include "mdoc.h"
#include "roff_int.h"
#include "tag.h"

struct tag_entry {
	struct roff_node **nodes;
	size_t	 maxnodes;
	size_t	 nnodes;
	int	 prio;
	char	 s[];
};

static void		 tag_move_href(struct roff_man *,
				struct roff_node *, const char *);
static void		 tag_move_id(struct roff_node *);

static struct ohash	 tag_data;


/*
 * Set up the ohash table to collect nodes
 * where various marked-up terms are documented.
 */
void
tag_alloc(void)
{
	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
}

void
tag_free(void)
{
	struct tag_entry	*entry;
	unsigned int		 slot;

	if (tag_data.info.free == NULL)
		return;
	entry = ohash_first(&tag_data, &slot);
	while (entry != NULL) {
		free(entry->nodes);
		free(entry);
		entry = ohash_next(&tag_data, &slot);
	}
	ohash_delete(&tag_data);
	tag_data.info.free = NULL;
}

/*
 * Set a node where a term is defined,
 * unless the term is already defined at a lower priority.
 */
void
tag_put(const char *s, int prio, struct roff_node *n)
{
	struct tag_entry	*entry;
	struct roff_node	*nold;
	const char		*se, *src;
	char			*cpy;
	size_t			 len;
	unsigned int		 slot;
	int			 changed;

	assert(prio <= TAG_FALLBACK);

	/*
	 * If the node is already tagged, the existing tag is
	 * explicit and we are now about to add an implicit tag.
	 * Don't do that; just skip implicit tagging if the author
	 * specified an explicit tag.
	 */

	if (n->flags & NODE_ID)
		return;

	/* Determine the implicit tag. */

	changed = 1;
	if (s == NULL) {
		if (n->child == NULL || n->child->type != ROFFT_TEXT)
			return;
		s = n->child->string;
		switch (s[0]) {
		case '-':
			s++;
			break;
		case '\\':
			switch (s[1]) {
			case '&':
			case '-':
			case 'e':
				s += 2;
				break;
			default:
				return;
			}
			break;
		default:
			changed = 0;
			break;
		}
	}

	/*
	 * Translate \- and ASCII_HYPH to plain '-'.
	 * Skip whitespace and escapes and whatever follows,
	 * and if there is any, downgrade the priority.
	 */

	cpy = mandoc_malloc(strlen(s) + 1);
	for (src = s, len = 0; *src != '\0'; src++, len++) {
		switch (*src) {
		case '\t':
		case ' ':
			changed = 1;
			break;
		case ASCII_HYPH:
			cpy[len] = '-';
			changed = 1;
			continue;
		case '\\':
			if (src[1] != '-')
				break;
			src++;
			changed = 1;
			/* FALLTHROUGH */
		default:
			cpy[len] = *src;
			continue;
		}
		break;
	}
	if (len == 0)
		goto out;
	cpy[len] = '\0';

	if (*src != '\0' && prio < TAG_WEAK)
		prio = TAG_WEAK;

	s = cpy;
	se = cpy + len;
	slot = ohash_qlookupi(&tag_data, s, &se);
	entry = ohash_find(&tag_data, slot);

	/* Build a new entry. */

	if (entry == NULL) {
		entry = mandoc_malloc(sizeof(*entry) + len + 1);
		memcpy(entry->s, s, len + 1);
		entry->nodes = NULL;
		entry->maxnodes = entry->nnodes = 0;
		ohash_insert(&tag_data, slot, entry);
	}

	/*
	 * Lower priority numbers take precedence.
	 * If a better entry is already present, ignore the new one.
	 */

	else if (entry->prio < prio)
		goto out;

	/*
	 * If the existing entry is worse, clear it.
	 * In addition, a tag with priority TAG_FALLBACK
	 * is only used if the tag occurs exactly once.
	 */

	else if (entry->prio > prio || prio == TAG_FALLBACK) {
		while (entry->nnodes > 0) {
			nold = entry->nodes[--entry->nnodes];
			nold->flags &= ~NODE_ID;
			free(nold->tag);
			nold->tag = NULL;
		}
		if (prio == TAG_FALLBACK) {
			entry->prio = TAG_DELETE;
			goto out;
		}
	}

	/* Remember the new node. */

	if (entry->maxnodes == entry->nnodes) {
		entry->maxnodes += 4;
		entry->nodes = mandoc_reallocarray(entry->nodes,
		    entry->maxnodes, sizeof(*entry->nodes));
	}
	entry->nodes[entry->nnodes++] = n;
	entry->prio = prio;
	n->flags |= NODE_ID;
	if (changed) {
		assert(n->tag == NULL);
		n->tag = mandoc_strndup(s, len);
	}

 out:
	free(cpy);
}

int
tag_exists(const char *tag)
{
	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
}

/*
 * For in-line elements, move the link target
 * to the enclosing paragraph when appropriate.
 */
static void
tag_move_id(struct roff_node *n)
{
	struct roff_node *np;

	np = n;
	for (;;) {
		if (np->prev != NULL)
			np = np->prev;
		else if ((np = np->parent) == NULL)
			return;
		switch (np->tok) {
		case MDOC_It:
			switch (np->parent->parent->norm->Bl.type) {
			case LIST_column:
				/* Target the ROFFT_BLOCK = <tr>. */
				np = np->parent;
				break;
			case LIST_diag:
			case LIST_hang:
			case LIST_inset:
			case LIST_ohang:
			case LIST_tag:
				/* Target the ROFFT_HEAD = <dt>. */
				np = np->parent->head;
				break;
			default:
				/* Target the ROFF_BODY = <li>. */
				break;
			}
			/* FALLTHROUGH */
		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
			if (np->tag == NULL) {
				np->tag = mandoc_strdup(n->tag == NULL ?
				    n->child->string : n->tag);
				np->flags |= NODE_ID;
				n->flags &= ~NODE_ID;
			}
			return;
		case MDOC_Sh:
		case MDOC_Ss:
		case MDOC_Bd:
		case MDOC_Bl:
		case MDOC_D1:
		case MDOC_Dl:
		case MDOC_Rs:
			/* Do not move past major blocks. */
			return;
		default:
			/*
			 * Move past in-line content and partial
			 * blocks, for example .It Xo or .It Bq Er.
			 */
			break;
		}
	}
}

/*
 * When a paragraph is tagged and starts with text,
 * move the permalink to the first few words.
 */
static void
tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
{
	char	*cp;

	if (n == NULL || n->type != ROFFT_TEXT ||
	    *n->string == '\0' || *n->string == ' ')
		return;

	cp = n->string;
	while (cp != NULL && cp - n->string < 5)
		cp = strchr(cp + 1, ' ');

	/* If the first text node is longer, split it. */

	if (cp != NULL && cp[1] != '\0') {
		man->last = n;
		man->next = ROFF_NEXT_SIBLING;
		roff_word_alloc(man, n->line,
		    n->pos + (cp - n->string), cp + 1);
		man->last->flags = n->flags & ~NODE_LINE;
		*cp = '\0';
	}

	assert(n->tag == NULL);
	n->tag = mandoc_strdup(tag);
	n->flags |= NODE_HREF;
}

/*
 * When all tags have been set, decide where to put
 * the associated permalinks, and maybe move some tags
 * to the beginning of the respective paragraphs.
 */
void
tag_postprocess(struct roff_man *man, struct roff_node *n)
{
	if (n->flags & NODE_ID) {
		switch (n->tok) {
		case MDOC_Pp:
			tag_move_href(man, n->next, n->tag);
			break;
		case MDOC_Bd:
		case MDOC_D1:
		case MDOC_Dl:
			tag_move_href(man, n->child, n->tag);
			break;
		case MDOC_Bl:
			/* XXX No permalink for now. */
			break;
		default:
			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
				tag_move_id(n);
			if (n->tok != MDOC_Tg)
				n->flags |= NODE_HREF;
			else if ((n->flags & NODE_ID) == 0) {
				n->flags |= NODE_NOPRT;
				free(n->tag);
				n->tag = NULL;
			}
			break;
		}
	}
	for (n = n->child; n != NULL; n = n->next)
		tag_postprocess(man, n);
}