summaryrefslogblamecommitdiffstats
path: root/mandoc-db.c
blob: 944ae35ca6d2c2ce057a0f2e62c9eb85e8128a0c (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16















                                                                           



                    



















                                    
                                       
                                
                                                









                        




                                                  









                                                                    
                                      



                                                            
                                                    
                                                        












































































































































                                                             

                                                   
                                                 




                                                                    
                                                                    
                           





                                                                   
                                                        
                             


                                





                                         
                 
 
                                                    
                            

                                     
                              








                                                        



                                                                      

           



                                                        


                                             
 


                                         





                                              
                                           



                                                                 


                                              
          


                                                                      



                                            
                                                                

                         
                              


                                              
                                                                  
 




                                              




                                                                       
                                                           

           

                                                                      

                                     








                                      

                                        
                                 
 

                                                                     
                                 
                 
 

                                                













                                                                  



                                                                     
                      


                         

                               


                        


                                                                 

                                      

                                      




































                                                                       
















                                                                        


                                   
                          



                                                              
                                          














                                                           




                                                                   

                                          
 


                                                          




                                                                      
                          
                                           





                                      
                                       











































                                                            
                                        

























                                                            
                                                                  














































































































                                                                        




























































                                                                        

                               
                                                 

                                                 




                                                         



                                    
                                    


                                       
/*	$Id$ */
/*
 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <sys/param.h>

#include <assert.h>
#ifdef __linux__
# include <db_185.h>
#else
# include <db.h>
#endif
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "man.h"
#include "mdoc.h"
#include "mandoc.h"

#define	MANDOC_DB	 "mandoc.db"
#define	MANDOC_IDX	 "mandoc.index"
#define	MANDOC_BUFSZ	  BUFSIZ
#define	MANDOC_FLAGS	  O_CREAT|O_TRUNC|O_RDWR

enum	type {
	MANDOC_NONE = 0,
	MANDOC_NAME,
	MANDOC_FUNCTION,
	MANDOC_UTILITY,
	MANDOC_INCLUDES,
	MANDOC_VARIABLE
};

#define	MAN_ARGS	  DB *db, \
			  const char *dbn, \
			  DBT *key, size_t *ksz, \
			  DBT *val, \
			  const struct man_node *n
#define	MDOC_ARGS	  DB *db, \
			  const char *dbn, \
			  DBT *key, size_t *ksz, \
			  DBT *val, \
			  const struct mdoc_node *n

static	void		  dbt_append(DBT *, size_t *, const char *);
static	void		  dbt_appendb(DBT *, size_t *, 
				const void *, size_t);
static	void		  dbt_init(DBT *, size_t *);
static	void		  usage(void);
static	void		  pman(DB *, const char *, 
				DBT *, size_t *, DBT *, 
				const char *, struct man *);
static	int		  pman_node(MAN_ARGS);
static	void		  pmdoc(DB *, const char *, 
				DBT *, size_t *, DBT *, 
				const char *, struct mdoc *);
static	void		  pmdoc_node(MDOC_ARGS);
static	void		  pmdoc_Fd(MDOC_ARGS);
static	void		  pmdoc_In(MDOC_ARGS);
static	void		  pmdoc_Fn(MDOC_ARGS);
static	void		  pmdoc_Fo(MDOC_ARGS);
static	void		  pmdoc_Nm(MDOC_ARGS);
static	void		  pmdoc_Vt(MDOC_ARGS);

typedef	void		(*pmdoc_nf)(MDOC_ARGS);

static	const char	 *progname;

static	const pmdoc_nf	  mdocs[MDOC_MAX] = {
	NULL, /* Ap */
	NULL, /* Dd */
	NULL, /* Dt */
	NULL, /* Os */
	NULL, /* Sh */ 
	NULL, /* Ss */ 
	NULL, /* Pp */ 
	NULL, /* D1 */
	NULL, /* Dl */
	NULL, /* Bd */
	NULL, /* Ed */
	NULL, /* Bl */ 
	NULL, /* El */
	NULL, /* It */
	NULL, /* Ad */ 
	NULL, /* An */ 
	NULL, /* Ar */
	NULL, /* Cd */ 
	NULL, /* Cm */
	NULL, /* Dv */ 
	NULL, /* Er */ 
	NULL, /* Ev */ 
	NULL, /* Ex */ 
	NULL, /* Fa */ 
	pmdoc_Fd, /* Fd */
	NULL, /* Fl */
	pmdoc_Fn, /* Fn */ 
	NULL, /* Ft */ 
	NULL, /* Ic */ 
	pmdoc_In, /* In */ 
	NULL, /* Li */
	NULL, /* Nd */
	pmdoc_Nm, /* Nm */
	NULL, /* Op */
	NULL, /* Ot */
	NULL, /* Pa */
	NULL, /* Rv */
	NULL, /* St */ 
	pmdoc_Vt, /* Va */
	pmdoc_Vt, /* Vt */ 
	NULL, /* Xr */ 
	NULL, /* %A */
	NULL, /* %B */
	NULL, /* %D */
	NULL, /* %I */
	NULL, /* %J */
	NULL, /* %N */
	NULL, /* %O */
	NULL, /* %P */
	NULL, /* %R */
	NULL, /* %T */
	NULL, /* %V */
	NULL, /* Ac */
	NULL, /* Ao */
	NULL, /* Aq */
	NULL, /* At */ 
	NULL, /* Bc */
	NULL, /* Bf */
	NULL, /* Bo */
	NULL, /* Bq */
	NULL, /* Bsx */
	NULL, /* Bx */
	NULL, /* Db */
	NULL, /* Dc */
	NULL, /* Do */
	NULL, /* Dq */
	NULL, /* Ec */
	NULL, /* Ef */ 
	NULL, /* Em */ 
	NULL, /* Eo */
	NULL, /* Fx */
	NULL, /* Ms */ 
	NULL, /* No */
	NULL, /* Ns */
	NULL, /* Nx */
	NULL, /* Ox */
	NULL, /* Pc */
	NULL, /* Pf */
	NULL, /* Po */
	NULL, /* Pq */
	NULL, /* Qc */
	NULL, /* Ql */
	NULL, /* Qo */
	NULL, /* Qq */
	NULL, /* Re */
	NULL, /* Rs */
	NULL, /* Sc */
	NULL, /* So */
	NULL, /* Sq */
	NULL, /* Sm */ 
	NULL, /* Sx */
	NULL, /* Sy */
	NULL, /* Tn */
	NULL, /* Ux */
	NULL, /* Xc */
	NULL, /* Xo */
	pmdoc_Fo, /* Fo */ 
	NULL, /* Fc */ 
	NULL, /* Oo */
	NULL, /* Oc */
	NULL, /* Bk */
	NULL, /* Ek */
	NULL, /* Bt */
	NULL, /* Hf */
	NULL, /* Fr */
	NULL, /* Ud */
	NULL, /* Lb */
	NULL, /* Lp */ 
	NULL, /* Lk */ 
	NULL, /* Mt */ 
	NULL, /* Brq */ 
	NULL, /* Bro */ 
	NULL, /* Brc */ 
	NULL, /* %C */
	NULL, /* Es */
	NULL, /* En */
	NULL, /* Dx */
	NULL, /* %Q */
	NULL, /* br */
	NULL, /* sp */
	NULL, /* %U */
	NULL, /* Ta */
};

int
main(int argc, char *argv[])
{
	struct mparse	*mp; /* parse sequence */
	struct mdoc	*mdoc; /* resulting mdoc */
	struct man	*man; /* resulting man */
	char		*fn;
	const char	*dir; /* result dir (default: cwd) */
	char		 ibuf[MAXPATHLEN], /* index fname */
			 ibbuf[MAXPATHLEN], /* index backup fname */
			 fbuf[MAXPATHLEN],  /* btree fname */
			 fbbuf[MAXPATHLEN]; /* btree backup fname */
	int		 c;
	DB		*index, /* index database */
			*db; /* keyword database */
	DBT		 rkey, rval, /* recno entries */
			 key, val; /* persistent keyword entries */
	size_t		 ksz; /* entry buffer size */
	char		 vbuf[8];
	BTREEINFO	 info; /* btree configuration */
	recno_t		 rec;
	extern int	 optind;
	extern char	*optarg;

	progname = strrchr(argv[0], '/');
	if (progname == NULL)
		progname = argv[0];
	else
		++progname;

	dir = "";

	while (-1 != (c = getopt(argc, argv, "d:")))
		switch (c) {
		case ('d'):
			dir = optarg;
			break;
		default:
			usage();
			return((int)MANDOCLEVEL_BADARG);
		}

	argc -= optind;
	argv += optind;

	/*
	 * Set up temporary file-names into which we're going to write
	 * all of our data (both for the index and database).  These
	 * will be securely renamed to the real file-names after we've
	 * written all of our data.
	 */

	ibuf[0] = ibuf[MAXPATHLEN - 2] =
		ibbuf[0] = ibbuf[MAXPATHLEN - 2] = 
		fbuf[0] = fbuf[MAXPATHLEN - 2] = 
		fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';

	strlcat(fbuf, dir, MAXPATHLEN);
	strlcat(fbuf, MANDOC_DB, MAXPATHLEN);

	strlcat(fbbuf, fbuf, MAXPATHLEN);
	strlcat(fbbuf, "~", MAXPATHLEN);

	strlcat(ibuf, dir, MAXPATHLEN);
	strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);

	strlcat(ibbuf, ibuf, MAXPATHLEN);
	strlcat(ibbuf, "~", MAXPATHLEN);

	if ('\0' != fbuf[MAXPATHLEN - 2] ||
			'\0' != fbbuf[MAXPATHLEN - 2] ||
			'\0' != ibuf[MAXPATHLEN - 2] ||
			'\0' != ibbuf[MAXPATHLEN - 2]) {
		fprintf(stderr, "%s: Path too long\n", progname);
		exit((int)MANDOCLEVEL_SYSERR);
	}

	/*
	 * For the keyword database, open a BTREE database that allows
	 * duplicates.  For the index database, use a standard RECNO
	 * database type.
	 */

	memset(&info, 0, sizeof(BTREEINFO));
	info.flags = R_DUP;
	db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);

	if (NULL == db) {
		perror(fbbuf);
		exit((int)MANDOCLEVEL_SYSERR);
	}

	index = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);

	if (NULL == db) {
		perror(ibbuf);
		(*db->close)(db);
		exit((int)MANDOCLEVEL_SYSERR);
	}

	/*
	 * Try parsing the manuals given on the command line.  If we
	 * totally fail, then just keep on going.  Take resulting trees
	 * and push them down into the database code.
	 * Use the auto-parser and don't report any errors.
	 */

	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);

	memset(&key, 0, sizeof(DBT));
	memset(&val, 0, sizeof(DBT));
	memset(&rkey, 0, sizeof(DBT));
	memset(&rval, 0, sizeof(DBT));

	val.size = sizeof(vbuf);
	val.data = vbuf;
	rkey.size = sizeof(recno_t);

	rec = 1;
	ksz = 0;

	while (NULL != (fn = *argv++)) {
		mparse_reset(mp);

		if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
			fprintf(stderr, "%s: Parse failure\n", fn);
			continue;
		}

		mparse_result(mp, &mdoc, &man);
		if (NULL == mdoc && NULL == man)
			continue;

		rkey.data = &rec;
		rval.data = fn;
		rval.size = strlen(fn) + 1;

		if (-1 == (*index->put)(index, &rkey, &rval, 0)) {
			perror(ibbuf);
			break;
		}

		memset(val.data, 0, sizeof(uint32_t));
		memcpy(val.data + 4, &rec, sizeof(uint32_t));

		if (mdoc)
			pmdoc(db, fbbuf, &key, &ksz, &val, fn, mdoc);
		else 
			pman(db, fbbuf, &key, &ksz, &val, fn, man);
		rec++;
	}

	(*db->close)(db);
	(*index->close)(index);

	mparse_free(mp);

	free(key.data);

	/* Atomically replace the file with our temporary one. */

	if (-1 == rename(fbbuf, fbuf))
		perror(fbuf);
	if (-1 == rename(ibbuf, ibuf))
		perror(fbuf);

	return((int)MANDOCLEVEL_OK);
}

/*
 * Initialise the stored database key whose data buffer is shared
 * between uses (as the key must sometimes be constructed from an array
 * of 
 */
static void
dbt_init(DBT *key, size_t *ksz)
{

	if (0 == *ksz) {
		assert(0 == key->size);
		assert(NULL == key->data);
		key->data = mandoc_malloc(MANDOC_BUFSZ);
		*ksz = MANDOC_BUFSZ;
	}

	key->size = 0;
}

/*
 * Append a binary value to a database entry.  This can be invoked
 * multiple times; the buffer is automatically resized.
 */
static void
dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
{

	assert(key->data);

	/* Overshoot by MANDOC_BUFSZ. */

	while (key->size + sz >= *ksz) {
		*ksz = key->size + sz + MANDOC_BUFSZ;
		key->data = mandoc_realloc(key->data, *ksz);
	}

	memcpy(key->data + (int)key->size, cp, sz);
	key->size += sz;
}

/*
 * Append a nil-terminated string to the database entry.  This can be
 * invoked multiple times.  The database entry will be nil-terminated as
 * well; if invoked multiple times, a space is put between strings.
 */
static void
dbt_append(DBT *key, size_t *ksz, const char *cp)
{
	size_t		 sz;

	if (0 == (sz = strlen(cp)))
		return;

	assert(key->data);

	if (key->size)
		((char *)key->data)[(int)key->size - 1] = ' ';

	dbt_appendb(key, ksz, cp, sz + 1);
}

/* ARGSUSED */
static void
pmdoc_Fd(MDOC_ARGS)
{
	uint32_t	 fl;
	const char	*start, *end;
	size_t		 sz;
	char		 nil;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
		return;

	/*
	 * Only consider those `Fd' macro fields that begin with an
	 * "inclusion" token (versus, e.g., #define).
	 */
	if (strcmp("#include", n->string))
		return;

	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
		return;

	/*
	 * Strip away the enclosing angle brackets and make sure we're
	 * not zero-length.
	 */

	start = n->string;
	if ('<' == *start || '"' == *start)
		start++;

	if (0 == (sz = strlen(start)))
		return;

	end = &start[(int)sz - 1];
	if ('>' == *end || '"' == *end)
		end--;

	nil = '\0';
	dbt_appendb(key, ksz, start, end - start + 1);
	dbt_appendb(key, ksz, &nil, 1);

	fl = MANDOC_INCLUDES;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_In(MDOC_ARGS)
{
	uint32_t	 fl;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	dbt_append(key, ksz, n->child->string);
	fl = MANDOC_INCLUDES;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Fn(MDOC_ARGS)
{
	uint32_t	 fl;
	const char	*cp;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	/* .Fn "struct type *arg" "foo" */

	cp = strrchr(n->child->string, ' ');
	if (NULL == cp)
		cp = n->child->string;

	/* Strip away pointer symbol. */

	while ('*' == *cp)
		cp++;

	dbt_append(key, ksz, cp);
	fl = MANDOC_FUNCTION;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Vt(MDOC_ARGS)
{
	uint32_t	 fl;
	const char	*start, *end;
	size_t		 sz;
	char		 nil;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	/*
	 * Strip away leading pointer symbol '*' and trailing ';'.
	 */

	start = n->last->string;

	while ('*' == *start)
		start++;

	if (0 == (sz = strlen(start)))
		return;

	end = &start[sz - 1];
	while (end > start && ';' == *end)
		end--;

	if (end == start)
		return;

	nil = '\0';
	dbt_appendb(key, ksz, start, end - start + 1);
	dbt_appendb(key, ksz, &nil, 1);
	fl = MANDOC_VARIABLE;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Fo(MDOC_ARGS)
{
	uint32_t	 fl;
	
	if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	dbt_append(key, ksz, n->child->string);
	fl = MANDOC_FUNCTION;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Nm(MDOC_ARGS)
{
	uint32_t	 fl;
	
	if (SEC_NAME == n->sec) {
		for (n = n->child; n; n = n->next) {
			if (MDOC_TEXT != n->type)
				continue;
			dbt_append(key, ksz, n->string);
		}
		fl = MANDOC_NAME;
		memcpy(val->data, &fl, 4);
		return;
	} else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
		return;

	for (n = n->child; n; n = n->next) {
		if (MDOC_TEXT != n->type)
			continue;
		dbt_append(key, ksz, n->string);
	}

	fl = MANDOC_UTILITY;
	memcpy(val->data, &fl, 4);
}

/*
 * Call out to per-macro handlers after clearing the persistent database
 * key.  If the macro sets the database key, flush it to the database.
 */
static void
pmdoc_node(MDOC_ARGS)
{

	if (NULL == n)
		return;

	switch (n->type) {
	case (MDOC_HEAD):
		/* FALLTHROUGH */
	case (MDOC_BODY):
		/* FALLTHROUGH */
	case (MDOC_TAIL):
		/* FALLTHROUGH */
	case (MDOC_BLOCK):
		/* FALLTHROUGH */
	case (MDOC_ELEM):
		if (NULL == mdocs[n->tok])
			break;

		dbt_init(key, ksz);
		(*mdocs[n->tok])(db, dbn, key, ksz, val, n);

		if (0 == key->size)
			break;
		if (0 == (*db->put)(db, key, val, 0))
			break;
		
		perror(dbn);
		exit((int)MANDOCLEVEL_SYSERR);
		/* NOTREACHED */
	default:
		break;
	}

	pmdoc_node(db, dbn, key, ksz, val, n->child);
	pmdoc_node(db, dbn, key, ksz, val, n->next);
}

static int
pman_node(MAN_ARGS)
{
	const struct man_node *head, *body;
	const char	*end, *start;
	char		 nil;
	uint32_t	 fl;

	if (NULL == n)
		return(0);

	/*
	 * We're only searching for one thing: the first text child in
	 * the BODY of a NAME section.  Since we don't keep track of
	 * sections in -man, run some hoops to find out whether we're in
	 * the correct section or not.
	 */

	if (MAN_BODY == n->type && MAN_SH == n->tok) {
		body = n;
		assert(body->parent);
		if (NULL != (head = body->parent->head) &&
				1 == head->nchild &&
				NULL != (head = (head->child)) &&
				MAN_TEXT == head->type &&
				0 == strcmp(head->string, "NAME") &&
				NULL != (body = body->child) &&
				MAN_TEXT == body->type) {
			nil = '\0';

			start = body->string;
			if (NULL == (end = strchr(start, ' ')))
				end = start + strlen(start);

			dbt_init(key, ksz);
			dbt_appendb(key, ksz, start, end - start + 1);
			dbt_appendb(key, ksz, &nil, 1);
			fl = MANDOC_NAME;
			memcpy(val->data, &fl, 4);
			return(1);
		}
	}

	if (pman_node(db, dbn, key, ksz, val, n->child))
		return(1);
	if (pman_node(db, dbn, key, ksz, val, n->next))
		return(1);

	return(0);
}

static void
pman(DB *db, const char *dbn, 
		DBT *key, size_t *ksz, DBT *val, 
		const char *path, struct man *m)
{

	pman_node(db, dbn, key, ksz, val, man_node(m));
}


static void
pmdoc(DB *db, const char *dbn, 
		DBT *key, size_t *ksz, DBT *val, 
		const char *path, struct mdoc *m)
{

	pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
}

static void
usage(void)
{

	fprintf(stderr, "usage: %s "
			"[-d path] "
			"[file...]\n", 
			progname);
}