diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-07-14 10:57:02 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-07-14 10:57:02 +0000 |
commit | 2b87c6b1a975b795f88ce8dddfbfc2f746d3437c (patch) | |
tree | ee56a4a9464a8d3256cd14d3e26fd7112a2e59ae | |
parent | ad65fd82badffccc03d23a670fcca78d0441df6b (diff) | |
download | mandoc-2b87c6b1a975b795f88ce8dddfbfc2f746d3437c.tar.gz |
Rename makewhatis [back] into mandocdb. This is to maintain consistency
with OpenBSD, which is sandboxing the code for merge. It makes sense
because it doesn't really make a `makewhatis' file in the traditional
sense, so it may be confusing.
-rw-r--r-- | Makefile | 32 | ||||
-rw-r--r-- | index.sgml | 24 | ||||
-rw-r--r-- | mandocdb.1 | 191 | ||||
-rw-r--r-- | mandocdb.c | 1105 |
4 files changed, 1324 insertions, 28 deletions
@@ -73,8 +73,8 @@ SRCS = Makefile \ mandoc.3 \ mandoc.c \ mandoc.h \ - makewhatis.1 \ - makewhatis.c \ + mandocdb.1 \ + mandocdb.c \ mandoc_char.7 \ mdoc.h \ mdoc.7 \ @@ -227,21 +227,21 @@ $(MANDOC_OBJS) $(MANDOC_LNS): main.h mandoc.h mdoc.h man.h config.h out.h compat.o compat.ln: config.h -MAKEWHATIS_OBJS = makewhatis.o -MAKEWHATIS_LNS = makewhatis.ln +MANDOCDB_OBJS = mandocdb.o +MANDOCDB_LNS = mandocdb.ln -$(MAKEWHATIS_OBJS) $(MAKEWHATIS_LNS): mandoc.h mdoc.h man.h config.h +$(MANDOCDB_OBJS) $(MANDOCDB_LNS): mandoc.h mdoc.h man.h config.h PRECONV_OBJS = preconv.o PRECONV_LNS = preconv.ln $(PRECONV_OBJS) $(PRECONV_LNS): config.h -INDEX_MANS = makewhatis.1.html \ - makewhatis.1.xhtml \ - makewhatis.1.ps \ - makewhatis.1.pdf \ - makewhatis.1.txt \ +INDEX_MANS = mandocdb.1.html \ + mandocdb.1.xhtml \ + mandocdb.1.ps \ + mandocdb.1.pdf \ + mandocdb.1.txt \ mandoc.1.html \ mandoc.1.xhtml \ mandoc.1.ps \ @@ -304,8 +304,8 @@ lint: llib-llibmandoc.ln llib-lmandoc.ln llib-lpreconv.ln clean: rm -f libmandoc.a $(LIBMANDOC_OBJS) rm -f llib-llibmandoc.ln $(LIBMANDOC_LNS) - rm -f makewhatis $(MAKEWHATIS_OBJS) - rm -f llib-lmakewhatis.ln $(MAKEWHATIS_LNS) + rm -f mandocdb $(MANDOCDB_OBJS) + rm -f llib-lmandocdb.ln $(MANDOCDB_LNS) rm -f preconv $(PRECONV_OBJS) rm -f llib-lpreconv.ln $(PRECONV_LNS) rm -f mandoc $(MANDOC_OBJS) @@ -353,11 +353,11 @@ llib-lmandoc.ln: $(MANDOC_LNS) $(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS) # You'll need -ldb for Linux. -makewhatis: $(MAKEWHATIS_OBJS) libmandoc.a - $(CC) -o $@ $(MAKEWHATIS_OBJS) libmandoc.a +mandocdb: $(MANDOCDB_OBJS) libmandoc.a + $(CC) -o $@ $(MANDOCDB_OBJS) libmandoc.a -llib-lmakewhatis.ln: $(MAKEWHATIS_LNS) - $(LINT) $(LINTFLAGS) -Cmakewhatis $(MAKEWHATIS_LNS) +llib-lmandocdb.ln: $(MANDOCDB_LNS) + $(LINT) $(LINTFLAGS) -Cmandocdb $(MANDOCDB_LNS) preconv: $(PRECONV_OBJS) $(CC) -o $@ $(PRECONV_OBJS) @@ -29,7 +29,7 @@ HREF="mandoc.1.html">mandoc</A>, which interfaces with the compiler library to format output for UNIX terminals (with support for wide-character locales), XHTML, HTML, PostScript, and PDF. It also includes <A HREF="preconv.1.html">preconv</A>, for recoding multibyte manuals; and <A - HREF="makewhatis.1.html">makewhatis</A>, for indexing manuals. + HREF="mandocdb.1.html">mandocdb</A>, for indexing manuals. It is a <A CLASS="external" HREF="http://bsd.lv/">BSD.lv</A> project. </P> <P> @@ -40,15 +40,15 @@ </H1> <P> <SPAN CLASS="nm">mdocml</SPAN> is in plain-old ANSI C and should build and run on any UNIX system, although <A - HREF="makewhatis.1.html">makewhatis</A> requires <A CLASS="external" + HREF="mandocdb.1.html">mandocdb</A> requires <A CLASS="external" HREF="http://www.oracle.com/technetwork/database/berkeleydb/overview/index.html">Berkeley Database</A> (this is installed by default on all BSD operating systems). To compile <SPAN CLASS="nm">mdocml</SPAN>, run <CODE>make</CODE>, then <CODE>make install</CODE> to install into <I>/usr/local</I>. Be aware: if you have an existing <A HREF="http://www.gnu.org/software/groff/" CLASS="external">groff</A> installation, this may overwrite its <B>preconv</B> binary. - The <A HREF="makewhatis.1.html">makewhatis</A> utility is not yet linked to the build. You must run <CODE>make - makewhatis</CODE> to build it (it does not install). + The <A HREF="mandocdb.1.html">mandocdb</A> utility is not yet linked to the build. You must run <CODE>make + mandocdb</CODE> to build it (it does not install). </P> <P> The most current version of <SPAN CLASS="nm">mdocml</SPAN> is <SPAN CLASS="attn">@VERSION@</SPAN>, dated <SPAN @@ -152,14 +152,14 @@ </TD> </TR> <TR> - <TD VALIGN="top"><A HREF="makewhatis.1.html">makewhatis(1)</A></TD> + <TD VALIGN="top"><A HREF="mandocdb.1.html">mandocdb(1)</A></TD> <TD VALIGN="top"> index UNIX manuals <SMALL> - (<A HREF="makewhatis.1.txt">text</A> | - <A HREF="makewhatis.1.xhtml">xhtml</A> | - <A HREF="makewhatis.1.pdf">pdf</A> | - <A HREF="makewhatis.1.ps">postscript</A>) + (<A HREF="mandocdb.1.txt">text</A> | + <A HREF="mandocdb.1.xhtml">xhtml</A> | + <A HREF="mandocdb.1.pdf">pdf</A> | + <A HREF="mandocdb.1.ps">postscript</A>) </SMALL> </TD> </TR> @@ -314,8 +314,8 @@ 12-07-2011: version 1.11.4 </P> <P> - Bug-fixes and clean-ups across all systems, especially in <A HREF="makewhatis.1.html">makewhatis</A> (note: still not - connected to the general build and must be compiled with <CODE>make makewhatis</CODE>) and the <A + Bug-fixes and clean-ups across all systems, especially in <INS><A HREF="mandocdb.1.html">mandocdb</A></INS> <DEL>makewhatis</DEL> (note: still not + connected to the general build and must be compiled with <INS><CODE>make mandocdb</CODE></INS> <DEL><CODE>make makewhatis</CODE></DEL>) and the <A HREF="man.7.html">man</A> parser. This release was significantly assisted by participants in <A CLASS="external" HREF="http://www.openbsd.org">OpenBSD</A>'s c2k11. Thanks! </P> @@ -341,7 +341,7 @@ <P> Corrected some installation issues in version 1.11.1. Further migration to <A HREF="mandoc.3.html">libmandoc</A>. - Initial public release (this utility is very much under development) of <A HREF="makewhatis.1.html">makewhatis</A>, + Initial public release (this utility is very much under development) of <INS><A HREF="mandocdb.1.html">mandocdb</A></INS><DEL>makewhatis</DEL>, initially named mandoc-db. This utility produces keyword databases of manual content <DEL>mandoc-cgi</DEL>, diff --git a/mandocdb.1 b/mandocdb.1 new file mode 100644 index 00000000..3f0ac150 --- /dev/null +++ b/mandocdb.1 @@ -0,0 +1,191 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt MANDOCDB 1 +.Os +.Sh NAME +.Nm mandocdb +.Nd index UNIX manuals +.Sh SYNOPSIS +.Nm +.Op Fl ruv +.Op Fl d Ar dir +.Ar +.Sh DESCRIPTION +The +.Nm +utility extracts keywords from +.Ux +manuals and indexes them for fast retrieval. +The arguments are as follows: +.Bl -tag -width Ds +.It Fl d Ar dir +The directory into which to write the keyword and index databases. +.It Ar +Read input from zero or more files in +.Xr mdoc 7 +or +.Xr man 7 +.Ux +manual format. +.It Fl r +Remove entries. +This will remove the index and keyword references. +If the record is not found, it is ignored. +.It Fl u +Update the record. +This will first remove the record (as in +.Fl r ) +then re-add it. +.It Fl v +Verbose output. +If specified once, prints the name of each indexed file. +If twice, prints keywords for each file. +.El +.Pp +By default, +.Nm +constructs a new +.Sx Index Database +and +.Sx Keyword Database +in the current working directory. +Existing databases are truncated. +.Pp +If fatal parse errors are encountered, the offending file is printed to +stderr, omitted from the index, and the parse continues with the next +input file. +.Ss Index Database +The index database, +.Pa mandoc.index , +is a +.Xr recno 3 +database with record values consisting of +.Pp +.Bl -enum -compact +.It +a nil-terminated filename, +.It +a nil-terminated manual section, +.It +a nil-terminated manual title, +.It +a nil-terminated architecture +.Pq this is not often available +.It +and a nil-terminated description. +.El +.Pp +Both the manual section and description may be zero-length. +Entries are sequentially-numbered, but the filenames are unordered. +.Ss Keyword Database +The keyword database, +.Pa mandoc.db , +is a +.Xr btree 3 +database of nil-terminated keywords (record length is non-zero string +length plus one) mapping to a 8-byte binary field consisting of the +keyword type and source +.Sx Index Database +record number. +The type, a 32-bit bit-mask in host order, consists of the following +fields: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Li 0x01 +The name of a manual page as given in the NAME section. +.It Li 0x02 +A function prototype name as given in the SYNOPSIS section. +.It Li 0x04 +A utility name as given in the SYNOPSIS section. +.It Li 0x08 +An include file as given in the SYNOPSIS section. +.It Li 0x10 +A variable name as given in the SYNOPSIS section. +.It Li 0x20 +A standard as given in the STANDARDS section. +.It Li 0x40 +An author as given in the AUTHORS section. +.It Li 0x80 +A configuration as given in the SYNOPSIS section. +.It Li 0x100 +Free-form descriptive text as given in the NAME section. +.It Li 0x200 +Cross-links between manuals. +Listed as the link name, then a period, then the link section. +If the link has no section, the period terminates the string. +.It Li 0x400 +Path reference as given in the FILES section. +.It Li 0x800 +Environment variable as given in the ENVIRONMENT section. +.It Li 0x1000 +Error codes as given in the ERRORS section. +.El +.Pp +The last four bytes are a host-ordered record number within the +.Sx Index Database . +.Pp +The +.Nm +utility is +.Ud +.Sh IMPLEMENTATION NOTES +The time to construct a new database pair grows linearly with the +number of keywords in the input. +However, removing or updating entries with +.Fl r +or +.Fl u , +respectively, grows as a multiple of the index length and input size. +.Sh FILES +.Bl -tag -width Ds +.It Pa mandoc.db +A +.Xr btree 3 +keyword database mapping keywords to a type and file reference in +.Pa mandoc.index . +.It Pa mandoc.index +A +.Xr recno 3 +database of indexed file-names. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No errors occurred. +.It 5 +Invalid command line arguments were specified. +No input files have been read. +.It 6 +An operating system error occurred, for example memory exhaustion or an +error accessing input files. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +The output databases are corrupt and should be removed . +.El +.Sh SEE ALSO +.Xr mandoc 1 +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . diff --git a/mandocdb.c b/mandocdb.c new file mode 100644 index 00000000..e2282eda --- /dev/null +++ b/mandocdb.c @@ -0,0 +1,1105 @@ +/* $Id$ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/param.h> + +#include <assert.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#ifdef __linux__ +# include <db_185.h> +#else +# include <db.h> +#endif + +#include "man.h" +#include "mdoc.h" +#include "mandoc.h" + +#define MANDOC_DB "mandoc.db" +#define MANDOC_IDX "mandoc.index" +#define MANDOC_BUFSZ BUFSIZ +#define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR +#define MANDOC_SLOP 1024 + +/* Bit-fields. See makewhatis.1. */ + +#define TYPE_NAME 0x01 +#define TYPE_FUNCTION 0x02 +#define TYPE_UTILITY 0x04 +#define TYPE_INCLUDES 0x08 +#define TYPE_VARIABLE 0x10 +#define TYPE_STANDARD 0x20 +#define TYPE_AUTHOR 0x40 +#define TYPE_CONFIG 0x80 +#define TYPE_DESC 0x100 +#define TYPE_XREF 0x200 +#define TYPE_PATH 0x400 +#define TYPE_ENV 0x800 +#define TYPE_ERR 0x1000 + +/* Buffer for storing growable data. */ + +struct buf { + char *cp; + size_t len; + size_t size; +}; + +/* Operation we're going to perform. */ + +enum op { + OP_NEW = 0, /* new database */ + OP_UPDATE, /* update entries in existing database */ + OP_DELETE /* delete entries from existing database */ +}; + +#define MAN_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct man_node *n +#define MDOC_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct mdoc_node *n, \ + const struct mdoc_meta *m + +static void buf_appendmdoc(struct buf *, + const struct mdoc_node *, int); +static void buf_append(struct buf *, const char *); +static void buf_appendb(struct buf *, + const void *, size_t); +static void dbt_put(DB *, const char *, DBT *, DBT *); +static void hash_put(DB *, const struct buf *, int); +static void hash_reset(DB **); +static int pman_node(MAN_ARGS); +static void pmdoc_node(MDOC_ARGS); +static void pmdoc_An(MDOC_ARGS); +static void pmdoc_Cd(MDOC_ARGS); +static void pmdoc_Er(MDOC_ARGS); +static void pmdoc_Ev(MDOC_ARGS); +static void pmdoc_Fd(MDOC_ARGS); +static void pmdoc_In(MDOC_ARGS); +static void pmdoc_Fn(MDOC_ARGS); +static void pmdoc_Fo(MDOC_ARGS); +static void pmdoc_Nd(MDOC_ARGS); +static void pmdoc_Nm(MDOC_ARGS); +static void pmdoc_Pa(MDOC_ARGS); +static void pmdoc_St(MDOC_ARGS); +static void pmdoc_Vt(MDOC_ARGS); +static void pmdoc_Xr(MDOC_ARGS); +static void usage(void); + +typedef void (*pmdoc_nf)(MDOC_ARGS); + +static const pmdoc_nf mdocs[MDOC_MAX] = { + NULL, /* Ap */ + NULL, /* Dd */ + NULL, /* Dt */ + NULL, /* Os */ + NULL, /* Sh */ + NULL, /* Ss */ + NULL, /* Pp */ + NULL, /* D1 */ + NULL, /* Dl */ + NULL, /* Bd */ + NULL, /* Ed */ + NULL, /* Bl */ + NULL, /* El */ + NULL, /* It */ + NULL, /* Ad */ + pmdoc_An, /* An */ + NULL, /* Ar */ + pmdoc_Cd, /* Cd */ + NULL, /* Cm */ + NULL, /* Dv */ + pmdoc_Er, /* Er */ + pmdoc_Ev, /* Ev */ + NULL, /* Ex */ + NULL, /* Fa */ + pmdoc_Fd, /* Fd */ + NULL, /* Fl */ + pmdoc_Fn, /* Fn */ + NULL, /* Ft */ + NULL, /* Ic */ + pmdoc_In, /* In */ + NULL, /* Li */ + pmdoc_Nd, /* Nd */ + pmdoc_Nm, /* Nm */ + NULL, /* Op */ + NULL, /* Ot */ + pmdoc_Pa, /* Pa */ + NULL, /* Rv */ + pmdoc_St, /* St */ + pmdoc_Vt, /* Va */ + pmdoc_Vt, /* Vt */ + pmdoc_Xr, /* Xr */ + NULL, /* %A */ + NULL, /* %B */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + NULL, /* %N */ + NULL, /* %O */ + NULL, /* %P */ + NULL, /* %R */ + NULL, /* %T */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + NULL, /* Aq */ + NULL, /* At */ + NULL, /* Bc */ + NULL, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + NULL, /* Bsx */ + NULL, /* Bx */ + NULL, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + NULL, /* Em */ + NULL, /* Eo */ + NULL, /* Fx */ + NULL, /* Ms */ + NULL, /* No */ + NULL, /* Ns */ + NULL, /* Nx */ + NULL, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + NULL, /* Pq */ + NULL, /* Qc */ + NULL, /* Ql */ + NULL, /* Qo */ + NULL, /* Qq */ + NULL, /* Re */ + NULL, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + NULL, /* Sq */ + NULL, /* Sm */ + NULL, /* Sx */ + NULL, /* Sy */ + NULL, /* Tn */ + NULL, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + pmdoc_Fo, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + NULL, /* Bk */ + NULL, /* Ek */ + NULL, /* Bt */ + NULL, /* Hf */ + NULL, /* Fr */ + NULL, /* Ud */ + NULL, /* Lb */ + NULL, /* Lp */ + NULL, /* Lk */ + NULL, /* Mt */ + NULL, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + NULL, /* Es */ + NULL, /* En */ + NULL, /* Dx */ + NULL, /* %Q */ + NULL, /* br */ + NULL, /* sp */ + NULL, /* %U */ + NULL, /* Ta */ +}; + +static const char *progname; + +int +main(int argc, char *argv[]) +{ + struct mparse *mp; /* parse sequence */ + struct mdoc *mdoc; /* resulting mdoc */ + struct man *man; /* resulting man */ + enum op op; /* current operation */ + char *fn; /* current file being parsed */ + const char *msec, /* manual section */ + *mtitle, /* manual title */ + *arch, /* manual architecture */ + *dir; /* result dir (default: cwd) */ + char ibuf[MAXPATHLEN], /* index fname */ + fbuf[MAXPATHLEN], /* btree fname */ + vbuf[8]; /* stringified record number */ + int ch, seq, sseq, verb, i; + DB *idx, /* index database */ + *db, /* keyword database */ + *hash; /* temporary keyword hashtable */ + DBT key, val; + enum mandoclevel ec; /* exit status */ + size_t sv; + BTREEINFO info; /* btree configuration */ + recno_t rec, + maxrec; /* supremum of all records */ + recno_t *recs; /* buffer of empty records */ + size_t recsz, /* buffer size of recs */ + reccur; /* valid number of recs */ + struct buf buf, /* keyword buffer */ + dbuf; /* description buffer */ + extern int optind; + extern char *optarg; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + dir = ""; + verb = 0; + db = idx = NULL; + mp = NULL; + hash = NULL; + recs = NULL; + recsz = reccur = 0; + maxrec = 0; + op = OP_NEW; + ec = MANDOCLEVEL_SYSERR; + + memset(&buf, 0, sizeof(struct buf)); + memset(&dbuf, 0, sizeof(struct buf)); + + while (-1 != (ch = getopt(argc, argv, "d:ruv"))) + switch (ch) { + case ('d'): + dir = optarg; + break; + case ('r'): + op = OP_DELETE; + break; + case ('u'): + op = OP_UPDATE; + break; + case ('v'): + verb++; + break; + default: + usage(); + return((int)MANDOCLEVEL_BADARG); + } + + argc -= optind; + argv += optind; + + ibuf[0] = ibuf[MAXPATHLEN - 2] = + fbuf[0] = fbuf[MAXPATHLEN - 2] = '\0'; + + strlcat(fbuf, dir, MAXPATHLEN); + strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + + strlcat(ibuf, dir, MAXPATHLEN); + strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + + if ('\0' != fbuf[MAXPATHLEN - 2] || + '\0' != ibuf[MAXPATHLEN - 2]) { + fprintf(stderr, "%s: Path too long\n", dir); + goto out; + } + + /* + * For the keyword database, open a BTREE database that allows + * duplicates. + * For the index database, use a standard RECNO database type. + * Truncate the database if we're creating a new one. + */ + + memset(&info, 0, sizeof(BTREEINFO)); + info.flags = R_DUP; + + if (OP_NEW == op) { + db = dbopen(fbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info); + idx = dbopen(ibuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL); + } else { + db = dbopen(fbuf, O_CREAT|O_RDWR, 0644, DB_BTREE, &info); + idx = dbopen(ibuf, O_CREAT|O_RDWR, 0644, DB_RECNO, NULL); + } + + if (NULL == db) { + perror(fbuf); + goto out; + } else if (NULL == db) { + perror(ibuf); + goto out; + } + + /* + * If we're going to delete or update a database, remove the + * entries now (both the index and all keywords pointing to it). + * This doesn't actually remove them: it only sets their record + * value lengths to zero. + * While doing so, add the empty records to a list we'll access + * later in re-adding entries to the database. + */ + + if (OP_DELETE == op || OP_UPDATE == op) { + seq = R_FIRST; + while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { + seq = R_NEXT; + maxrec = *(recno_t *)key.data; + if (0 == val.size && OP_UPDATE == op) { + if (reccur >= recsz) { + recsz += MANDOC_SLOP; + recs = mandoc_realloc + (recs, recsz * sizeof(recno_t)); + } + recs[(int)reccur] = maxrec; + reccur++; + continue; + } + + fn = (char *)val.data; + for (i = 0; i < argc; i++) + if (0 == strcmp(fn, argv[i])) + break; + + if (i == argc) + continue; + + sseq = R_FIRST; + while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { + sseq = R_NEXT; + assert(8 == val.size); + if (maxrec != *(recno_t *)(val.data + 4)) + continue; + if (verb > 1) + printf("%s: Deleted keyword: %s\n", + fn, (char *)key.data); + ch = (*db->del)(db, &key, R_CURSOR); + if (ch < 0) + break; + } + if (ch < 0) { + perror(fbuf); + exit((int)MANDOCLEVEL_SYSERR); + } + + if (verb) + printf("%s: Deleted index\n", fn); + + val.size = 0; + ch = (*idx->put)(idx, &key, &val, R_CURSOR); + if (ch < 0) { + perror(ibuf); + exit((int)MANDOCLEVEL_SYSERR); + } + + if (OP_UPDATE == op) { + if (reccur >= recsz) { + recsz += MANDOC_SLOP; + recs = mandoc_realloc + (recs, recsz * sizeof(recno_t)); + } + recs[(int)reccur] = maxrec; + reccur++; + } + } + maxrec++; + } + + if (OP_DELETE == op) { + ec = MANDOCLEVEL_OK; + goto out; + } + + /* + * Add records to the database. + * Try parsing each manual given on the command line. + * If we fail, then emit an error and keep on going. + * Take resulting trees and push them down into the database code. + * Use the auto-parser and don't report any errors. + */ + + mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); + + buf.size = dbuf.size = MANDOC_BUFSZ; + buf.cp = mandoc_malloc(buf.size); + dbuf.cp = mandoc_malloc(dbuf.size); + + for (rec = 0, i = 0; i < argc; i++) { + fn = argv[i]; + if (OP_UPDATE == op) { + if (reccur > 0) { + --reccur; + rec = recs[(int)reccur]; + } else if (maxrec > 0) { + rec = maxrec; + maxrec = 0; + } else + rec++; + } else + rec++; + + mparse_reset(mp); + hash_reset(&hash); + + if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { + fprintf(stderr, "%s: Parse failure\n", fn); + continue; + } + + mparse_result(mp, &mdoc, &man); + if (NULL == mdoc && NULL == man) + continue; + + msec = NULL != mdoc ? + mdoc_meta(mdoc)->msec : man_meta(man)->msec; + mtitle = NULL != mdoc ? + mdoc_meta(mdoc)->title : man_meta(man)->title; + arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL; + + if (NULL == arch) + arch = ""; + + /* + * The index record value consists of a nil-terminated + * filename, a nil-terminated manual section, and a + * nil-terminated description. Since the description + * may not be set, we set a sentinel to see if we're + * going to write a nil byte in its place. + */ + + dbuf.len = 0; + buf_appendb(&dbuf, fn, strlen(fn) + 1); + buf_appendb(&dbuf, msec, strlen(msec) + 1); + buf_appendb(&dbuf, mtitle, strlen(mtitle) + 1); + buf_appendb(&dbuf, arch, strlen(arch) + 1); + + sv = dbuf.len; + + /* Fix the record number in the btree value. */ + + if (mdoc) + pmdoc_node(hash, &buf, &dbuf, + mdoc_node(mdoc), mdoc_meta(mdoc)); + else + pman_node(hash, &buf, &dbuf, man_node(man)); + + /* + * Copy from the in-memory hashtable of pending keywords + * into the database. + */ + + memset(vbuf, 0, sizeof(uint32_t)); + memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + + seq = R_FIRST; + while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { + seq = R_NEXT; + + memcpy(vbuf, val.data, sizeof(uint32_t)); + val.size = sizeof(vbuf); + val.data = vbuf; + + if (verb > 1) + printf("%s: Added keyword: %s, 0x%x\n", + fn, (char *)key.data, + *(int *)val.data); + dbt_put(db, fbuf, &key, &val); + } + if (ch < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } + + /* + * Apply to the index. If we haven't had a description + * set, put an empty one in now. + */ + + if (dbuf.len == sv) + buf_appendb(&dbuf, "", 1); + + key.data = &rec; + key.size = sizeof(recno_t); + + val.data = dbuf.cp; + val.size = dbuf.len; + + if (verb > 0) + printf("%s: Added index\n", fn); + + dbt_put(idx, ibuf, &key, &val); + } + + ec = MANDOCLEVEL_OK; +out: + if (db) + (*db->close)(db); + if (idx) + (*idx->close)(idx); + if (hash) + (*hash->close)(hash); + if (mp) + mparse_free(mp); + + free(buf.cp); + free(dbuf.cp); + free(recs); + + return((int)ec); +} + +/* + * Grow the buffer (if necessary) and copy in a binary string. + */ +static void +buf_appendb(struct buf *buf, const void *cp, size_t sz) +{ + + /* Overshoot by MANDOC_BUFSZ. */ + + while (buf->len + sz >= buf->size) { + buf->size = buf->len + sz + MANDOC_BUFSZ; + buf->cp = mandoc_realloc(buf->cp, buf->size); + } + + memcpy(buf->cp + (int)buf->len, cp, sz); + buf->len += sz; +} + +/* + * Append a nil-terminated string to the buffer. + * This can be invoked multiple times. + * The buffer string will be nil-terminated. + * If invoked multiple times, a space is put between strings. + */ +static void +buf_append(struct buf *buf, const char *cp) +{ + size_t sz; + + if (0 == (sz = strlen(cp))) + return; + + if (buf->len) + buf->cp[(int)buf->len - 1] = ' '; + + buf_appendb(buf, cp, sz + 1); +} + +/* + * Recursively add all text from a given node. + * This is optimised for general mdoc nodes in this context, which do + * not consist of subexpressions and having a recursive call for n->next + * would be wasteful. + * The "f" variable should be 0 unless called from pmdoc_Nd for the + * description buffer, which does not start at the beginning of the + * buffer. + */ +static void +buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) +{ + + for ( ; n; n = n->next) { + if (n->child) + buf_appendmdoc(buf, n->child, f); + + if (MDOC_TEXT == n->type && f) { + f = 0; + buf_appendb(buf, n->string, + strlen(n->string) + 1); + } else if (MDOC_TEXT == n->type) + buf_append(buf, n->string); + + } +} + +/* ARGSUSED */ +static void +pmdoc_An(MDOC_ARGS) +{ + + if (SEC_AUTHORS != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_AUTHOR); +} + +static void +hash_reset(DB **db) +{ + DB *hash; + + if (NULL != (hash = *db)) + (*hash->close)(hash); + + *db = dbopen(NULL, MANDOC_FLAGS, 0644, DB_HASH, NULL); + if (NULL == *db) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } +} + +/* ARGSUSED */ +static void +pmdoc_Fd(MDOC_ARGS) +{ + const char *start, *end; + size_t sz; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == (n = n->child) || MDOC_TEXT != n->type) + return; + + /* + * Only consider those `Fd' macro fields that begin with an + * "inclusion" token (versus, e.g., #define). + */ + if (strcmp("#include", n->string)) + return; + + if (NULL == (n = n->next) || MDOC_TEXT != n->type) + return; + + /* + * Strip away the enclosing angle brackets and make sure we're + * not zero-length. + */ + + start = n->string; + if ('<' == *start || '"' == *start) + start++; + + if (0 == (sz = strlen(start))) + return; + + end = &start[(int)sz - 1]; + if ('>' == *end || '"' == *end) + end--; + + assert(end >= start); + + buf_appendb(buf, start, (size_t)(end - start + 1)); + buf_appendb(buf, "", 1); + + hash_put(hash, buf, TYPE_INCLUDES); +} + +/* ARGSUSED */ +static void +pmdoc_Cd(MDOC_ARGS) +{ + + if (SEC_SYNOPSIS != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_CONFIG); +} + +/* ARGSUSED */ +static void +pmdoc_In(MDOC_ARGS) +{ + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_INCLUDES); +} + +/* ARGSUSED */ +static void +pmdoc_Fn(MDOC_ARGS) +{ + const char *cp; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + /* .Fn "struct type *arg" "foo" */ + + cp = strrchr(n->child->string, ' '); + if (NULL == cp) + cp = n->child->string; + + /* Strip away pointer symbol. */ + + while ('*' == *cp) + cp++; + + buf_append(buf, cp); + hash_put(hash, buf, TYPE_FUNCTION); +} + +/* ARGSUSED */ +static void +pmdoc_St(MDOC_ARGS) +{ + + if (SEC_STANDARDS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_STANDARD); +} + +/* ARGSUSED */ +static void +pmdoc_Xr(MDOC_ARGS) +{ + + if (NULL == (n = n->child)) + return; + + buf_appendb(buf, n->string, strlen(n->string)); + + if (NULL != (n = n->next)) { + buf_appendb(buf, ".", 1); + buf_appendb(buf, n->string, strlen(n->string) + 1); + } else + buf_appendb(buf, ".", 2); + + hash_put(hash, buf, TYPE_XREF); +} + +/* ARGSUSED */ +static void +pmdoc_Vt(MDOC_ARGS) +{ + const char *start; + size_t sz; + + if (SEC_SYNOPSIS != n->sec) + return; + if (MDOC_Vt == n->tok && MDOC_BODY != n->type) + return; + if (NULL == n->last || MDOC_TEXT != n->last->type) + return; + + /* + * Strip away leading pointer symbol '*' and trailing ';'. + */ + + start = n->last->string; + + while ('*' == *start) + start++; + + if (0 == (sz = strlen(start))) + return; + + if (';' == start[(int)sz - 1]) + sz--; + + if (0 == sz) + return; + + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); + hash_put(hash, buf, TYPE_VARIABLE); +} + +/* ARGSUSED */ +static void +pmdoc_Fo(MDOC_ARGS) +{ + + if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_FUNCTION); +} + + +/* ARGSUSED */ +static void +pmdoc_Nd(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + + buf_appendmdoc(dbuf, n->child, 1); + buf_appendmdoc(buf, n->child, 0); + + hash_put(hash, buf, TYPE_DESC); +} + +/* ARGSUSED */ +static void +pmdoc_Er(MDOC_ARGS) +{ + + if (SEC_ERRORS != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_ERR); +} + +/* ARGSUSED */ +static void +pmdoc_Ev(MDOC_ARGS) +{ + + if (SEC_ENVIRONMENT != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_ENV); +} + +/* ARGSUSED */ +static void +pmdoc_Pa(MDOC_ARGS) +{ + + if (SEC_FILES != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_PATH); +} + +/* ARGSUSED */ +static void +pmdoc_Nm(MDOC_ARGS) +{ + + if (SEC_NAME == n->sec) { + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_NAME); + return; + } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return; + + if (NULL == n->child) + buf_append(buf, m->name); + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_UTILITY); +} + +static void +hash_put(DB *db, const struct buf *buf, int mask) +{ + DBT key, val; + int rc; + + if (buf->len < 2) + return; + + key.data = buf->cp; + key.size = buf->len; + + if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } else if (0 == rc) + mask |= *(int *)val.data; + + val.data = &mask; + val.size = sizeof(int); + + if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } +} + +static void +dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) +{ + + assert(key->size); + assert(val->size); + + if (0 == (*db->put)(db, key, val, 0)) + return; + + perror(dbn); + exit((int)MANDOCLEVEL_SYSERR); + /* NOTREACHED */ +} + +/* + * Call out to per-macro handlers after clearing the persistent database + * key. If the macro sets the database key, flush it to the database. + */ +static void +pmdoc_node(MDOC_ARGS) +{ + + if (NULL == n) + return; + + switch (n->type) { + case (MDOC_HEAD): + /* FALLTHROUGH */ + case (MDOC_BODY): + /* FALLTHROUGH */ + case (MDOC_TAIL): + /* FALLTHROUGH */ + case (MDOC_BLOCK): + /* FALLTHROUGH */ + case (MDOC_ELEM): + if (NULL == mdocs[n->tok]) + break; + + buf->len = 0; + (*mdocs[n->tok])(hash, buf, dbuf, n, m); + break; + default: + break; + } + + pmdoc_node(hash, buf, dbuf, n->child, m); + pmdoc_node(hash, buf, dbuf, n->next, m); +} + +static int +pman_node(MAN_ARGS) +{ + const struct man_node *head, *body; + const char *start, *sv; + size_t sz; + + if (NULL == n) + return(0); + + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. + */ + + if (MAN_BODY == n->type && MAN_SH == n->tok) { + body = n; + assert(body->parent); + if (NULL != (head = body->parent->head) && + 1 == head->nchild && + NULL != (head = (head->child)) && + MAN_TEXT == head->type && + 0 == strcmp(head->string, "NAME") && + NULL != (body = body->child) && + MAN_TEXT == body->type) { + + assert(body->string); + start = sv = body->string; + + /* + * Go through a special heuristic dance here. + * This is why -man manuals are great! + * (I'm being sarcastic: my eyes are bleeding.) + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ + + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[(int)sz]) + break; + + buf->len = 0; + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); + + hash_put(hash, buf, TYPE_NAME); + + if (' ' == start[(int)sz]) { + start += (int)sz + 1; + break; + } + + assert(',' == start[(int)sz]); + start += (int)sz + 1; + while (' ' == *start) + start++; + } + + buf->len = 0; + + if (sv == start) { + buf_append(buf, start); + return(1); + } + + while (' ' == *start) + start++; + + if (0 == strncmp(start, "-", 1)) + start += 1; + else if (0 == strncmp(start, "\\-", 2)) + start += 2; + else if (0 == strncmp(start, "\\(en", 4)) + start += 4; + else if (0 == strncmp(start, "\\(em", 4)) + start += 4; + + while (' ' == *start) + start++; + + sz = strlen(start) + 1; + buf_appendb(dbuf, start, sz); + buf_appendb(buf, start, sz); + + hash_put(hash, buf, TYPE_DESC); + } + } + + if (pman_node(hash, buf, dbuf, n->child)) + return(1); + if (pman_node(hash, buf, dbuf, n->next)) + return(1); + + return(0); +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s [-ruv] [-d path] [file...]\n", + progname); +} |