diff options
-rw-r--r-- | mandocdb.c | 3032 |
1 files changed, 1432 insertions, 1600 deletions
@@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,313 +19,301 @@ #include "config.h" #endif -#include <sys/stat.h> +#include <sys/types.h> #include <assert.h> #include <ctype.h> +#include <dirent.h> #include <errno.h> #include <fcntl.h> -#include <fts.h> #include <getopt.h> #include <limits.h> -#include <stddef.h> #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -#ifdef HAVE_OHASH -#include <ohash.h> +#if defined(__linux__) +# include <endian.h> +# include <db_185.h> +#elif defined(__APPLE__) +# include <libkern/OSByteOrder.h> +# include <db.h> #else -#include "compat_ohash.h" +# include <db.h> #endif -#include <sqlite3.h> -#include "mdoc.h" #include "man.h" +#include "mdoc.h" #include "mandoc.h" +#include "mandocdb.h" #include "manpath.h" -#include "mansearch.h" - -#define SQL_EXEC(_v) \ - if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ - fprintf(stderr, "%s\n", sqlite3_errmsg(db)) -#define SQL_BIND_TEXT(_s, _i, _v) \ - if (SQLITE_OK != sqlite3_bind_text \ - ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ - fprintf(stderr, "%s\n", sqlite3_errmsg(db)) -#define SQL_BIND_INT(_s, _i, _v) \ - if (SQLITE_OK != sqlite3_bind_int \ - ((_s), (_i)++, (_v))) \ - fprintf(stderr, "%s\n", sqlite3_errmsg(db)) -#define SQL_BIND_INT64(_s, _i, _v) \ - if (SQLITE_OK != sqlite3_bind_int64 \ - ((_s), (_i)++, (_v))) \ - fprintf(stderr, "%s\n", sqlite3_errmsg(db)) -#define SQL_STEP(_s) \ - if (SQLITE_DONE != sqlite3_step((_s))) \ - fprintf(stderr, "%s\n", sqlite3_errmsg(db)) -enum op { - OP_DEFAULT = 0, /* new dbs from dir list or default config */ - OP_CONFFILE, /* new databases from custom config file */ - OP_UPDATE, /* delete/add entries in existing database */ - OP_DELETE, /* delete entries from existing database */ - OP_TEST /* change no databases, report potential problems */ +#define MANDOC_BUFSZ BUFSIZ +#define MANDOC_SLOP 1024 + +#define MANDOC_SRC 0x1 +#define MANDOC_FORM 0x2 + +#define WARNING(_f, _b, _fmt, _args...) \ + do if (warnings) { \ + fprintf(stderr, "%s: ", (_b)); \ + fprintf(stderr, (_fmt), ##_args); \ + if ('\0' != *(_f)) \ + fprintf(stderr, ": %s", (_f)); \ + fprintf(stderr, "\n"); \ + } while (/* CONSTCOND */ 0) + +/* Access to the mandoc database on disk. */ + +struct mdb { + char idxn[PATH_MAX]; /* index db filename */ + char dbn[PATH_MAX]; /* keyword db filename */ + DB *idx; /* index recno database */ + DB *db; /* keyword btree database */ }; -enum form { - FORM_SRC, /* format is -man or -mdoc */ - FORM_CAT, /* format is cat */ - FORM_NONE /* format is unknown */ -}; +/* Stack of temporarily unused index records. */ -struct str { - char *utf8; /* key in UTF-8 form */ - const struct of *of; /* if set, the owning parse */ - uint64_t mask; /* bitmask in sequence */ - char key[]; /* the string itself */ +struct recs { + recno_t *stack; /* pointer to a malloc'ed array */ + size_t size; /* number of allocated slots */ + size_t cur; /* current number of empty records */ + recno_t last; /* last record number in the index */ }; -struct id { - ino_t ino; - dev_t dev; -}; +/* Tiny list for files. No need to bring in QUEUE. */ struct of { - struct id id; /* used for hashing routine */ - struct of *next; /* next in ofs */ - enum form dform; /* path-cued form */ - enum form sform; /* suffix-cued form */ - char file[PATH_MAX]; /* filename rel. to manpath */ - char *desc; /* parsed description */ - char *name; /* name (from filename) (not empty) */ - char *sec; /* suffix-cued section (or empty) */ - char *dsec; /* path-cued section (or empty) */ - char *arch; /* path-cued arch. (or empty) */ + char *fname; /* heap-allocated */ + char *sec; + char *arch; + char *title; + int src_form; + struct of *next; /* NULL for last one */ + struct of *first; /* first in list */ }; -struct title { - char *title; /* name(sec/arch) given inside the file */ - char *file; /* file name in case of mismatch */ +/* Buffer for storing growable data. */ + +struct buf { + char *cp; + size_t len; /* current length */ + size_t size; /* total buffer size */ }; -enum stmt { - STMT_DELETE = 0, /* delete manpage */ - STMT_INSERT_DOC, /* insert manpage */ - STMT_INSERT_KEY, /* insert parsed key */ - STMT__MAX +/* Operation we're going to perform. */ + +enum op { + OP_DEFAULT = 0, /* new dbs from dir list or default config */ + OP_CONFFILE, /* new databases from custom config file */ + OP_UPDATE, /* delete/add entries in existing database */ + OP_DELETE, /* delete entries from existing database */ + OP_TEST /* change no databases, report potential problems */ }; -typedef int (*mdoc_fp)(struct of *, const struct mdoc_node *); +#define MAN_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct man_node *n +#define MDOC_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct mdoc_node *n, \ + const struct mdoc_meta *m + +static void buf_appendmdoc(struct buf *, + const struct mdoc_node *, int); +static void buf_append(struct buf *, const char *); +static void buf_appendb(struct buf *, + const void *, size_t); +static void dbt_put(DB *, const char *, DBT *, DBT *); +static void hash_put(DB *, const struct buf *, uint64_t); +static void hash_reset(DB **); +static void index_merge(const struct of *, struct mparse *, + struct buf *, struct buf *, DB *, + struct mdb *, struct recs *, + const char *); +static void index_prune(const struct of *, struct mdb *, + struct recs *, const char *); +static void ofile_argbuild(int, char *[], + struct of **, const char *); +static void ofile_dirbuild(const char *, const char *, + const char *, int, struct of **, char *); +static void ofile_free(struct of *); +static void pformatted(DB *, struct buf *, struct buf *, + const struct of *, const char *); +static int pman_node(MAN_ARGS); +static void pmdoc_node(MDOC_ARGS); +static int pmdoc_head(MDOC_ARGS); +static int pmdoc_body(MDOC_ARGS); +static int pmdoc_Fd(MDOC_ARGS); +static int pmdoc_In(MDOC_ARGS); +static int pmdoc_Fn(MDOC_ARGS); +static int pmdoc_Nd(MDOC_ARGS); +static int pmdoc_Nm(MDOC_ARGS); +static int pmdoc_Sh(MDOC_ARGS); +static int pmdoc_St(MDOC_ARGS); +static int pmdoc_Xr(MDOC_ARGS); + +#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ struct mdoc_handler { - mdoc_fp fp; /* optional handler */ - uint64_t mask; /* set unless handler returns 0 */ + int (*fp)(MDOC_ARGS); /* Optional handler. */ + uint64_t mask; /* Set unless handler returns 0. */ + int flags; /* For use by pmdoc_node. */ }; -static void dbclose(int); -static void dbindex(struct mchars *, int, const struct of *); -static int dbopen(int); -static void dbprune(void); -static void fileadd(struct of *); -static int filecheck(const char *); -static void filescan(const char *); -static void *hash_alloc(size_t, void *); -static void hash_free(void *, size_t, void *); -static void *hash_halloc(size_t, void *); -static void inoadd(const struct stat *, struct of *); -static int inocheck(const struct stat *); -static void ofadd(int, const char *, const char *, const char *, - const char *, const char *, const struct stat *); -static void offree(void); -static void ofmerge(struct mchars *, struct mparse *, int); -static void parse_catpage(struct of *); -static void parse_man(struct of *, const struct man_node *); -static void parse_mdoc(struct of *, const struct mdoc_node *); -static int parse_mdoc_body(struct of *, const struct mdoc_node *); -static int parse_mdoc_head(struct of *, const struct mdoc_node *); -static int parse_mdoc_Fd(struct of *, const struct mdoc_node *); -static int parse_mdoc_Fn(struct of *, const struct mdoc_node *); -static int parse_mdoc_In(struct of *, const struct mdoc_node *); -static int parse_mdoc_Nd(struct of *, const struct mdoc_node *); -static int parse_mdoc_Nm(struct of *, const struct mdoc_node *); -static int parse_mdoc_Sh(struct of *, const struct mdoc_node *); -static int parse_mdoc_St(struct of *, const struct mdoc_node *); -static int parse_mdoc_Xr(struct of *, const struct mdoc_node *); -static int set_basedir(const char *); -static void putkey(const struct of *, - const char *, uint64_t); -static void putkeys(const struct of *, - const char *, size_t, uint64_t); -static void putmdockey(const struct of *, - const struct mdoc_node *, uint64_t); -static void say(const char *, const char *, ...); -static int treescan(void); -static size_t utf8(unsigned int, char [7]); -static void utf8key(struct mchars *, struct str *); - -static char *progname; -static int use_all; /* use all found files */ -static int nodb; /* no database changes */ -static int verb; /* print what we're doing */ -static int warnings; /* warn about crap */ -static int exitcode; /* to be returned by main */ -static enum op op; /* operational mode */ -static char basedir[PATH_MAX]; /* current base directory */ -static struct ohash inos; /* table of inodes/devices */ -static struct ohash filenames; /* table of filenames */ -static struct ohash strings; /* table of all strings */ -static struct of *ofs = NULL; /* vector of files to parse */ -static sqlite3 *db = NULL; /* current database */ -static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ - static const struct mdoc_handler mdocs[MDOC_MAX] = { - { NULL, 0 }, /* Ap */ - { NULL, 0 }, /* Dd */ - { NULL, 0 }, /* Dt */ - { NULL, 0 }, /* Os */ - { parse_mdoc_Sh, TYPE_Sh }, /* Sh */ - { parse_mdoc_head, TYPE_Ss }, /* Ss */ - { NULL, 0 }, /* Pp */ - { NULL, 0 }, /* D1 */ - { NULL, 0 }, /* Dl */ - { NULL, 0 }, /* Bd */ - { NULL, 0 }, /* Ed */ - { NULL, 0 }, /* Bl */ - { NULL, 0 }, /* El */ - { NULL, 0 }, /* It */ - { NULL, 0 }, /* Ad */ - { NULL, TYPE_An }, /* An */ - { NULL, TYPE_Ar }, /* Ar */ - { NULL, TYPE_Cd }, /* Cd */ - { NULL, TYPE_Cm }, /* Cm */ - { NULL, TYPE_Dv }, /* Dv */ - { NULL, TYPE_Er }, /* Er */ - { NULL, TYPE_Ev }, /* Ev */ - { NULL, 0 }, /* Ex */ - { NULL, TYPE_Fa }, /* Fa */ - { parse_mdoc_Fd, 0 }, /* Fd */ - { NULL, TYPE_Fl }, /* Fl */ - { parse_mdoc_Fn, 0 }, /* Fn */ - { NULL, TYPE_Ft }, /* Ft */ - { NULL, TYPE_Ic }, /* Ic */ - { parse_mdoc_In, TYPE_In }, /* In */ - { NULL, TYPE_Li }, /* Li */ - { parse_mdoc_Nd, TYPE_Nd }, /* Nd */ - { parse_mdoc_Nm, TYPE_Nm }, /* Nm */ - { NULL, 0 }, /* Op */ - { NULL, 0 }, /* Ot */ - { NULL, TYPE_Pa }, /* Pa */ - { NULL, 0 }, /* Rv */ - { parse_mdoc_St, 0 }, /* St */ - { NULL, TYPE_Va }, /* Va */ - { parse_mdoc_body, TYPE_Va }, /* Vt */ - { parse_mdoc_Xr, 0 }, /* Xr */ - { NULL, 0 }, /* %A */ - { NULL, 0 }, /* %B */ - { NULL, 0 }, /* %D */ - { NULL, 0 }, /* %I */ - { NULL, 0 }, /* %J */ - { NULL, 0 }, /* %N */ - { NULL, 0 }, /* %O */ - { NULL, 0 }, /* %P */ - { NULL, 0 }, /* %R */ - { NULL, 0 }, /* %T */ - { NULL, 0 }, /* %V */ - { NULL, 0 }, /* Ac */ - { NULL, 0 }, /* Ao */ - { NULL, 0 }, /* Aq */ - { NULL, TYPE_At }, /* At */ - { NULL, 0 }, /* Bc */ - { NULL, 0 }, /* Bf */ - { NULL, 0 }, /* Bo */ - { NULL, 0 }, /* Bq */ - { NULL, TYPE_Bsx }, /* Bsx */ - { NULL, TYPE_Bx }, /* Bx */ - { NULL, 0 }, /* Db */ - { NULL, 0 }, /* Dc */ - { NULL, 0 }, /* Do */ - { NULL, 0 }, /* Dq */ - { NULL, 0 }, /* Ec */ - { NULL, 0 }, /* Ef */ - { NULL, TYPE_Em }, /* Em */ - { NULL, 0 }, /* Eo */ - { NULL, TYPE_Fx }, /* Fx */ - { NULL, TYPE_Ms }, /* Ms */ - { NULL, 0 }, /* No */ - { NULL, 0 }, /* Ns */ - { NULL, TYPE_Nx }, /* Nx */ - { NULL, TYPE_Ox }, /* Ox */ - { NULL, 0 }, /* Pc */ - { NULL, 0 }, /* Pf */ - { NULL, 0 }, /* Po */ - { NULL, 0 }, /* Pq */ - { NULL, 0 }, /* Qc */ - { NULL, 0 }, /* Ql */ - { NULL, 0 }, /* Qo */ - { NULL, 0 }, /* Qq */ - { NULL, 0 }, /* Re */ - { NULL, 0 }, /* Rs */ - { NULL, 0 }, /* Sc */ - { NULL, 0 }, /* So */ - { NULL, 0 }, /* Sq */ - { NULL, 0 }, /* Sm */ - { NULL, 0 }, /* Sx */ - { NULL, TYPE_Sy }, /* Sy */ - { NULL, TYPE_Tn }, /* Tn */ - { NULL, 0 }, /* Ux */ - { NULL, 0 }, /* Xc */ - { NULL, 0 }, /* Xo */ - { parse_mdoc_head, 0 }, /* Fo */ - { NULL, 0 }, /* Fc */ - { NULL, 0 }, /* Oo */ - { NULL, 0 }, /* Oc */ - { NULL, 0 }, /* Bk */ - { NULL, 0 }, /* Ek */ - { NULL, 0 }, /* Bt */ - { NULL, 0 }, /* Hf */ - { NULL, 0 }, /* Fr */ - { NULL, 0 }, /* Ud */ - { NULL, TYPE_Lb }, /* Lb */ - { NULL, 0 }, /* Lp */ - { NULL, TYPE_Lk }, /* Lk */ - { NULL, TYPE_Mt }, /* Mt */ - { NULL, 0 }, /* Brq */ - { NULL, 0 }, /* Bro */ - { NULL, 0 }, /* Brc */ - { NULL, 0 }, /* %C */ - { NULL, 0 }, /* Es */ - { NULL, 0 }, /* En */ - { NULL, TYPE_Dx }, /* Dx */ - { NULL, 0 }, /* %Q */ - { NULL, 0 }, /* br */ - { NULL, 0 }, /* sp */ - { NULL, 0 }, /* %U */ - { NULL, 0 }, /* Ta */ + { NULL, 0, 0 }, /* Ap */ + { NULL, 0, 0 }, /* Dd */ + { NULL, 0, 0 }, /* Dt */ + { NULL, 0, 0 }, /* Os */ + { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ + { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ + { NULL, 0, 0 }, /* Pp */ + { NULL, 0, 0 }, /* D1 */ + { NULL, 0, 0 }, /* Dl */ + { NULL, 0, 0 }, /* Bd */ + { NULL, 0, 0 }, /* Ed */ + { NULL, 0, 0 }, /* Bl */ + { NULL, 0, 0 }, /* El */ + { NULL, 0, 0 }, /* It */ + { NULL, 0, 0 }, /* Ad */ + { NULL, TYPE_An, MDOCF_CHILD }, /* An */ + { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ + { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ + { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ + { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ + { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ + { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ + { NULL, 0, 0 }, /* Ex */ + { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ + { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ + { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ + { pmdoc_Fn, 0, 0 }, /* Fn */ + { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ + { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ + { pmdoc_In, TYPE_In, 0 }, /* In */ + { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ + { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ + { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ + { NULL, 0, 0 }, /* Op */ + { NULL, 0, 0 }, /* Ot */ + { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ + { NULL, 0, 0 }, /* Rv */ + { pmdoc_St, TYPE_St, 0 }, /* St */ + { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ + { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ + { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ + { NULL, 0, 0 }, /* %A */ + { NULL, 0, 0 }, /* %B */ + { NULL, 0, 0 }, /* %D */ + { NULL, 0, 0 }, /* %I */ + { NULL, 0, 0 }, /* %J */ + { NULL, 0, 0 }, /* %N */ + { NULL, 0, 0 }, /* %O */ + { NULL, 0, 0 }, /* %P */ + { NULL, 0, 0 }, /* %R */ + { NULL, 0, 0 }, /* %T */ + { NULL, 0, 0 }, /* %V */ + { NULL, 0, 0 }, /* Ac */ + { NULL, 0, 0 }, /* Ao */ + { NULL, 0, 0 }, /* Aq */ + { NULL, TYPE_At, MDOCF_CHILD }, /* At */ + { NULL, 0, 0 }, /* Bc */ + { NULL, 0, 0 }, /* Bf */ + { NULL, 0, 0 }, /* Bo */ + { NULL, 0, 0 }, /* Bq */ + { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ + { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ + { NULL, 0, 0 }, /* Db */ + { NULL, 0, 0 }, /* Dc */ + { NULL, 0, 0 }, /* Do */ + { NULL, 0, 0 }, /* Dq */ + { NULL, 0, 0 }, /* Ec */ + { NULL, 0, 0 }, /* Ef */ + { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ + { NULL, 0, 0 }, /* Eo */ + { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ + { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ + { NULL, 0, 0 }, /* No */ + { NULL, 0, 0 }, /* Ns */ + { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ + { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ + { NULL, 0, 0 }, /* Pc */ + { NULL, 0, 0 }, /* Pf */ + { NULL, 0, 0 }, /* Po */ + { NULL, 0, 0 }, /* Pq */ + { NULL, 0, 0 }, /* Qc */ + { NULL, 0, 0 }, /* Ql */ + { NULL, 0, 0 }, /* Qo */ + { NULL, 0, 0 }, /* Qq */ + { NULL, 0, 0 }, /* Re */ + { NULL, 0, 0 }, /* Rs */ + { NULL, 0, 0 }, /* Sc */ + { NULL, 0, 0 }, /* So */ + { NULL, 0, 0 }, /* Sq */ + { NULL, 0, 0 }, /* Sm */ + { NULL, 0, 0 }, /* Sx */ + { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ + { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ + { NULL, 0, 0 }, /* Ux */ + { NULL, 0, 0 }, /* Xc */ + { NULL, 0, 0 }, /* Xo */ + { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ + { NULL, 0, 0 }, /* Fc */ + { NULL, 0, 0 }, /* Oo */ + { NULL, 0, 0 }, /* Oc */ + { NULL, 0, 0 }, /* Bk */ + { NULL, 0, 0 }, /* Ek */ + { NULL, 0, 0 }, /* Bt */ + { NULL, 0, 0 }, /* Hf */ + { NULL, 0, 0 }, /* Fr */ + { NULL, 0, 0 }, /* Ud */ + { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ + { NULL, 0, 0 }, /* Lp */ + { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ + { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ + { NULL, 0, 0 }, /* Brq */ + { NULL, 0, 0 }, /* Bro */ + { NULL, 0, 0 }, /* Brc */ + { NULL, 0, 0 }, /* %C */ + { NULL, 0, 0 }, /* Es */ + { NULL, 0, 0 }, /* En */ + { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ + { NULL, 0, 0 }, /* %Q */ + { NULL, 0, 0 }, /* br */ + { NULL, 0, 0 }, /* sp */ + { NULL, 0, 0 }, /* %U */ + { NULL, 0, 0 }, /* Ta */ }; +static const char *progname; +static int use_all; /* Use all directories and files. */ +static int verb; /* Output verbosity level. */ +static int warnings; /* Potential problems in manuals. */ + int main(int argc, char *argv[]) { - int ch, i; - size_t j, sz; - const char *path_arg; - struct mchars *mc; - struct manpaths dirs; - struct mparse *mp; - struct ohash_info ino_info, filename_info; - - memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); - memset(&dirs, 0, sizeof(struct manpaths)); - - ino_info.alloc = filename_info.alloc = hash_alloc; - ino_info.halloc = filename_info.halloc = hash_halloc; - ino_info.hfree = filename_info.hfree = hash_free; - - ino_info.key_offset = offsetof(struct of, id); - filename_info.key_offset = offsetof(struct of, file); + struct mparse *mp; /* parse sequence */ + struct manpaths dirs; + struct mdb mdb; + struct recs recs; + enum op op; /* current operation */ + const char *dir; + int ch, i, flags; + char dirbuf[PATH_MAX]; + DB *hash; /* temporary keyword hashtable */ + BTREEINFO info; /* btree configuration */ + size_t sz1, sz2; + struct buf buf, /* keyword buffer */ + dbuf; /* description buffer */ + struct of *of; /* list of files for processing */ + extern int optind; + extern char *optarg; progname = strrchr(argv[0], '/'); if (progname == NULL) @@ -333,47 +321,57 @@ main(int argc, char *argv[]) else ++progname; - /* - * We accept a few different invocations. - * The CHECKOP macro makes sure that invocation styles don't - * clobber each other. - */ -#define CHECKOP(_op, _ch) do \ - if (OP_DEFAULT != (_op)) { \ - fprintf(stderr, "-%c: Conflicting option\n", (_ch)); \ - goto usage; \ - } while (/*CONSTCOND*/0) + memset(&dirs, 0, sizeof(struct manpaths)); + memset(&mdb, 0, sizeof(struct mdb)); + memset(&recs, 0, sizeof(struct recs)); - path_arg = NULL; + of = NULL; + mp = NULL; + hash = NULL; op = OP_DEFAULT; + dir = NULL; - while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) switch (ch) { case ('a'): use_all = 1; break; case ('C'): - CHECKOP(op, ch); - path_arg = optarg; + if (op) { + fprintf(stderr, + "-C: conflicting options\n"); + goto usage; + } + dir = optarg; op = OP_CONFFILE; break; case ('d'): - CHECKOP(op, ch); - path_arg = optarg; + if (op) { + fprintf(stderr, + "-d: conflicting options\n"); + goto usage; + } + dir = optarg; op = OP_UPDATE; break; - case ('n'): - nodb = 1; - break; case ('t'): - CHECKOP(op, ch); dup2(STDOUT_FILENO, STDERR_FILENO); + if (op) { + fprintf(stderr, + "-t: conflicting options\n"); + goto usage; + } op = OP_TEST; - nodb = warnings = 1; + use_all = 1; + warnings = 1; break; case ('u'): - CHECKOP(op, ch); - path_arg = optarg; + if (op) { + fprintf(stderr, + "-u: conflicting options\n"); + goto usage; + } + dir = optarg; op = OP_DELETE; break; case ('v'): @@ -390,601 +388,233 @@ main(int argc, char *argv[]) argv += optind; if (OP_CONFFILE == op && argc > 0) { - fprintf(stderr, "-C: Too many arguments\n"); + fprintf(stderr, "-C: too many arguments\n"); goto usage; } - exitcode = (int)MANDOCLEVEL_OK; - mp = mparse_alloc(MPARSE_AUTO, - MANDOCLEVEL_FATAL, NULL, NULL, NULL); - mc = mchars_alloc(); + memset(&info, 0, sizeof(BTREEINFO)); + info.lorder = 4321; + info.flags = R_DUP; - ohash_init(&inos, 6, &ino_info); - ohash_init(&filenames, 6, &filename_info); + mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL); - if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { - /* - * Force processing all files. - */ - use_all = 1; + memset(&buf, 0, sizeof(struct buf)); + memset(&dbuf, 0, sizeof(struct buf)); - /* - * All of these deal with a specific directory. - * Jump into that directory then collect files specified - * on the command-line. - */ - if (0 == set_basedir(path_arg)) - goto out; - for (i = 0; i < argc; i++) - filescan(argv[i]); - if (0 == dbopen(1)) - goto out; - if (OP_TEST != op) - dbprune(); - if (OP_DELETE != op) - ofmerge(mc, mp, 0); - dbclose(1); - } else { - /* - * If we have arguments, use them as our manpaths. - * If we don't, grok from manpath(1) or however else - * manpath_parse() wants to do it. - */ - if (argc > 0) { - dirs.paths = mandoc_calloc - (argc, sizeof(char *)); - dirs.sz = (size_t)argc; - for (i = 0; i < argc; i++) - dirs.paths[i] = mandoc_strdup(argv[i]); - } else - manpath_parse(&dirs, path_arg, NULL, NULL); + buf.size = dbuf.size = MANDOC_BUFSZ; - /* - * First scan the tree rooted at a base directory, then - * build a new database and finally move it into place. - * Ignore zero-length directories and strip trailing - * slashes. - */ - for (j = 0; j < dirs.sz; j++) { - sz = strlen(dirs.paths[j]); - if (sz && '/' == dirs.paths[j][sz - 1]) - dirs.paths[j][--sz] = '\0'; - if (0 == sz) - continue; + buf.cp = mandoc_malloc(buf.size); + dbuf.cp = mandoc_malloc(dbuf.size); - if (j) { - ohash_init(&inos, 6, &ino_info); - ohash_init(&filenames, 6, &filename_info); - } - - if (0 == set_basedir(dirs.paths[j])) - goto out; - if (0 == treescan()) - goto out; - if (0 == set_basedir(dirs.paths[j])) - goto out; - if (0 == dbopen(0)) - goto out; - - ofmerge(mc, mp, warnings && !use_all); - dbclose(0); - - if (j + 1 < dirs.sz) { - ohash_delete(&inos); - ohash_delete(&filenames); - offree(); - } - } + if (OP_TEST == op) { + ofile_argbuild(argc, argv, &of, "."); + if (NULL == of) + goto out; + index_merge(of, mp, &dbuf, &buf, + hash, &mdb, &recs, "."); + goto out; } -out: - set_basedir(NULL); - manpath_free(&dirs); - mchars_free(mc); - mparse_free(mp); - ohash_delete(&inos); - ohash_delete(&filenames); - offree(); - return(exitcode); -usage: - fprintf(stderr, "usage: %s [-anvW] [-C file]\n" - " %s [-anvW] dir ...\n" - " %s [-nvW] -d dir [file ...]\n" - " %s [-nvW] -u dir [file ...]\n" - " %s -t file ...\n", - progname, progname, progname, - progname, progname); - - return((int)MANDOCLEVEL_BADARG); -} - -/* - * Scan a directory tree rooted at "basedir" for manpages. - * We use fts(), scanning directory parts along the way for clues to our - * section and architecture. - * - * If use_all has been specified, grok all files. - * If not, sanitise paths to the following: - * - * [./]man*[/<arch>]/<name>.<section> - * or - * [./]cat<section>[/<arch>]/<name>.0 - * - * TODO: accomodate for multi-language directories. - */ -static int -treescan(void) -{ - FTS *f; - FTSENT *ff; - int dform; - char *sec; - const char *dsec, *arch, *cp, *path; - const char *argv[2]; - argv[0] = "."; - argv[1] = (char *)NULL; + if (OP_UPDATE == op || OP_DELETE == op) { + strlcat(mdb.dbn, dir, PATH_MAX); + strlcat(mdb.dbn, "/", PATH_MAX); + sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX); - /* - * Walk through all components under the directory, using the - * logical descent of files. - */ - f = fts_open((char * const *)argv, FTS_LOGICAL, NULL); - if (NULL == f) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say("", NULL); - return(0); - } + strlcat(mdb.idxn, dir, PATH_MAX); + strlcat(mdb.idxn, "/", PATH_MAX); + sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX); - dsec = arch = NULL; - dform = FORM_NONE; - - while (NULL != (ff = fts_read(f))) { - path = ff->fts_path + 2; - /* - * If we're a regular file, add an "of" by using the - * stored directory data and handling the filename. - * Disallow duplicate (hard-linked) files. - */ - if (FTS_F == ff->fts_info) { - if (0 == strcmp(path, MANDOC_DB)) - continue; - if ( ! use_all && ff->fts_level < 2) { - if (warnings) - say(path, "Extraneous file"); - continue; - } else if (inocheck(ff->fts_statp)) { - if (warnings) - say(path, "Duplicate file"); - continue; - } else if (NULL == (sec = - strrchr(ff->fts_name, '.'))) { - if ( ! use_all) { - if (warnings) - say(path, - "No filename suffix"); - continue; - } - } else if (0 == strcmp(++sec, "html")) { - if (warnings) - say(path, "Skip html"); - continue; - } else if (0 == strcmp(sec, "gz")) { - if (warnings) - say(path, "Skip gz"); - continue; - } else if (0 == strcmp(sec, "ps")) { - if (warnings) - say(path, "Skip ps"); - continue; - } else if (0 == strcmp(sec, "pdf")) { - if (warnings) - say(path, "Skip pdf"); - continue; - } else if ( ! use_all && - ((FORM_SRC == dform && strcmp(sec, dsec)) || - (FORM_CAT == dform && strcmp(sec, "0")))) { - if (warnings) - say(path, "Wrong filename suffix"); - continue; - } else - sec[-1] = '\0'; - ofadd(dform, path, ff->fts_name, dsec, sec, - arch, ff->fts_statp); - continue; - } else if (FTS_D != ff->fts_info && - FTS_DP != ff->fts_info) { - if (warnings) - say(path, "Not a regular file"); - continue; + if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) { + fprintf(stderr, "%s: path too long\n", dir); + exit((int)MANDOCLEVEL_BADARG); } - switch (ff->fts_level) { - case (0): - /* Ignore the root directory. */ - break; - case (1): - /* - * This might contain manX/ or catX/. - * Try to infer this from the name. - * If we're not in use_all, enforce it. - */ - dsec = NULL; - dform = FORM_NONE; - cp = ff->fts_name; - if (FTS_DP == ff->fts_info) - break; - - if (0 == strncmp(cp, "man", 3)) { - dform = FORM_SRC; - dsec = cp + 3; - } else if (0 == strncmp(cp, "cat", 3)) { - dform = FORM_CAT; - dsec = cp + 3; - } - - if (NULL != dsec || use_all) - break; + flags = O_CREAT | O_RDWR; + mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); + mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); - if (warnings) - say(path, "Unknown directory part"); - fts_set(f, ff, FTS_SKIP); - break; - case (2): - /* - * Possibly our architecture. - * If we're descending, keep tabs on it. - */ - arch = NULL; - if (FTS_DP != ff->fts_info && NULL != dsec) - arch = ff->fts_name; - break; - default: - if (FTS_DP == ff->fts_info || use_all) - break; - if (warnings) - say(path, "Extraneous directory part"); - fts_set(f, ff, FTS_SKIP); - break; + if (NULL == mdb.db) { + perror(mdb.dbn); + exit((int)MANDOCLEVEL_SYSERR); + } else if (NULL == mdb.idx) { + perror(mdb.idxn); + exit((int)MANDOCLEVEL_SYSERR); } - } - - fts_close(f); - return(1); -} -/* - * Add a file to the file vector. - * Do not verify that it's a "valid" looking manpage (we'll do that - * later). - * - * Try to infer the manual section, architecture, and page name from the - * path, assuming it looks like - * - * [./]man*[/<arch>]/<name>.<section> - * or - * [./]cat<section>[/<arch>]/<name>.0 - * - * Stuff this information directly into the "of" vector. - * See treescan() for the fts(3) version of this. - */ -static void -filescan(const char *file) -{ - char buf[PATH_MAX]; - const char *sec, *arch, *name, *dsec; - char *p, *start; - int dform; - struct stat st; + ofile_argbuild(argc, argv, &of, dir); - assert(use_all); + if (NULL == of) + goto out; - if (0 == strncmp(file, "./", 2)) - file += 2; + index_prune(of, &mdb, &recs, dir); - if (NULL == realpath(file, buf)) { - exitcode = (int)MANDOCLEVEL_BADARG; - say(file, NULL); - return; - } else if (strstr(buf, basedir) != buf) { - exitcode = (int)MANDOCLEVEL_BADARG; - say("", "%s: outside base directory", buf); - return; - } else if (-1 == stat(buf, &st)) { - exitcode = (int)MANDOCLEVEL_BADARG; - say(file, NULL); - return; - } else if ( ! (S_IFREG & st.st_mode)) { - exitcode = (int)MANDOCLEVEL_BADARG; - say(file, "Not a regular file"); - return; - } else if (inocheck(&st)) { - if (warnings) - say(file, "Duplicate file"); - return; - } - start = buf + strlen(basedir); - sec = arch = name = dsec = NULL; - dform = FORM_NONE; + /* + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). + */ - /* - * First try to guess our directory structure. - * If we find a separator, try to look for man* or cat*. - * If we find one of these and what's underneath is a directory, - * assume it's an architecture. - */ - if (NULL != (p = strchr(start, '/'))) { - *p++ = '\0'; - if (0 == strncmp(start, "man", 3)) { - dform = FORM_SRC; - dsec = start + 3; - } else if (0 == strncmp(start, "cat", 3)) { - dform = FORM_CAT; - dsec = start + 3; + if (OP_UPDATE == op) { + if (-1 == chdir(dir)) { + perror(dir); + exit((int)MANDOCLEVEL_SYSERR); + } + index_merge(of, mp, &dbuf, &buf, hash, + &mdb, &recs, dir); } - start = p; - if (NULL != dsec && NULL != (p = strchr(start, '/'))) { - *p++ = '\0'; - arch = start; - start = p; - } + goto out; } /* - * Now check the file suffix. - * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. + * Configure the directories we're going to scan. + * If we have command-line arguments, use them. + * If not, we use man(1)'s method (see mandocdb.8). */ - p = strrchr(start, '\0'); - while (p-- > start && '/' != *p && '.' != *p) - /* Loop. */ ; - - if ('.' == *p) { - *p++ = '\0'; - sec = p; - } - /* - * Now try to parse the name. - * Use the filename portion of the path. - */ - name = start; - if (NULL != (p = strrchr(start, '/'))) { - name = p + 1; - *p = '\0'; - } + if (argc > 0) { + dirs.paths = mandoc_calloc(argc, sizeof(char *)); + dirs.sz = argc; + for (i = 0; i < argc; i++) + dirs.paths[i] = mandoc_strdup(argv[i]); + } else + manpath_parse(&dirs, dir, NULL, NULL); - ofadd(dform, file, name, dsec, sec, arch, &st); -} + for (i = 0; i < dirs.sz; i++) { + /* + * Go to the root of the respective manual tree. + * This must work or no manuals may be found: + * They are indexed relative to the root. + */ -/* - * See fileadd(). - */ -static int -filecheck(const char *name) -{ + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); + exit((int)MANDOCLEVEL_SYSERR); + } - return(NULL != ohash_find(&filenames, - ohash_qlookup(&filenames, name))); -} + strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX); + strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX); -/* - * Use the standard hashing mechanism (K&R) to see if the given filename - * already exists. - */ -static void -fileadd(struct of *of) -{ - unsigned int slot; + flags = O_CREAT | O_TRUNC | O_RDWR; + mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); + mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); - slot = ohash_qlookup(&filenames, of->file); - assert(NULL == ohash_find(&filenames, slot)); - ohash_insert(&filenames, slot, of); -} + if (NULL == mdb.db) { + perror(mdb.dbn); + exit((int)MANDOCLEVEL_SYSERR); + } else if (NULL == mdb.idx) { + perror(mdb.idxn); + exit((int)MANDOCLEVEL_SYSERR); + } -/* - * See inoadd(). - */ -static int -inocheck(const struct stat *st) -{ - struct id id; - uint32_t hash; + /* + * Search for manuals and fill the new database. + */ - memset(&id, 0, sizeof(id)); - id.ino = hash = st->st_ino; - id.dev = st->st_dev; + strlcpy(dirbuf, dirs.paths[i], PATH_MAX); + ofile_dirbuild(".", "", "", 0, &of, dirbuf); - return(NULL != ohash_find(&inos, ohash_lookup_memory( - &inos, (char *)&id, sizeof(id), hash))); -} + if (NULL != of) { + index_merge(of, mp, &dbuf, &buf, hash, + &mdb, &recs, dirs.paths[i]); + ofile_free(of); + of = NULL; + } -/* - * The hashing function used here is quite simple: simply take the inode - * and use uint32_t of its bits. - * Then when we do the lookup, use both the inode and device identifier. - */ -static void -inoadd(const struct stat *st, struct of *of) -{ - uint32_t hash; - unsigned int slot; + (*mdb.db->close)(mdb.db); + (*mdb.idx->close)(mdb.idx); + mdb.db = NULL; + mdb.idx = NULL; + } - of->id.ino = hash = st->st_ino; - of->id.dev = st->st_dev; - slot = ohash_lookup_memory - (&inos, (char *)&of->id, sizeof(of->id), hash); +out: + if (mdb.db) + (*mdb.db->close)(mdb.db); + if (mdb.idx) + (*mdb.idx->close)(mdb.idx); + if (hash) + (*hash->close)(hash); + if (mp) + mparse_free(mp); - assert(NULL == ohash_find(&inos, slot)); - ohash_insert(&inos, slot, of); -} + manpath_free(&dirs); + ofile_free(of); + free(buf.cp); + free(dbuf.cp); + free(recs.stack); -static void -ofadd(int dform, const char *file, const char *name, const char *dsec, - const char *sec, const char *arch, const struct stat *st) -{ - struct of *of; - int sform; - - assert(NULL != file); - - if (NULL == name) - name = ""; - if (NULL == sec) - sec = ""; - if (NULL == dsec) - dsec = ""; - if (NULL == arch) - arch = ""; - - sform = FORM_NONE; - if (NULL != sec && *sec <= '9' && *sec >= '1') - sform = FORM_SRC; - else if (NULL != sec && *sec == '0') { - sec = dsec; - sform = FORM_CAT; - } + return(MANDOCLEVEL_OK); - of = mandoc_calloc(1, sizeof(struct of)); - strlcpy(of->file, file, PATH_MAX); - of->name = mandoc_strdup(name); - of->sec = mandoc_strdup(sec); - of->dsec = mandoc_strdup(dsec); - of->arch = mandoc_strdup(arch); - of->sform = sform; - of->dform = dform; - of->next = ofs; - ofs = of; +usage: + fprintf(stderr, + "usage: %s [-av] [-C file] | dir ... | -t file ...\n" + " -d dir [file ...] | " + "-u dir [file ...]\n", + progname); - /* - * Add to unique identifier hash. - * Then if it's a source manual and we're going to use source in - * favour of catpages, add it to that hash. - */ - inoadd(st, of); - fileadd(of); + return((int)MANDOCLEVEL_BADARG); } -static void -offree(void) +void +index_merge(const struct of *of, struct mparse *mp, + struct buf *dbuf, struct buf *buf, DB *hash, + struct mdb *mdb, struct recs *recs, + const char *basedir) { - struct of *of; - - while (NULL != (of = ofs)) { - ofs = of->next; - free(of->name); - free(of->sec); - free(of->dsec); - free(of->arch); - free(of); + recno_t rec; + int ch, skip; + DBT key, val; + DB *files; /* temporary file name table */ + char emptystring[1] = {'\0'}; + struct mdoc *mdoc; + struct man *man; + char *p; + const char *fn, *msec, *march, *mtitle; + uint64_t mask; + size_t sv; + unsigned seq; + uint64_t vbuf[2]; + char type; + + if (warnings) { + files = NULL; + hash_reset(&files); } -} -/* - * Run through the files in the global vector "ofs" and add them to the - * database specified in "basedir". - * - * This handles the parsing scheme itself, using the cues of directory - * and filename to determine whether the file is parsable or not. - */ -static void -ofmerge(struct mchars *mc, struct mparse *mp, int check_reachable) -{ - struct ohash title_table; - struct ohash_info title_info, str_info; - char buf[PATH_MAX]; - struct of *of; - struct mdoc *mdoc; - struct man *man; - struct title *title_entry; - char *bufp, *title_str; - const char *msec, *march, *mtitle, *cp; - size_t sz; - int form; - int match; - unsigned int slot; - enum mandoclevel lvl; - - str_info.alloc = hash_alloc; - str_info.halloc = hash_halloc; - str_info.hfree = hash_free; - str_info.key_offset = offsetof(struct str, key); - - if (check_reachable) { - title_info.alloc = hash_alloc; - title_info.halloc = hash_halloc; - title_info.hfree = hash_free; - title_info.key_offset = offsetof(struct title, title); - ohash_init(&title_table, 6, &title_info); - } + rec = 0; + for (of = of->first; of; of = of->next) { + fn = of->fname; - for (of = ofs; NULL != of; of = of->next) { /* - * If we're a catpage (as defined by our path), then see - * if a manpage exists by the same name (ignoring the - * suffix). - * If it does, then we want to use it instead of our - * own. + * Try interpreting the file as mdoc(7) or man(7) + * source code, unless it is already known to be + * formatted. Fall back to formatted mode. */ - if ( ! use_all && FORM_CAT == of->dform) { - sz = strlcpy(buf, of->file, PATH_MAX); - if (sz >= PATH_MAX) { - if (warnings) - say(of->file, "Filename too long"); - continue; - } - bufp = strstr(buf, "cat"); - assert(NULL != bufp); - memcpy(bufp, "man", 3); - if (NULL != (bufp = strrchr(buf, '.'))) - *++bufp = '\0'; - strlcat(buf, of->dsec, PATH_MAX); - if (filecheck(buf)) { - if (warnings) - say(of->file, "Man " - "source exists: %s", buf); - continue; - } - } - ohash_init(&strings, 6, &str_info); mparse_reset(mp); mdoc = NULL; man = NULL; - form = 0; - msec = of->dsec; - march = of->arch; - mtitle = of->name; - match = 1; - /* - * Try interpreting the file as mdoc(7) or man(7) - * source code, unless it is already known to be - * formatted. Fall back to formatted mode. - */ - if (FORM_SRC == of->dform || FORM_SRC == of->sform) { - lvl = mparse_readfd(mp, -1, of->file); - if (lvl < MANDOCLEVEL_FATAL) - mparse_result(mp, &mdoc, &man); - } + if ((MANDOC_SRC & of->src_form || + ! (MANDOC_FORM & of->src_form)) && + MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) + mparse_result(mp, &mdoc, &man); if (NULL != mdoc) { - form = 1; msec = mdoc_meta(mdoc)->msec; march = mdoc_meta(mdoc)->arch; + if (NULL == march) + march = ""; mtitle = mdoc_meta(mdoc)->title; } else if (NULL != man) { - form = 1; msec = man_meta(man)->msec; march = ""; mtitle = man_meta(man)->title; - } - - if (NULL == msec) - msec = ""; - if (NULL == march) - march = ""; - if (NULL == mtitle) - mtitle = ""; + } else { + msec = of->sec; + march = of->arch; + mtitle = of->title; + } /* * Check whether the manual section given in a file @@ -995,14 +625,13 @@ ofmerge(struct mchars *mc, struct mparse *mp, int check_reachable) * section, like encrypt(1) = makekey(8). Do not skip * manuals for such reasons. */ - if (warnings && !use_all && form && - strcasecmp(msec, of->dsec)) { - match = 0; - say(of->file, "Section \"%s\" " - "manual in %s directory", - msec, of->dsec); - } + skip = 0; + assert(of->sec); + assert(msec); + if (strcasecmp(msec, of->sec)) + WARNING(fn, basedir, "Section \"%s\" manual " + "in \"%s\" directory", msec, of->sec); /* * Manual page directories exist for each kernel * architecture as returned by machine(1). @@ -1017,28 +646,24 @@ ofmerge(struct mchars *mc, struct mparse *mp, int check_reachable) * Thus, warn about architecture mismatches, * but don't skip manuals for this reason. */ - if (warnings && !use_all && strcasecmp(march, of->arch)) { - match = 0; - say(of->file, "Architecture \"%s\" " + + assert(of->arch); + assert(march); + if (strcasecmp(march, of->arch)) + WARNING(fn, basedir, "Architecture \"%s\" " "manual in \"%s\" directory", march, of->arch); - } - if (warnings && !use_all && strcasecmp(mtitle, of->name)) - match = 0; - putkey(of, of->name, TYPE_Nm); + /* + * By default, skip a file if the title given + * in the file disagrees with the file name. + * Do not warn, this happens for all MLINKs. + */ - if (NULL != mdoc) { - if (NULL != (cp = mdoc_meta(mdoc)->name)) - putkey(of, cp, TYPE_Nm); - assert(NULL == of->desc); - parse_mdoc(of, mdoc_node(mdoc)); - putkey(of, NULL != of->desc ? - of->desc : of->name, TYPE_Nd); - } else if (NULL != man) - parse_man(of, man_node(man)); - else - parse_catpage(of); + assert(of->title); + assert(mtitle); + if (strcasecmp(mtitle, of->title)) + skip = 1; /* * Build a title string for the file. If it matches @@ -1046,376 +671,390 @@ ofmerge(struct mchars *mc, struct mparse *mp, int check_reachable) * found; else, remember it as missing. */ - if (check_reachable) { - if (-1 == asprintf(&title_str, "%s(%s%s%s)", mtitle, - msec, '\0' == *march ? "" : "/", march)) { - perror(NULL); - exit((int)MANDOCLEVEL_SYSERR); + if (warnings) { + buf->len = 0; + buf_appendb(buf, mtitle, strlen(mtitle)); + buf_appendb(buf, "(", 1); + buf_appendb(buf, msec, strlen(msec)); + if ('\0' != *march) { + buf_appendb(buf, "/", 1); + buf_appendb(buf, march, strlen(march)); } - slot = ohash_qlookup(&title_table, title_str); - title_entry = ohash_find(&title_table, slot); - if (NULL == title_entry) { - title_entry = mandoc_malloc( - sizeof(struct title)); - title_entry->title = title_str; - title_entry->file = mandoc_strdup( - match ? "" : of->file); - ohash_insert(&title_table, slot, - title_entry); - } else { - if (match) - *title_entry->file = '\0'; - free(title_str); + buf_appendb(buf, ")", 2); + for (p = buf->cp; '\0' != *p; p++) + *p = tolower(*p); + key.data = buf->cp; + key.size = buf->len; + val.data = NULL; + val.size = 0; + if (0 == skip) + val.data = emptystring; + else { + ch = (*files->get)(files, &key, &val, 0); + if (ch < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } else if (ch > 0) { + val.data = (void *)fn; + val.size = strlen(fn) + 1; + } else + val.data = NULL; + } + if (NULL != val.data && + (*files->put)(files, &key, &val, 0) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); } } - dbindex(mc, form, of); - ohash_delete(&strings); - } + if (skip && !use_all) + continue; - if (check_reachable) { - title_entry = ohash_first(&title_table, &slot); - while (NULL != title_entry) { - if ('\0' != *title_entry->file) - say(title_entry->file, - "Probably unreachable, title is %s", - title_entry->title); - free(title_entry->title); - free(title_entry->file); - free(title_entry); - title_entry = ohash_next(&title_table, &slot); - } - ohash_delete(&title_table); - } -} + /* + * The index record value consists of a nil-terminated + * filename, a nil-terminated manual section, and a + * nil-terminated description. Use the actual + * location of the file, such that the user can find + * it with man(1). Since the description may not be + * set, we set a sentinel to see if we're going to + * write a nil byte in its place. + */ -static void -parse_catpage(struct of *of) -{ - FILE *stream; - char *line, *p, *title; - size_t len, plen, titlesz; + dbuf->len = 0; + type = mdoc ? 'd' : (man ? 'a' : 'c'); + buf_appendb(dbuf, &type, 1); + buf_appendb(dbuf, fn, strlen(fn) + 1); + buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); + buf_appendb(dbuf, of->title, strlen(of->title) + 1); + buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); - if (NULL == (stream = fopen(of->file, "r"))) { - if (warnings) - say(of->file, NULL); - return; - } + sv = dbuf->len; - /* Skip to first blank line. */ + /* + * Collect keyword/mask pairs. + * Each pair will become a new btree node. + */ - while (NULL != (line = fgetln(stream, &len))) - if ('\n' == *line) - break; + hash_reset(&hash); + if (mdoc) + pmdoc_node(hash, buf, dbuf, + mdoc_node(mdoc), mdoc_meta(mdoc)); + else if (man) + pman_node(hash, buf, dbuf, man_node(man)); + else + pformatted(hash, buf, dbuf, of, basedir); - /* - * Assume the first line that is not indented - * is the first section header. Skip to it. - */ + /* Test mode, do not access any database. */ - while (NULL != (line = fgetln(stream, &len))) - if ('\n' != *line && ' ' != *line) - break; - - /* - * Read up until the next section into a buffer. - * Strip the leading and trailing newline from each read line, - * appending a trailing space. - * Ignore empty (whitespace-only) lines. - */ + if (NULL == mdb->db || NULL == mdb->idx) + continue; - titlesz = 0; - title = NULL; + /* + * Make sure the file name is always registered + * as an .Nm search key. + */ + buf->len = 0; + buf_append(buf, of->title); + hash_put(hash, buf, TYPE_Nm); - while (NULL != (line = fgetln(stream, &len))) { - if (' ' != *line || '\n' != line[len - 1]) - break; - while (len > 0 && isspace((unsigned char)*line)) { - line++; - len--; - } - if (1 == len) - continue; - title = mandoc_realloc(title, titlesz + len); - memcpy(title + titlesz, line, len); - titlesz += len; - title[titlesz - 1] = ' '; - } + /* + * Reclaim an empty index record, if available. + * Use its record number for all new btree nodes. + */ - /* - * If no page content can be found, or the input line - * is already the next section header, or there is no - * trailing newline, reuse the page title as the page - * description. - */ + if (recs->cur > 0) { + recs->cur--; + rec = recs->stack[(int)recs->cur]; + } else if (recs->last > 0) { + rec = recs->last; + recs->last = 0; + } else + rec++; + vbuf[1] = htobe64(rec); - if (NULL == title || '\0' == *title) { - if (warnings) - say(of->file, "Cannot find NAME section"); - assert(NULL == of->desc); - of->desc = mandoc_strdup(of->name); - putkey(of, of->name, TYPE_Nd); - fclose(stream); - free(title); - return; - } + /* + * Copy from the in-memory hashtable of pending + * keyword/mask pairs into the database. + */ - title = mandoc_realloc(title, titlesz + 1); - title[titlesz] = '\0'; + seq = R_FIRST; + while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { + seq = R_NEXT; + assert(sizeof(uint64_t) == val.size); + memcpy(&mask, val.data, val.size); + vbuf[0] = htobe64(mask); + val.size = sizeof(vbuf); + val.data = &vbuf; + dbt_put(mdb->db, mdb->dbn, &key, &val); + } + if (ch < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } - /* - * Skip to the first dash. - * Use the remaining line as the description (no more than 70 - * bytes). - */ + /* + * Apply to the index. If we haven't had a description + * set, put an empty one in now. + */ - if (NULL != (p = strstr(title, "- "))) { - for (p += 2; ' ' == *p || '\b' == *p; p++) - /* Skip to next word. */ ; - } else { - if (warnings) - say(of->file, "No dash in title line"); - p = title; - } + if (dbuf->len == sv) + buf_appendb(dbuf, "", 1); - plen = strlen(p); + key.data = &rec; + key.size = sizeof(recno_t); - /* Strip backspace-encoding from line. */ + val.data = dbuf->cp; + val.size = dbuf->len; - while (NULL != (line = memchr(p, '\b', plen))) { - len = line - p; - if (0 == len) { - memmove(line, line + 1, plen--); - continue; - } - memmove(line - 1, line + 1, plen - len); - plen -= 2; + if (verb) + printf("%s: Adding to index: %s\n", basedir, fn); + + dbt_put(mdb->idx, mdb->idxn, &key, &val); } - assert(NULL == of->desc); - of->desc = mandoc_strdup(p); - putkey(of, of->desc, TYPE_Nd); - fclose(stream); - free(title); + /* + * Iterate the remembered file titles and check that + * all files can be found by their main title. + */ + + if (warnings) { + seq = R_FIRST; + while (0 == (*files->seq)(files, &key, &val, seq)) { + seq = R_NEXT; + if (val.size) + WARNING((char *)val.data, basedir, + "Probably unreachable, title " + "is %s", (char *)key.data); + } + (*files->close)(files); + } } /* - * Put a type/word pair into the word database for this particular file. + * Scan through all entries in the index file `idx' and prune those + * entries in `ofile'. + * Pruning consists of removing from `db', then invalidating the entry + * in `idx' (zeroing its value size). */ static void -putkey(const struct of *of, const char *value, uint64_t type) +index_prune(const struct of *ofile, struct mdb *mdb, + struct recs *recs, const char *basedir) { + const struct of *of; + const char *fn; + uint64_t vbuf[2]; + unsigned seq, sseq; + DBT key, val; + int ch; - assert(NULL != value); - putkeys(of, value, strlen(value), type); -} + recs->cur = 0; + seq = R_FIRST; + while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { + seq = R_NEXT; + assert(sizeof(recno_t) == key.size); + memcpy(&recs->last, key.data, key.size); -/* - * Grok all nodes at or below a certain mdoc node into putkey(). - */ -static void -putmdockey(const struct of *of, const struct mdoc_node *n, uint64_t m) -{ + /* Deleted records are zero-sized. Skip them. */ - for ( ; NULL != n; n = n->next) { - if (NULL != n->child) - putmdockey(of, n->child, m); - if (MDOC_TEXT == n->type) - putkey(of, n->string, m); - } -} + if (0 == val.size) + goto cont; -static void -parse_man(struct of *of, const struct man_node *n) -{ - const struct man_node *head, *body; - char *start, *sv, *title; - char byte; - size_t sz, titlesz; + /* + * Make sure we're sane. + * Read past our mdoc/man/cat type to the next string, + * then make sure it's bounded by a NUL. + * Failing any of these, we go into our error handler. + */ - if (NULL == n) - return; + fn = (char *)val.data + 1; + if (NULL == memchr(fn, '\0', val.size - 1)) + break; - /* - * We're only searching for one thing: the first text child in - * the BODY of a NAME section. Since we don't keep track of - * sections in -man, run some hoops to find out whether we're in - * the correct section or not. - */ + /* + * Search for the file in those we care about. + * XXX: build this into a tree. Too slow. + */ - if (MAN_BODY == n->type && MAN_SH == n->tok) { - body = n; - assert(body->parent); - if (NULL != (head = body->parent->head) && - 1 == head->nchild && - NULL != (head = (head->child)) && - MAN_TEXT == head->type && - 0 == strcmp(head->string, "NAME") && - NULL != (body = body->child) && - MAN_TEXT == body->type) { + for (of = ofile->first; of; of = of->next) + if (0 == strcmp(fn, of->fname)) + break; - title = NULL; - titlesz = 0; + if (NULL == of) + continue; - /* - * Suck the entire NAME section into memory. - * Yes, we might run away. - * But too many manuals have big, spread-out - * NAME sections over many lines. - */ + /* + * Search through the keyword database, throwing out all + * references to our file. + */ - for ( ; NULL != body; body = body->next) { - if (MAN_TEXT != body->type) - break; - if (0 == (sz = strlen(body->string))) - continue; - title = mandoc_realloc - (title, titlesz + sz + 1); - memcpy(title + titlesz, body->string, sz); - titlesz += sz + 1; - title[titlesz - 1] = ' '; - } - if (NULL == title) - return; + sseq = R_FIRST; + while (0 == (ch = (*mdb->db->seq)(mdb->db, + &key, &val, sseq))) { + sseq = R_NEXT; + if (sizeof(vbuf) != val.size) + break; - title = mandoc_realloc(title, titlesz + 1); - title[titlesz] = '\0'; + memcpy(vbuf, val.data, val.size); + if (recs->last != betoh64(vbuf[1])) + continue; - /* Skip leading space. */ + if ((ch = (*mdb->db->del)(mdb->db, + &key, R_CURSOR)) < 0) + break; + } - sv = title; - while (isspace((unsigned char)*sv)) - sv++; + if (ch < 0) { + perror(mdb->dbn); + exit((int)MANDOCLEVEL_SYSERR); + } else if (1 != ch) { + fprintf(stderr, "%s: corrupt database\n", + mdb->dbn); + exit((int)MANDOCLEVEL_SYSERR); + } - if (0 == (sz = strlen(sv))) { - free(title); - return; - } + if (verb) + printf("%s: Deleting from index: %s\n", + basedir, fn); - /* Erase trailing space. */ + val.size = 0; + ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); - start = &sv[sz - 1]; - while (start > sv && isspace((unsigned char)*start)) - *start-- = '\0'; + if (ch < 0) + break; +cont: + if (recs->cur >= recs->size) { + recs->size += MANDOC_SLOP; + recs->stack = mandoc_realloc(recs->stack, + recs->size * sizeof(recno_t)); + } - if (start == sv) { - free(title); - return; - } + recs->stack[(int)recs->cur] = recs->last; + recs->cur++; + } - start = sv; + if (ch < 0) { + perror(mdb->idxn); + exit((int)MANDOCLEVEL_SYSERR); + } else if (1 != ch) { + fprintf(stderr, "%s: corrupt index\n", mdb->idxn); + exit((int)MANDOCLEVEL_SYSERR); + } - /* - * Go through a special heuristic dance here. - * Conventionally, one or more manual names are - * comma-specified prior to a whitespace, then a - * dash, then a description. Try to puzzle out - * the name parts here. - */ + recs->last++; +} - for ( ;; ) { - sz = strcspn(start, " ,"); - if ('\0' == start[sz]) - break; +/* + * Grow the buffer (if necessary) and copy in a binary string. + */ +static void +buf_appendb(struct buf *buf, const void *cp, size_t sz) +{ - byte = start[sz]; - start[sz] = '\0'; + /* Overshoot by MANDOC_BUFSZ. */ - putkey(of, start, TYPE_Nm); + while (buf->len + sz >= buf->size) { + buf->size = buf->len + sz + MANDOC_BUFSZ; + buf->cp = mandoc_realloc(buf->cp, buf->size); + } - if (' ' == byte) { - start += sz + 1; - break; - } + memcpy(buf->cp + (int)buf->len, cp, sz); + buf->len += sz; +} - assert(',' == byte); - start += sz + 1; - while (' ' == *start) - start++; - } +/* + * Append a nil-terminated string to the buffer. + * This can be invoked multiple times. + * The buffer string will be nil-terminated. + * If invoked multiple times, a space is put between strings. + */ +static void +buf_append(struct buf *buf, const char *cp) +{ + size_t sz; - if (sv == start) { - putkey(of, start, TYPE_Nm); - free(title); - return; - } + if (0 == (sz = strlen(cp))) + return; - while (isspace((unsigned char)*start)) - start++; + if (buf->len) + buf->cp[(int)buf->len - 1] = ' '; - if (0 == strncmp(start, "-", 1)) - start += 1; - else if (0 == strncmp(start, "\\-\\-", 4)) - start += 4; - else if (0 == strncmp(start, "\\-", 2)) - start += 2; - else if (0 == strncmp(start, "\\(en", 4)) - start += 4; - else if (0 == strncmp(start, "\\(em", 4)) - start += 4; + buf_appendb(buf, cp, sz + 1); +} - while (' ' == *start) - start++; +/* + * Recursively add all text from a given node. + * This is optimised for general mdoc nodes in this context, which do + * not consist of subexpressions and having a recursive call for n->next + * would be wasteful. + * The "f" variable should be 0 unless called from pmdoc_Nd for the + * description buffer, which does not start at the beginning of the + * buffer. + */ +static void +buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) +{ - assert(NULL == of->desc); - of->desc = mandoc_strdup(start); - putkey(of, of->desc, TYPE_Nd); - free(title); - return; - } - } + for ( ; n; n = n->next) { + if (n->child) + buf_appendmdoc(buf, n->child, f); - for (n = n->child; n; n = n->next) - parse_man(of, n); + if (MDOC_TEXT == n->type && f) { + f = 0; + buf_appendb(buf, n->string, + strlen(n->string) + 1); + } else if (MDOC_TEXT == n->type) + buf_append(buf, n->string); + + } } static void -parse_mdoc(struct of *of, const struct mdoc_node *n) +hash_reset(DB **db) { + DB *hash; - assert(NULL != n); - for (n = n->child; NULL != n; n = n->next) { - switch (n->type) { - case (MDOC_ELEM): - /* FALLTHROUGH */ - case (MDOC_BLOCK): - /* FALLTHROUGH */ - case (MDOC_HEAD): - /* FALLTHROUGH */ - case (MDOC_BODY): - /* FALLTHROUGH */ - case (MDOC_TAIL): - if (NULL != mdocs[n->tok].fp) - if (0 == (*mdocs[n->tok].fp)(of, n)) - break; - if (mdocs[n->tok].mask) - putmdockey(of, n->child, mdocs[n->tok].mask); - break; - default: - assert(MDOC_ROOT != n->type); - continue; - } - if (NULL != n->child) - parse_mdoc(of, n); + if (NULL != (hash = *db)) + (*hash->close)(hash); + + *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); + if (NULL == *db) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); } } +/* ARGSUSED */ +static int +pmdoc_head(MDOC_ARGS) +{ + + return(MDOC_HEAD == n->type); +} + +/* ARGSUSED */ static int -parse_mdoc_Fd(struct of *of, const struct mdoc_node *n) +pmdoc_body(MDOC_ARGS) +{ + + return(MDOC_BODY == n->type); +} + +/* ARGSUSED */ +static int +pmdoc_Fd(MDOC_ARGS) { const char *start, *end; size_t sz; - if (SEC_SYNOPSIS != n->sec || - NULL == (n = n->child) || - MDOC_TEXT != n->type) + if (SEC_SYNOPSIS != n->sec) + return(0); + if (NULL == (n = n->child) || MDOC_TEXT != n->type) return(0); /* * Only consider those `Fd' macro fields that begin with an * "inclusion" token (versus, e.g., #define). */ - if (strcmp("#include", n->string)) return(0); @@ -1438,120 +1077,121 @@ parse_mdoc_Fd(struct of *of, const struct mdoc_node *n) if ('>' == *end || '"' == *end) end--; - if (end > start) - putkeys(of, start, end - start + 1, TYPE_In); + assert(end >= start); + + buf_appendb(buf, start, (size_t)(end - start + 1)); + buf_appendb(buf, "", 1); return(1); } +/* ARGSUSED */ static int -parse_mdoc_In(struct of *of, const struct mdoc_node *n) +pmdoc_In(MDOC_ARGS) { - if (NULL != n->child && MDOC_TEXT == n->child->type) + if (NULL == n->child || MDOC_TEXT != n->child->type) return(0); - putkey(of, n->child->string, TYPE_In); + buf_append(buf, n->child->string); return(1); } +/* ARGSUSED */ static int -parse_mdoc_Fn(struct of *of, const struct mdoc_node *n) +pmdoc_Fn(MDOC_ARGS) { + struct mdoc_node *nn; const char *cp; - if (NULL == (n = n->child) || MDOC_TEXT != n->type) + nn = n->child; + + if (NULL == nn || MDOC_TEXT != nn->type) return(0); - /* - * Parse: .Fn "struct type *name" "char *arg". - * First strip away pointer symbol. - * Then store the function name, then type. - * Finally, store the arguments. - */ + /* .Fn "struct type *name" "char *arg" */ + + cp = strrchr(nn->string, ' '); + if (NULL == cp) + cp = nn->string; - if (NULL == (cp = strrchr(n->string, ' '))) - cp = n->string; + /* Strip away pointer symbol. */ while ('*' == *cp) cp++; - putkey(of, cp, TYPE_Fn); + /* Store the function name. */ + + buf_append(buf, cp); + hash_put(hash, buf, TYPE_Fn); + + /* Store the function type. */ + + if (nn->string < cp) { + buf->len = 0; + buf_appendb(buf, nn->string, cp - nn->string); + buf_appendb(buf, "", 1); + hash_put(hash, buf, TYPE_Ft); + } - if (n->string < cp) - putkeys(of, n->string, cp - n->string, TYPE_Ft); + /* Store the arguments. */ - for (n = n->next; NULL != n; n = n->next) - if (MDOC_TEXT == n->type) - putkey(of, n->string, TYPE_Fa); + for (nn = nn->next; nn; nn = nn->next) { + if (MDOC_TEXT != nn->type) + continue; + buf->len = 0; + buf_append(buf, nn->string); + hash_put(hash, buf, TYPE_Fa); + } return(0); } +/* ARGSUSED */ static int -parse_mdoc_St(struct of *of, const struct mdoc_node *n) +pmdoc_St(MDOC_ARGS) { if (NULL == n->child || MDOC_TEXT != n->child->type) return(0); - putkey(of, n->child->string, TYPE_St); + buf_append(buf, n->child->string); return(1); } +/* ARGSUSED */ static int -parse_mdoc_Xr(struct of *of, const struct mdoc_node *n) +pmdoc_Xr(MDOC_ARGS) { - char *cp; if (NULL == (n = n->child)) return(0); - if (NULL == n->next) { - putkey(of, n->string, TYPE_Xr); - return(0); - } + buf_appendb(buf, n->string, strlen(n->string)); - if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) { - perror(NULL); - exit((int)MANDOCLEVEL_SYSERR); - } - putkey(of, cp, TYPE_Xr); - free(cp); - return(0); + if (NULL != (n = n->next)) { + buf_appendb(buf, ".", 1); + buf_appendb(buf, n->string, strlen(n->string) + 1); + } else + buf_appendb(buf, ".", 2); + + return(1); } +/* ARGSUSED */ static int -parse_mdoc_Nd(struct of *of, const struct mdoc_node *n) +pmdoc_Nd(MDOC_ARGS) { - size_t sz; if (MDOC_BODY != n->type) return(0); - /* - * Special-case the `Nd' because we need to put the description - * into the document table. - */ - - for (n = n->child; NULL != n; n = n->next) { - if (MDOC_TEXT == n->type) { - if (NULL != of->desc) { - sz = strlen(of->desc) + - strlen(n->string) + 2; - of->desc = mandoc_realloc(of->desc, sz); - strlcat(of->desc, " ", sz); - strlcat(of->desc, n->string, sz); - } else - of->desc = mandoc_strdup(n->string); - } - if (NULL != n->child) - parse_mdoc_Nd(of, n); - } + buf_appendmdoc(dbuf, n->child, 1); return(1); } +/* ARGSUSED */ static int -parse_mdoc_Nm(struct of *of, const struct mdoc_node *n) +pmdoc_Nm(MDOC_ARGS) { if (SEC_NAME == n->sec) @@ -1559,519 +1199,711 @@ parse_mdoc_Nm(struct of *of, const struct mdoc_node *n) else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) return(0); + if (NULL == n->child) + buf_append(buf, m->name); + return(1); } +/* ARGSUSED */ static int -parse_mdoc_Sh(struct of *of, const struct mdoc_node *n) +pmdoc_Sh(MDOC_ARGS) { return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); } -static int -parse_mdoc_head(struct of *of, const struct mdoc_node *n) +static void +hash_put(DB *db, const struct buf *buf, uint64_t mask) { + uint64_t oldmask; + DBT key, val; + int rc; - return(MDOC_HEAD == n->type); + if (buf->len < 2) + return; + + key.data = buf->cp; + key.size = buf->len; + + if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } else if (0 == rc) { + assert(sizeof(uint64_t) == val.size); + memcpy(&oldmask, val.data, val.size); + mask |= oldmask; + } + + val.data = &mask; + val.size = sizeof(uint64_t); + + if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } } -static int -parse_mdoc_body(struct of *of, const struct mdoc_node *n) +static void +dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) { - return(MDOC_BODY == n->type); + assert(key->size); + assert(val->size); + + if (0 == (*db->put)(db, key, val, 0)) + return; + + perror(dbn); + exit((int)MANDOCLEVEL_SYSERR); + /* NOTREACHED */ } /* - * Add a string to the hash table for the current manual. - * Each string has a bitmask telling which macros it belongs to. - * When we finish the manual, we'll dump the table. + * Call out to per-macro handlers after clearing the persistent database + * key. If the macro sets the database key, flush it to the database. */ static void -putkeys(const struct of *of, const char *cp, size_t sz, uint64_t v) +pmdoc_node(MDOC_ARGS) { - struct str *s; - unsigned int slot; - const char *end; - if (0 == sz) + if (NULL == n) return; - end = cp + sz; - slot = ohash_qlookupi(&strings, cp, &end); - s = ohash_find(&strings, slot); + switch (n->type) { + case (MDOC_HEAD): + /* FALLTHROUGH */ + case (MDOC_BODY): + /* FALLTHROUGH */ + case (MDOC_TAIL): + /* FALLTHROUGH */ + case (MDOC_BLOCK): + /* FALLTHROUGH */ + case (MDOC_ELEM): + buf->len = 0; - if (NULL != s && of == s->of) { - s->mask |= v; - return; - } else if (NULL == s) { - s = mandoc_calloc(sizeof(struct str) + sz + 1, 1); - memcpy(s->key, cp, sz); - ohash_insert(&strings, slot, s); + /* + * Both NULL handlers and handlers returning true + * request using the data. Only skip the element + * when the handler returns false. + */ + + if (NULL != mdocs[n->tok].fp && + 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) + break; + + /* + * For many macros, use the text from all children. + * Set zero flags for macros not needing this. + * In that case, the handler must fill the buffer. + */ + + if (MDOCF_CHILD & mdocs[n->tok].flags) + buf_appendmdoc(buf, n->child, 0); + + /* + * Cover the most common case: + * Automatically stage one string per element. + * Set a zero mask for macros not needing this. + * Additional staging can be done in the handler. + */ + + if (mdocs[n->tok].mask) + hash_put(hash, buf, mdocs[n->tok].mask); + break; + default: + break; } - s->of = of; - s->mask = v; + + pmdoc_node(hash, buf, dbuf, n->child, m); + pmdoc_node(hash, buf, dbuf, n->next, m); } -/* - * Take a Unicode codepoint and produce its UTF-8 encoding. - * This isn't the best way to do this, but it works. - * The magic numbers are from the UTF-8 packaging. - * They're not as scary as they seem: read the UTF-8 spec for details. - */ -static size_t -utf8(unsigned int cp, char out[7]) +static int +pman_node(MAN_ARGS) { - size_t rc; - - rc = 0; - if (cp <= 0x0000007F) { - rc = 1; - out[0] = (char)cp; - } else if (cp <= 0x000007FF) { - rc = 2; - out[0] = (cp >> 6 & 31) | 192; - out[1] = (cp & 63) | 128; - } else if (cp <= 0x0000FFFF) { - rc = 3; - out[0] = (cp >> 12 & 15) | 224; - out[1] = (cp >> 6 & 63) | 128; - out[2] = (cp & 63) | 128; - } else if (cp <= 0x001FFFFF) { - rc = 4; - out[0] = (cp >> 18 & 7) | 240; - out[1] = (cp >> 12 & 63) | 128; - out[2] = (cp >> 6 & 63) | 128; - out[3] = (cp & 63) | 128; - } else if (cp <= 0x03FFFFFF) { - rc = 5; - out[0] = (cp >> 24 & 3) | 248; - out[1] = (cp >> 18 & 63) | 128; - out[2] = (cp >> 12 & 63) | 128; - out[3] = (cp >> 6 & 63) | 128; - out[4] = (cp & 63) | 128; - } else if (cp <= 0x7FFFFFFF) { - rc = 6; - out[0] = (cp >> 30 & 1) | 252; - out[1] = (cp >> 24 & 63) | 128; - out[2] = (cp >> 18 & 63) | 128; - out[3] = (cp >> 12 & 63) | 128; - out[4] = (cp >> 6 & 63) | 128; - out[5] = (cp & 63) | 128; - } else + const struct man_node *head, *body; + char *start, *sv, *title; + size_t sz, titlesz; + + if (NULL == n) return(0); - out[rc] = '\0'; - return(rc); -} + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. + */ -/* - * Store the UTF-8 version of a key, or alias the pointer if the key has - * no UTF-8 transcription marks in it. - */ -static void -utf8key(struct mchars *mc, struct str *key) -{ - size_t sz, bsz, pos; - char utfbuf[7], res[5]; - char *buf; - const char *seq, *cpp, *val; - int len, u; - enum mandoc_esc esc; + if (MAN_BODY == n->type && MAN_SH == n->tok) { + body = n; + assert(body->parent); + if (NULL != (head = body->parent->head) && + 1 == head->nchild && + NULL != (head = (head->child)) && + MAN_TEXT == head->type && + 0 == strcmp(head->string, "NAME") && + NULL != (body = body->child) && + MAN_TEXT == body->type) { - assert(NULL == key->utf8); + title = NULL; + titlesz = 0; + /* + * Suck the entire NAME section into memory. + * Yes, we might run away. + * But too many manuals have big, spread-out + * NAME sections over many lines. + */ + for ( ; NULL != body; body = body->next) { + if (MAN_TEXT != body->type) + break; + if (0 == (sz = strlen(body->string))) + continue; + title = mandoc_realloc + (title, titlesz + sz + 1); + memcpy(title + titlesz, body->string, sz); + titlesz += sz + 1; + title[(int)titlesz - 1] = ' '; + } + if (NULL == title) + return(0); - res[0] = '\\'; - res[1] = '\t'; - res[2] = ASCII_NBRSP; - res[3] = ASCII_HYPH; - res[4] = '\0'; + title = mandoc_realloc(title, titlesz + 1); + title[(int)titlesz] = '\0'; - val = key->key; - bsz = strlen(val); + /* Skip leading space. */ - /* - * Pre-check: if we have no stop-characters, then set the - * pointer as ourselvse and get out of here. - */ - if (strcspn(val, res) == bsz) { - key->utf8 = key->key; - return; - } + sv = title; + while (isspace((unsigned char)*sv)) + sv++; - /* Pre-allocate by the length of the input */ + if (0 == (sz = strlen(sv))) { + free(title); + return(0); + } - buf = mandoc_malloc(++bsz); - pos = 0; + /* Erase trailing space. */ - while ('\0' != *val) { - /* - * Halt on the first escape sequence. - * This also halts on the end of string, in which case - * we just copy, fallthrough, and exit the loop. - */ - if ((sz = strcspn(val, res)) > 0) { - memcpy(&buf[pos], val, sz); - pos += sz; - val += sz; - } + start = &sv[sz - 1]; + while (start > sv && isspace((unsigned char)*start)) + *start-- = '\0'; - if (ASCII_HYPH == *val) { - buf[pos++] = '-'; - val++; - continue; - } else if ('\t' == *val || ASCII_NBRSP == *val) { - buf[pos++] = ' '; - val++; - continue; - } else if ('\\' != *val) - break; + if (start == sv) { + free(title); + return(0); + } - /* Read past the slash. */ + start = sv; - val++; - u = 0; + /* + * Go through a special heuristic dance here. + * This is why -man manuals are great! + * (I'm being sarcastic: my eyes are bleeding.) + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ - /* - * Parse the escape sequence and see if it's a - * predefined character or special character. - */ - esc = mandoc_escape - ((const char **)&val, &seq, &len); - if (ESCAPE_ERROR == esc) - break; + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[(int)sz]) + break; - if (ESCAPE_SPECIAL != esc) - continue; - if (0 == (u = mchars_spec2cp(mc, seq, len))) - continue; + buf->len = 0; + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); - /* - * If we have a Unicode codepoint, try to convert that - * to a UTF-8 byte string. - */ - cpp = utfbuf; - if (0 == (sz = utf8(u, utfbuf))) - continue; + hash_put(hash, buf, TYPE_Nm); + + if (' ' == start[(int)sz]) { + start += (int)sz + 1; + break; + } + + assert(',' == start[(int)sz]); + start += (int)sz + 1; + while (' ' == *start) + start++; + } + + buf->len = 0; - /* Copy the rendered glyph into the stream. */ + if (sv == start) { + buf_append(buf, start); + free(title); + return(1); + } + + while (isspace((unsigned char)*start)) + start++; + + if (0 == strncmp(start, "-", 1)) + start += 1; + else if (0 == strncmp(start, "\\-\\-", 4)) + start += 4; + else if (0 == strncmp(start, "\\-", 2)) + start += 2; + else if (0 == strncmp(start, "\\(en", 4)) + start += 4; + else if (0 == strncmp(start, "\\(em", 4)) + start += 4; - sz = strlen(cpp); - bsz += sz; + while (' ' == *start) + start++; - buf = mandoc_realloc(buf, bsz); + sz = strlen(start) + 1; + buf_appendb(dbuf, start, sz); + buf_appendb(buf, start, sz); - memcpy(&buf[pos], cpp, sz); - pos += sz; + hash_put(hash, buf, TYPE_Nd); + free(title); + } } - buf[pos] = '\0'; - key->utf8 = buf; + for (n = n->child; n; n = n->next) + if (pman_node(hash, buf, dbuf, n)) + return(1); + + return(0); } /* - * Flush the current page's terms (and their bits) into the database. - * Wrap the entire set of additions in a transaction to make sqlite be a - * little faster. - * Also, UTF-8-encode the description at the last possible moment. + * Parse a formatted manual page. + * By necessity, this involves rather crude guesswork. */ static void -dbindex(struct mchars *mc, int form, const struct of *of) +pformatted(DB *hash, struct buf *buf, struct buf *dbuf, + const struct of *of, const char *basedir) { - struct str *key; - const char *desc; - int64_t recno; - size_t i; - unsigned int slot; - - if (verb) - say(of->file, "Adding to index"); + FILE *stream; + char *line, *p, *title; + size_t len, plen, titlesz; - if (nodb) + if (NULL == (stream = fopen(of->fname, "r"))) { + WARNING(of->fname, basedir, "%s", strerror(errno)); return; + } + + /* + * Always use the title derived from the filename up front, + * do not even try to find it in the file. This also makes + * sure we don't end up with an orphan index record, even if + * the file content turns out to be completely unintelligible. + */ + + buf->len = 0; + buf_append(buf, of->title); + hash_put(hash, buf, TYPE_Nm); + + /* Skip to first blank line. */ - desc = ""; - if (NULL != of->desc) { - key = ohash_find(&strings, - ohash_qlookup(&strings, of->desc)); - assert(NULL != key); - if (NULL == key->utf8) - utf8key(mc, key); - desc = key->utf8; + while (NULL != (line = fgetln(stream, &len))) + if ('\n' == *line) + break; + + /* + * Assume the first line that is not indented + * is the first section header. Skip to it. + */ + + while (NULL != (line = fgetln(stream, &len))) + if ('\n' != *line && ' ' != *line) + break; + + /* + * Read up until the next section into a buffer. + * Strip the leading and trailing newline from each read line, + * appending a trailing space. + * Ignore empty (whitespace-only) lines. + */ + + titlesz = 0; + title = NULL; + + while (NULL != (line = fgetln(stream, &len))) { + if (' ' != *line || '\n' != line[(int)len - 1]) + break; + while (len > 0 && isspace((unsigned char)*line)) { + line++; + len--; + } + if (1 == len) + continue; + title = mandoc_realloc(title, titlesz + len); + memcpy(title + titlesz, line, len); + titlesz += len; + title[(int)titlesz - 1] = ' '; } - SQL_EXEC("BEGIN TRANSACTION"); - - i = 1; - SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->file); - SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->sec); - SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->arch); - SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, desc); - SQL_BIND_INT(stmts[STMT_INSERT_DOC], i, form); - SQL_STEP(stmts[STMT_INSERT_DOC]); - recno = sqlite3_last_insert_rowid(db); - sqlite3_reset(stmts[STMT_INSERT_DOC]); - - for (key = ohash_first(&strings, &slot); NULL != key; - key = ohash_next(&strings, &slot)) { - assert(key->of == of); - if (NULL == key->utf8) - utf8key(mc, key); - i = 1; - SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); - SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8); - SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno); - SQL_STEP(stmts[STMT_INSERT_KEY]); - sqlite3_reset(stmts[STMT_INSERT_KEY]); - if (key->utf8 != key->key) - free(key->utf8); - free(key); + /* + * If no page content can be found, or the input line + * is already the next section header, or there is no + * trailing newline, reuse the page title as the page + * description. + */ + + if (NULL == title || '\0' == *title) { + WARNING(of->fname, basedir, + "Cannot find NAME section"); + buf_appendb(dbuf, buf->cp, buf->size); + hash_put(hash, buf, TYPE_Nd); + fclose(stream); + free(title); + return; } - SQL_EXEC("END TRANSACTION"); -} + title = mandoc_realloc(title, titlesz + 1); + title[(int)titlesz] = '\0'; -static void -dbprune(void) -{ - struct of *of; - size_t i; + /* + * Skip to the first dash. + * Use the remaining line as the description (no more than 70 + * bytes). + */ - if (nodb) - return; + if (NULL != (p = strstr(title, "- "))) { + for (p += 2; ' ' == *p || '\b' == *p; p++) + /* Skip to next word. */ ; + } else { + WARNING(of->fname, basedir, + "No dash in title line"); + p = title; + } - for (of = ofs; NULL != of; of = of->next) { - i = 1; - SQL_BIND_TEXT(stmts[STMT_DELETE], i, of->file); - SQL_STEP(stmts[STMT_DELETE]); - sqlite3_reset(stmts[STMT_DELETE]); - if (verb) - say(of->file, "Deleted from index"); + plen = strlen(p); + + /* Strip backspace-encoding from line. */ + + while (NULL != (line = memchr(p, '\b', plen))) { + len = line - p; + if (0 == len) { + memmove(line, line + 1, plen--); + continue; + } + memmove(line - 1, line + 1, plen - len); + plen -= 2; } + + buf_appendb(dbuf, p, plen + 1); + buf->len = 0; + buf_appendb(buf, p, plen + 1); + hash_put(hash, buf, TYPE_Nd); + fclose(stream); + free(title); } -/* - * Close an existing database and its prepared statements. - * If "real" is not set, rename the temporary file into the real one. - */ static void -dbclose(int real) +ofile_argbuild(int argc, char *argv[], + struct of **of, const char *basedir) { - size_t i; + char buf[PATH_MAX]; + const char *sec, *arch, *title; + char *p; + int i, src_form; + struct of *nof; - if (nodb) - return; + for (i = 0; i < argc; i++) { - for (i = 0; i < STMT__MAX; i++) { - sqlite3_finalize(stmts[i]); - stmts[i] = NULL; - } + /* + * Try to infer the manual section, architecture and + * page title from the path, assuming it looks like + * man*[/<arch>]/<title>.<section> or + * cat<section>[/<arch>]/<title>.0 + */ - sqlite3_close(db); - db = NULL; + if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { + fprintf(stderr, "%s: Path too long\n", argv[i]); + continue; + } + sec = arch = title = ""; + src_form = 0; + p = strrchr(buf, '\0'); + while (p-- > buf) { + if ('\0' == *sec && '.' == *p) { + sec = p + 1; + *p = '\0'; + if ('0' == *sec) + src_form |= MANDOC_FORM; + else if ('1' <= *sec && '9' >= *sec) + src_form |= MANDOC_SRC; + continue; + } + if ('/' != *p) + continue; + if ('\0' == *title) { + title = p + 1; + *p = '\0'; + continue; + } + if (0 == strncmp("man", p + 1, 3)) + src_form |= MANDOC_SRC; + else if (0 == strncmp("cat", p + 1, 3)) + src_form |= MANDOC_FORM; + else + arch = p + 1; + break; + } + if ('\0' == *title) { + WARNING(argv[i], basedir, + "Cannot deduce title from filename"); + title = buf; + } - if (real) - return; + /* + * Build the file structure. + */ - if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say(MANDOC_DB, NULL); + nof = mandoc_calloc(1, sizeof(struct of)); + nof->fname = mandoc_strdup(argv[i]); + nof->sec = mandoc_strdup(sec); + nof->arch = mandoc_strdup(arch); + nof->title = mandoc_strdup(title); + nof->src_form = src_form; + + /* + * Add the structure to the list. + */ + + if (NULL == *of) { + *of = nof; + (*of)->first = nof; + } else { + nof->first = (*of)->first; + (*of)->next = nof; + *of = nof; + } } } /* - * This is straightforward stuff. - * Open a database connection to a "temporary" database, then open a set - * of prepared statements we'll use over and over again. - * If "real" is set, we use the existing database; if not, we truncate a - * temporary one. - * Must be matched by dbclose(). + * Recursively build up a list of files to parse. + * We use this instead of ftw() and so on because I don't want global + * variables hanging around. + * This ignores the mandocdb.db and mandocdb.index files, but assumes that + * everything else is a manual. + * Pass in a pointer to a NULL structure for the first invocation. */ -static int -dbopen(int real) +static void +ofile_dirbuild(const char *dir, const char* psec, const char *parch, + int p_src_form, struct of **of, char *basedir) { - const char *file, *sql; - int rc, ofl; - - if (nodb) - return(1); - - ofl = SQLITE_OPEN_READWRITE; - if (0 == real) { - file = MANDOC_DB "~"; - if (-1 == remove(file) && ENOENT != errno) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, NULL); - return(0); - } - ofl |= SQLITE_OPEN_EXCLUSIVE; - } else - file = MANDOC_DB; - - rc = sqlite3_open_v2(file, &db, ofl, NULL); - if (SQLITE_OK == rc) - goto prepare_statements; - if (SQLITE_CANTOPEN != rc) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, NULL); - return(0); + char buf[PATH_MAX]; + size_t sz; + DIR *d; + const char *fn, *sec, *arch; + char *p, *q, *suffix; + struct of *nof; + struct dirent *dp; + int src_form; + + if (NULL == (d = opendir(dir))) { + WARNING("", dir, "%s", strerror(errno)); + return; } - sqlite3_close(db); - db = NULL; + while (NULL != (dp = readdir(d))) { + fn = dp->d_name; - if (SQLITE_OK != (rc = sqlite3_open(file, &db))) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, NULL); - return(0); - } + if ('.' == *fn) + continue; - sql = "CREATE TABLE \"docs\" (\n" - " \"file\" TEXT NOT NULL,\n" - " \"sec\" TEXT NOT NULL,\n" - " \"arch\" TEXT NOT NULL,\n" - " \"desc\" TEXT NOT NULL,\n" - " \"form\" INTEGER NOT NULL,\n" - " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" - ");\n" - "\n" - "CREATE TABLE \"keys\" (\n" - " \"bits\" INTEGER NOT NULL,\n" - " \"key\" TEXT NOT NULL,\n" - " \"docid\" INTEGER NOT NULL REFERENCES docs(id) " - "ON DELETE CASCADE,\n" - " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" - ");\n" - "\n" - "CREATE INDEX \"key_index\" ON keys (key);\n"; - - if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, "%s", sqlite3_errmsg(db)); - return(0); - } + src_form = p_src_form; -prepare_statements: - SQL_EXEC("PRAGMA foreign_keys = ON"); - sql = "DELETE FROM docs where file=?"; - sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE], NULL); - sql = "INSERT INTO docs " - "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)"; - sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_DOC], NULL); - sql = "INSERT INTO keys " - "(bits,key,docid) VALUES (?,?,?)"; - sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); - -#ifndef __APPLE__ - /* - * When opening a new database, we can turn off - * synchronous mode for much better performance. - */ + if (DT_DIR == dp->d_type) { + sec = psec; + arch = parch; - if (real) - SQL_EXEC("PRAGMA synchronous = OFF"); -#endif + /* + * By default, only use directories called: + * man<section>/[<arch>/] or + * cat<section>/[<arch>/] + */ - return(1); -} + if ('\0' == *sec) { + if(0 == strncmp("man", fn, 3)) { + src_form |= MANDOC_SRC; + sec = fn + 3; + } else if (0 == strncmp("cat", fn, 3)) { + src_form |= MANDOC_FORM; + sec = fn + 3; + } else { + WARNING(fn, basedir, "Bad section"); + if (use_all) + sec = fn; + else + continue; + } + } else if ('\0' == *arch) { + if (NULL != strchr(fn, '.')) { + WARNING(fn, basedir, "Bad architecture"); + if (0 == use_all) + continue; + } + arch = fn; + } else { + WARNING(fn, basedir, "Excessive subdirectory"); + if (0 == use_all) + continue; + } -static void * -hash_halloc(size_t sz, void *arg) -{ + buf[0] = '\0'; + strlcat(buf, dir, PATH_MAX); + strlcat(buf, "/", PATH_MAX); + strlcat(basedir, "/", PATH_MAX); + strlcat(basedir, fn, PATH_MAX); + sz = strlcat(buf, fn, PATH_MAX); - return(mandoc_calloc(sz, 1)); -} + if (PATH_MAX <= sz) { + WARNING(fn, basedir, "Path too long"); + continue; + } -static void * -hash_alloc(size_t sz, void *arg) -{ + ofile_dirbuild(buf, sec, arch, + src_form, of, basedir); - return(mandoc_malloc(sz)); -} + p = strrchr(basedir, '/'); + *p = '\0'; + continue; + } -static void -hash_free(void *p, size_t sz, void *arg) -{ + if (DT_REG != dp->d_type) { + WARNING(fn, basedir, "Not a regular file"); + continue; + } + if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) + continue; + if ('\0' == *psec) { + WARNING(fn, basedir, "File outside section"); + if (0 == use_all) + continue; + } - free(p); -} + /* + * By default, skip files where the file name suffix + * does not agree with the section directory + * they are located in. + */ -static int -set_basedir(const char *targetdir) -{ - static char startdir[PATH_MAX]; - static int fd; + suffix = strrchr(fn, '.'); + if (NULL == suffix) { + WARNING(fn, basedir, "No filename suffix"); + if (0 == use_all) + continue; + } else if ((MANDOC_SRC & src_form && + strcmp(suffix + 1, psec)) || + (MANDOC_FORM & src_form && + strcmp(suffix + 1, "0"))) { + WARNING(fn, basedir, "Wrong filename suffix"); + if (0 == use_all) + continue; + if ('0' == suffix[1]) + src_form |= MANDOC_FORM; + else if ('1' <= suffix[1] && '9' >= suffix[1]) + src_form |= MANDOC_SRC; + } - /* - * Remember where we started by keeping a fd open to the origin - * path component: throughout this utility, we chdir() a lot to - * handle relative paths, and by doing this, we can return to - * the starting point. - */ - if ('\0' == *startdir) { - if (NULL == getcwd(startdir, PATH_MAX)) { - exitcode = (int)MANDOCLEVEL_SYSERR; - if (NULL != targetdir) - say(".", NULL); - return(0); + /* + * Skip formatted manuals if a source version is + * available. Ignore the age: it is very unlikely + * that people install newer formatted base manuals + * when they used to have source manuals before, + * and in ports, old manuals get removed on update. + */ + if (0 == use_all && MANDOC_FORM & src_form && + '\0' != *psec) { + buf[0] = '\0'; + strlcat(buf, dir, PATH_MAX); + p = strrchr(buf, '/'); + if ('\0' != *parch && NULL != p) + for (p--; p > buf; p--) + if ('/' == *p) + break; + if (NULL == p) + p = buf; + else + p++; + if (0 == strncmp("cat", p, 3)) + memcpy(p, "man", 3); + strlcat(buf, "/", PATH_MAX); + sz = strlcat(buf, fn, PATH_MAX); + if (sz >= PATH_MAX) { + WARNING(fn, basedir, "Path too long"); + continue; + } + q = strrchr(buf, '.'); + if (NULL != q && p < q++) { + *q = '\0'; + sz = strlcat(buf, psec, PATH_MAX); + if (sz >= PATH_MAX) { + WARNING(fn, basedir, "Path too long"); + continue; + } + if (0 == access(buf, R_OK)) + continue; + } } - if (-1 == (fd = open(startdir, O_RDONLY, 0))) { - exitcode = (int)MANDOCLEVEL_SYSERR; - say(startdir, NULL); - return(0); + + buf[0] = '\0'; + assert('.' == dir[0]); + if ('/' == dir[1]) { + strlcat(buf, dir + 2, PATH_MAX); + strlcat(buf, "/", PATH_MAX); } - if (NULL == targetdir) - targetdir = startdir; - } else { - if (-1 == fd) - return(0); - if (-1 == fchdir(fd)) { - close(fd); - basedir[0] = '\0'; - exitcode = (int)MANDOCLEVEL_SYSERR; - say(startdir, NULL); - return(0); + sz = strlcat(buf, fn, PATH_MAX); + if (sz >= PATH_MAX) { + WARNING(fn, basedir, "Path too long"); + continue; } - if (NULL == targetdir) { - close(fd); - return(1); + + nof = mandoc_calloc(1, sizeof(struct of)); + nof->fname = mandoc_strdup(buf); + nof->sec = mandoc_strdup(psec); + nof->arch = mandoc_strdup(parch); + nof->src_form = src_form; + + /* + * Remember the file name without the extension, + * to be used as the page title in the database. + */ + + if (NULL != suffix) + *suffix = '\0'; + nof->title = mandoc_strdup(fn); + + /* + * Add the structure to the list. + */ + + if (NULL == *of) { + *of = nof; + (*of)->first = nof; + } else { + nof->first = (*of)->first; + (*of)->next = nof; + *of = nof; } } - if (NULL == realpath(targetdir, basedir)) { - basedir[0] = '\0'; - exitcode = (int)MANDOCLEVEL_BADARG; - say(targetdir, NULL); - return(0); - } else if (-1 == chdir(basedir)) { - exitcode = (int)MANDOCLEVEL_BADARG; - say("", NULL); - return(0); - } - return(1); + + closedir(d); } static void -say(const char *file, const char *format, ...) +ofile_free(struct of *of) { - va_list ap; - - if ('\0' != *basedir) - fprintf(stderr, "%s", basedir); - if ('\0' != *basedir && '\0' != *file) - fputs("//", stderr); - if ('\0' != *file) - fprintf(stderr, "%s", file); - fputs(": ", stderr); - - if (NULL == format) { - perror(NULL); - return; - } + struct of *nof; - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); + if (NULL != of) + of = of->first; - fputc('\n', stderr); + while (NULL != of) { + nof = of->next; + free(of->fname); + free(of->sec); + free(of->arch); + free(of->title); + free(of); + of = nof; + } } |