summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2011-11-26 11:23:56 +0000
committerIngo Schwarze <schwarze@openbsd.org>2011-11-26 11:23:56 +0000
commitf8e12b2bd2dcaa23ee142da8052f6f788df55c60 (patch)
tree2c4dfc4a2473d0c0a2255337e179ccb5e575e8fe
parent30a2fcbdee4f248acbd484a9e0944f9717ec226c (diff)
downloadmandoc-f8e12b2bd2dcaa23ee142da8052f6f788df55c60.tar.gz
Store page titles in the correct case, and by default, only
put stuff into the database that man(1) will be able to retrieve. However, support an option to use all directories and files. feedback and ok kristaps@
-rw-r--r--mandocdb.813
-rw-r--r--mandocdb.c226
2 files changed, 201 insertions, 38 deletions
diff --git a/mandocdb.8 b/mandocdb.8
index ae01bddb..e7a0bf13 100644
--- a/mandocdb.8
+++ b/mandocdb.8
@@ -22,7 +22,7 @@
.Nd index UNIX manuals
.Sh SYNOPSIS
.Nm
-.Op Fl v
+.Op Fl av
.Op Ar dir...
.Nm
.Op Fl v
@@ -42,8 +42,19 @@ manuals and indexes them in a
and
.Sx Index Database
for fast retrieval.
+.Pp
The arguments are as follows:
.Bl -tag -width Ds
+.It Fl a
+Use all directories and files found below
+.Ar dir ... .
+By default, only files matching
+.Sm off
+.Sy man Ar section Li /
+.Op Ar arch Li /
+.Ar title . section
+.Sm on
+will be used.
.It Fl d Ar dir
Merge (remove and re-add)
.Ar
diff --git a/mandocdb.c b/mandocdb.c
index d442481b..b6b1d9a1 100644
--- a/mandocdb.c
+++ b/mandocdb.c
@@ -1,6 +1,7 @@
/* $Id$ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -48,6 +49,9 @@
struct of {
char *fname; /* heap-allocated */
+ char *sec;
+ char *arch;
+ char *title;
struct of *next; /* NULL for last one */
struct of *first; /* first in list */
};
@@ -89,13 +93,15 @@ static void hash_reset(DB **);
static void index_merge(const struct of *, struct mparse *,
struct buf *, struct buf *,
DB *, DB *, const char *,
- DB *, const char *, int,
+ DB *, const char *, int, int,
recno_t, const recno_t *, size_t);
static void index_prune(const struct of *, DB *,
const char *, DB *, const char *,
int, recno_t *, recno_t **, size_t *);
-static void ofile_argbuild(char *[], int, int, struct of **);
-static int ofile_dirbuild(const char *, int, struct of **);
+static void ofile_argbuild(char *[], int, int, int,
+ struct of **);
+static int ofile_dirbuild(const char *, const char *,
+ const char *, int, int, struct of **);
static void ofile_free(struct of *);
static int pman_node(MAN_ARGS);
static void pmdoc_node(MDOC_ARGS);
@@ -254,16 +260,17 @@ main(int argc, char *argv[])
char ibuf[MAXPATHLEN], /* index fname */
fbuf[MAXPATHLEN]; /* btree fname */
int verb, /* output verbosity */
+ use_all, /* use all directories and files */
ch, i, flags;
DB *idx, /* index database */
*db, /* keyword database */
*hash; /* temporary keyword hashtable */
BTREEINFO info; /* btree configuration */
- recno_t maxrec; /* supremum of all records */
- recno_t *recs; /* buffer of empty records */
+ recno_t maxrec; /* last record number in the index */
+ recno_t *recs; /* the numbers of all empty records */
size_t sz1, sz2,
- recsz, /* buffer size of recs */
- reccur; /* valid number of recs */
+ recsz, /* number of allocated slots in recs */
+ reccur; /* current number of empty records */
struct buf buf, /* keyword buffer */
dbuf; /* description buffer */
struct of *of; /* list of files for processing */
@@ -279,6 +286,7 @@ main(int argc, char *argv[])
memset(&dirs, 0, sizeof(struct manpaths));
verb = 0;
+ use_all = 0;
of = NULL;
db = idx = NULL;
mp = NULL;
@@ -289,8 +297,11 @@ main(int argc, char *argv[])
op = OP_NEW;
dir = NULL;
- while (-1 != (ch = getopt(argc, argv, "d:u:v")))
+ while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
switch (ch) {
+ case ('a'):
+ use_all = 1;
+ break;
case ('d'):
dir = optarg;
op = OP_UPDATE;
@@ -347,7 +358,7 @@ main(int argc, char *argv[])
if (NULL == db) {
perror(fbuf);
exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == db) {
+ } else if (NULL == idx) {
perror(ibuf);
exit((int)MANDOCLEVEL_SYSERR);
}
@@ -357,7 +368,7 @@ main(int argc, char *argv[])
printf("%s: Opened\n", ibuf);
}
- ofile_argbuild(argv, argc, verb, &of);
+ ofile_argbuild(argv, argc, use_all, verb, &of);
if (NULL == of)
goto out;
@@ -368,8 +379,8 @@ main(int argc, char *argv[])
if (OP_UPDATE == op)
index_merge(of, mp, &dbuf, &buf, hash,
- db, fbuf, idx, ibuf, verb,
- maxrec, recs, reccur);
+ db, fbuf, idx, ibuf, use_all,
+ verb, maxrec, recs, reccur);
goto out;
}
@@ -411,7 +422,7 @@ main(int argc, char *argv[])
if (NULL == db) {
perror(fbuf);
exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == db) {
+ } else if (NULL == idx) {
perror(ibuf);
exit((int)MANDOCLEVEL_SYSERR);
}
@@ -424,7 +435,8 @@ main(int argc, char *argv[])
ofile_free(of);
of = NULL;
- if ( ! ofile_dirbuild(dirs.paths[i], verb, &of))
+ if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL,
+ use_all, verb, &of))
exit((int)MANDOCLEVEL_SYSERR);
if (NULL == of)
@@ -433,7 +445,8 @@ main(int argc, char *argv[])
of = of->first;
index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
- idx, ibuf, verb, maxrec, recs, reccur);
+ idx, ibuf, use_all, verb,
+ maxrec, recs, reccur);
}
out:
@@ -459,7 +472,7 @@ void
index_merge(const struct of *of, struct mparse *mp,
struct buf *dbuf, struct buf *buf,
DB *hash, DB *db, const char *dbf,
- DB *idx, const char *idxf, int verb,
+ DB *idx, const char *idxf, int use_all, int verb,
recno_t maxrec, const recno_t *recs, size_t reccur)
{
recno_t rec;
@@ -495,17 +508,53 @@ index_merge(const struct of *of, struct mparse *mp,
if (NULL == mdoc && NULL == man)
continue;
+ /*
+ * By default, skip a file if the manual section
+ * and architecture given in the file disagree
+ * with the directory where the file is located.
+ */
+
msec = NULL != mdoc ?
mdoc_meta(mdoc)->msec : man_meta(man)->msec;
- mtitle = NULL != mdoc ?
- mdoc_meta(mdoc)->title : man_meta(man)->title;
arch = NULL != mdoc ?
mdoc_meta(mdoc)->arch : NULL;
+ if (0 == use_all) {
+ assert(of->sec);
+ assert(msec);
+ if (strcmp(msec, of->sec))
+ continue;
+
+ if (NULL == arch) {
+ if (NULL != of->arch)
+ continue;
+ } else if (NULL == of->arch ||
+ strcmp(arch, of->arch))
+ continue;
+ }
+
if (NULL == arch)
arch = "";
/*
+ * By default, skip a file if the title given
+ * in the file disagrees with the file name.
+ * If both agree, use the file name as the title,
+ * because the one in the file usually is all caps.
+ */
+
+ mtitle = NULL != mdoc ?
+ mdoc_meta(mdoc)->title : man_meta(man)->title;
+
+ assert(of->title);
+ assert(mtitle);
+
+ if (0 == strcasecmp(mtitle, of->title))
+ mtitle = of->title;
+ else if (0 == use_all)
+ continue;
+
+ /*
* The index record value consists of a nil-terminated
* filename, a nil-terminated manual section, and a
* nil-terminated description. Since the description
@@ -1179,14 +1228,64 @@ pman_node(MAN_ARGS)
}
static void
-ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
+ofile_argbuild(char *argv[], int argc, int use_all, int verb,
+ struct of **of)
{
+ char buf[MAXPATHLEN];
+ char *sec, *arch, *title, *p;
int i;
struct of *nof;
for (i = 0; i < argc; i++) {
+
+ /*
+ * Try to infer the manual section, architecture and
+ * page title from the path, assuming it looks like
+ * man*[/<arch>]/<title>.<section>
+ */
+
+ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
+ fprintf(stderr, "%s: Path too long\n", argv[i]);
+ continue;
+ }
+ sec = arch = title = NULL;
+ p = strrchr(buf, '\0');
+ while (p-- > buf) {
+ if (NULL == sec && '.' == *p) {
+ sec = p + 1;
+ *p = '\0';
+ continue;
+ }
+ if ('/' != *p)
+ continue;
+ if (NULL == title) {
+ title = p + 1;
+ *p = '\0';
+ continue;
+ }
+ if (strncmp("man", p + 1, 3))
+ arch = p + 1;
+ break;
+ }
+ if (NULL == title)
+ title = buf;
+
+ /*
+ * Build the file structure.
+ */
+
nof = mandoc_calloc(1, sizeof(struct of));
- nof->fname = strdup(argv[i]);
+ nof->fname = mandoc_strdup(argv[i]);
+ if (NULL != sec)
+ nof->sec = mandoc_strdup(sec);
+ if (NULL != arch)
+ nof->arch = mandoc_strdup(arch);
+ nof->title = mandoc_strdup(title);
+
+ /*
+ * Add the structure to the list.
+ */
+
if (verb > 2)
printf("%s: Scheduling\n", argv[i]);
if (NULL == *of) {
@@ -1209,12 +1308,14 @@ ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
* Pass in a pointer to a NULL structure for the first invocation.
*/
static int
-ofile_dirbuild(const char *dir, int verb, struct of **of)
+ofile_dirbuild(const char *dir, const char* psec, const char *parch,
+ int use_all, int verb, struct of **of)
{
char buf[MAXPATHLEN];
size_t sz;
DIR *d;
- const char *fn;
+ const char *fn, *sec, *arch;
+ char *suffix;
struct of *nof;
struct dirent *dp;
@@ -1225,10 +1326,30 @@ ofile_dirbuild(const char *dir, int verb, struct of **of)
while (NULL != (dp = readdir(d))) {
fn = dp->d_name;
+
+ if ('.' == *fn)
+ continue;
+
if (DT_DIR == dp->d_type) {
- if (0 == strcmp(".", fn))
- continue;
- if (0 == strcmp("..", fn))
+ sec = psec;
+ arch = parch;
+
+ /*
+ * By default, only use directories called:
+ * man<section>/[<arch>/]
+ */
+
+ if (NULL == sec) {
+ if(0 == strncmp("man", fn, 3))
+ sec = fn + 3;
+ else if (use_all)
+ sec = fn;
+ else
+ continue;
+ } else if (NULL == arch && (use_all ||
+ NULL == strchr(fn, '.')))
+ arch = fn;
+ else if (0 == use_all)
continue;
buf[0] = '\0';
@@ -1236,22 +1357,37 @@ ofile_dirbuild(const char *dir, int verb, struct of **of)
strlcat(buf, "/", MAXPATHLEN);
sz = strlcat(buf, fn, MAXPATHLEN);
- if (sz < MAXPATHLEN) {
- if ( ! ofile_dirbuild(buf, verb, of))
- return(0);
- continue;
- } else if (sz < MAXPATHLEN)
- continue;
+ if (MAXPATHLEN <= sz) {
+ fprintf(stderr, "%s: Path too long\n", dir);
+ return(0);
+ }
+
+ if (verb > 2)
+ printf("%s: Scanning\n", buf);
- fprintf(stderr, "%s: Path too long\n", dir);
- return(0);
+ if ( ! ofile_dirbuild(buf, sec, arch,
+ use_all, verb, of))
+ return(0);
}
- if (DT_REG != dp->d_type)
+ if (DT_REG != dp->d_type ||
+ (NULL == psec && !use_all) ||
+ !strcmp(MANDOC_DB, fn) ||
+ !strcmp(MANDOC_IDX, fn))
continue;
- if (0 == strcmp(MANDOC_DB, fn) ||
- 0 == strcmp(MANDOC_IDX, fn))
- continue;
+ /*
+ * By default, skip files where the file name suffix
+ * does not agree with the section directory
+ * they are located in.
+ */
+
+ suffix = strrchr(fn, '.');
+ if (0 == use_all) {
+ if (NULL == suffix)
+ continue;
+ if (strcmp(suffix + 1, psec))
+ continue;
+ }
buf[0] = '\0';
strlcat(buf, dir, MAXPATHLEN);
@@ -1264,6 +1400,19 @@ ofile_dirbuild(const char *dir, int verb, struct of **of)
nof = mandoc_calloc(1, sizeof(struct of));
nof->fname = mandoc_strdup(buf);
+ if (NULL != psec)
+ nof->sec = mandoc_strdup(psec);
+ if (NULL != parch)
+ nof->arch = mandoc_strdup(parch);
+
+ /*
+ * Remember the file name without the extension,
+ * to be used as the page title in the database.
+ */
+
+ if (NULL != suffix)
+ *suffix = '\0';
+ nof->title = mandoc_strdup(fn);
if (verb > 2)
printf("%s: Scheduling\n", buf);
@@ -1290,6 +1439,9 @@ ofile_free(struct of *of)
while (of) {
nof = of->next;
free(of->fname);
+ free(of->sec);
+ free(of->arch);
+ free(of->title);
free(of);
of = nof;
}