diff options
-rw-r--r-- | Makefile | 22 | ||||
-rw-r--r-- | apropos.1 | 169 | ||||
-rw-r--r-- | apropos.c | 689 |
3 files changed, 879 insertions, 1 deletions
@@ -40,6 +40,8 @@ INSTALL_MAN = $(INSTALL_DATA) all: mandoc preconv demandoc SRCS = Makefile \ + apropos.1 \ + apropos.c \ arch.c \ arch.in \ att.c \ @@ -266,12 +268,22 @@ PRECONV_LNS = preconv.ln $(PRECONV_OBJS) $(PRECONV_LNS): config.h +APROPOS_OBJS = apropos.o +APROPOS_LNS = apropos.ln + +$(APROPOS_OBJS) $(APROPOS_LNS): config.h mandoc.h + DEMANDOC_OBJS = demandoc.o DEMANDOC_LNS = demandoc.ln $(DEMANDOC_OBJS) $(DEMANDOC_LNS): config.h -INDEX_MANS = demandoc.1.html \ +INDEX_MANS = apropos.1.html \ + apropos.1.xhtml \ + apropos.1.ps \ + apropos.1.pdf \ + apropos.1.txt \ + demandoc.1.html \ demandoc.1.xhtml \ demandoc.1.ps \ demandoc.1.pdf \ @@ -347,6 +359,8 @@ clean: rm -f llib-lmandocdb.ln $(MANDOCDB_LNS) rm -f preconv $(PRECONV_OBJS) rm -f llib-lpreconv.ln $(PRECONV_LNS) + rm -f apropos $(APROPOS_OBJS) + rm -f llib-lapropos.ln $(APROPOS_LNS) rm -f demandoc $(DEMANDOC_OBJS) rm -f llib-ldemandoc.ln $(DEMANDOC_LNS) rm -f mandoc $(MANDOC_OBJS) @@ -412,6 +426,12 @@ preconv: $(PRECONV_OBJS) llib-lpreconv.ln: $(PRECONV_LNS) $(LINT) $(LINTFLAGS) -Cpreconv $(PRECONV_LNS) +apropos: $(APROPOS_OBJS) libmandoc.a + $(CC) $(LDFLAGS) -o $@ $(APROPOS_OBJS) libmandoc.a + +llib-lapropos.ln: $(APROPOS_LNS) + $(LINT) $(LINTFLAGS) -Capropos $(APROPOS_LNS) + demandoc: $(DEMANDOC_OBJS) libmandoc.a $(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a diff --git a/apropos.1 b/apropos.1 new file mode 100644 index 00000000..e9b81b22 --- /dev/null +++ b/apropos.1 @@ -0,0 +1,169 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt APROPOS 1 +.Os +.Sh NAME +.Nm apropos +.Nd search the manual page database +.Sh SYNOPSIS +.Nm +.Op Fl eIr +.Op Fl a Ar arch +.Op Fl c Ar cat +.Op Fl s Ar sort +.Op Fl t Ar types +.Ar key +.Sh DESCRIPTION +The +.Nm +utility queries the manual page database. +Its arguments are as follows: +.Bl -tag -width Ds +.It Fl a Ar arch +Search only for a particular architecture. +.It Fl c Ar cat +Search only for a category (manual section). +See +.Xr man 1 +for a listing of categories. +.It Fl e +Search only for exact matches (subject to +.Fl I ) . +.It Fl I +Case-insensitive matching. +.It Fl r +Consider +.Ar key +to be a POSIX regular expression (subject to +.Fl I ) . +See +.Xr re_format 7 +for a description of regular expressions. +.It Fl s Ar sort +Sorting type. +Accepts +.Ar cat +to sort by category and then by title or +.Ar title +to sort by title (which is the default). +.It Fl t Ar types +Consider only types of keyword match, where +.Ar types +is a comma-separated list consisting of the following: +.Ar name , +manual names; +.Ar func , +function names; +.Ar utility , +utility names; +.Ar incl , +include files; +.Ar var , +variable names; +.Ar stand , +standards ; +.Ar auth , +authors; +.Ar conf , +configuration strings; +.Ar desc , +descriptive text; +.Ar xref , +cross-references. +.Ar path , +file pathnames; +.Ar env , +environment variables; or +.Ar err , +error codes. +Specifying +.Ar all +will search for all types. +.It Ar key +The search key. +.El +.Pp +By default, +.Nm +searches for substring matches of +.Ar key +in manual names and descriptions +and displays results by manual title. +Output is formatted as +.Pp +.D1 title(cat) \- description +.Pp +Where +.Qq title +is the manual's title (note multiple manual names may exist for one +title), +.Qq cat +is the category, and +.Qq description +is the manual's short description. +If an architecture is specified for the manual, it is displayed as +.Pp +.D1 title(cat/arch) \- description +.Pp +Resulting manuals may be accessed as +.Pp +.Dl $ man -s cat title +.Pp +If an architecture is specified in the output, use +.Pp +.Dl $ man -s cat -S arch title +.\" .Sh IMPLEMENTATION NOTES +.\" Not used in OpenBSD. +.\" .Sh RETURN VALUES +.\" For sections 2, 3, & 9 only. +.\" .Sh ENVIRONMENT +.\" For sections 1, 6, 7, & 8 only. +.\" .Sh FILES +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Search for +.Qq optind +as a variable name in the library category: +.Pp +.Dl $ apropos -tvar -c 3 optind +.Pp +Search for all manuals referencing the term +.Qq POSIX +in any letter case: +.Pp +.Dl $ apropos -tall -I posix +.\" .Sh DIAGNOSTICS +.\" For sections 1, 4, 6, 7, & 8 only. +.\" .Sh ERRORS +.\" For sections 2, 3, & 9 only. +.Sh SEE ALSO +.Xr man 1 , +.Xr mandoc 1 , +.Xr re_format 7 +.\" .Sh STANDARDS +.\" .Sh HISTORY +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.\" .Sh CAVEATS +.\" .Sh BUGS +.\" .Sh SECURITY CONSIDERATIONS +.\" Not used in OpenBSD. diff --git a/apropos.c b/apropos.c new file mode 100644 index 00000000..225555fc --- /dev/null +++ b/apropos.c @@ -0,0 +1,689 @@ +/* $Id$ */ +/* +* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <limits.h> +#include <regex.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#ifdef __linux__ +# include <db_185.h> +#else +# include <db.h> +#endif + +#include "mandoc.h" + +#define MAXRESULTS 100 + +#define TYPE_NAME 0x01 +#define TYPE_FUNCTION 0x02 +#define TYPE_UTILITY 0x04 +#define TYPE_INCLUDES 0x08 +#define TYPE_VARIABLE 0x10 +#define TYPE_STANDARD 0x20 +#define TYPE_AUTHOR 0x40 +#define TYPE_CONFIG 0x80 +#define TYPE_DESC 0x100 +#define TYPE_XREF 0x200 +#define TYPE_PATH 0x400 +#define TYPE_ENV 0x800 +#define TYPE_ERR 0x1000 + +enum match { + MATCH_SUBSTR = 0, + MATCH_REGEX, + MATCH_EXACT +}; + +enum sort { + SORT_TITLE = 0, + SORT_CAT, + SORT__MAX +}; + +struct opts { + enum sort sort; /* output sorting */ + const char *arch; /* restrict to architecture */ + const char *cat; /* restrict to category */ + int types; /* only types in bitmask */ + int insens; /* case-insensitive match */ + enum match match; /* match type */ +}; + +struct type { + int mask; + const char *name; +}; + +struct rec { + char *file; + char *cat; + char *title; + char *arch; + char *desc; + recno_t rec; +}; + +struct res { + char *arch; /* architecture */ + char *desc; /* free-form description */ + char *keyword; /* matched keyword */ + int types; /* bitmask of field selectors */ + char *cat; /* manual section */ + char *title; /* manual section */ + char *uri; /* formatted uri of file */ + recno_t rec; /* unique id of underlying manual */ +}; + +struct state { + DB *db; /* database */ + DB *idx; /* index */ + const char *dbf; /* database name */ + const char *idxf; /* index name */ + void (*err)(const char *); + void (*errx)(const char *, ...); +}; + +static const char * const sorts[SORT__MAX] = { + "cat", /* SORT_CAT */ + "title", /* SORT_TITLE */ +}; + +static const struct type types[] = { + { TYPE_NAME, "name" }, + { TYPE_FUNCTION, "func" }, + { TYPE_UTILITY, "utility" }, + { TYPE_INCLUDES, "incl" }, + { TYPE_VARIABLE, "var" }, + { TYPE_STANDARD, "stand" }, + { TYPE_AUTHOR, "auth" }, + { TYPE_CONFIG, "conf" }, + { TYPE_DESC, "desc" }, + { TYPE_XREF, "xref" }, + { TYPE_PATH, "path" }, + { TYPE_ENV, "env" }, + { TYPE_ERR, "err" }, + { INT_MAX, "all" }, + { 0, NULL } +}; + +static void buf_alloc(char **, size_t *, size_t); +static void buf_dup(struct mchars *, char **, const char *); +static void buf_redup(struct mchars *, char **, + size_t *, const char *); +static void error(const char *, ...); +static int sort_cat(const void *, const void *); +static int sort_title(const void *, const void *); +static void state_destroy(struct state *); +static int state_getrecord(struct state *, recno_t, struct rec *); +static int state_init(struct state *, + const char *, const char *, + void (*err)(const char *), + void (*errx)(const char *, ...)); +static void state_output(const struct res *, int); +static void state_search(struct state *, + const struct opts *, char *); + +static void usage(void); + +static const char *progname; + +int +main(int argc, char *argv[]) +{ + int ch, i; + const char *dbf, *idxf; + struct state state; + char *q, *v; + struct opts opts; + extern int optind; + extern char *optarg; + + memset(&opts, 0, sizeof(struct opts)); + + dbf = "mandoc.db"; + idxf = "mandoc.index"; + q = NULL; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + opts.match = MATCH_SUBSTR; + + while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:"))) + switch (ch) { + case ('a'): + opts.arch = optarg; + break; + case ('c'): + opts.cat = optarg; + break; + case ('e'): + opts.match = MATCH_EXACT; + break; + case ('I'): + opts.insens = 1; + break; + case ('r'): + opts.match = MATCH_REGEX; + break; + case ('s'): + for (i = 0; i < SORT__MAX; i++) { + if (strcmp(optarg, sorts[i])) + continue; + opts.sort = (enum sort)i; + break; + } + + if (i < SORT__MAX) + break; + + error("%s: Bad sort\n", optarg); + return(EXIT_FAILURE); + case ('t'): + while (NULL != (v = strsep(&optarg, ","))) { + if ('\0' == *v) + continue; + for (i = 0; types[i].mask; i++) { + if (strcmp(types[i].name, v)) + continue; + break; + } + if (0 == types[i].mask) + break; + opts.types |= types[i].mask; + } + if (NULL == v) + break; + + error("%s: Bad type\n", v); + return(EXIT_FAILURE); + default: + usage(); + return(EXIT_FAILURE); + } + + argc -= optind; + argv += optind; + + if (0 == argc || '\0' == **argv) { + usage(); + return(EXIT_FAILURE); + } else + q = *argv; + + if (0 == opts.types) + opts.types = TYPE_NAME | TYPE_DESC; + + if ( ! state_init(&state, dbf, idxf, perror, error)) { + state_destroy(&state); + return(EXIT_FAILURE); + } + + state_search(&state, &opts, q); + state_destroy(&state); + + return(EXIT_SUCCESS); +} + +static void +state_search(struct state *p, const struct opts *opts, char *q) +{ + int i, len, ch, rflags, dflag; + struct mchars *mc; + char *buf; + size_t bufsz; + recno_t rec; + uint32_t fl; + DBT key, val; + struct res res[MAXRESULTS]; + regex_t reg; + regex_t *regp; + char filebuf[10]; + struct rec record; + + len = 0; + buf = NULL; + bufsz = 0; + ch = 0; + regp = NULL; + + switch (opts->match) { + case (MATCH_REGEX): + rflags = REG_EXTENDED | REG_NOSUB | + (opts->insens ? REG_ICASE : 0); + + if (0 != regcomp(®, q, rflags)) { + error("%s: Bad pattern\n", q); + return; + } + + regp = ® + dflag = R_FIRST; + break; + case (MATCH_EXACT): + key.data = q; + key.size = strlen(q) + 1; + dflag = R_CURSOR; + break; + default: + dflag = R_FIRST; + break; + } + + if (NULL == (mc = mchars_alloc())) { + perror(NULL); + exit(EXIT_FAILURE); + } + + /* + * Iterate over the entire keyword database. + * For each record, we must first translate the key into UTF-8. + * Following that, make sure it's acceptable. + * Lastly, add it to the available records. + */ + + while (len < MAXRESULTS) { + if ((ch = (*p->db->seq)(p->db, &key, &val, dflag))) + break; + + dflag = R_NEXT; + + /* + * Keys must be sized as such: the keyword must be + * non-empty (nil terminator plus one character) and the + * value must be 8 (recno_t---uint32_t---index reference + * and a uint32_t flag field). + */ + + if (key.size < 2 || 8 != val.size) { + error("%s: Corrupt database\n", p->dbf); + exit(EXIT_FAILURE); + } + + buf_redup(mc, &buf, &bufsz, (char *)key.data); + + fl = *(uint32_t *)val.data; + + if ( ! (fl & opts->types)) + continue; + + switch (opts->match) { + case (MATCH_REGEX): + if (regexec(regp, buf, 0, NULL, 0)) + continue; + break; + case (MATCH_EXACT): + if (opts->insens && strcasecmp(buf, q)) + goto send; + if ( ! opts->insens && strcmp(buf, q)) + goto send; + break; + default: + if (opts->insens && NULL == strcasestr(buf, q)) + continue; + if ( ! opts->insens && NULL == strstr(buf, q)) + continue; + break; + } + + /* + * Now look up the file itself in our index. The file's + * indexed by its recno for fast lookups. + */ + + memcpy(&rec, val.data + 4, sizeof(recno_t)); + + if ( ! state_getrecord(p, rec, &record)) + exit(EXIT_FAILURE); + + /* If we're in a different section, skip... */ + + if (opts->cat && strcasecmp(opts->cat, record.cat)) + continue; + if (opts->arch && strcasecmp(opts->arch, record.arch)) + continue; + + /* FIXME: this needs to be changed. Ugh. Linear. */ + + for (i = 0; i < len; i++) + if (res[i].rec == record.rec) + break; + + if (i < len) + continue; + + /* + * Now we have our filename, keywords, types, and all + * other necessary information. + * Process it and add it to our list of results. + */ + + filebuf[9] = '\0'; + snprintf(filebuf, 10, "%u", record.rec); + assert('\0' == filebuf[9]); + + res[len].rec = record.rec; + res[len].types = fl; + + buf_dup(mc, &res[len].keyword, buf); + buf_dup(mc, &res[len].uri, filebuf); + buf_dup(mc, &res[len].cat, record.cat); + buf_dup(mc, &res[len].arch, record.arch); + buf_dup(mc, &res[len].title, record.title); + buf_dup(mc, &res[len].desc, record.desc); + len++; + } + +send: + if (ch < 0) { + perror(p->dbf); + exit(EXIT_FAILURE); + } + + switch (opts->sort) { + case (SORT_CAT): + qsort(res, len, sizeof(struct res), sort_cat); + break; + default: + qsort(res, len, sizeof(struct res), sort_title); + break; + } + + state_output(res, len); + + for (len-- ; len >= 0; len--) { + free(res[len].keyword); + free(res[len].title); + free(res[len].cat); + free(res[len].arch); + free(res[len].desc); + free(res[len].uri); + } + + free(buf); + mchars_free(mc); + + if (regp) + regfree(regp); +} + +/* + * Track allocated buffer size for buf_redup(). + */ +static inline void +buf_alloc(char **buf, size_t *bufsz, size_t sz) +{ + + if (sz < *bufsz) + return; + + *bufsz = sz + 1024; + if (NULL == (*buf = realloc(*buf, *bufsz))) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +/* + * Like buf_redup() but throwing away the buffer size. + */ +static void +buf_dup(struct mchars *mc, char **buf, const char *val) +{ + size_t bufsz; + + bufsz = 0; + *buf = NULL; + buf_redup(mc, buf, &bufsz, val); +} + +/* + * Normalise strings from the index and database. + * These strings are escaped as defined by mandoc_char(7) along with + * other goop in mandoc.h (e.g., soft hyphens). + */ +static void +buf_redup(struct mchars *mc, char **buf, + size_t *bufsz, const char *val) +{ + size_t sz; + const char *seq, *cpp; + int len, pos; + enum mandoc_esc esc; + const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' }; + + /* Pre-allocate by the length of the input */ + + buf_alloc(buf, bufsz, strlen(val) + 1); + + pos = 0; + + while ('\0' != *val) { + /* + * Halt on the first escape sequence. + * This also halts on the end of string, in which case + * we just copy, fallthrough, and exit the loop. + */ + if ((sz = strcspn(val, rsv)) > 0) { + memcpy(&(*buf)[pos], val, sz); + pos += (int)sz; + val += (int)sz; + } + + if (ASCII_HYPH == *val) { + (*buf)[pos++] = '-'; + val++; + continue; + } else if (ASCII_NBRSP == *val) { + (*buf)[pos++] = ' '; + val++; + continue; + } else if ('\\' != *val) + break; + + /* Read past the slash. */ + + val++; + + /* + * Parse the escape sequence and see if it's a + * predefined character or special character. + */ + + esc = mandoc_escape(&val, &seq, &len); + if (ESCAPE_ERROR == esc) + break; + + cpp = ESCAPE_SPECIAL == esc ? + mchars_spec2str(mc, seq, len, &sz) : NULL; + + if (NULL == cpp) + continue; + + /* Copy the rendered glyph into the stream. */ + + buf_alloc(buf, bufsz, sz); + + memcpy(&(*buf)[pos], cpp, sz); + pos += (int)sz; + } + + (*buf)[pos] = '\0'; +} + +static void +error(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static void +state_output(const struct res *res, int sz) +{ + int i; + + for (i = 0; i < sz; i++) + printf("%s(%s%s%s) - %s\n", res[i].title, + res[i].cat, + *res[i].arch ? "/" : "", + *res[i].arch ? res[i].arch : "", + res[i].desc); +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s " + "[-eIr] " + "[-a arch] " + "[-c cat] " + "[-s sort] " + "[-t type[,...]] " + "key\n", progname); +} + +static int +state_init(struct state *p, + const char *dbf, const char *idxf, + void (*err)(const char *), + void (*errx)(const char *, ...)) +{ + BTREEINFO info; + + memset(p, 0, sizeof(struct state)); + memset(&info, 0, sizeof(BTREEINFO)); + + info.flags = R_DUP; + + p->dbf = dbf; + p->idxf = idxf; + p->err = err; + + p->db = dbopen(p->dbf, O_RDONLY, 0, DB_BTREE, &info); + if (NULL == p->db) { + (*err)(p->dbf); + return(0); + } + + p->idx = dbopen(p->idxf, O_RDONLY, 0, DB_RECNO, NULL); + if (NULL == p->idx) { + (*err)(p->idxf); + return(0); + } + + return(1); +} + +static void +state_destroy(struct state *p) +{ + + if (p->db) + (*p->db->close)(p->db); + if (p->idx) + (*p->idx->close)(p->idx); +} + +static int +state_getrecord(struct state *p, recno_t rec, struct rec *rp) +{ + DBT key, val; + size_t sz; + int rc; + + key.data = &rec; + key.size = sizeof(recno_t); + + rc = (*p->idx->get)(p->idx, &key, &val, 0); + if (rc < 0) { + (*p->err)(p->idxf); + return(0); + } else if (rc > 0) { + (*p->errx)("%s: Corrupt index\n", p->idxf); + return(0); + } + + rp->file = (char *)val.data; + if ((sz = strlen(rp->file) + 1) >= val.size) { + (*p->errx)("%s: Corrupt index\n", p->idxf); + return(0); + } + + rp->cat = (char *)val.data + (int)sz; + if ((sz += strlen(rp->cat) + 1) >= val.size) { + (*p->errx)("%s: Corrupt index\n", p->idxf); + return(0); + } + + rp->title = (char *)val.data + (int)sz; + if ((sz += strlen(rp->title) + 1) >= val.size) { + (*p->errx)("%s: Corrupt index\n", p->idxf); + return(0); + } + + rp->arch = (char *)val.data + (int)sz; + if ((sz += strlen(rp->arch) + 1) >= val.size) { + (*p->errx)("%s: Corrupt index\n", p->idxf); + return(0); + } + + rp->desc = (char *)val.data + (int)sz; + rp->rec = rec; + return(1); +} + +static int +sort_title(const void *p1, const void *p2) +{ + + return(strcmp(((const struct res *)p1)->title, + ((const struct res *)p2)->title)); +} + +static int +sort_cat(const void *p1, const void *p2) +{ + int rc; + + rc = strcmp(((const struct res *)p1)->cat, + ((const struct res *)p2)->cat); + + return(0 == rc ? sort_title(p1, p2) : rc); +} |