diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-11-09 01:24:23 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2011-11-09 01:24:23 +0000 |
commit | 6c38eff672364431564091cddb15592a2ca4a2a0 (patch) | |
tree | 5c6a04444298122b3b219585990a64a780d8a11f | |
parent | 7f5351a1e9d07e9a65c22430e56ff4c50c4a9bdf (diff) | |
download | mandoc-6c38eff672364431564091cddb15592a2ca4a2a0.tar.gz |
Split apropos.c into db.c and apropos.h with simpler code (re-written, but
inspired by apropos.c and mandoc-tools' mandoc-cgi.c). This uses UTF-8
right now for its re-writing, but will soon accomodate for the regular
suspects (this is a rather simple matter).
I also introduce man.cgi (cgi.c), which is a standalone CGI that replaces
mandoc-tools' mandoc.cgi. Right now it's just a framework.
-rw-r--r-- | Makefile | 44 | ||||
-rw-r--r-- | apropos.c | 560 | ||||
-rw-r--r-- | apropos.h | 68 | ||||
-rw-r--r-- | cgi.c | 303 | ||||
-rw-r--r-- | db.c | 436 |
5 files changed, 860 insertions, 551 deletions
@@ -44,7 +44,8 @@ INSTALL_MAN = $(INSTALL_DATA) # comment out apropos and mandocdb. # #DBLIB = -ldb -DBBIN = apropos mandocdb +DBBIN = apropos mandocdb man.cgi +DBLN = llib-lapropos.ln llib-lmandocdb.ln llib-lman.cgi.ln all: mandoc preconv demandoc $(DBBIN) @@ -277,10 +278,15 @@ PRECONV_LNS = preconv.ln $(PRECONV_OBJS) $(PRECONV_LNS): config.h -APROPOS_OBJS = apropos.o -APROPOS_LNS = apropos.ln +APROPOS_OBJS = apropos.o db.o +APROPOS_LNS = apropos.ln db.ln -$(APROPOS_OBJS) $(APROPOS_LNS): config.h mandoc.h +$(APROPOS_OBJS) $(APROPOS_LNS): config.h mandoc.h apropos.h + +CGI_OBJS = cgi.o db.o +CGI_LNS = cgi.ln db.ln + +$(CGI_OBJS) $(CGI_LNS): config.h mandoc.h apropos.h DEMANDOC_OBJS = demandoc.o DEMANDOC_LNS = demandoc.ln @@ -359,7 +365,7 @@ INDEX_OBJS = $(INDEX_MANS) \ www: index.html -lint: llib-llibmandoc.ln llib-lmandoc.ln llib-lpreconv.ln llib-ldemandoc.ln +lint: llib-lmandoc.ln llib-lpreconv.ln llib-ldemandoc.ln $(DBLN) clean: rm -f libmandoc.a $(LIBMANDOC_OBJS) @@ -370,6 +376,8 @@ clean: rm -f llib-lpreconv.ln $(PRECONV_LNS) rm -f apropos $(APROPOS_OBJS) rm -f llib-lapropos.ln $(APROPOS_LNS) + rm -f man.cgi $(CGI_OBJS) + rm -f llib-lman.cgi.ln $(CGI_LNS) rm -f demandoc $(DEMANDOC_OBJS) rm -f llib-ldemandoc.ln $(DEMANDOC_LNS) rm -f mandoc $(MANDOC_OBJS) @@ -420,32 +428,38 @@ llib-llibmandoc.ln: $(COMPAT_LNS) $(LIBMANDOC_LNS) mandoc: $(MANDOC_OBJS) libmandoc.a $(CC) $(LDFLAGS) -o $@ $(MANDOC_OBJS) libmandoc.a -llib-lmandoc.ln: $(MANDOC_LNS) - $(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS) +llib-lmandoc.ln: $(MANDOC_LNS) llib-llibmandoc.ln + $(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS) llib-llibmandoc.ln mandocdb: $(MANDOCDB_OBJS) libmandoc.a $(CC) $(LDFLAGS) -o $@ $(MANDOCDB_OBJS) libmandoc.a $(DBLIB) -llib-lmandocdb.ln: $(MANDOCDB_LNS) - $(LINT) $(LINTFLAGS) -Cmandocdb $(MANDOCDB_LNS) +llib-lmandocdb.ln: $(MANDOCDB_LNS) llib-llibmandoc.ln + $(LINT) $(LINTFLAGS) -Cmandocdb $(MANDOCDB_LNS) llib-llibmandoc.ln preconv: $(PRECONV_OBJS) $(CC) $(LDFLAGS) -o $@ $(PRECONV_OBJS) -llib-lpreconv.ln: $(PRECONV_LNS) - $(LINT) $(LINTFLAGS) -Cpreconv $(PRECONV_LNS) +llib-lpreconv.ln: $(PRECONV_LNS) llib-llibmandoc.ln + $(LINT) $(LINTFLAGS) -Cpreconv $(PRECONV_LNS) llib-llibmandoc.ln apropos: $(APROPOS_OBJS) libmandoc.a $(CC) $(LDFLAGS) -o $@ $(APROPOS_OBJS) libmandoc.a $(DBLIB) -llib-lapropos.ln: $(APROPOS_LNS) - $(LINT) $(LINTFLAGS) -Capropos $(APROPOS_LNS) +llib-lapropos.ln: $(APROPOS_LNS) llib-llibmandoc.ln + $(LINT) $(LINTFLAGS) -Capropos $(APROPOS_LNS) llib-llibmandoc.ln + +man.cgi: $(CGI_OBJS) libmandoc.a + $(CC) $(LDFLAGS) -o $@ $(CGI_OBJS) libmandoc.a $(DBLIB) + +llib-lman.cgi.ln: $(CGI_LNS) llib-llibmandoc.ln + $(LINT) $(LINTFLAGS) -Cman.cgi $(CGI_LNS) llib-llibmandoc.ln demandoc: $(DEMANDOC_OBJS) libmandoc.a $(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a -llib-ldemandoc.ln: $(DEMANDOC_LNS) - $(LINT) $(LINTFLAGS) -Cdemandoc $(DEMANDOC_LNS) +llib-ldemandoc.ln: $(DEMANDOC_LNS) llib-llibmandoc.ln + $(LINT) $(LINTFLAGS) -Cdemandoc $(DEMANDOC_LNS) llib-llibmandoc.ln mdocml.md5: mdocml.tar.gz md5 mdocml.tar.gz >$@ @@ -1,6 +1,6 @@ /* $Id$ */ /* -* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,117 +14,21 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <sys/types.h> - #include <assert.h> -#include <errno.h> -#include <fcntl.h> #include <getopt.h> #include <limits.h> -#include <regex.h> -#include <stdarg.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <unistd.h> - -#ifdef __linux__ -# include <db_185.h> -#else -# include <db.h> -#endif +#include "apropos.h" #include "mandoc.h" -#define MAXRESULTS 256 - -/* Bit-fields. See mandocdb.8. */ - -#define TYPE_NAME 0x01 -#define TYPE_FUNCTION 0x02 -#define TYPE_UTILITY 0x04 -#define TYPE_INCLUDES 0x08 -#define TYPE_VARIABLE 0x10 -#define TYPE_STANDARD 0x20 -#define TYPE_AUTHOR 0x40 -#define TYPE_CONFIG 0x80 -#define TYPE_DESC 0x100 -#define TYPE_XREF 0x200 -#define TYPE_PATH 0x400 -#define TYPE_ENV 0x800 -#define TYPE_ERR 0x1000 - -enum match { - MATCH_SUBSTR = 0, - MATCH_REGEX, - MATCH_EXACT -}; - -enum sort { - SORT_TITLE = 0, - SORT_CAT, - SORT__MAX -}; - -struct opts { - enum sort sort; /* output sorting */ - const char *arch; /* restrict to architecture */ - const char *cat; /* restrict to category */ - int types; /* only types in bitmask */ - int insens; /* case-insensitive match */ - enum match match; /* match type */ -}; - struct type { int mask; const char *name; /* command-line type name */ }; -struct rec { - char *file; /* file in file-system */ - char *cat; /* category (3p, 3, etc.) */ - char *title; /* title (FOO, etc.) */ - char *arch; /* arch (or empty string) */ - char *desc; /* description (from Nd) */ - recno_t rec; /* record in index */ -}; - -struct res { - char *arch; /* architecture */ - char *desc; /* free-form description */ - char *keyword; /* matched keyword */ - int types; /* bitmask of field selectors */ - char *cat; /* manual section */ - char *title; /* manual section */ - char *uri; /* formatted uri of file */ - recno_t rec; /* unique id of underlying manual */ - /* - * Maintain a binary tree for checking the uniqueness of `rec' - * when adding elements to the results array. - * Since the results array is dynamic, use offset in the array - * instead of a pointer to the structure. - */ - int lhs; - int rhs; -}; - -struct state { - DB *db; /* database */ - DB *idx; /* index */ - const char *dbf; /* database name */ - const char *idxf; /* index name */ -}; - -static const char * const sorts[SORT__MAX] = { - "cat", /* SORT_CAT */ - "title", /* SORT_TITLE */ -}; - static const struct type types[] = { { TYPE_NAME, "name" }, { TYPE_FUNCTION, "func" }, @@ -143,17 +47,8 @@ static const struct type types[] = { { 0, NULL } }; -static void buf_alloc(char **, size_t *, size_t); -static void buf_dup(struct mchars *, char **, const char *); -static void buf_redup(struct mchars *, char **, - size_t *, const char *); -static int sort_cat(const void *, const void *); -static int sort_title(const void *, const void *); -static int state_getrecord(struct state *, - recno_t, struct rec *); -static void state_output(const struct res *, int); -static int state_search(struct state *, - const struct opts *, char *); +static int cmp(const void *, const void *); +static void list(struct rec *, size_t, void *); static void usage(void); static char *progname; @@ -161,22 +56,15 @@ static char *progname; int main(int argc, char *argv[]) { - BTREEINFO info; - int ch, i, rc; - const char *dbf, *idxf; - struct state state; + int ch, i; char *q, *v; struct opts opts; extern int optind; extern char *optarg; memset(&opts, 0, sizeof(struct opts)); - memset(&state, 0, sizeof(struct state)); - dbf = "mandoc.db"; - idxf = "mandoc.index"; q = NULL; - rc = EXIT_FAILURE; progname = strrchr(argv[0], '/'); if (progname == NULL) @@ -184,9 +72,7 @@ main(int argc, char *argv[]) else ++progname; - opts.match = MATCH_SUBSTR; - - while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:"))) + while (-1 != (ch = getopt(argc, argv, "a:c:I:t:"))) switch (ch) { case ('a'): opts.arch = optarg; @@ -194,28 +80,9 @@ main(int argc, char *argv[]) case ('c'): opts.cat = optarg; break; - case ('e'): - opts.match = MATCH_EXACT; - break; case ('I'): - opts.insens = 1; - break; - case ('r'): - opts.match = MATCH_REGEX; + opts.flags |= OPTS_INSENS; break; - case ('s'): - for (i = 0; i < SORT__MAX; i++) { - if (strcmp(optarg, sorts[i])) - continue; - opts.sort = (enum sort)i; - break; - } - - if (i < SORT__MAX) - break; - - fprintf(stderr, "%s: Bad sort\n", optarg); - return(EXIT_FAILURE); case ('t'): while (NULL != (v = strsep(&optarg, ","))) { if ('\0' == *v) @@ -244,7 +111,7 @@ main(int argc, char *argv[]) if (0 == argc || '\0' == **argv) { usage(); - goto out; + return(EXIT_SUCCESS); } else q = *argv; @@ -258,345 +125,19 @@ main(int argc, char *argv[]) * The index database is a recno. */ - memset(&info, 0, sizeof(BTREEINFO)); - info.flags = R_DUP; - - state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info); - if (NULL == state.db) { - perror(dbf); - goto out; - } - - state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL); - if (NULL == state.idx) { - perror(idxf); - goto out; - } - - /* Main search function. */ - - rc = state_search(&state, &opts, q) ? - EXIT_SUCCESS : EXIT_FAILURE; -out: - if (state.db) - (*state.db->close)(state.db); - if (state.idx) - (*state.idx->close)(state.idx); - - return(rc); + apropos_search(&opts, q, NULL, list); + return(EXIT_SUCCESS); } -static int -state_search(struct state *p, const struct opts *opts, char *q) -{ - int leaf, root, len, ch, dflag, rc; - struct mchars *mc; - char *buf; - size_t bufsz; - recno_t rec; - uint32_t fl; - DBT key, val; - struct res *res; - regex_t reg; - regex_t *regp; - char filebuf[10]; - struct rec record; - - rc = 0; - root = leaf = -1; - res = NULL; - len = 0; - buf = NULL; - bufsz = 0; - regp = NULL; - - /* - * Configure how we scan through results to see if we match: - * whether by regexp or exact matches. - */ - - switch (opts->match) { - case (MATCH_REGEX): - ch = REG_EXTENDED | REG_NOSUB | - (opts->insens ? REG_ICASE : 0); - - if (0 != regcomp(®, q, ch)) { - fprintf(stderr, "%s: Bad pattern\n", q); - return(0); - } - - regp = ® - dflag = R_FIRST; - break; - case (MATCH_EXACT): - key.data = q; - key.size = strlen(q) + 1; - dflag = R_CURSOR; - break; - default: - dflag = R_FIRST; - break; - } - - mc = mchars_alloc(); - - /* - * Iterate over the entire keyword database. - * For each record, we must first translate the key into UTF-8. - * Following that, make sure it's acceptable. - * Lastly, add it to the available records. - */ - - while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) { - dflag = R_NEXT; - - /* - * Keys must be sized as such: the keyword must be - * non-empty (nil terminator plus one character) and the - * value must be 8 (recno_t---uint32_t---index reference - * and a uint32_t flag field). - */ - - if (key.size < 2 || 8 != val.size) { - fprintf(stderr, "%s: Bad database\n", p->dbf); - goto out; - } - - buf_redup(mc, &buf, &bufsz, (char *)key.data); - - fl = *(uint32_t *)val.data; - - if ( ! (fl & opts->types)) - continue; - - switch (opts->match) { - case (MATCH_REGEX): - if (regexec(regp, buf, 0, NULL, 0)) - continue; - break; - case (MATCH_EXACT): - if (opts->insens && strcasecmp(buf, q)) - goto send; - if ( ! opts->insens && strcmp(buf, q)) - goto send; - break; - default: - if (opts->insens && NULL == strcasestr(buf, q)) - continue; - if ( ! opts->insens && NULL == strstr(buf, q)) - continue; - break; - } - - /* - * Now look up the file itself in our index. The file's - * indexed by its recno for fast lookups. - */ - - memcpy(&rec, val.data + 4, sizeof(recno_t)); - - if ( ! state_getrecord(p, rec, &record)) - goto out; - - /* If we're in a different section, skip... */ - - if (opts->cat && strcasecmp(opts->cat, record.cat)) - continue; - if (opts->arch && strcasecmp(opts->arch, record.arch)) - continue; - - /* - * Do a binary search to dedupe the results tree of the - * same record: we don't print the same file. - */ - - for (leaf = root; leaf >= 0; ) - if (rec > res[leaf].rec && res[leaf].rhs >= 0) - leaf = res[leaf].rhs; - else if (rec < res[leaf].rec && res[leaf].lhs >= 0) - leaf = res[leaf].lhs; - else - break; - - if (leaf >= 0 && res[leaf].rec == rec) - continue; - - res = mandoc_realloc - (res, (len + 1) * sizeof(struct res)); - - /* - * Now we have our filename, keywords, types, and all - * other necessary information. - * Process it and add it to our list of results. - */ - - filebuf[9] = '\0'; - snprintf(filebuf, 10, "%u", record.rec); - assert('\0' == filebuf[9]); - - res[len].rec = record.rec; - res[len].types = fl; - res[len].lhs = res[len].rhs = -1; - - buf_dup(mc, &res[len].keyword, buf); - buf_dup(mc, &res[len].uri, filebuf); - buf_dup(mc, &res[len].cat, record.cat); - buf_dup(mc, &res[len].arch, record.arch); - buf_dup(mc, &res[len].title, record.title); - buf_dup(mc, &res[len].desc, record.desc); - - if (leaf >= 0) { - if (record.rec > res[leaf].rec) - res[leaf].rhs = len; - else - res[leaf].lhs = len; - } else - root = len; - - len++; - } - - if (ch < 0) { - perror(p->dbf); - goto out; - } -send: - /* Sort our results. */ - - if (SORT_CAT == opts->sort) - qsort(res, len, sizeof(struct res), sort_cat); - else - qsort(res, len, sizeof(struct res), sort_title); - - state_output(res, len); - rc = 1; -out: - for (len-- ; len >= 0; len--) { - free(res[len].keyword); - free(res[len].title); - free(res[len].cat); - free(res[len].arch); - free(res[len].desc); - free(res[len].uri); - } - - free(res); - free(buf); - mchars_free(mc); - - if (regp) - regfree(regp); - - return(rc); -} - -/* - * Track allocated buffer size for buf_redup(). - */ -static inline void -buf_alloc(char **buf, size_t *bufsz, size_t sz) -{ - - if (sz < *bufsz) - return; - - *bufsz = sz + 1024; - *buf = mandoc_realloc(*buf, *bufsz); -} - -/* - * Like buf_redup() but throwing away the buffer size. - */ +/* ARGSUSED */ static void -buf_dup(struct mchars *mc, char **buf, const char *val) -{ - size_t bufsz; - - bufsz = 0; - *buf = NULL; - buf_redup(mc, buf, &bufsz, val); -} - -/* - * Normalise strings from the index and database. - * These strings are escaped as defined by mandoc_char(7) along with - * other goop in mandoc.h (e.g., soft hyphens). - */ -static void -buf_redup(struct mchars *mc, char **buf, - size_t *bufsz, const char *val) -{ - size_t sz; - const char *seq, *cpp; - int len, pos; - enum mandoc_esc esc; - const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' }; - - /* Pre-allocate by the length of the input */ - - buf_alloc(buf, bufsz, strlen(val) + 1); - - pos = 0; - - while ('\0' != *val) { - /* - * Halt on the first escape sequence. - * This also halts on the end of string, in which case - * we just copy, fallthrough, and exit the loop. - */ - if ((sz = strcspn(val, rsv)) > 0) { - memcpy(&(*buf)[pos], val, sz); - pos += (int)sz; - val += (int)sz; - } - - if (ASCII_HYPH == *val) { - (*buf)[pos++] = '-'; - val++; - continue; - } else if (ASCII_NBRSP == *val) { - (*buf)[pos++] = ' '; - val++; - continue; - } else if ('\\' != *val) - break; - - /* Read past the slash. */ - - val++; - - /* - * Parse the escape sequence and see if it's a - * predefined character or special character. - */ - - esc = mandoc_escape(&val, &seq, &len); - if (ESCAPE_ERROR == esc) - break; - - cpp = ESCAPE_SPECIAL == esc ? - mchars_spec2str(mc, seq, len, &sz) : NULL; - - if (NULL == cpp) - continue; - - /* Copy the rendered glyph into the stream. */ - - buf_alloc(buf, bufsz, sz); - - memcpy(&(*buf)[pos], cpp, sz); - pos += (int)sz; - } - - (*buf)[pos] = '\0'; -} - -static void -state_output(const struct res *res, int sz) +list(struct rec *res, size_t sz, void *arg) { int i; - for (i = 0; i < sz; i++) + qsort(res, sz, sizeof(struct rec), cmp); + + for (i = 0; i < (int)sz; i++) printf("%s(%s%s%s) - %s\n", res[i].title, res[i].cat, *res[i].arch ? "/" : "", @@ -604,75 +145,22 @@ state_output(const struct res *res, int sz) res[i].desc); } +static int +cmp(const void *p1, const void *p2) +{ + + return(strcmp(((const struct rec *)p1)->title, + ((const struct rec *)p2)->title)); +} + static void usage(void) { fprintf(stderr, "usage: %s " - "[-eIr] " + "[-I] " "[-a arch] " "[-c cat] " - "[-s sort] " "[-t type[,...]] " "key\n", progname); } - -static int -state_getrecord(struct state *p, recno_t rec, struct rec *rp) -{ - DBT key, val; - size_t sz; - int rc; - - key.data = &rec; - key.size = sizeof(recno_t); - - rc = (*p->idx->get)(p->idx, &key, &val, 0); - if (rc < 0) { - perror(p->idxf); - return(0); - } else if (rc > 0) - goto err; - - rp->file = (char *)val.data; - if ((sz = strlen(rp->file) + 1) >= val.size) - goto err; - - rp->cat = (char *)val.data + (int)sz; - if ((sz += strlen(rp->cat) + 1) >= val.size) - goto err; - - rp->title = (char *)val.data + (int)sz; - if ((sz += strlen(rp->title) + 1) >= val.size) - goto err; - - rp->arch = (char *)val.data + (int)sz; - if ((sz += strlen(rp->arch) + 1) >= val.size) - goto err; - - rp->desc = (char *)val.data + (int)sz; - rp->rec = rec; - return(1); -err: - fprintf(stderr, "%s: Corrupt index\n", p->idxf); - return(0); -} - -static int -sort_title(const void *p1, const void *p2) -{ - - return(strcmp(((const struct res *)p1)->title, - ((const struct res *)p2)->title)); -} - -static int -sort_cat(const void *p1, const void *p2) -{ - int rc; - - rc = strcmp(((const struct res *)p1)->cat, - ((const struct res *)p2)->cat); - - return(0 == rc ? sort_title(p1, p2) : rc); -} diff --git a/apropos.h b/apropos.h new file mode 100644 index 00000000..0dc26e5f --- /dev/null +++ b/apropos.h @@ -0,0 +1,68 @@ +/* $Id$ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef APROPOS_H +#define APROPOS_H + +#define TYPE_NAME 0x01 +#define TYPE_FUNCTION 0x02 +#define TYPE_UTILITY 0x04 +#define TYPE_INCLUDES 0x08 +#define TYPE_VARIABLE 0x10 +#define TYPE_STANDARD 0x20 +#define TYPE_AUTHOR 0x40 +#define TYPE_CONFIG 0x80 +#define TYPE_DESC 0x100 +#define TYPE_XREF 0x200 +#define TYPE_PATH 0x400 +#define TYPE_ENV 0x800 +#define TYPE_ERR 0x1000 + +struct rec { + char *file; /* file in file-system */ + char *cat; /* category (3p, 3, etc.) */ + char *title; /* title (FOO, etc.) */ + char *arch; /* arch (or empty string) */ + char *desc; /* description (from Nd) */ + unsigned int rec; /* record in index */ + /* + * By the time the apropos_search() callback is called, these + * are superfluous. + * Maintain a binary tree for checking the uniqueness of `rec' + * when adding elements to the results array. + * Since the results array is dynamic, use offset in the array + * instead of a pointer to the structure. + */ + int lhs; + int rhs; +}; + +struct opts { + const char *arch; /* restrict to architecture */ + const char *cat; /* restrict to manual section */ + int types; /* only types in bitmask */ + int flags; +#define OPTS_INSENS (0x01) /* case-insensitive match */ +}; + +__BEGIN_DECLS + +void apropos_search(const struct opts *, const char *, + void *, void (*)(struct rec *, size_t, void *)); + +__END_DECLS + +#endif /*!APROPOS_H*/ @@ -0,0 +1,303 @@ +/* $Id$ */ +#include <assert.h> +#include <fcntl.h> +#include <regex.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> + +#include "apropos.h" +#include "mandoc.h" + +/* + * The page a request is trying to make. + */ +enum page { + PAGE_INDEX, + PAGE_SEARCH, + PAGE__MAX +}; + +/* + * Key-value pair. + * Both key and val are on the heap. + */ +struct kval { + char *key; + char *val; +}; + +/* + * The media type, determined by suffix, of the requesting or responding + * context. + */ +enum media { + MEDIA_HTML, + MEDIA__MAX +}; + +/* + * An HTTP request. + */ +struct req { + struct kval *fields; /* query fields */ + size_t fieldsz; + enum media media; + enum page page; +}; + +static void html_printtext(const char *); +static int kval_decode(char *); +static void kval_parse(struct kval **, size_t *, char *); +static void kval_free(struct kval *, size_t); +static void pg_index(const struct req *, char *); +static void pg_search(const struct req *, char *); +static void pg_searchres(struct rec *, size_t, void *); + +static const char * const pages[PAGE__MAX] = { + "index", /* PAGE_INDEX */ + "search", /* PAGE_SEARCH */ +}; + +static const char * const medias[MEDIA__MAX] = { + "html", /* MEDIA_HTML */ +}; + +static void +html_printtext(const char *p) +{ + char c; + + while ('\0' != *p) + switch ((c = *p++)) { + case ('"'): + printf(""e;"); + break; + case ('&'): + printf("&"); + break; + case ('>'): + printf(">"); + break; + case ('<'): + printf("<"); + break; + default: + putchar((unsigned char)c); + break; + } +} + +static void +kval_free(struct kval *p, size_t sz) +{ + int i; + + for (i = 0; i < (int)sz; i++) { + free(p[i].key); + free(p[i].val); + } + free(p); +} + +/* + * Parse out key-value pairs from an HTTP request variable. + * This can be either a cookie or a POST/GET string. + */ +static void +kval_parse(struct kval **kv, size_t *kvsz, char *p) +{ + char *key, *val; + size_t sz, cur; + + cur = 0; + + while (p && '\0' != *p) { + while (' ' == *p) + p++; + + key = p; + val = NULL; + + if (NULL != (p = strchr(p, '='))) { + *p++ = '\0'; + val = p; + + sz = strcspn(p, ";&"); + /* LINTED */ + p += sz; + + if ('\0' != *p) + *p++ = '\0'; + } else { + p = key; + sz = strcspn(p, ";&"); + /* LINTED */ + p += sz; + + if ('\0' != *p) + p++; + continue; + } + + if ('\0' == *key || '\0' == *val) + continue; + + /* Just abort handling. */ + + if ( ! kval_decode(key)) + return; + if ( ! kval_decode(val)) + return; + + if (*kvsz + 1 >= cur) { + cur++; + *kv = mandoc_realloc + (*kv, cur * sizeof(struct kval)); + } + + (*kv)[(int)*kvsz].key = mandoc_strdup(key); + (*kv)[(int)*kvsz].val = mandoc_strdup(val); + (*kvsz)++; + } +} + +/* + * In-place HTTP-decode a string. The standard explanation is that this + * turns "%4e+foo" into "n foo" in the regular way. This is done + * in-place over the allocated string. + */ +static int +kval_decode(char *p) +{ + char hex[3]; + int c; + + hex[2] = '\0'; + + for ( ; '\0' != *p; p++) { + if ('%' == *p) { + if ('\0' == (hex[0] = *(p + 1))) + return(0); + if ('\0' == (hex[1] = *(p + 2))) + return(0); + if (1 != sscanf(hex, "%x", &c)) + return(0); + if ('\0' == c) + return(0); + + *p = (char)c; + memmove(p + 1, p + 3, strlen(p + 3) + 1); + } else + *p = '+' == *p ? ' ' : *p; + } + + *p = '\0'; + return(1); +} + + +/* ARGSUSED */ +static void +pg_index(const struct req *req, char *path) +{ + +} + +static void +pg_searchres(struct rec *recs, size_t sz, void *arg) +{ + int i; + const char *pg; + + if (NULL == (pg = getenv("SCRIPT_NAME"))) + pg = ""; + + for (i = 0; i < (int)sz; i++) { + printf("<A HREF=\"%s/show/%u.html\">", + pg, recs[i].rec); + html_printtext(recs[i].title); + putchar('('); + html_printtext(recs[i].cat); + puts(")</A>"); + } +} + +static void +pg_search(const struct req *req, char *path) +{ + int i; + struct opts opt; + + for (i = 0; i < (int)req->fieldsz; i++) + if (0 == strcmp(req->fields[i].key, "key")) + break; + + if (i == (int)req->fieldsz) + return; + + memset(&opt, 0, sizeof(struct opts)); + opt.types = TYPE_NAME | TYPE_DESC; + apropos_search(&opt, req->fields[i].val, NULL, pg_searchres); +} + +int +main(void) +{ + int i; + struct req req; + char *p; + char *path, *subpath, *suffix; + + memset(&req, 0, sizeof(struct req)); + + if (NULL != (p = getenv("QUERY_STRING"))) + kval_parse(&req.fields, &req.fieldsz, p); + + suffix = subpath = path = NULL; + + req.media = MEDIA_HTML; + req.page = PAGE__MAX; + + if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path) + req.page = PAGE_INDEX; + if (NULL != path && '/' == *path && '\0' == *++path) + req.page = PAGE_INDEX; + + if (NULL != path && NULL != (suffix = strrchr(path, '.'))) + if (NULL != suffix && NULL == strchr(suffix, '/')) + *suffix++ = '\0'; + + if (NULL != path && NULL != (subpath = strchr(path, '/'))) + *subpath++ = '\0'; + + if (NULL != suffix && '\0' != *suffix) + for (i = 0; i < (int)MEDIA__MAX; i++) + if (0 == strcmp(medias[i], suffix)) { + req.media = (enum media)i; + break; + } + + if (NULL != path && '\0' != *path) + for (i = 0; i < (int)PAGE__MAX; i++) + if (0 == strcmp(pages[i], path)) { + req.page = (enum page)i; + break; + } + + switch (req.page) { + case (PAGE_INDEX): + pg_index(&req, subpath); + break; + case (PAGE_SEARCH): + pg_search(&req, subpath); + break; + default: + /* Blah */ + break; + } + + kval_free(req.fields, req.fieldsz); + return(EXIT_SUCCESS); +} @@ -0,0 +1,436 @@ +/* $Id$ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <assert.h> +#include <fcntl.h> +#include <regex.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> + +#ifdef __linux__ +# include <db_185.h> +#else +# include <db.h> +#endif + +#include "apropos.h" +#include "mandoc.h" + +static DB *btree_open(void); +static int btree_read(const DBT *, const struct mchars *, char **); +static DB *index_open(void); +static int index_read(const DBT *, const DBT *, + const struct mchars *, struct rec *); +static void norm_string(const char *, + const struct mchars *, char **); +static size_t norm_utf8(unsigned int, char[7]); + +/* + * Open the keyword mandoc-db database. + */ +static DB * +btree_open(void) +{ + BTREEINFO info; + DB *db; + + memset(&info, 0, sizeof(BTREEINFO)); + info.flags = R_DUP; + + db = dbopen("mandoc.db", O_RDONLY, 0, DB_BTREE, &info); + if (NULL != db) + return(db); + + return(NULL); +} + +/* + * Read a keyword from the database and normalise it. + * Return 0 if the database is insane, else 1. + */ +static int +btree_read(const DBT *v, const struct mchars *mc, char **buf) +{ + + /* Sanity: are we nil-terminated? */ + + assert(v->size > 0); + if ('\0' != ((char *)v->data)[(int)v->size - 1]) + return(0); + + norm_string((char *)v->data, mc, buf); + return(1); +} + +/* + * Take a Unicode codepoint and produce its UTF-8 encoding. + * This isn't the best way to do this, but it works. + * The magic numbers are from the UTF-8 packaging. + * They're not as scary as they seem: read the UTF-8 spec for details. + */ +static size_t +norm_utf8(unsigned int cp, char out[7]) +{ + size_t rc; + + rc = 0; + + if (cp <= 0x0000007F) { + rc = 1; + out[0] = (char)cp; + } else if (cp <= 0x000007FF) { + rc = 2; + out[0] = (cp >> 6 & 31) | 192; + out[1] = (cp & 63) | 128; + } else if (cp <= 0x0000FFFF) { + rc = 3; + out[0] = (cp >> 12 & 15) | 224; + out[1] = (cp >> 6 & 63) | 128; + out[2] = (cp & 63) | 128; + } else if (cp <= 0x001FFFFF) { + rc = 4; + out[0] = (cp >> 18 & 7) | 240; + out[1] = (cp >> 12 & 63) | 128; + out[2] = (cp >> 6 & 63) | 128; + out[3] = (cp & 63) | 128; + } else if (cp <= 0x03FFFFFF) { + rc = 5; + out[0] = (cp >> 24 & 3) | 248; + out[1] = (cp >> 18 & 63) | 128; + out[2] = (cp >> 12 & 63) | 128; + out[3] = (cp >> 6 & 63) | 128; + out[4] = (cp & 63) | 128; + } else if (cp <= 0x7FFFFFFF) { + rc = 6; + out[0] = (cp >> 30 & 1) | 252; + out[1] = (cp >> 24 & 63) | 128; + out[2] = (cp >> 18 & 63) | 128; + out[3] = (cp >> 12 & 63) | 128; + out[4] = (cp >> 6 & 63) | 128; + out[5] = (cp & 63) | 128; + } else + return(0); + + out[rc] = '\0'; + return(rc); +} + +/* + * Normalise strings from the index and database. + * These strings are escaped as defined by mandoc_char(7) along with + * other goop in mandoc.h (e.g., soft hyphens). + * This function normalises these into a nice UTF-8 string. + * Returns 0 if the database is fucked. + */ +static void +norm_string(const char *val, const struct mchars *mc, char **buf) +{ + size_t sz, bsz; + char utfbuf[7]; + const char *seq, *cpp; + int len, u, pos; + enum mandoc_esc esc; + static const char res[] = { '\\', '\t', + ASCII_NBRSP, ASCII_HYPH, '\0' }; + + /* Pre-allocate by the length of the input */ + + bsz = strlen(val) + 1; + *buf = mandoc_realloc(*buf, bsz); + pos = 0; + + while ('\0' != *val) { + /* + * Halt on the first escape sequence. + * This also halts on the end of string, in which case + * we just copy, fallthrough, and exit the loop. + */ + if ((sz = strcspn(val, res)) > 0) { + memcpy(&(*buf)[pos], val, sz); + pos += (int)sz; + val += (int)sz; + } + + if (ASCII_HYPH == *val) { + (*buf)[pos++] = '-'; + val++; + continue; + } else if ('\t' == *val || ASCII_NBRSP == *val) { + (*buf)[pos++] = ' '; + val++; + continue; + } else if ('\\' != *val) + break; + + /* Read past the slash. */ + + val++; + u = 0; + + /* + * Parse the escape sequence and see if it's a + * predefined character or special character. + */ + + esc = mandoc_escape(&val, &seq, &len); + if (ESCAPE_ERROR == esc) + break; + + /* + * XXX - this just does UTF-8, but we need to know + * beforehand whether we should do text substitution. + */ + + switch (esc) { + case (ESCAPE_SPECIAL): + if (0 != (u = mchars_spec2cp(mc, seq, len))) + break; + /* FALLTHROUGH */ + default: + continue; + } + + /* + * If we have a Unicode codepoint, try to convert that + * to a UTF-8 byte string. + */ + + cpp = utfbuf; + if (0 == (sz = norm_utf8(u, utfbuf))) + continue; + + /* Copy the rendered glyph into the stream. */ + + sz = strlen(cpp); + bsz += sz; + + *buf = mandoc_realloc(*buf, bsz); + + memcpy(&(*buf)[pos], cpp, sz); + pos += (int)sz; + } + + (*buf)[pos] = '\0'; +} + +/* + * Open the filename-index mandoc-db database. + * Returns NULL if opening failed. + */ +static DB * +index_open(void) +{ + DB *db; + + db = dbopen("mandoc.index", O_RDONLY, 0, DB_RECNO, NULL); + if (NULL != db) + return(db); + + return(NULL); +} + +/* + * Safely unpack from an index file record into the structure. + * Returns 1 if an entry was unpacked, 0 if the database is insane. + */ +static int +index_read(const DBT *key, const DBT *val, + const struct mchars *mc, struct rec *rec) +{ + size_t left; + char *np, *cp; + +#define INDEX_BREAD(_dst) \ + do { \ + if (NULL == (np = memchr(cp, '\0', left))) \ + return(0); \ + norm_string(cp, mc, &(_dst)); \ + left -= (np - cp) + 1; \ + cp = np + 1; \ + } while (/* CONSTCOND */ 0) + + left = val->size; + cp = (char *)val->data; + + rec->rec = *(recno_t *)key->data; + + INDEX_BREAD(rec->file); + INDEX_BREAD(rec->cat); + INDEX_BREAD(rec->title); + INDEX_BREAD(rec->arch); + INDEX_BREAD(rec->desc); + return(1); +} + +/* + * Search the mandocdb database for the regular expression "q". + * Filter out by "opts". + * Call "res" with the results, which may be zero. + */ +void +apropos_search(const struct opts *opts, const char *q, void *arg, + void (*res)(struct rec *, size_t, void *)) +{ + int i, len, root, leaf; + regex_t reg; + DBT key, val; + DB *btree, *idx; + struct mchars *mc; + int ch; + char *buf; + recno_t rec; + struct rec *recs; + struct rec srec; + + root = -1; + leaf = -1; + btree = NULL; + idx = NULL; + mc = NULL; + buf = NULL; + recs = NULL; + len = 0; + + memset(&srec, 0, sizeof(struct rec)); + + if (NULL != q && '\0' == *q) + q = NULL; + + ch = REG_EXTENDED | REG_NOSUB | + (OPTS_INSENS & opts->flags ? REG_ICASE : 0); + + /* XXX: error out with bad regexp? */ + + if (NULL == q || regcomp(®, q, ch)) { + (*res)(NULL, 0, arg); + return; + } + + mc = mchars_alloc(); + + /* XXX: return fact that we've errored? */ + + if (NULL == (btree = btree_open())) + goto out; + if (NULL == (idx = index_open())) + goto out; + + while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { + /* + * Low-water mark for key and value. + * The key must have something in it, and the value must + * have the correct tags/recno mix. + */ + if (key.size < 2 || 8 != val.size) + break; + + if ( ! (*(int32_t *)val.data & opts->types)) + continue; + + if ( ! btree_read(&key, mc, &buf)) + break; + if (regexec(®, buf, 0, NULL, 0)) + continue; + + memcpy(&rec, val.data + 4, sizeof(recno_t)); + + /* + * O(log n) scan for prior records. Since a record + * number is unbounded, this has decent performance over + * a complex hash function. + */ + + for (leaf = root; leaf >= 0; ) + if (rec > recs[leaf].rec && recs[leaf].rhs >= 0) + leaf = recs[leaf].rhs; + else if (rec < recs[leaf].rec && recs[leaf].lhs >= 0) + leaf = recs[leaf].lhs; + else + break; + + if (leaf >= 0 && recs[leaf].rec == rec) + continue; + + /* + * Now we actually extract the manpage's metadata from + * the index database. + */ + + key.data = &rec; + key.size = sizeof(recno_t); + + if (0 != (*idx->get)(idx, &key, &val, 0)) + break; + + srec.lhs = srec.rhs = -1; + if ( ! index_read(&key, &val, mc, &srec)) + break; + + if (opts->cat && strcasecmp(opts->cat, srec.cat)) + continue; + if (opts->arch && strcasecmp(opts->arch, srec.arch)) + continue; + + recs = mandoc_realloc + (recs, (len + 1) * sizeof(struct rec)); + + memcpy(&recs[len], &srec, sizeof(struct rec)); + + /* Append to our tree. */ + + if (leaf >= 0) { + if (rec > recs[leaf].rec) + recs[leaf].rhs = len; + else + recs[leaf].lhs = len; + } else + root = len; + + memset(&srec, 0, sizeof(struct rec)); + len++; + } + + if (1 == ch) + (*res)(recs, len, arg); + + /* XXX: else? corrupt database error? */ +out: + for (i = 0; i < len; i++) { + free(recs[i].file); + free(recs[i].cat); + free(recs[i].title); + free(recs[i].arch); + free(recs[i].desc); + } + + free(srec.file); + free(srec.cat); + free(srec.title); + free(srec.arch); + free(srec.desc); + + if (mc) + mchars_free(mc); + if (btree) + (*btree->close)(btree); + if (idx) + (*idx->close)(idx); + + free(buf); + free(recs); + regfree(®); +} |