From 4d2a21b04fba0be7d5b7cea8853dfb6bdf5eb1fe Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Fri, 8 Jun 2012 10:36:23 +0000 Subject: Add a new mansearch.h interface, which replaces apropos_db.c This is a much more minimal interface that stuffs all operations into a single function. It uses sqlite3 and ohash. --- mansearch.c | 436 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mansearch.h | 38 ++++++ 2 files changed, 474 insertions(+) create mode 100644 mansearch.c create mode 100644 mansearch.h diff --git a/mansearch.c b/mansearch.c new file mode 100644 index 00000000..d752b917 --- /dev/null +++ b/mansearch.c @@ -0,0 +1,436 @@ +/* $Id$ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mandoc.h" +#include "manpath.h" +#include "mandocdb.h" +#include "mansearch.h" + +struct expr { + int glob; /* is glob? */ + uint64_t bits; /* type-mask */ + const char *v; /* search value */ + struct expr *next; /* next in sequence */ +}; + +struct match { + uint64_t id; /* identifier in database */ + char *file; /* relative filepath of manpage */ + char *desc; /* description of manpage */ + int form; /* 0 == catpage */ +}; + +struct type { + uint64_t bits; + const char *name; +}; + +static const struct type types[] = { + { TYPE_An, "An" }, + { TYPE_Ar, "Ar" }, + { TYPE_At, "At" }, + { TYPE_Bsx, "Bsx" }, + { TYPE_Bx, "Bx" }, + { TYPE_Cd, "Cd" }, + { TYPE_Cm, "Cm" }, + { TYPE_Dv, "Dv" }, + { TYPE_Dx, "Dx" }, + { TYPE_Em, "Em" }, + { TYPE_Er, "Er" }, + { TYPE_Ev, "Ev" }, + { TYPE_Fa, "Fa" }, + { TYPE_Fl, "Fl" }, + { TYPE_Fn, "Fn" }, + { TYPE_Fn, "Fo" }, + { TYPE_Ft, "Ft" }, + { TYPE_Fx, "Fx" }, + { TYPE_Ic, "Ic" }, + { TYPE_In, "In" }, + { TYPE_Lb, "Lb" }, + { TYPE_Li, "Li" }, + { TYPE_Lk, "Lk" }, + { TYPE_Ms, "Ms" }, + { TYPE_Mt, "Mt" }, + { TYPE_Nd, "Nd" }, + { TYPE_Nm, "Nm" }, + { TYPE_Nx, "Nx" }, + { TYPE_Ox, "Ox" }, + { TYPE_Pa, "Pa" }, + { TYPE_Rs, "Rs" }, + { TYPE_Sh, "Sh" }, + { TYPE_Ss, "Ss" }, + { TYPE_St, "St" }, + { TYPE_Sy, "Sy" }, + { TYPE_Tn, "Tn" }, + { TYPE_Va, "Va" }, + { TYPE_Va, "Vt" }, + { TYPE_Xr, "Xr" }, + { ~0ULL, "any" }, + { 0ULL, NULL } +}; + +static void *hash_alloc(size_t, void *); +static void hash_free(void *, size_t, void *); +static void *hash_halloc(size_t, void *); +static struct expr *exprcomp(int, char *[]); +static void exprfree(struct expr *); +static struct expr *exprterm(char *); +static char *sql_statement(const struct expr *, + const char *, const char *); + +int +mansearch(const struct manpaths *paths, + const char *arch, const char *sec, + int argc, char *argv[], + struct manpage **res, size_t *sz) +{ + int fd, rc; + int64_t id; + char buf[MAXPATHLEN]; + char *sql; + struct expr *e, *ep; + sqlite3 *db; + sqlite3_stmt *s; + struct match *mp; + struct ohash_info info; + struct ohash htab; + unsigned int idx; + size_t i, j, cur, maxres; + + memset(&info, 0, sizeof(struct ohash_info)); + + info.halloc = hash_halloc; + info.alloc = hash_alloc; + info.hfree = hash_free; + info.key_offset = offsetof(struct match, id); + + *sz = 0; + sql = NULL; + *res = NULL; + fd = -1; + e = NULL; + cur = maxres = 0; + + if (0 == argc) + goto out; + if (NULL == (e = exprcomp(argc, argv))) + goto out; + + /* + * Save a descriptor to the current working directory. + * Since pathnames in the "paths" variable might be relative, + * and we'll be chdir()ing into them, we need to keep a handle + * on our current directory from which to start the chdir(). + */ + + if (NULL == getcwd(buf, MAXPATHLEN)) { + perror(NULL); + goto out; + } else if (-1 == (fd = open(buf, O_RDONLY, 0))) { + perror(buf); + goto out; + } + + sql = sql_statement(e, arch, sec); + + /* + * Loop over the directories (containing databases) for us to + * search. + * Don't let missing/bad databases/directories phase us. + * In each, try to open the resident database and, if it opens, + * scan it for our match expression. + */ + + for (i = 0; i < paths->sz; i++) { + if (-1 == fchdir(fd)) { + /* FIXME: will return success */ + perror(buf); + free(*res); + break; + } else if (-1 == chdir(paths->paths[i])) { + perror(paths->paths[i]); + continue; + } + + rc = sqlite3_open_v2 + (MANDOC_DB, &db, SQLITE_OPEN_READONLY, NULL); + + if (SQLITE_OK != rc) { + perror(MANDOC_DB); + sqlite3_close(db); + continue; + } + + j = 1; + sqlite3_prepare_v2(db, sql, -1, &s, NULL); + + if (NULL != arch) + sqlite3_bind_text + (s, j++, arch, -1, SQLITE_STATIC); + if (NULL != sec) + sqlite3_bind_text + (s, j++, sec, -1, SQLITE_STATIC); + + for (ep = e; NULL != ep; ep = ep->next) { + sqlite3_bind_text + (s, j++, ep->v, -1, SQLITE_STATIC); + sqlite3_bind_int64 + (s, j++, ep->bits); + } + + memset(&htab, 0, sizeof(struct ohash)); + ohash_init(&htab, 4, &info); + + /* + * Hash each entry on its [unique] document identifier. + * This is a uint64_t. + * Instead of using a hash function, simply convert the + * uint64_t to a uint32_t, the hash value's type. + * This gives good performance and preserves the + * distribution of buckets in the table. + */ + while (SQLITE_ROW == sqlite3_step(s)) { + id = sqlite3_column_int64(s, 0); + idx = ohash_lookup_memory + (&htab, (char *)&id, + sizeof(uint64_t), (uint32_t)id); + + if (NULL != ohash_find(&htab, idx)) + continue; + + mp = mandoc_calloc(1, sizeof(struct match)); + mp->id = id; + mp->file = mandoc_strdup + ((char *)sqlite3_column_text(s, 3)); + mp->desc = mandoc_strdup + ((char *)sqlite3_column_text(s, 4)); + mp->form = sqlite3_column_int(s, 5); + ohash_insert(&htab, idx, mp); + } + + sqlite3_finalize(s); + sqlite3_close(db); + + for (mp = ohash_first(&htab, &idx); + NULL != mp; + mp = ohash_next(&htab, &idx)) { + if (cur + 1 > maxres) { + maxres += 1024; + *res = mandoc_realloc + (*res, maxres * sizeof(struct manpage)); + } + strlcpy((*res)[cur].file, + paths->paths[i], MAXPATHLEN); + strlcat((*res)[cur].file, "/", MAXPATHLEN); + strlcat((*res)[cur].file, mp->file, MAXPATHLEN); + (*res)[cur].desc = mp->desc; + (*res)[cur].form = mp->form; + free(mp->file); + free(mp); + cur++; + } + ohash_delete(&htab); + } +out: + exprfree(e); + if (-1 != fd) + close(fd); + free(sql); + *sz = cur; + return(1); +} + +/* + * Prepare the search SQL statement. + * We search for any of the words specified in our match expression. + * We filter the per-doc AND expressions when collecting results. + */ +static char * +sql_statement(const struct expr *e, const char *arch, const char *sec) +{ + char *sql; + const char *glob = "(key GLOB ? AND bits & ?)"; + const char *eq = "(key = ? AND bits & ?)"; + const char *andarch = "arch = ? AND "; + const char *andsec = "sec = ? AND "; + const size_t globsz = 27; + const size_t eqsz = 22; + size_t sz; + + sql = mandoc_strdup + ("SELECT docid,bits,key,file,desc,form,sec,arch " + "FROM keys " + "INNER JOIN docs ON docs.id=keys.docid " + "WHERE "); + sz = strlen(sql); + + if (NULL != arch) { + sz += strlen(andarch) + 1; + sql = mandoc_realloc(sql, sz); + strlcat(sql, andarch, sz); + } + if (NULL != sec) { + sz += strlen(andsec) + 1; + sql = mandoc_realloc(sql, sz); + strlcat(sql, andsec, sz); + } + + sz += 2; + sql = mandoc_realloc(sql, sz); + strlcat(sql, "(", sz); + + for ( ; NULL != e; e = e->next) { + sz += (e->glob ? globsz : eqsz) + + (NULL == e->next ? 3 : 5); + sql = mandoc_realloc(sql, sz); + strlcat(sql, e->glob ? glob : eq, sz); + strlcat(sql, NULL == e->next ? ");" : " OR ", sz); + } + + return(sql); +} + +/* + * Compile a set of string tokens into an expression. + * Tokens in "argv" are assumed to be individual expression atoms (e.g., + * "(", "foo=bar", etc.). + */ +static struct expr * +exprcomp(int argc, char *argv[]) +{ + int i; + struct expr *first, *next, *cur; + + first = cur = NULL; + + for (i = 0; i < argc; i++) { + next = exprterm(argv[i]); + if (NULL == next) { + exprfree(first); + return(NULL); + } + if (NULL != first) { + cur->next = next; + cur = next; + } else + cur = first = next; + } + + return(first); +} + +static struct expr * +exprterm(char *buf) +{ + struct expr *e; + char *key, *v; + size_t i; + + if ('\0' == *buf) + return(NULL); + + e = mandoc_calloc(1, sizeof(struct expr)); + + /* + * If no =~ is specified, search with equality over names and + * descriptions. + * If =~ begins the phrase, use name and description fields. + */ + + if (NULL == (v = strpbrk(buf, "=~"))) { + e->v = buf; + e->bits = TYPE_Nm | TYPE_Nd; + return(e); + } else if (v == buf) + e->bits = TYPE_Nm | TYPE_Nd; + + e->glob = '~' == *v; + *v++ = '\0'; + e->v = v; + + /* + * Parse out all possible fields. + * If the field doesn't resolve, bail. + */ + + while (NULL != (key = strsep(&buf, ","))) { + if ('\0' == *key) + continue; + i = 0; + while (types[i].bits && + strcasecmp(types[i].name, key)) + i++; + if (0 == types[i].bits) { + free(e); + return(NULL); + } + e->bits |= types[i].bits; + } + + return(e); +} + +static void +exprfree(struct expr *p) +{ + struct expr *pp; + + while (NULL != p) { + pp = p->next; + free(p); + p = pp; + } +} + +static void * +hash_halloc(size_t sz, void *arg) +{ + + return(mandoc_calloc(sz, 1)); +} + +static void * +hash_alloc(size_t sz, void *arg) +{ + + return(mandoc_malloc(sz)); +} + +static void +hash_free(void *p, size_t sz, void *arg) +{ + + free(p); +} diff --git a/mansearch.h b/mansearch.h new file mode 100644 index 00000000..b9caf4ae --- /dev/null +++ b/mansearch.h @@ -0,0 +1,38 @@ +/* $Id$ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MANSEARCH_H +#define MANSEARCH_H + +struct manpage { + char file[MAXPATHLEN]; /* prefixed by manpath */ + char *desc; /* description of manpage */ + int form; /* 0 == catpage */ +}; + +__BEGIN_DECLS + +int mansearch(const struct manpaths *paths, /* manpaths */ + const char *arch, /* architecture */ + const char *sec, /* manual section */ + int argc, /* size of argv */ + char *argv[], /* search terms */ + struct manpage **res, /* results */ + size_t *ressz); /* results returned */ + +__END_DECLS + +#endif /*!MANSEARCH_H*/ -- cgit