summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-04-10 02:46:21 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-04-10 02:46:21 +0000
commit2f4e06cbd2a887b4a85880dbcb59c36fb93535d2 (patch)
tree0c53eeb19990a0776312f7a538f10d33399f9c24
parent31583f54660e373832d0fc36eaa2005e7e836229 (diff)
downloadmandoc-2f4e06cbd2a887b4a85880dbcb59c36fb93535d2.tar.gz
Next speed optimization step for the new apropos(1).
Split manual names out of the common "keys" table into their own "names" table. This reduces standard apropos(1) search times (i.e. searching for names and descriptions only) by typically about 70% for the full /usr/share/man database. (Yes, that multiplies with the previous optimization step, so both together have reduced search times by a factor of more than six. I'm not done yet, expect more to come.) Even with the minimal databases built with makewhatis(8) -Q, this step still reduces search times by 15-20%. For both cases, database sizes and build times hardly change (+/-2%).
-rw-r--r--mandocdb.c90
-rw-r--r--mansearch.c39
-rw-r--r--mansearch.h88
-rw-r--r--mansearch_const.c15
4 files changed, 144 insertions, 88 deletions
diff --git a/mandocdb.c b/mandocdb.c
index 1e30afbc..edfb39e9 100644
--- a/mandocdb.c
+++ b/mandocdb.c
@@ -126,6 +126,7 @@ enum stmt {
STMT_DELETE_PAGE = 0, /* delete mpage */
STMT_INSERT_PAGE, /* insert mpage */
STMT_INSERT_LINK, /* insert mlink */
+ STMT_INSERT_NAME, /* insert name */
STMT_INSERT_KEY, /* insert parsed key */
STMT__MAX
};
@@ -188,9 +189,11 @@ static enum op op; /* operational mode */
static char basedir[PATH_MAX]; /* current base directory */
static struct ohash mpages; /* table of distinct manual pages */
static struct ohash mlinks; /* table of directory entries */
+static struct ohash names; /* table of all names */
static struct ohash strings; /* table of all strings */
static sqlite3 *db = NULL; /* current database */
static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */
+static uint64_t name_mask;
static const struct mdoc_handler mdocs[MDOC_MAX] = {
{ NULL, 0 }, /* Ap */
@@ -963,7 +966,7 @@ mlink_check(struct mpage *mpage, struct mlink *mlink)
/*
* XXX
- * parse_cat() doesn't set TYPE_Nm and TYPE_NAME yet.
+ * parse_cat() doesn't set NAME_TITLE yet.
*/
if (FORM_CAT == mpage->form)
@@ -974,10 +977,10 @@ mlink_check(struct mpage *mpage, struct mlink *mlink)
* appears as a name in the NAME section.
*/
- slot = ohash_qlookup(&strings, mlink->name);
- str = ohash_find(&strings, slot);
+ slot = ohash_qlookup(&names, mlink->name);
+ str = ohash_find(&names, slot);
assert(NULL != str);
- if ( ! (TYPE_NAME & str->mask))
+ if ( ! (NAME_TITLE & str->mask))
say(mlink->file, "Name missing in NAME section");
}
@@ -1021,6 +1024,8 @@ mpages_merge(struct mchars *mc, struct mparse *mp)
continue;
}
+ name_mask = NAME_MASK;
+ ohash_init(&names, 4, &str_info);
ohash_init(&strings, 6, &str_info);
mparse_reset(mp);
mdoc = NULL;
@@ -1146,12 +1151,12 @@ mpages_merge(struct mchars *mc, struct mparse *mp)
putkey(mpage, mlink->fsec, TYPE_sec);
putkey(mpage, '\0' == *mlink->arch ?
any : mlink->arch, TYPE_arch);
- putkey(mpage, mlink->name, TYPE_Nm);
+ putkey(mpage, mlink->name, NAME_FILE);
}
if (NULL != mdoc) {
if (NULL != (cp = mdoc_meta(mdoc)->name))
- putkey(mpage, cp, TYPE_Nm);
+ putkey(mpage, cp, NAME_HEAD);
assert(NULL == mpage->desc);
parse_mdoc(mpage, mdoc_node(mdoc));
if (NULL == mpage->desc)
@@ -1187,6 +1192,7 @@ nextpage:
}
}
ohash_delete(&strings);
+ ohash_delete(&names);
mpage = ohash_next(&mpages, &pslot);
}
@@ -1204,11 +1210,11 @@ names_check(void)
sqlite3_prepare_v2(db,
"SELECT name, sec, arch, key FROM ("
- "SELECT key, pageid FROM keys "
+ "SELECT name AS key, pageid FROM names "
"WHERE bits & ? AND NOT EXISTS ("
"SELECT pageid FROM mlinks "
- "WHERE mlinks.pageid == keys.pageid "
- "AND mlinks.name == keys.key"
+ "WHERE mlinks.pageid == names.pageid "
+ "AND mlinks.name == names.name"
")"
") JOIN ("
"SELECT * FROM mlinks GROUP BY pageid"
@@ -1216,7 +1222,7 @@ names_check(void)
-1, &stmt, NULL);
i = 1;
- SQL_BIND_INT64(stmt, i, TYPE_NAME);
+ SQL_BIND_INT64(stmt, i, NAME_TITLE);
while (SQLITE_ROW == (irc = sqlite3_step(stmt))) {
name = sqlite3_column_text(stmt, 0);
@@ -1441,7 +1447,7 @@ parse_man(struct mpage *mpage, const struct man_node *n)
('\\' == start[0] && '-' == start[1]))
break;
- putkey(mpage, start, TYPE_NAME | TYPE_Nm);
+ putkey(mpage, start, NAME_TITLE);
if (' ' == byte) {
start += sz + 1;
@@ -1455,7 +1461,7 @@ parse_man(struct mpage *mpage, const struct man_node *n)
}
if (start == title) {
- putkey(mpage, start, TYPE_NAME | TYPE_Nm);
+ putkey(mpage, start, NAME_TITLE);
free(title);
return;
}
@@ -1632,9 +1638,9 @@ parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n)
{
if (SEC_NAME == n->sec)
- putmdockey(mpage, n->child, TYPE_NAME | TYPE_Nm);
+ putmdockey(mpage, n->child, NAME_TITLE);
else if (SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type)
- putmdockey(mpage, n->child, TYPE_Nm);
+ putmdockey(mpage, n->child, NAME_SYN);
return(0);
}
@@ -1668,28 +1674,35 @@ static void
putkeys(const struct mpage *mpage,
const char *cp, size_t sz, uint64_t v)
{
+ struct ohash *htab;
struct str *s;
const char *end;
- uint64_t mask;
unsigned int slot;
int i;
if (0 == sz)
return;
- if (debug > 1) {
- for (i = 0, mask = 1;
- i < mansearch_keymax;
- i++, mask <<= 1)
- if (mask & v)
- break;
- say(mpage->mlinks->file, "Adding key %s=%*s",
- mansearch_keynames[i], sz, cp);
+ if (TYPE_Nm & v) {
+ htab = &names;
+ v &= name_mask;
+ name_mask &= ~NAME_FIRST;
+ if (debug > 1)
+ say(mpage->mlinks->file,
+ "Adding name %*s", sz, cp);
+ } else {
+ htab = &strings;
+ if (debug > 1)
+ for (i = 0; i < mansearch_keymax; i++)
+ if (1 << i & v)
+ say(mpage->mlinks->file,
+ "Adding key %s=%*s",
+ mansearch_keynames[i], sz, cp);
}
end = cp + sz;
- slot = ohash_qlookupi(&strings, cp, &end);
- s = ohash_find(&strings, slot);
+ slot = ohash_qlookupi(htab, cp, &end);
+ s = ohash_find(htab, slot);
if (NULL != s && mpage == s->mpage) {
s->mask |= v;
@@ -1697,7 +1710,7 @@ putkeys(const struct mpage *mpage,
} else if (NULL == s) {
s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);
memcpy(s->key, cp, sz);
- ohash_insert(&strings, slot, s);
+ ohash_insert(htab, slot, s);
}
s->mpage = mpage;
s->mask = v;
@@ -1945,6 +1958,21 @@ dbadd(struct mpage *mpage, struct mchars *mc)
mlink = mlink->next;
}
+ for (key = ohash_first(&names, &slot); NULL != key;
+ key = ohash_next(&names, &slot)) {
+ assert(key->mpage == mpage);
+ if (NULL == key->rendered)
+ render_key(mc, key);
+ i = 1;
+ SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, key->mask);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, key->rendered);
+ SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mpage->recno);
+ SQL_STEP(stmts[STMT_INSERT_NAME]);
+ sqlite3_reset(stmts[STMT_INSERT_NAME]);
+ if (key->rendered != key->key)
+ free(key->rendered);
+ free(key);
+ }
for (key = ohash_first(&strings, &slot); NULL != key;
key = ohash_next(&strings, &slot)) {
assert(key->mpage == mpage);
@@ -2160,6 +2188,13 @@ create_tables:
"ON DELETE CASCADE\n"
");\n"
"\n"
+ "CREATE TABLE \"names\" (\n"
+ " \"bits\" INTEGER NOT NULL,\n"
+ " \"name\" TEXT NOT NULL,\n"
+ " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
+ "ON DELETE CASCADE\n"
+ ");\n"
+ "\n"
"CREATE TABLE \"keys\" (\n"
" \"bits\" INTEGER NOT NULL,\n"
" \"key\" TEXT NOT NULL,\n"
@@ -2185,6 +2220,9 @@ prepare_statements:
sql = "INSERT INTO mlinks "
"(sec,arch,name,pageid) VALUES (?,?,?,?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
+ sql = "INSERT INTO names "
+ "(bits,name,pageid) VALUES (?,?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_NAME], NULL);
sql = "INSERT INTO keys "
"(bits,key,pageid) VALUES (?,?,?)";
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
diff --git a/mansearch.c b/mansearch.c
index d3d88ce8..a4348d64 100644
--- a/mansearch.c
+++ b/mansearch.c
@@ -221,7 +221,7 @@ mansearch(const struct mansearch *search,
SQL_BIND_BLOB(db, s, j, ep->regexp);
} else
SQL_BIND_TEXT(db, s, j, ep->substr);
- if (0 == (TYPE_Nd & ep->bits))
+ if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits))
SQL_BIND_INT64(db, s, j, ep->bits);
}
@@ -504,6 +504,12 @@ sql_statement(const struct expr *e)
? (NULL == e->substr
? "desc REGEXP ?"
: "desc MATCH ?")
+ : TYPE_Nm == e->bits
+ ? (NULL == e->substr
+ ? "id IN (SELECT pageid FROM names "
+ "WHERE name REGEXP ?)"
+ : "id IN (SELECT pageid FROM names "
+ "WHERE name MATCH ?)")
: (NULL == e->substr
? "id IN (SELECT pageid FROM keys "
"WHERE key REGEXP ? AND bits & ?)"
@@ -525,8 +531,9 @@ sql_statement(const struct expr *e)
static struct expr *
exprcomp(const struct mansearch *search, int argc, char *argv[])
{
+ uint64_t mask;
int i, toopen, logic, igncase, toclose;
- struct expr *first, *next, *cur;
+ struct expr *first, *prev, *cur, *next;
first = cur = NULL;
logic = igncase = toclose = 0;
@@ -569,6 +576,7 @@ exprcomp(const struct mansearch *search, int argc, char *argv[])
first = next;
else
cur->next = next;
+ prev = cur = next;
/*
* Searching for descriptions must be split out
@@ -576,18 +584,23 @@ exprcomp(const struct mansearch *search, int argc, char *argv[])
* not in the keys table.
*/
- if (TYPE_Nd & next->bits && ~TYPE_Nd & next->bits) {
- cur = mandoc_calloc(1, sizeof(struct expr));
- memcpy(cur, next, sizeof(struct expr));
- next->open = 1;
- next->bits = TYPE_Nd;
- next->next = cur;
- cur->bits &= ~TYPE_Nd;
+ for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) {
+ if (mask & cur->bits && ~mask & cur->bits) {
+ next = mandoc_calloc(1,
+ sizeof(struct expr));
+ memcpy(next, cur, sizeof(struct expr));
+ prev->open = 1;
+ cur->bits = mask;
+ cur->next = next;
+ cur = next;
+ cur->bits &= ~mask;
+ }
+ }
+ prev->and = (1 == logic);
+ prev->open += toopen;
+ if (cur != prev)
cur->close = 1;
- } else
- cur = next;
- next->and = (1 == logic);
- next->open += toopen;
+
toopen = logic = igncase = 0;
}
if (toopen || logic || igncase || toclose)
diff --git a/mansearch.h b/mansearch.h
index 41149072..d5a10ba3 100644
--- a/mansearch.h
+++ b/mansearch.h
@@ -20,47 +20,53 @@
#define MANDOC_DB "mandoc.db"
-#define TYPE_NAME 0x0000000000000001ULL
-#define TYPE_Nm 0x0000000000000002ULL
-#define TYPE_arch 0x0000000000000004ULL
-#define TYPE_sec 0x0000000000000008ULL
-#define TYPE_Xr 0x0000000000000010ULL
-#define TYPE_Ar 0x0000000000000020ULL
-#define TYPE_Fa 0x0000000000000040ULL
-#define TYPE_Fl 0x0000000000000080ULL
-#define TYPE_Dv 0x0000000000000100ULL
-#define TYPE_Fn 0x0000000000000200ULL
-#define TYPE_Ic 0x0000000000000400ULL
-#define TYPE_Pa 0x0000000000000800ULL
-#define TYPE_Cm 0x0000000000001000ULL
-#define TYPE_Li 0x0000000000002000ULL
-#define TYPE_Em 0x0000000000004000ULL
-#define TYPE_Cd 0x0000000000008000ULL
-#define TYPE_Va 0x0000000000010000ULL
-#define TYPE_Ft 0x0000000000020000ULL
-#define TYPE_Tn 0x0000000000040000ULL
-#define TYPE_Er 0x0000000000080000ULL
-#define TYPE_Ev 0x0000000000100000ULL
-#define TYPE_Sy 0x0000000000200000ULL
-#define TYPE_Sh 0x0000000000400000ULL
-#define TYPE_In 0x0000000000800000ULL
-#define TYPE_Ss 0x0000000001000000ULL
-#define TYPE_Ox 0x0000000002000000ULL
-#define TYPE_An 0x0000000004000000ULL
-#define TYPE_Mt 0x0000000008000000ULL
-#define TYPE_St 0x0000000010000000ULL
-#define TYPE_Bx 0x0000000020000000ULL
-#define TYPE_At 0x0000000040000000ULL
-#define TYPE_Nx 0x0000000080000000ULL
-#define TYPE_Fx 0x0000000100000000ULL
-#define TYPE_Lk 0x0000000200000000ULL
-#define TYPE_Ms 0x0000000400000000ULL
-#define TYPE_Bsx 0x0000000800000000ULL
-#define TYPE_Dx 0x0000001000000000ULL
-#define TYPE_Rs 0x0000002000000000ULL
-#define TYPE_Vt 0x0000004000000000ULL
-#define TYPE_Lb 0x0000008000000000ULL
-#define TYPE_Nd 0x0000010000000000ULL
+#define TYPE_arch 0x0000000000000001ULL
+#define TYPE_sec 0x0000000000000002ULL
+#define TYPE_Xr 0x0000000000000004ULL
+#define TYPE_Ar 0x0000000000000008ULL
+#define TYPE_Fa 0x0000000000000010ULL
+#define TYPE_Fl 0x0000000000000020ULL
+#define TYPE_Dv 0x0000000000000040ULL
+#define TYPE_Fn 0x0000000000000080ULL
+#define TYPE_Ic 0x0000000000000100ULL
+#define TYPE_Pa 0x0000000000000200ULL
+#define TYPE_Cm 0x0000000000000400ULL
+#define TYPE_Li 0x0000000000000800ULL
+#define TYPE_Em 0x0000000000001000ULL
+#define TYPE_Cd 0x0000000000002000ULL
+#define TYPE_Va 0x0000000000004000ULL
+#define TYPE_Ft 0x0000000000008000ULL
+#define TYPE_Tn 0x0000000000010000ULL
+#define TYPE_Er 0x0000000000020000ULL
+#define TYPE_Ev 0x0000000000040000ULL
+#define TYPE_Sy 0x0000000000080000ULL
+#define TYPE_Sh 0x0000000000100000ULL
+#define TYPE_In 0x0000000000200000ULL
+#define TYPE_Ss 0x0000000000400000ULL
+#define TYPE_Ox 0x0000000000800000ULL
+#define TYPE_An 0x0000000001000000ULL
+#define TYPE_Mt 0x0000000002000000ULL
+#define TYPE_St 0x0000000004000000ULL
+#define TYPE_Bx 0x0000000008000000ULL
+#define TYPE_At 0x0000000010000000ULL
+#define TYPE_Nx 0x0000000020000000ULL
+#define TYPE_Fx 0x0000000040000000ULL
+#define TYPE_Lk 0x0000000080000000ULL
+#define TYPE_Ms 0x0000000100000000ULL
+#define TYPE_Bsx 0x0000000200000000ULL
+#define TYPE_Dx 0x0000000400000000ULL
+#define TYPE_Rs 0x0000000800000000ULL
+#define TYPE_Vt 0x0000001000000000ULL
+#define TYPE_Lb 0x0000002000000000ULL
+#define TYPE_Nm 0x0000004000000000ULL
+#define TYPE_Nd 0x0000008000000000ULL
+
+#define NAME_SYN 0x0000004000000001ULL
+#define NAME_FILE 0x0000004000000002ULL
+#define NAME_TITLE 0x000000400000000cULL
+#define NAME_FIRST 0x0000004000000008ULL
+#define NAME_HEAD 0x0000004000000010ULL
+#define NAME_MASK 0x000000000000001fULL
__BEGIN_DECLS
diff --git a/mansearch_const.c b/mansearch_const.c
index cfa9b6c3..b6d37fdb 100644
--- a/mansearch_const.c
+++ b/mansearch_const.c
@@ -20,13 +20,12 @@
#include "manpath.h"
#include "mansearch.h"
-const int mansearch_keymax = 41;
+const int mansearch_keymax = 40;
-const char *const mansearch_keynames[41] = {
- "NAME", "Nm", "arch", "sec", "Xr", "Ar", "Fa", "Fl",
- "Dv", "Fn", "Ic", "Pa", "Cm", "Li", "Em", "Cd",
- "Va", "Ft", "Tn", "Er", "Ev", "Sy", "Sh", "In",
- "Ss", "Ox", "An", "Mt", "St", "Bx", "At", "Nx",
- "Fx", "Lk", "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb",
- "Nd"
+const char *const mansearch_keynames[40] = {
+ "arch", "sec", "Xr", "Ar", "Fa", "Fl", "Dv", "Fn",
+ "Ic", "Pa", "Cm", "Li", "Em", "Cd", "Va", "Ft",
+ "Tn", "Er", "Ev", "Sy", "Sh", "In", "Ss", "Ox",
+ "An", "Mt", "St", "Bx", "At", "Nx", "Fx", "Lk",
+ "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb", "Nm", "Nd"
};