diff options
-rw-r--r-- | apropos_db.c | 88 | ||||
-rw-r--r-- | cgi.c | 1 | ||||
-rw-r--r-- | mandocdb.c | 26 | ||||
-rw-r--r-- | mandocdb.h | 55 |
4 files changed, 114 insertions, 56 deletions
diff --git a/apropos_db.c b/apropos_db.c index e0e48f33..6e611514 100644 --- a/apropos_db.c +++ b/apropos_db.c @@ -19,6 +19,7 @@ #include <fcntl.h> #include <regex.h> #include <stdarg.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> @@ -49,7 +50,7 @@ struct rec { struct expr { int regex; /* is regex? */ int index; /* index in match array */ - int mask; /* type-mask */ + uint64_t mask; /* type-mask */ int cs; /* is case-sensitive? */ int and; /* is rhs of logical AND? */ char *v; /* search value */ @@ -59,25 +60,50 @@ struct expr { }; struct type { - int mask; + uint64_t mask; const char *name; }; static const struct type types[] = { - { TYPE_An, "An" }, - { TYPE_Cd, "Cd" }, - { TYPE_Er, "Er" }, - { TYPE_Ev, "Ev" }, - { TYPE_Fn, "Fn" }, - { TYPE_Fn, "Fo" }, - { TYPE_In, "In" }, - { TYPE_Nd, "Nd" }, - { TYPE_Nm, "Nm" }, - { TYPE_Pa, "Pa" }, - { TYPE_St, "St" }, - { TYPE_Va, "Va" }, - { TYPE_Va, "Vt" }, - { TYPE_Xr, "Xr" }, + { TYPE_An, "An" }, + { TYPE_Ar, "Ar" }, + { TYPE_At, "At" }, + { TYPE_Bsx, "Bsx" }, + { TYPE_Bx, "Bx" }, + { TYPE_Cd, "Cd" }, + { TYPE_Cm, "Cm" }, + { TYPE_Dv, "Dv" }, + { TYPE_Dx, "Dx" }, + { TYPE_Em, "Em" }, + { TYPE_Er, "Er" }, + { TYPE_Ev, "Ev" }, + { TYPE_Fa, "Fa" }, + { TYPE_Fl, "Fl" }, + { TYPE_Fn, "Fn" }, + { TYPE_Fn, "Fo" }, + { TYPE_Ft, "Ft" }, + { TYPE_Fx, "Fx" }, + { TYPE_Ic, "Ic" }, + { TYPE_In, "In" }, + { TYPE_Lb, "Lb" }, + { TYPE_Li, "Li" }, + { TYPE_Lk, "Lk" }, + { TYPE_Ms, "Ms" }, + { TYPE_Mt, "Mt" }, + { TYPE_Nd, "Nd" }, + { TYPE_Nm, "Nm" }, + { TYPE_Nx, "Nx" }, + { TYPE_Ox, "Ox" }, + { TYPE_Pa, "Pa" }, + { TYPE_Rs, "Rs" }, + { TYPE_Sh, "Sh" }, + { TYPE_Ss, "Ss" }, + { TYPE_St, "St" }, + { TYPE_Sy, "Sy" }, + { TYPE_Tn, "Tn" }, + { TYPE_Va, "Va" }, + { TYPE_Va, "Vt" }, + { TYPE_Xr, "Xr" }, { INT_MAX, "any" }, { 0, NULL } }; @@ -87,9 +113,9 @@ static int btree_read(const DBT *, const struct mchars *, char **); static int expreval(const struct expr *, int *); static void exprexec(const struct expr *, - const char *, int, struct rec *); + const char *, uint64_t, struct rec *); static int exprmark(const struct expr *, - const char *, int, int *); + const char *, uint64_t, int *); static struct expr *exprexpr(int, char *[], int *, int *, size_t *); static struct expr *exprterm(char *, int); static DB *index_open(void); @@ -130,6 +156,7 @@ btree_read(const DBT *v, const struct mchars *mc, char **buf) /* Sanity: are we nil-terminated? */ assert(v->size > 0); + if ('\0' != ((char *)v->data)[(int)v->size - 1]) return(0); @@ -348,7 +375,8 @@ apropos_search(const struct opts *opts, const struct expr *expr, size_t terms, void *arg, void (*res)(struct res *, size_t, void *)) { - int i, rsz, root, leaf, mask, mlen, rc, ch; + int i, rsz, root, leaf, mlen, rc, ch; + uint64_t mask; DBT key, val; DB *btree, *idx; struct mchars *mc; @@ -357,6 +385,7 @@ apropos_search(const struct opts *opts, const struct expr *expr, struct rec *rs; struct res *ress; struct rec r; + struct db_val *vbuf; rc = 0; root = -1; @@ -378,17 +407,14 @@ apropos_search(const struct opts *opts, const struct expr *expr, goto out; while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { - /* - * Low-water mark for key and value. - * The key must have something in it, and the value must - * have the correct tags/recno mix. - */ - if (key.size < 2 || 8 != val.size) - break; + if (key.size < 2 || sizeof(struct db_val) != val.size) + break; if ( ! btree_read(&key, mc, &buf)) break; - mask = *(int *)val.data; + vbuf = val.data; + rec = vbuf->rec; + mask = vbuf->mask; /* * See if this keyword record matches any of the @@ -397,8 +423,6 @@ apropos_search(const struct opts *opts, const struct expr *expr, if ( ! exprmark(expr, buf, mask, NULL)) continue; - memcpy(&rec, val.data + 4, sizeof(recno_t)); - /* * O(log n) scan for prior records. Since a record * number is unbounded, this has decent performance over @@ -701,7 +725,8 @@ exprfree(struct expr *p) } static int -exprmark(const struct expr *p, const char *cp, int mask, int *ms) +exprmark(const struct expr *p, const char *cp, + uint64_t mask, int *ms) { for ( ; p; p = p->next) { @@ -772,7 +797,8 @@ expreval(const struct expr *p, int *ms) * If this evaluates to true, mark the expression as satisfied. */ static void -exprexec(const struct expr *p, const char *cp, int mask, struct rec *r) +exprexec(const struct expr *p, const char *cp, + uint64_t mask, struct rec *r) { assert(0 == r->matched); @@ -4,6 +4,7 @@ #include <regex.h> #include <stdio.h> #include <stdarg.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> @@ -83,7 +83,7 @@ static void buf_append(struct buf *, const char *); static void buf_appendb(struct buf *, const void *, size_t); static void dbt_put(DB *, const char *, DBT *, DBT *); -static void hash_put(DB *, const struct buf *, int); +static void hash_put(DB *, const struct buf *, uint64_t); static void hash_reset(DB **); static void index_merge(const struct of *, struct mparse *, struct buf *, struct buf *, @@ -450,7 +450,7 @@ index_merge(const struct of *of, struct mparse *mp, const char *fn, *msec, *mtitle, *arch; size_t sv; unsigned seq; - char vbuf[8]; + struct db_val vbuf; for (rec = 0; of; of = of->next) { fn = of->fname; @@ -513,17 +513,15 @@ index_merge(const struct of *of, struct mparse *mp, * Copy from the in-memory hashtable of pending keywords * into the database. */ - - memset(vbuf, 0, sizeof(uint32_t)); - memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + vbuf.rec = rec; seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - memcpy(vbuf, val.data, sizeof(uint32_t)); - val.size = sizeof(vbuf); - val.data = vbuf; + vbuf.mask = *(uint64_t *)val.data; + val.size = sizeof(struct db_val); + val.data = &vbuf; if (verb > 1) printf("%s: Added keyword: %s\n", @@ -568,6 +566,7 @@ index_prune(const struct of *ofile, DB *db, const char *dbf, { const struct of *of; const char *fn; + struct db_val *vbuf; unsigned seq, sseq; DBT key, val; size_t reccur; @@ -600,8 +599,9 @@ index_prune(const struct of *ofile, DB *db, const char *dbf, sseq = R_FIRST; while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { sseq = R_NEXT; - assert(8 == val.size); - if (*maxrec != *(recno_t *)(val.data + 4)) + assert(sizeof(struct db_val) == val.size); + vbuf = val.data; + if (*maxrec != vbuf->rec) continue; if (verb) printf("%s: Deleted keyword: %s\n", @@ -982,7 +982,7 @@ pmdoc_Nm(MDOC_ARGS) } static void -hash_put(DB *db, const struct buf *buf, int mask) +hash_put(DB *db, const struct buf *buf, uint64_t mask) { DBT key, val; int rc; @@ -997,10 +997,10 @@ hash_put(DB *db, const struct buf *buf, int mask) perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } else if (0 == rc) - mask |= *(int *)val.data; + mask |= *(uint64_t *)val.data; val.data = &mask; - val.size = sizeof(int); + val.size = sizeof(uint64_t); if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { perror("hash"); @@ -17,20 +17,51 @@ #ifndef MANDOCDB_H #define MANDOCDB_H +struct db_val { + uint64_t mask; + uint32_t rec; +}; + #define MANDOC_DB "mandoc.db" #define MANDOC_IDX "mandoc.index" -#define TYPE_An 0x01 -#define TYPE_Cd 0x02 -#define TYPE_Er 0x04 -#define TYPE_Ev 0x08 -#define TYPE_Fn 0x10 -#define TYPE_In 0x20 -#define TYPE_Nd 0x40 -#define TYPE_Nm 0x100 -#define TYPE_Pa 0x200 -#define TYPE_St 0x400 -#define TYPE_Va 0x1000 -#define TYPE_Xr 0x2000 +#define TYPE_An 0x0000000000000001ULL +#define TYPE_Ar 0x0000000000000002ULL +#define TYPE_At 0x0000000000000004ULL +#define TYPE_Bsx 0x0000000000000008ULL +#define TYPE_Bx 0x0000000000000010ULL +#define TYPE_Cd 0x0000000000000020ULL +#define TYPE_Cm 0x0000000000000040ULL +#define TYPE_Dv 0x0000000000000080ULL +#define TYPE_Dx 0x0000000000000100ULL +#define TYPE_Em 0x0000000000000200ULL +#define TYPE_Er 0x0000000000000400ULL +#define TYPE_Ev 0x0000000000000800ULL +#define TYPE_Fa 0x0000000000001000ULL +#define TYPE_Fl 0x0000000000002000ULL +#define TYPE_Fn 0x0000000000004000ULL +#define TYPE_Ft 0x0000000000008000ULL +#define TYPE_Fx 0x0000000000010000ULL +#define TYPE_Ic 0x0000000000020000ULL +#define TYPE_In 0x0000000000040000ULL +#define TYPE_Lb 0x0000000000080000ULL +#define TYPE_Li 0x0000000000100000ULL +#define TYPE_Lk 0x0000000000200000ULL +#define TYPE_Ms 0x0000000000400000ULL +#define TYPE_Mt 0x0000000000800000ULL +#define TYPE_Nd 0x0000000001000000ULL +#define TYPE_Nm 0x0000000002000000ULL +#define TYPE_Nx 0x0000000004000000ULL +#define TYPE_Ox 0x0000000008000000ULL +#define TYPE_Pa 0x0000000010000000ULL +#define TYPE_Rs 0x0000000020000000ULL +#define TYPE_Sh 0x0000000040000000ULL +#define TYPE_Ss 0x0000000080000000ULL +#define TYPE_St 0x0000000100000000ULL +#define TYPE_Sy 0x0000000200000000ULL +#define TYPE_Tn 0x0000000400000000ULL +#define TYPE_Va 0x0000000800000000ULL +#define TYPE_Vt 0x0000001000000000ULL +#define TYPE_Xr 0x0000002000000000ULL #endif /*!MANDOCDB_H */ |