From c81baf94fd62c83c95a70727ad90da88092ccad7 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Sat, 9 Jun 2012 11:00:13 +0000 Subject: Add a compatibility interface for ohash. This include's espie@'s wholesale src/lib/libc/ohash directory from OpenBSD into compat_ohash.c (with a single copyright/license notice at the top) and src/include/ohash.h as compat_ohash.h. The ohash_int.h part of compat_ohash.c has been changed only in that ohash.h points to compat_ohash.h. Added HAVE_OHASH test (test-ohash.c) to Makefile. In mandocdb.c and mansearch.c, check HAVE_OHASH test for inclusion. --- Makefile | 8 +- compat_ohash.c | 337 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ compat_ohash.h | 73 +++++++++++++ mandocdb.c | 9 +- mansearch.c | 4 + test-ohash.c | 19 ++++ 6 files changed, 447 insertions(+), 3 deletions(-) create mode 100644 compat_ohash.c create mode 100644 compat_ohash.h create mode 100644 test-ohash.c diff --git a/Makefile b/Makefile index 96832d5c..10221e4a 100644 --- a/Makefile +++ b/Makefile @@ -147,6 +147,7 @@ SRCS = Makefile \ test-fgetln.c \ test-getsubopt.c \ test-mmap.c \ + test-ohash.c \ test-strlcat.c \ test-strlcpy.c \ test-strptime.c \ @@ -187,6 +188,7 @@ LIBMANDOC_OBJS = $(LIBMAN_OBJS) \ COMPAT_OBJS = compat_fgetln.o \ compat_getsubopt.o \ + compat_ohash.o \ compat_strlcat.o \ compat_strlcpy.o @@ -203,7 +205,7 @@ $(LIBMAN_OBJS): libman.h $(LIBMDOC_OBJS): libmdoc.h $(LIBROFF_OBJS): libroff.h $(LIBMANDOC_OBJS): mandoc.h mdoc.h man.h libmandoc.h config.h -$(COMPAT_OBJS): config.h +$(COMPAT_OBJS): config.h compat_ohash.h MANDOC_HTML_OBJS = eqn_html.o \ html.o \ @@ -396,6 +398,10 @@ config.h: config.h.pre config.h.post rm -f config.log ( cat config.h.pre; \ echo; \ + if $(CC) $(CFLAGS) -Werror -o test-ohash test-ohash.c >> config.log 2>&1; then \ + echo '#define HAVE_OHASH'; \ + rm test-ohash; \ + fi; \ if $(CC) $(CFLAGS) -Werror -o test-fgetln test-fgetln.c >> config.log 2>&1; then \ echo '#define HAVE_FGETLN'; \ rm test-fgetln; \ diff --git a/compat_ohash.c b/compat_ohash.c new file mode 100644 index 00000000..802f458d --- /dev/null +++ b/compat_ohash.c @@ -0,0 +1,337 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_OHASH + +int dummy; + +#else + +/* $OpenBSD: ohash_int.h,v 1.3 2006/01/16 15:52:25 espie Exp $ */ + +#include +#include +#include +#include +#include "compat_ohash.h" + +struct _ohash_record { + u_int32_t hv; + const char *p; +}; + +#define DELETED ((const char *)h) +#define NONE (h->size) + +/* Don't bother changing the hash table if the change is small enough. */ +#define MINSIZE (1UL << 4) +#define MINDELETED 4 +/* $OpenBSD: ohash_create_entry.c,v 1.2 2004/06/22 20:00:16 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* This handles the common case of variable length keys, where the + * key is stored at the end of the record. + */ +void * +ohash_create_entry(struct ohash_info *i, const char *start, const char **end) +{ + char *p; + + if (!*end) + *end = start + strlen(start); + p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data); + if (p) { + memcpy(p+i->key_offset, start, *end-start); + p[i->key_offset + (*end - start)] = '\0'; + } + return (void *)p; +} + +/* hash_delete only frees the hash structure. Use hash_first/hash_next + * to free entries as well. */ +void +ohash_delete(struct ohash *h) +{ + (h->info.hfree)(h->t, sizeof(struct _ohash_record) * h->size, + h->info.data); +#ifndef NDEBUG + h->t = NULL; +#endif +} + +static void ohash_resize(struct ohash *); + +static void +ohash_resize(struct ohash *h) +{ + struct _ohash_record *n; + unsigned int ns, j; + unsigned int i, incr; + + if (4 * h->deleted < h->total) + ns = h->size << 1; + else if (3 * h->deleted > 2 * h->total) + ns = h->size >> 1; + else + ns = h->size; + if (ns < MINSIZE) + ns = MINSIZE; +#ifdef STATS_HASH + STAT_HASH_EXPAND++; + STAT_HASH_SIZE += ns - h->size; +#endif + n = (h->info.halloc)(sizeof(struct _ohash_record) * ns, h->info.data); + if (!n) + return; + + for (j = 0; j < h->size; j++) { + if (h->t[j].p != NULL && h->t[j].p != DELETED) { + i = h->t[j].hv % ns; + incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1; + while (n[i].p != NULL) { + i += incr; + if (i >= ns) + i -= ns; + } + n[i].hv = h->t[j].hv; + n[i].p = h->t[j].p; + } + } + (h->info.hfree)(h->t, sizeof(struct _ohash_record) * h->size, + h->info.data); + h->t = n; + h->size = ns; + h->total -= h->deleted; + h->deleted = 0; +} + +void * +ohash_remove(struct ohash *h, unsigned int i) +{ + void *result = (void *)h->t[i].p; + + if (result == NULL || result == DELETED) + return NULL; + +#ifdef STATS_HASH + STAT_HASH_ENTRIES--; +#endif + h->t[i].p = DELETED; + h->deleted++; + if (h->deleted >= MINDELETED && 4 * h->deleted > h->total) + ohash_resize(h); + return result; +} + +void * +ohash_find(struct ohash *h, unsigned int i) +{ + if (h->t[i].p == DELETED) + return NULL; + else + return (void *)h->t[i].p; +} + +void * +ohash_insert(struct ohash *h, unsigned int i, void *p) +{ +#ifdef STATS_HASH + STAT_HASH_ENTRIES++; +#endif + if (h->t[i].p == DELETED) { + h->deleted--; + h->t[i].p = p; + } else { + h->t[i].p = p; + /* Arbitrary resize boundary. Tweak if not efficient enough. */ + if (++h->total * 4 > h->size * 3) + ohash_resize(h); + } + return p; +} + +unsigned int +ohash_entries(struct ohash *h) +{ + return h->total - h->deleted; +} + +void * +ohash_first(struct ohash *h, unsigned int *pos) +{ + *pos = 0; + return ohash_next(h, pos); +} + +void * +ohash_next(struct ohash *h, unsigned int *pos) +{ + for (; *pos < h->size; (*pos)++) + if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL) + return (void *)h->t[(*pos)++].p; + return NULL; +} + +void +ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info) +{ + h->size = 1UL << size; + if (h->size < MINSIZE) + h->size = MINSIZE; +#ifdef STATS_HASH + STAT_HASH_CREATION++; + STAT_HASH_SIZE += h->size; +#endif + /* Copy info so that caller may free it. */ + h->info.key_offset = info->key_offset; + h->info.halloc = info->halloc; + h->info.hfree = info->hfree; + h->info.alloc = info->alloc; + h->info.data = info->data; + h->t = (h->info.halloc)(sizeof(struct _ohash_record) * h->size, + h->info.data); + h->total = h->deleted = 0; +} + +uint32_t +ohash_interval(const char *s, const char **e) +{ + uint32_t k; + + if (!*e) + *e = s + strlen(s); + if (s == *e) + k = 0; + else + k = *s++; + while (s != *e) + k = ((k << 2) | (k >> 30)) ^ *s++; + return k; +} + +unsigned int +ohash_lookup_interval(struct ohash *h, const char *start, const char *end, + uint32_t hv) +{ + unsigned int i, incr; + unsigned int empty; + +#ifdef STATS_HASH + STAT_HASH_LOOKUP++; +#endif + empty = NONE; + i = hv % h->size; + incr = ((hv % (h->size-2)) & ~1) + 1; + while (h->t[i].p != NULL) { +#ifdef STATS_HASH + STAT_HASH_LENGTH++; +#endif + if (h->t[i].p == DELETED) { + if (empty == NONE) + empty = i; + } else if (h->t[i].hv == hv && + strncmp(h->t[i].p+h->info.key_offset, start, + end - start) == 0 && + (h->t[i].p+h->info.key_offset)[end-start] == '\0') { + if (empty != NONE) { + h->t[empty].hv = hv; + h->t[empty].p = h->t[i].p; + h->t[i].p = DELETED; + return empty; + } else { +#ifdef STATS_HASH + STAT_HASH_POSITIVE++; +#endif + return i; + } + } + i += incr; + if (i >= h->size) + i -= h->size; + } + + /* Found an empty position. */ + if (empty != NONE) + i = empty; + h->t[i].hv = hv; + return i; +} + +unsigned int +ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv) +{ + unsigned int i, incr; + unsigned int empty; + +#ifdef STATS_HASH + STAT_HASH_LOOKUP++; +#endif + empty = NONE; + i = hv % h->size; + incr = ((hv % (h->size-2)) & ~1) + 1; + while (h->t[i].p != NULL) { +#ifdef STATS_HASH + STAT_HASH_LENGTH++; +#endif + if (h->t[i].p == DELETED) { + if (empty == NONE) + empty = i; + } else if (h->t[i].hv == hv && + memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) { + if (empty != NONE) { + h->t[empty].hv = hv; + h->t[empty].p = h->t[i].p; + h->t[i].p = DELETED; + return empty; + } else { +#ifdef STATS_HASH + STAT_HASH_POSITIVE++; +#endif + } return i; + } + i += incr; + if (i >= h->size) + i -= h->size; + } + + /* Found an empty position. */ + if (empty != NONE) + i = empty; + h->t[i].hv = hv; + return i; +} + +unsigned int +ohash_qlookup(struct ohash *h, const char *s) +{ + const char *e = NULL; + return ohash_qlookupi(h, s, &e); +} + +unsigned int +ohash_qlookupi(struct ohash *h, const char *s, const char **e) +{ + u_int32_t hv; + + hv = ohash_interval(s, e); + return ohash_lookup_interval(h, s, *e, hv); +} + +#endif /*!HAVE_OHASH*/ diff --git a/compat_ohash.h b/compat_ohash.h new file mode 100644 index 00000000..85cf9114 --- /dev/null +++ b/compat_ohash.h @@ -0,0 +1,73 @@ +#ifndef OHASH_H +#define OHASH_H +/* $OpenBSD: ohash.h,v 1.9 2006/01/16 15:52:25 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* Open hashing support. + * Open hashing was chosen because it is much lighter than other hash + * techniques, and more efficient in most cases. + */ + +struct ohash_info { + ptrdiff_t key_offset; + void *data; /* user data */ + void *(*halloc)(size_t, void *); + void (*hfree)(void *, size_t, void *); + void *(*alloc)(size_t, void *); +}; + +struct _ohash_record; + +struct ohash { + struct _ohash_record *t; + struct ohash_info info; + unsigned int size; + unsigned int total; + unsigned int deleted; +}; + +/* For this to be tweakable, we use small primitives, and leave part of the + * logic to the client application. e.g., hashing is left to the client + * application. We also provide a simple table entry lookup that yields + * a hashing table index (opaque) to be used in find/insert/remove. + * The keys are stored at a known position in the client data. + */ +__BEGIN_DECLS +void ohash_init(struct ohash *, unsigned, struct ohash_info *); +void ohash_delete(struct ohash *); + +unsigned int ohash_lookup_interval(struct ohash *, const char *, + const char *, uint32_t); +unsigned int ohash_lookup_memory(struct ohash *, const char *, + size_t, uint32_t) + __attribute__ ((__bounded__(__string__,2,3))); +void *ohash_find(struct ohash *, unsigned int); +void *ohash_remove(struct ohash *, unsigned int); +void *ohash_insert(struct ohash *, unsigned int, void *); +void *ohash_first(struct ohash *, unsigned int *); +void *ohash_next(struct ohash *, unsigned int *); +unsigned int ohash_entries(struct ohash *); + +void *ohash_create_entry(struct ohash_info *, const char *, const char **); +u_int32_t ohash_interval(const char *, const char **); + +unsigned int ohash_qlookupi(struct ohash *, const char *, const char **); +unsigned int ohash_qlookup(struct ohash *, const char *); +__END_DECLS +#endif diff --git a/mandocdb.c b/mandocdb.c index a5a9ee14..8fad980b 100644 --- a/mandocdb.c +++ b/mandocdb.c @@ -34,7 +34,11 @@ #include #include +#ifdef HAVE_OHASH #include +#else +#include "compat_ohash.h" +#endif #include #include "mdoc.h" @@ -327,6 +331,7 @@ main(int argc, char *argv[]) { char cwd[MAXPATHLEN]; int ch, rc, fd, i; + unsigned int index; size_t j, sz; const char *dir; struct str *s; @@ -521,8 +526,8 @@ out: manpath_free(&dirs); mchars_free(mc); mparse_free(mp); - for (s = ohash_first(&strings, &ch); - NULL != s; s = ohash_next(&strings, &ch)) { + for (s = ohash_first(&strings, &index); + NULL != s; s = ohash_next(&strings, &index)) { if (s->utf8 != s->key) free(s->utf8); free(s); diff --git a/mansearch.c b/mansearch.c index 1bdd9d0b..5aa30623 100644 --- a/mansearch.c +++ b/mansearch.c @@ -30,7 +30,11 @@ #include #include +#ifdef HAVE_OHASH #include +#else +#include "compat_ohash.h" +#endif #include #include "mandoc.h" diff --git a/test-ohash.c b/test-ohash.c new file mode 100644 index 00000000..72b3e3dd --- /dev/null +++ b/test-ohash.c @@ -0,0 +1,19 @@ +#include +#include +#include +#include + +void *xalloc(size_t sz, void *arg) { return(calloc(sz,1)); } +void xfree(void *p, size_t sz, void *arg) { free(p); } + +int +main(void) +{ + struct ohash h; + struct ohash_info i; + i.halloc = i.alloc = xalloc; + i.free = free; + ohash_init(&h, 2, &i); + ohash_delete(&h); + return 0; +} -- cgit