diff options
Diffstat (limited to 'chars.c')
-rw-r--r-- | chars.c | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/chars.c b/chars.c new file mode 100644 index 00000000..eaf80689 --- /dev/null +++ b/chars.c @@ -0,0 +1,201 @@ +/* $Id$ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <assert.h> +#include <err.h> +#include <stdlib.h> +#include <string.h> + +#include "chars.h" + +#define ASCII_PRINT_HI 126 +#define ASCII_PRINT_LO 32 + +struct ln { + struct ln *next; + const char *code; + const char *out; + size_t codesz; + size_t outsz; + int type; +#define CHARS_CHAR (1 << 0) +#define CHARS_STRING (1 << 1) +#define CHARS_BOTH (0x03) +}; + +#define LINES_MAX 266 + +#define CHAR(w, x, y, z) \ + { NULL, (w), (y), (x), (z), CHARS_CHAR }, +#define STRING(w, x, y, z) \ + { NULL, (w), (y), (x), (z), CHARS_STRING }, +#define BOTH(w, x, y, z) \ + { NULL, (w), (y), (x), (z), CHARS_BOTH }, + +static struct ln lines[LINES_MAX] = { +#include "chars.in" +}; + +struct tbl { + struct ln **htab; +}; + +static inline int match(const struct ln *, + const char *, size_t, int); +static const char *find(struct tbl *, const char *, + size_t, size_t *, int); + + +void +chars_free(void *arg) +{ + struct tbl *tab; + + tab = (struct tbl *)arg; + + free(tab->htab); + free(tab); +} + + +/* ARGSUSED */ +void * +chars_init(enum chars type) +{ + struct tbl *tab; + struct ln **htab; + struct ln *pp; + int i, hash; + + /* + * Constructs a very basic chaining hashtable. The hash routine + * is simply the integral value of the first character. + * Subsequent entries are chained in the order they're processed + * (they're in-line re-ordered during lookup). + */ + + if (NULL == (tab = malloc(sizeof(struct tbl)))) + err(1, "malloc"); + + htab = calloc(ASCII_PRINT_HI - ASCII_PRINT_LO + 1, + sizeof(struct ln **)); + + if (NULL == htab) + err(1, "malloc"); + + for (i = 0; i < LINES_MAX; i++) { + assert(lines[i].codesz > 0); + assert(lines[i].code); + assert(lines[i].out); + + hash = (int)lines[i].code[0] - ASCII_PRINT_LO; + + if (NULL == (pp = htab[hash])) { + htab[hash] = &lines[i]; + continue; + } + + for ( ; pp->next; pp = pp->next) + /* Scan ahead. */ ; + + pp->next = &lines[i]; + } + + tab->htab = htab; + return(tab); +} + + +const char * +chars_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz) +{ + + return(find((struct tbl *)arg, p, sz, rsz, CHARS_CHAR)); +} + + +const char * +chars_a2res(void *arg, const char *p, size_t sz, size_t *rsz) +{ + + return(find((struct tbl *)arg, p, sz, rsz, CHARS_STRING)); +} + + +static const char * +find(struct tbl *tab, const char *p, size_t sz, size_t *rsz, int type) +{ + struct ln *pp, *prev; + struct ln **htab; + int hash; + + assert(p); + assert(sz > 0); + + if (p[0] < ASCII_PRINT_LO || p[0] > ASCII_PRINT_HI) + return(NULL); + + /* + * Lookup the symbol in the symbol hash. See ascii2htab for the + * hashtable specs. This dynamically re-orders the hash chain + * to optimise for repeat hits. + */ + + hash = (int)p[0] - ASCII_PRINT_LO; + htab = tab->htab; + + if (NULL == (pp = htab[hash])) + return(NULL); + + if (NULL == pp->next) { + if ( ! match(pp, p, sz, type)) + return(NULL); + *rsz = pp->outsz; + return(pp->out); + } + + for (prev = NULL; pp; pp = pp->next) { + if ( ! match(pp, p, sz, type)) { + prev = pp; + continue; + } + + /* Re-order the hash chain. */ + + if (prev) { + prev->next = pp->next; + pp->next = htab[hash]; + htab[hash] = pp; + } + + *rsz = pp->outsz; + return(pp->out); + } + + return(NULL); +} + + +static inline int +match(const struct ln *ln, const char *p, size_t sz, int type) +{ + + if ( ! (ln->type & type)) + return(0); + if (ln->codesz != sz) + return(0); + return(0 == strncmp(ln->code, p, sz)); +} |