summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2015-10-13 22:59:54 +0000
committerIngo Schwarze <schwarze@openbsd.org>2015-10-13 22:59:54 +0000
commitb26f648a71b3259c793410232a0d3bbdd0aa54e3 (patch)
tree38c696a975659d4498c81dddfc966f0e30bc6326
parent35c8bbbbc8de3992d8c2d207662d365f4d566865 (diff)
downloadmandoc-b26f648a71b3259c793410232a0d3bbdd0aa54e3.tar.gz
Major character table cleanup:
* Use ohash(3) rather than a hand-rolled hash table. * Make the character table static in the chars.c module: There is no need to pass a pointer around, we most certainly never want to use two different character tables concurrently. * No need to keep the characters in a separate file chars.in; that merely encourages downstream porters to mess with them. * Sort the characters to agree with the mandoc_chars(7) manual page. * Specify Unicode codepoints in hex, not decimal (that's the detail that originally triggered this patch). No functional change, minus 100 LOC, and i don't see a performance change.
-rw-r--r--Makefile1
-rw-r--r--Makefile.depend2
-rw-r--r--cgi.c12
-rw-r--r--chars.c479
-rw-r--r--chars.in404
-rw-r--r--demandoc.c7
-rw-r--r--html.c6
-rw-r--r--html.h1
-rw-r--r--libmandoc.h3
-rw-r--r--main.c26
-rw-r--r--main.h19
-rw-r--r--mandoc.310
-rw-r--r--mandoc.h14
-rw-r--r--mandoc_headers.321
-rw-r--r--mandocdb.c12
-rw-r--r--mchars_alloc.327
-rw-r--r--read.c6
-rw-r--r--roff.c6
-rw-r--r--term.c11
-rw-r--r--term.h1
-rw-r--r--term_ascii.c19
-rw-r--r--term_ps.c14
22 files changed, 469 insertions, 632 deletions
diff --git a/Makefile b/Makefile
index a3b275e4..dc94be59 100644
--- a/Makefile
+++ b/Makefile
@@ -117,7 +117,6 @@ DISTFILES = INSTALL \
TODO \
apropos.1 \
cgi.h.example \
- chars.in \
compat_fts.h \
compat_ohash.h \
compat_stringlist.h \
diff --git a/Makefile.depend b/Makefile.depend
index abac604e..85d7d673 100644
--- a/Makefile.depend
+++ b/Makefile.depend
@@ -1,6 +1,6 @@
att.o: att.c config.h roff.h mdoc.h libmdoc.h
cgi.o: cgi.c config.h mandoc_aux.h mandoc.h roff.h main.h manconf.h mansearch.h cgi.h
-chars.o: chars.c config.h mandoc.h mandoc_aux.h libmandoc.h chars.in
+chars.o: chars.c config.h mandoc.h mandoc_aux.h mandoc_ohash.h compat_ohash.h libmandoc.h
compat_err.o: compat_err.c config.h
compat_fgetln.o: compat_fgetln.c config.h
compat_fts.o: compat_fts.c config.h compat_fts.h
diff --git a/cgi.c b/cgi.c
index 75ed46a6..88ae6457 100644
--- a/cgi.c
+++ b/cgi.c
@@ -819,7 +819,6 @@ format(const struct req *req, const char *file)
{
struct manoutput conf;
struct mparse *mp;
- struct mchars *mchars;
struct roff_man *man;
void *vp;
int fd;
@@ -830,9 +829,8 @@ format(const struct req *req, const char *file)
return;
}
- mchars = mchars_alloc();
- mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL,
- mchars, req->q.manpath);
+ mchars_alloc();
+ mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
mparse_readfd(mp, fd, file);
close(fd);
@@ -852,11 +850,11 @@ format(const struct req *req, const char *file)
req->q.manpath, file);
pg_error_internal();
mparse_free(mp);
- mchars_free(mchars);
+ mchars_free();
return;
}
- vp = html_alloc(mchars, &conf);
+ vp = html_alloc(&conf);
if (man->macroset == MACROSET_MDOC)
html_mdoc(vp, man);
@@ -865,7 +863,7 @@ format(const struct req *req, const char *file)
html_free(vp);
mparse_free(mp);
- mchars_free(mchars);
+ mchars_free();
free(conf.man);
}
diff --git a/chars.c b/chars.c
index 3b51d226..c4d237c9 100644
--- a/chars.c
+++ b/chars.c
@@ -1,7 +1,7 @@
/* $Id$ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -21,89 +21,428 @@
#include <assert.h>
#include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "mandoc.h"
#include "mandoc_aux.h"
+#include "mandoc_ohash.h"
#include "libmandoc.h"
-#define PRINT_HI 126
-#define PRINT_LO 32
-
struct ln {
- struct ln *next;
- const char *code;
+ const char roffcode[16];
const char *ascii;
int unicode;
};
-#define LINES_MAX 332
-
-#define CHAR(in, ch, code) \
- { NULL, (in), (ch), (code) },
-
-#define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
-#define CHAR_TBL_END };
-
-#include "chars.in"
-
-struct mchars {
- struct ln **htab;
+/* Special break control characters. */
+static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
+static const char ascii_break[2] = { ASCII_BREAK, '\0' };
+
+static struct ln lines[] = {
+
+ /* Spacing. */
+ { " ", ascii_nbrsp, 0x00a0 },
+ { "~", ascii_nbrsp, 0x00a0 },
+ { "0", " ", 0x2002 },
+ { "|", "", 0 },
+ { "^", "", 0 },
+ { "&", "", 0 },
+ { "%", "", 0 },
+ { ":", ascii_break, 0 },
+ /* XXX The following three do not really belong here. */
+ { "t", "", 0 },
+ { "c", "", 0 },
+ { "}", "", 0 },
+
+ /* Lines. */
+ { "ba", "|", 0x007c },
+ { "br", "|", 0x2502 },
+ { "ul", "_", 0x005f },
+ { "rn", "-", 0x203e },
+ { "bb", "|", 0x00a6 },
+ { "sl", "/", 0x002f },
+ { "rs", "\\", 0x005c },
+
+ /* Text markers. */
+ { "ci", "O", 0x25cb },
+ { "bu", "+\bo", 0x2022 },
+ { "dd", "|\b=", 0x2021 },
+ { "dg", "|\b-", 0x2020 },
+ { "lz", "<>", 0x25ca },
+ { "sq", "[]", 0x25a1 },
+ { "ps", "<par>", 0x00b6 },
+ { "sc", "<sec>", 0x00a7 },
+ { "lh", "<=", 0x261c },
+ { "rh", "=>", 0x261e },
+ { "at", "@", 0x0040 },
+ { "sh", "#", 0x0023 },
+ { "CR", "_|", 0x21b5 },
+ { "OK", "\\/", 0x2713 },
+
+ /* Legal symbols. */
+ { "co", "(C)", 0x00a9 },
+ { "rg", "(R)", 0x00ae },
+ { "tm", "tm", 0x2122 },
+
+ /* Punctuation. */
+ { "em", "--", 0x2014 },
+ { "en", "-", 0x2013 },
+ { "hy", "-", 0x2010 },
+ { "e", "\\", 0x005c },
+ { ".", ".", 0x002e },
+ { "r!", "!", 0x00a1 },
+ { "r?", "?", 0x00bf },
+
+ /* Quotes. */
+ { "Bq", ",,", 0x201e },
+ { "bq", ",", 0x201a },
+ { "lq", "\"", 0x201c },
+ { "rq", "\"", 0x201d },
+ { "Lq", "``", 0x201c },
+ { "Rq", "''", 0x201d },
+ { "oq", "`", 0x2018 },
+ { "cq", "\'", 0x2019 },
+ { "aq", "\'", 0x0027 },
+ { "dq", "\"", 0x0022 },
+ { "Fo", "<<", 0x00ab },
+ { "Fc", ">>", 0x00bb },
+ { "fo", "<", 0x2039 },
+ { "fc", ">", 0x203a },
+
+ /* Brackets. */
+ { "lB", "[", 0x005b },
+ { "rB", "]", 0x005d },
+ { "lC", "{", 0x007b },
+ { "rC", "}", 0x007d },
+ { "la", "<", 0x27e8 },
+ { "ra", ">", 0x27e9 },
+ { "bv", "|", 0x23aa },
+ { "braceex", "|", 0x23aa },
+ { "bracketlefttp", "|", 0x23a1 },
+ { "bracketleftbt", "|", 0x23a3 },
+ { "bracketleftex", "|", 0x23a2 },
+ { "bracketrighttp", "|", 0x23a4 },
+ { "bracketrightbt", "|", 0x23a6 },
+ { "bracketrightex", "|", 0x23a5 },
+ { "lt", ",-", 0x23a7 },
+ { "bracelefttp", ",-", 0x23a7 },
+ { "lk", "{", 0x23a8 },
+ { "braceleftmid", "{", 0x23a8 },
+ { "lb", "`-", 0x23a9 },
+ { "braceleftbt", "`-", 0x23a9 },
+ { "braceleftex", "|", 0x23aa },
+ { "rt", "-.", 0x23ab },
+ { "bracerighttp", "-.", 0x23ab },
+ { "rk", "}", 0x23ac },
+ { "bracerightmid", "}", 0x23ac },
+ { "rb", "-\'", 0x23ad },
+ { "bracerightbt", "-\'", 0x23ad },
+ { "bracerightex", "|", 0x23aa },
+ { "parenlefttp", "/", 0x239b },
+ { "parenleftbt", "\\", 0x239d },
+ { "parenleftex", "|", 0x239c },
+ { "parenrighttp", "\\", 0x239e },
+ { "parenrightbt", "/", 0x23a0 },
+ { "parenrightex", "|", 0x239f },
+
+ /* Arrows and lines. */
+ { "<-", "<-", 0x2190 },
+ { "->", "->", 0x2192 },
+ { "<>", "<->", 0x2194 },
+ { "da", "|\bv", 0x2193 },
+ { "ua", "|\b^", 0x2191 },
+ { "va", "^v", 0x2195 },
+ { "lA", "<=", 0x21d0 },
+ { "rA", "=>", 0x21d2 },
+ { "hA", "<=>", 0x21d4 },
+ { "uA", "=\b^", 0x21d1 },
+ { "dA", "=\bv", 0x21d3 },
+ { "vA", "^=v", 0x21d5 },
+
+ /* Logic. */
+ { "AN", "^", 0x2227 },
+ { "OR", "v", 0x2228 },
+ { "no", "~", 0x00ac },
+ { "tno", "~", 0x00ac },
+ { "te", "3", 0x2203 },
+ { "fa", "-\bV", 0x2200 },
+ { "st", "-)", 0x220b },
+ { "tf", ".:.", 0x2234 },
+ { "3d", ".:.", 0x2234 },
+ { "or", "|", 0x007c },
+
+ /* Mathematicals. */
+ { "pl", "+", 0x002b },
+ { "mi", "-", 0x2212 },
+ { "-", "-", 0x002d },
+ { "-+", "-+", 0x2213 },
+ { "+-", "+-", 0x00b1 },
+ { "t+-", "+-", 0x00b1 },
+ { "pc", ".", 0x00b7 },
+ { "md", ".", 0x22c5 },
+ { "mu", "x", 0x00d7 },
+ { "tmu", "x", 0x00d7 },
+ { "c*", "O\bx", 0x2297 },
+ { "c+", "O\b+", 0x2295 },
+ { "di", "-:-", 0x00f7 },
+ { "tdi", "-:-", 0x00f7 },
+ { "f/", "/", 0x2044 },
+ { "**", "*", 0x2217 },
+ { "<=", "<=", 0x2264 },
+ { ">=", ">=", 0x2265 },
+ { "<<", "<<", 0x226a },
+ { ">>", ">>", 0x226b },
+ { "eq", "=", 0x003d },
+ { "!=", "!=", 0x2260 },
+ { "==", "==", 0x2261 },
+ { "ne", "!==", 0x2262 },
+ { "ap", "~", 0x223c },
+ { "|=", "-~", 0x2243 },
+ { "=~", "=~", 0x2245 },
+ { "~~", "~~", 0x2248 },
+ { "~=", "~=", 0x2248 },
+ { "pt", "oc", 0x221d },
+ { "es", "{}", 0x2205 },
+ { "mo", "E", 0x2208 },
+ { "nm", "!E", 0x2209 },
+ { "sb", "(=", 0x2282 },
+ { "nb", "(!=", 0x2284 },
+ { "sp", "=)", 0x2283 },
+ { "nc", "!=)", 0x2285 },
+ { "ib", "(=\b_", 0x2286 },
+ { "ip", "=\b_)", 0x2287 },
+ { "ca", "(^)", 0x2229 },
+ { "cu", "U", 0x222a },
+ { "/_", "_\b/", 0x2220 },
+ { "pp", "_\b|", 0x22a5 },
+ { "is", "'\b,\bI", 0x222b },
+ { "integral", "'\b,\bI", 0x222b },
+ { "sum", "E", 0x2211 },
+ { "product", "TT", 0x220f },
+ { "coproduct", "U", 0x2210 },
+ { "gr", "V", 0x2207 },
+ { "sr", "\\/", 0x221a },
+ { "sqrt", "\\/", 0x221a },
+ { "lc", "|~", 0x2308 },
+ { "rc", "~|", 0x2309 },
+ { "lf", "|_", 0x230a },
+ { "rf", "_|", 0x230b },
+ { "if", "oo", 0x221e },
+ { "Ah", "N", 0x2135 },
+ { "Im", "I", 0x2111 },
+ { "Re", "R", 0x211c },
+ { "pd", "a", 0x2202 },
+ { "-h", "/h", 0x210f },
+ { "12", "1/2", 0x00bd },
+ { "14", "1/4", 0x00bc },
+ { "34", "3/4", 0x00be },
+
+ /* Ligatures. */
+ { "ff", "ff", 0xfb00 },
+ { "fi", "fi", 0xfb01 },
+ { "fl", "fl", 0xfb02 },
+ { "Fi", "ffi", 0xfb03 },
+ { "Fl", "ffl", 0xfb04 },
+ { "AE", "AE", 0x00c6 },
+ { "ae", "ae", 0x00e6 },
+ { "OE", "OE", 0x0152 },
+ { "oe", "oe", 0x0153 },
+ { "ss", "ss", 0x00df },
+ { "IJ", "IJ", 0x0132 },
+ { "ij", "ij", 0x0133 },
+
+ /* Accents. */
+ { "a\"", "\"", 0x02dd },
+ { "a-", "-", 0x00af },
+ { "a.", ".", 0x02d9 },
+ { "a^", "^", 0x005e },
+ { "aa", "\'", 0x00b4 },
+ { "\'", "\'", 0x00b4 },
+ { "ga", "`", 0x0060 },
+ { "`", "`", 0x0060 },
+ { "ab", "'\b`", 0x02d8 },
+ { "ac", ",", 0x00b8 },
+ { "ad", "\"", 0x00a8 },
+ { "ah", "v", 0x02c7 },
+ { "ao", "o", 0x02da },
+ { "a~", "~", 0x007e },
+ { "ho", ",", 0x02db },
+ { "ha", "^", 0x005e },
+ { "ti", "~", 0x007e },
+
+ /* Accented letters. */
+ { "'A", "'\bA", 0x00c1 },
+ { "'E", "'\bE", 0x00c9 },
+ { "'I", "'\bI", 0x00cd },
+ { "'O", "'\bO", 0x00d3 },
+ { "'U", "'\bU", 0x00da },
+ { "'a", "'\ba", 0x00e1 },
+ { "'e", "'\be", 0x00e9 },
+ { "'i", "'\bi", 0x00ed },
+ { "'o", "'\bo", 0x00f3 },
+ { "'u", "'\bu", 0x00fa },
+ { "`A", "`\bA", 0x00c0 },
+ { "`E", "`\bE", 0x00c8 },
+ { "`I", "`\bI", 0x00cc },
+ { "`O", "`\bO", 0x00d2 },
+ { "`U", "`\bU", 0x00d9 },
+ { "`a", "`\ba", 0x00e0 },
+ { "`e", "`\be", 0x00e8 },
+ { "`i", "`\bi", 0x00ec },
+ { "`o", "`\bo", 0x00f2 },
+ { "`u", "`\bu", 0x00f9 },
+ { "~A", "~\bA", 0x00c3 },
+ { "~N", "~\bN", 0x00d1 },
+ { "~O", "~\bO", 0x00d5 },
+ { "~a", "~\ba", 0x00e3 },
+ { "~n", "~\bn", 0x00f1 },
+ { "~o", "~\bo", 0x00f5 },
+ { ":A", "\"\bA", 0x00c4 },
+ { ":E", "\"\bE", 0x00cb },
+ { ":I", "\"\bI", 0x00cf },
+ { ":O", "\"\bO", 0x00d6 },
+ { ":U", "\"\bU", 0x00dc },
+ { ":a", "\"\ba", 0x00e4 },
+ { ":e", "\"\be", 0x00eb },
+ { ":i", "\"\bi", 0x00ef },
+ { ":o", "\"\bo", 0x00f6 },
+ { ":u", "\"\bu", 0x00fc },
+ { ":y", "\"\by", 0x00ff },
+ { "^A", "^\bA", 0x00c2 },
+ { "^E", "^\bE", 0x00ca },
+ { "^I", "^\bI", 0x00ce },
+ { "^O", "^\bO", 0x00d4 },
+ { "^U", "^\bU", 0x00db },
+ { "^a", "^\ba", 0x00e2 },
+ { "^e", "^\be", 0x00ea },
+ { "^i", "^\bi", 0x00ee },
+ { "^o", "^\bo", 0x00f4 },
+ { "^u", "^\bu", 0x00fb },
+ { ",C", ",\bC", 0x00c7 },
+ { ",c", ",\bc", 0x00e7 },
+ { "/L", "/\bL", 0x0141 },
+ { "/l", "/\bl", 0x0142 },
+ { "/O", "/\bO", 0x00d8 },
+ { "/o", "/\bo", 0x00f8 },
+ { "oA", "o\bA", 0x00c5 },
+ { "oa", "o\ba", 0x00e5 },
+
+ /* Special letters. */
+ { "-D", "-\bD", 0x00d0 },
+ { "Sd", "d", 0x00f0 },
+ { "TP", "Th", 0x00de },
+ { "Tp", "th", 0x00fe },
+ { ".i", "i", 0x0131 },
+ { ".j", "j", 0x0237 },
+
+ /* Currency. */
+ { "Do", "$", 0x0024 },
+ { "ct", "/\bc", 0x00a2 },
+ { "Eu", "EUR", 0x20ac },
+ { "eu", "EUR", 0x20ac },
+ { "Ye", "=\bY", 0x00a5 },
+ { "Po", "GBP", 0x00a3 },
+ { "Cs", "o\bx", 0x00a4 },
+ { "Fn", ",\bf", 0x0192 },
+
+ /* Units. */
+ { "de", "<deg>", 0x00b0 },
+ { "%0", "%o", 0x2030 },
+ { "fm", "\'", 0x2032 },
+ { "sd", "''", 0x2033 },
+ { "mc", ",\bu", 0x00b5 },
+
+ /* Greek characters. */
+ { "*A", "A", 0x0391 },
+ { "*B", "B", 0x0392 },
+ { "*G", "G", 0x0393 },
+ { "*D", "_\b/_\b\\", 0x0394 },
+ { "*E", "E", 0x0395 },
+ { "*Z", "Z", 0x0396 },
+ { "*Y", "H", 0x0397 },
+ { "*H", "-\bO", 0x0398 },
+ { "*I", "I", 0x0399 },
+ { "*K", "K", 0x039a },
+ { "*L", "/\\", 0x039b },
+ { "*M", "M", 0x039c },
+ { "*N", "N", 0x039d },
+ { "*C", "_\bH", 0x039e },
+ { "*O", "O", 0x039f },
+ { "*P", "TT", 0x03a0 },
+ { "*R", "P", 0x03a1 },
+ { "*S", "S", 0x03a3 },
+ { "*T", "T", 0x03a4 },
+ { "*U", "Y", 0x03a5 },
+ { "*F", "I\bO", 0x03a6 },
+ { "*X", "X", 0x03a7 },
+ { "*Q", "I\bY", 0x03a8 },
+ { "*W", "_\bO", 0x03a9 },
+ { "*a", "a", 0x03b1 },
+ { "*b", "B", 0x03b2 },
+ { "*g", "y", 0x03b3 },
+ { "*d", "d", 0x03b4 },
+ { "*e", "e", 0x03b5 },
+ { "*z", ",\bC", 0x03b6 },
+ { "*y", "n", 0x03b7 },
+ { "*h", "-\b0", 0x03b8 },
+ { "*i", "i", 0x03b9 },
+ { "*k", "k", 0x03ba },
+ { "*l", ">\b\\", 0x03bb },
+ { "*m", ",\bu", 0x03bc },
+ { "*n", "v", 0x03bd },
+ { "*c", ",\bE", 0x03be },
+ { "*o", "o", 0x03bf },
+ { "*p", "-\bn", 0x03c0 },
+ { "*r", "p", 0x03c1 },
+ { "*s", "-\bo", 0x03c3 },
+ { "*t", "~\bt", 0x03c4 },
+ { "*u", "u", 0x03c5 },
+ { "*f", "|\bo", 0x03d5 },
+ { "*x", "x", 0x03c7 },
+ { "*q", "|\bu", 0x03c8 },
+ { "*w", "w", 0x03c9 },
+ { "+h", "-\b0", 0x03d1 },
+ { "+f", "|\bo", 0x03c6 },
+ { "+p", "-\bw", 0x03d6 },
+ { "+e", "e", 0x03f5 },
+ { "ts", "s", 0x03c2 },
};
-static const struct ln *find(const struct mchars *,
- const char *, size_t);
+static struct ohash mchars;
void
-mchars_free(struct mchars *arg)
+mchars_free(void)
{
- free(arg->htab);
- free(arg);
+ ohash_delete(&mchars);
}
-struct mchars *
+void
mchars_alloc(void)
{
- struct mchars *tab;
- struct ln **htab;
- struct ln *pp;
- int i, hash;
-
- /*
- * Constructs a very basic chaining hashtable. The hash routine
- * is simply the integral value of the first character.
- * Subsequent entries are chained in the order they're processed.
- */
-
- tab = mandoc_malloc(sizeof(struct mchars));
- htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));
-
- for (i = 0; i < LINES_MAX; i++) {
- hash = (int)lines[i].code[0] - PRINT_LO;
-
- if (NULL == (pp = htab[hash])) {
- htab[hash] = &lines[i];
- continue;
- }
-
- for ( ; pp->next; pp = pp->next)
- /* Scan ahead. */ ;
- pp->next = &lines[i];
+ size_t i;
+ unsigned int slot;
+
+ mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
+ for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
+ slot = ohash_qlookup(&mchars, lines[i].roffcode);
+ assert(ohash_find(&mchars, slot) == NULL);
+ ohash_insert(&mchars, slot, lines + i);
}
-
- tab->htab = htab;
- return tab;
}
int
-mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
+mchars_spec2cp(const char *p, size_t sz)
{
const struct ln *ln;
+ const char *end;
- ln = find(arg, p, sz);
+ end = p + sz;
+ ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1;
}
@@ -127,12 +466,13 @@ mchars_num2uc(const char *p, size_t sz)
}
const char *
-mchars_spec2str(const struct mchars *arg,
- const char *p, size_t sz, size_t *rsz)
+mchars_spec2str(const char *p, size_t sz, size_t *rsz)
{
const struct ln *ln;
+ const char *end;
- ln = find(arg, p, sz);
+ end = p + sz;
+ ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
if (ln == NULL) {
*rsz = 1;
return sz == 1 ? p : NULL;
@@ -145,31 +485,10 @@ mchars_spec2str(const struct mchars *arg,
const char *
mchars_uc2str(int uc)
{
- int i;
+ size_t i;
- for (i = 0; i < LINES_MAX; i++)
+ for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
if (uc == lines[i].unicode)
return lines[i].ascii;
return "<?>";
}
-
-static const struct ln *
-find(const struct mchars *tab, const char *p, size_t sz)
-{
- const struct ln *pp;
- int hash;
-
- assert(p);
-
- if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
- return NULL;
-
- hash = (int)p[0] - PRINT_LO;
-
- for (pp = tab->htab[hash]; pp; pp = pp->next)
- if (0 == strncmp(pp->code, p, sz) &&
- '\0' == pp->code[(int)sz])
- return pp;
-
- return NULL;
-}
diff --git a/chars.in b/chars.in
deleted file mode 100644
index f34a098f..00000000
--- a/chars.in
+++ /dev/null
@@ -1,404 +0,0 @@
-/* $Id$ */
-/*
- * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * The ASCII translation tables.
- *
- * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx
- * and so on) whose length is listed second element. The right-hand
- * side is what's produced by the front-end, with the fourth element
- * being its length.
- *
- * XXX - C-escape strings!
- * XXX - update LINES_MAX if adding more!
- */
-
-/* Special break control characters. */
-static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
-static const char ascii_break[2] = { ASCII_BREAK, '\0' };
-
-CHAR_TBL_START
-
-/* Spacing. */
-CHAR(" ", ascii_nbrsp, 160)
-CHAR("~", ascii_nbrsp, 160)
-CHAR("0", " ", 8194)
-CHAR("|", "", 0)
-CHAR("^", "", 0)
-CHAR("&", "", 0)
-CHAR("%", "", 0)
-CHAR(":", ascii_break, 0)
-/* XXX The following three do not really belong into this file. */
-CHAR("t", "", 0)
-CHAR("c", "", 0)
-CHAR("}", "", 0)
-
-/* Accents. */
-CHAR("a\"", "\"", 733)
-CHAR("a-", "-", 175)
-CHAR("a.", ".", 729)
-CHAR("a^", "^", 94)
-CHAR("\'", "\'", 180)
-CHAR("aa", "\'", 180)
-CHAR("ga", "`", 96)
-CHAR("`", "`", 96)
-CHAR("ab", "'\b`", 728)
-CHAR("ac", ",", 184)
-CHAR("ad", "\"", 168)
-CHAR("ah", "v", 711)
-CHAR("ao", "o", 730)
-CHAR("a~", "~", 126)
-CHAR("ho", ",", 731)
-CHAR("ha", "^", 94)
-CHAR("ti", "~", 126)
-
-/* Quotes. */
-CHAR("Bq", ",,", 8222)
-CHAR("bq", ",", 8218)
-CHAR("lq", "\"", 8220)
-CHAR("rq", "\"", 8221)
-CHAR("Lq", "``", 8220)
-CHAR("Rq", "''", 8221)
-CHAR("oq", "`", 8216)
-CHAR("cq", "\'", 8217)
-CHAR("aq", "\'", 39)
-CHAR("dq", "\"", 34)
-CHAR("Fo", "<<", 171)
-CHAR("Fc", ">>", 187)
-CHAR("fo", "<", 8249)
-CHAR("fc", ">", 8250)
-
-/* Brackets. */
-CHAR("lB", "[", 91)
-CHAR("rB", "]", 93)
-CHAR("lC", "{", 123)
-CHAR("rC", "}", 125)
-CHAR("la", "<", 10216)
-CHAR("ra", ">", 10217)
-CHAR("bv", "|", 9130)
-CHAR("braceex", "|", 9130)
-CHAR("bracketlefttp", "|", 9121)
-CHAR("bracketleftbt", "|", 9123)
-CHAR("bracketleftex", "|", 9122)
-CHAR("bracketrighttp", "|", 9124)
-CHAR("bracketrightbt", "|", 9126)
-CHAR("bracketrightex", "|", 9125)
-CHAR("lt", ",-", 9127)
-CHAR("bracelefttp", ",-", 9127)
-CHAR("lk", "{", 9128)
-CHAR("braceleftmid", "{", 9128)
-CHAR("lb", "`-", 9129)
-CHAR("braceleftbt", "`-", 9129)
-CHAR("braceleftex", "|", 9130)
-CHAR("rt", "-.", 9131)
-CHAR("bracerighttp", "-.", 9131)
-CHAR("rk", "}", 9132)
-CHAR("bracerightmid", "}", 9132)
-CHAR("rb", "-\'", 9133)
-CHAR("bracerightbt", "-\'", 9133)
-CHAR("bracerightex", "|", 9130)
-CHAR("parenlefttp", "/", 9115)
-CHAR("parenleftbt", "\\", 9117)
-CHAR("parenleftex", "|", 9116)
-CHAR("parenrighttp", "\\", 9118)
-CHAR("parenrightbt", "/", 9120)
-CHAR("parenrightex", "|", 9119)
-
-/* Greek characters. */
-CHAR("*A", "A", 913)
-CHAR("*B", "B", 914)
-CHAR("*G", "G", 915)
-CHAR("*D", "_\b/_\b\\", 916)
-CHAR("*E", "E", 917)
-CHAR("*Z", "Z", 918)
-CHAR("*Y", "H", 919)
-CHAR("*H", "-\bO", 920)
-CHAR("*I", "I", 921)
-CHAR("*K", "K", 922)
-CHAR("*L", "/\\", 923)
-CHAR("*M", "M", 924)
-CHAR("*N", "N", 925)
-CHAR("*C", "_\bH", 926)
-CHAR("*O", "O", 927)
-CHAR("*P", "TT", 928)
-CHAR("*R", "P", 929)
-CHAR("*S", "S", 931)
-CHAR("*T", "T", 932)
-CHAR("*U", "Y", 933)
-CHAR("*F", "I\bO", 934)
-CHAR("*X", "X", 935)
-CHAR("*Q", "I\bY", 936)
-CHAR("*W", "_\bO", 937)
-CHAR("*a", "a", 945)
-CHAR("*b", "B", 946)
-CHAR("*g", "y", 947)
-CHAR("*d", "d", 948)
-CHAR("*e", "e", 949)
-CHAR("*z", ",\bC", 950)
-CHAR("*y", "n", 951)
-CHAR("*h", "-\b0", 952)
-CHAR("*i", "i", 953)
-CHAR("*k", "k", 954)
-CHAR("*l", ">\b\\", 955)
-CHAR("*m", ",\bu", 956)
-CHAR("*n", "v", 957)
-CHAR("*c", ",\bE", 958)
-CHAR("*o", "o", 959)
-CHAR("*p", "-\bn", 960)
-CHAR("*r", "p", 961)
-CHAR("*s", "-\bo", 963)
-CHAR("*t", "~\bt", 964)
-CHAR("*u", "u", 965)
-CHAR("*f", "|\bo", 981)
-CHAR("*x", "x", 967)
-CHAR("*q", "|\bu", 968)
-CHAR("*w", "w", 969)
-CHAR("+h", "-\b0", 977)
-CHAR("+f", "|\bo", 966)
-CHAR("+p", "-\bw", 982)
-CHAR("+e", "e", 1013)
-CHAR("ts", "s", 962)
-
-/* Accented letters. */
-CHAR(",C", ",\bC", 199)
-CHAR(",c", ",\bc", 231)
-CHAR("/L", "/\bL", 321)
-CHAR("/O", "/\bO", 216)
-CHAR("/l", "/\bl", 322)
-CHAR("/o", "/\bo", 248)
-CHAR("oA", "o\bA", 197)
-CHAR("oa", "o\ba", 229)
-CHAR(":A", "\"\bA", 196)
-CHAR(":E", "\"\bE", 203)
-CHAR(":I", "\"\bI", 207)
-CHAR(":O", "\"\bO", 214)
-CHAR(":U", "\"\bU", 220)
-CHAR(":a", "\"\ba", 228)
-CHAR(":e", "\"\be", 235)
-CHAR(":i", "\"\bi", 239)
-CHAR(":o", "\"\bo", 246)
-CHAR(":u", "\"\bu", 252)
-CHAR(":y", "\"\by", 255)
-CHAR("'A", "'\bA", 193)
-CHAR("'E", "'\bE", 201)
-CHAR("'I", "'\bI", 205)
-CHAR("'O", "'\bO", 211)
-CHAR("'U", "'\bU", 218)
-CHAR("'a", "'\ba", 225)
-CHAR("'e", "'\be", 233)
-CHAR("'i", "'\bi", 237)
-CHAR("'o", "'\bo", 243)
-CHAR("'u", "'\bu", 250)
-CHAR("^A", "^\bA", 194)
-CHAR("^E", "^\bE", 202)
-CHAR("^I", "^\bI", 206)
-CHAR("^O", "^\bO", 212)
-CHAR("^U", "^\bU", 219)
-CHAR("^a", "^\ba", 226)
-CHAR("^e", "^\be", 234)
-CHAR("^i", "^\bi", 238)
-CHAR("^o", "^\bo", 244)
-CHAR("^u", "^\bu", 251)
-CHAR("`A", "`\bA", 192)
-CHAR("`E", "`\bE", 200)
-CHAR("`I", "`\bI", 204)
-CHAR("`O", "`\bO", 210)
-CHAR("`U", "`\bU", 217)
-CHAR("`a", "`\ba", 224)
-CHAR("`e", "`\be", 232)
-CHAR("`i", "`\bi", 236)
-CHAR("`o", "`\bo", 242)
-CHAR("`u", "`\bu", 249)
-CHAR("~A", "~\bA", 195)
-CHAR("~N", "~\bN", 209)
-CHAR("~O", "~\bO", 213)
-CHAR("~a", "~\ba", 227)
-CHAR("~n", "~\bn", 241)
-CHAR("~o", "~\bo", 245)
-
-/* Arrows and lines. */
-CHAR("<-", "<-", 8592)
-CHAR("->", "->", 8594)
-CHAR("<>", "<->", 8596)
-CHAR("da", "|\bv", 8595)
-CHAR("ua", "|\b^", 8593)
-CHAR("va", "^v", 8597)
-CHAR("lA", "<=", 8656)
-CHAR("rA", "=>", 8658)
-CHAR("hA", "<=>", 8660)
-CHAR("dA", "=\bv", 8659)
-CHAR("uA", "=\b^", 8657)
-CHAR("vA", "^=v", 8661)
-
-/* Logic. */
-CHAR("AN", "^", 8743)
-CHAR("OR", "v", 8744)
-CHAR("no", "~", 172)
-CHAR("tno", "~", 172)
-CHAR("te", "3", 8707)
-CHAR("fa", "-\bV", 8704)
-CHAR("st", "-)", 8715)
-CHAR("tf", ".:.", 8756)
-CHAR("3d", ".:.", 8756)
-CHAR("or", "|", 124)
-
-/* Mathematicals. */
-CHAR("pl", "+", 43)
-CHAR("mi", "-", 8722)
-CHAR("-", "-", 45)
-CHAR("-+", "-+", 8723)
-CHAR("+-", "+-", 177)
-CHAR("t+-", "+-", 177)
-CHAR("pc", ".", 183)
-CHAR("md", ".", 8901)
-CHAR("mu", "x", 215)
-CHAR("tmu", "x", 215)
-CHAR("c*", "O\bx", 8855)
-CHAR("c+", "O\b+", 8853)
-CHAR("di", "-:-", 247)
-CHAR("tdi", "-:-", 247)
-CHAR("f/", "/", 8260)
-CHAR("**", "*", 8727)
-CHAR("<=", "<=", 8804)
-CHAR(">=", ">=", 8805)
-CHAR("<<", "<<", 8810)
-CHAR(">>", ">>", 8811)
-CHAR("eq", "=", 61)
-CHAR("!=", "!=", 8800)
-CHAR("==", "==", 8801)
-CHAR("ne", "!==", 8802)
-CHAR("=~", "=~", 8773)
-CHAR("|=", "-~", 8771)
-CHAR("ap", "~", 8764)
-CHAR("~~", "~~", 8776)
-CHAR("~=", "~=", 8776)
-CHAR("pt", "oc", 8733)
-CHAR("es", "{}", 8709)
-CHAR("mo", "E", 8712)
-CHAR("nm", "!E", 8713)
-CHAR("sb", "(=", 8834)
-CHAR("nb", "(!=", 8836)
-CHAR("sp", "=)", 8835)
-CHAR("nc", "!=)", 8837)
-CHAR("ib", "(=\b_", 8838)
-CHAR("ip", "=\b_)", 8839)
-CHAR("ca", "(^)", 8745)
-CHAR("cu", "U", 8746)
-CHAR("/_", "_\b/", 8736)
-CHAR("pp", "_\b|", 8869)
-CHAR("is", "'\b,\bI", 8747)
-CHAR("integral", "'\b,\bI", 8747)
-CHAR("sum", "E", 8721)
-CHAR("product", "TT", 8719)
-CHAR("coproduct", "U", 8720)
-CHAR("gr", "V", 8711)
-CHAR("sr", "\\/", 8730)
-CHAR("sqrt", "\\/", 8730)
-CHAR("lc", "|~", 8968)
-CHAR("rc", "~|", 8969)
-CHAR("lf", "|_", 8970)
-CHAR("rf", "_|", 8971)
-CHAR("if", "oo", 8734)
-CHAR("Ah", "N", 8501)
-CHAR("Im", "I", 8465)
-CHAR("Re", "R", 8476)
-CHAR("pd", "a", 8706)
-CHAR("-h", "/h", 8463)
-CHAR("12", "1/2", 189)
-CHAR("14", "1/4", 188)
-CHAR("34", "3/4", 190)
-
-/* Ligatures. */
-CHAR("ff", "ff", 64256)
-CHAR("fi", "fi", 64257)
-CHAR("fl", "fl", 64258)
-CHAR("Fi", "ffi", 64259)
-CHAR("Fl", "ffl", 64260)
-CHAR("AE", "AE", 198)
-CHAR("ae", "ae", 230)
-CHAR("OE", "OE", 338)
-CHAR("oe", "oe", 339)
-CHAR("ss", "ss", 223)
-CHAR("IJ", "IJ", 306)
-CHAR("ij", "ij", 307)
-
-/* Special letters. */
-CHAR("-D", "-\bD", 208)
-CHAR("Sd", "d", 240)
-CHAR("TP", "Th", 222)
-CHAR("Tp", "th", 254)
-CHAR(".i", "i", 305)
-CHAR(".j", "j", 567)
-
-/* Currency. */
-CHAR("Do", "$", 36)
-CHAR("ct", "/\bc", 162)
-CHAR("Eu", "EUR", 8364)
-CHAR("eu", "EUR", 8364)
-CHAR("Ye", "=\bY", 165)
-CHAR("Po", "GBP", 163)
-CHAR("Cs", "o\bx", 164)
-CHAR("Fn", ",\bf", 402)
-
-/* Lines. */
-CHAR("ba", "|", 124)
-CHAR("br", "|", 9474)
-CHAR("ul", "_", 95)
-CHAR("rn", "-", 8254)
-CHAR("bb", "|", 166)
-CHAR("sl", "/", 47)
-CHAR("rs", "\\", 92)
-
-/* Text markers. */
-CHAR("ci", "O", 9675)
-CHAR("bu", "+\bo", 8226)
-CHAR("dd", "|\b=", 8225)
-CHAR("dg", "|\b-", 8224)
-CHAR("lz", "<>", 9674)
-CHAR("sq", "[]", 9633)
-CHAR("ps", "<par>", 182)
-CHAR("sc", "<sec>", 167)
-CHAR("lh", "<=", 9756)
-CHAR("rh", "=>", 9758)
-CHAR("at", "@", 64)
-CHAR("sh", "#", 35)
-CHAR("CR", "_|", 8629)
-CHAR("OK", "\\/", 10003)
-
-/* Legal symbols. */
-CHAR("co", "(C)", 169)
-CHAR("rg", "(R)", 174)
-CHAR("tm", "tm", 8482)
-
-/* Punctuation. */
-CHAR(".", ".", 46)
-CHAR("r!", "!", 161)
-CHAR("r?", "?", 191)
-CHAR("em", "--", 8212)
-CHAR("en", "-", 8211)
-CHAR("hy", "-", 8208)
-CHAR("e", "\\", 92)
-
-/* Units. */
-CHAR("de", "<deg>", 176)
-CHAR("%0", "%o", 8240)
-CHAR("fm", "\'", 8242)
-CHAR("sd", "''", 8243)
-CHAR("mc", ",\bu", 181)
-
-CHAR_TBL_END
diff --git a/demandoc.c b/demandoc.c
index 8769fe10..4f88d735 100644
--- a/demandoc.c
+++ b/demandoc.c
@@ -44,7 +44,6 @@ int
main(int argc, char *argv[])
{
struct mparse *mp;
- struct mchars *mchars;
int ch, fd, i, list;
extern int optind;
@@ -79,8 +78,8 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;
- mchars = mchars_alloc();
- mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, mchars, NULL);
+ mchars_alloc();
+ mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, NULL);
assert(mp);
if (argc < 1)
@@ -96,7 +95,7 @@ main(int argc, char *argv[])
}
mparse_free(mp);
- mchars_free(mchars);
+ mchars_free();
return (int)MANDOCLEVEL_OK;
}
diff --git a/html.c b/html.c
index e5965ed5..d394748f 100644
--- a/html.c
+++ b/html.c
@@ -130,15 +130,13 @@ static void print_attr(struct html *, const char *, const char *);
void *
-html_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+html_alloc(const struct manoutput *outopts)
{
struct html *h;
h = mandoc_calloc(1, sizeof(struct html));
h->tags.head = NULL;
- h->symtab = mchars;
-
h->style = outopts->style;
h->base_man = outopts->man;
h->base_includes = outopts->includes;
@@ -398,7 +396,7 @@ print_encode(struct html *h, const char *p, int norecurse)
continue;
break;
case ESCAPE_SPECIAL:
- c = mchars_spec2cp(h->symtab, seq, len);
+ c = mchars_spec2cp(seq, len);
if (c <= 0)
continue;
break;
diff --git a/html.h b/html.h
index 9c790fe5..1e62d3da 100644
--- a/html.h
+++ b/html.h
@@ -130,7 +130,6 @@ struct html {
struct tagq tags; /* stack of open tags */
struct rofftbl tbl; /* current table */
struct tag *tblt; /* current open table scope */
- const struct mchars *symtab; /* character table */
char *base_man; /* base for manpage href */
char *base_includes; /* base for include href */
char *style; /* style-sheet URI */
diff --git a/libmandoc.h b/libmandoc.h
index 42bcb059..f570a557 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -35,7 +35,6 @@ struct buf {
__BEGIN_DECLS
struct mparse;
-struct mchars;
struct tbl_span;
struct eqn;
struct roff;
@@ -68,7 +67,7 @@ int preconv_encode(struct buf *, size_t *,
struct buf *, size_t *, int *);
void roff_free(struct roff *);
-struct roff *roff_alloc(struct mparse *, const struct mchars *, int);
+struct roff *roff_alloc(struct mparse *, int);
void roff_reset(struct roff *);
void roff_man_free(struct roff_man *);
struct roff_man *roff_man_alloc(struct roff *, struct mparse *,
diff --git a/main.c b/main.c
index cc94b99f..d6d36046 100644
--- a/main.c
+++ b/main.c
@@ -77,7 +77,6 @@ enum outt {
struct curparse {
struct mparse *mp;
- struct mchars *mchars; /* character table */
enum mandoclevel wlevel; /* ignore messages below this */
int wstop; /* stop after a file with a warning */
enum outt outtype; /* which output to use */
@@ -422,9 +421,8 @@ main(int argc, char *argv[])
if (search.argmode == ARG_FILE && ! moptions(&options, auxpaths))
return (int)MANDOCLEVEL_BADARG;
- curp.mchars = mchars_alloc();
- curp.mp = mparse_alloc(options, curp.wlevel, mmsg,
- curp.mchars, defos);
+ mchars_alloc();
+ curp.mp = mparse_alloc(options, curp.wlevel, mmsg, defos);
/*
* Conditionally start up the lookaside buffer before parsing.
@@ -478,7 +476,7 @@ main(int argc, char *argv[])
if (curp.outfree)
(*curp.outfree)(curp.outdata);
mparse_free(curp.mp);
- mchars_free(curp.mchars);
+ mchars_free();
out:
if (search.argmode != ARG_FILE) {
@@ -662,33 +660,27 @@ parse(struct curparse *curp, int fd, const char *file)
if ( ! (curp->outman && curp->outmdoc)) {
switch (curp->outtype) {
case OUTT_HTML:
- curp->outdata = html_alloc(curp->mchars,
- curp->outopts);
+ curp->outdata = html_alloc(curp->outopts);
curp->outfree = html_free;
break;
case OUTT_UTF8:
- curp->outdata = utf8_alloc(curp->mchars,
- curp->outopts);
+ curp->outdata = utf8_alloc(curp->outopts);
curp->outfree = ascii_free;
break;
case OUTT_LOCALE:
- curp->outdata = locale_alloc(curp->mchars,
- curp->outopts);
+ curp->outdata = locale_alloc(curp->outopts);
curp->outfree = ascii_free;
break;
case OUTT_ASCII:
- curp->outdata = ascii_alloc(curp->mchars,
- curp->outopts);
+ curp->outdata = ascii_alloc(curp->outopts);
curp->outfree = ascii_free;
break;
case OUTT_PDF:
- curp->outdata = pdf_alloc(curp->mchars,
- curp->outopts);
+ curp->outdata = pdf_alloc(curp->outopts);
curp->outfree = pspdf_free;
break;
case OUTT_PS:
- curp->outdata = ps_alloc(curp->mchars,
- curp->outopts);
+ curp->outdata = ps_alloc(curp->outopts);
curp->outfree = pspdf_free;
break;
default:
diff --git a/main.h b/main.h
index a513e764..0a2dd851 100644
--- a/main.h
+++ b/main.h
@@ -20,7 +20,6 @@
__BEGIN_DECLS
-struct mchars;
struct roff_man;
struct manoutput;
@@ -31,8 +30,7 @@ struct manoutput;
* terminal output routines with different character settings.
*/
-void *html_alloc(const struct mchars *,
- const struct manoutput *);
+void *html_alloc(const struct manoutput *);
void html_mdoc(void *, const struct roff_man *);
void html_man(void *, const struct roff_man *);
void html_free(void *);
@@ -43,19 +41,14 @@ void tree_man(void *, const struct roff_man *);
void man_mdoc(void *, const struct roff_man *);
void man_man(void *, const struct roff_man *);
-void *locale_alloc(const struct mchars *,
- const struct manoutput *);
-void *utf8_alloc(const struct mchars *,
- const struct manoutput *);
-void *ascii_alloc(const struct mchars *,
- const struct manoutput *);
+void *locale_alloc(const struct manoutput *);
+void *utf8_alloc(const struct manoutput *);
+void *ascii_alloc(const struct manoutput *);
void ascii_free(void *);
void ascii_sepline(void *);
-void *pdf_alloc(const struct mchars *,
- const struct manoutput *);
-void *ps_alloc(const struct mchars *,
- const struct manoutput *);
+void *pdf_alloc(const struct manoutput *);
+void *ps_alloc(const struct manoutput *);
void pspdf_free(void *);
void terminal_mdoc(void *, const struct roff_man *);
diff --git a/mandoc.3 b/mandoc.3
index cd4fe78b..01a72bc6 100644
--- a/mandoc.3
+++ b/mandoc.3
@@ -50,7 +50,6 @@
.Fa "int options"
.Fa "enum mandoclevel wlevel"
.Fa "mandocmsg mmsg"
-.Fa "const struct mchars *mchars"
.Fa "char *defos"
.Fc
.Ft void
@@ -210,12 +209,6 @@ An error or warning message during parsing.
A classification of an
.Vt "enum mandocerr"
as regards system operation.
-.It Vt "struct mchars"
-An opaque pointer to a a character table.
-Created with
-.Xr mchars_alloc 3
-and freed with
-.Xr mchars_free 3 .
.It Vt "struct mparse"
An opaque pointer to a running parse sequence.
Created with
@@ -340,9 +333,6 @@ A callback function to handle errors and warnings.
See
.Pa main.c
for an example.
-.It Ar mchars
-An opaque pointer to a a character table obtained from
-.Xr mchars_alloc 3 .
.It Ar defos
A default string for the
.Xr mdoc 7
diff --git a/mandoc.h b/mandoc.h
index 98f7da95..63e68707 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -410,21 +410,17 @@ typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,
__BEGIN_DECLS
struct mparse;
-struct mchars;
struct roff_man;
enum mandoc_esc mandoc_escape(const char **, const char **, int *);
-struct mchars *mchars_alloc(void);
-void mchars_free(struct mchars *);
+void mchars_alloc(void);
+void mchars_free(void);
int mchars_num2char(const char *, size_t);
const char *mchars_uc2str(int);
int mchars_num2uc(const char *, size_t);
-int mchars_spec2cp(const struct mchars *,
- const char *, size_t);
-const char *mchars_spec2str(const struct mchars *,
- const char *, size_t, size_t *);
-struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg,
- const struct mchars *, const char *);
+int mchars_spec2cp(const char *, size_t);
+const char *mchars_spec2str(const char *, size_t, size_t *);
+struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg, const char *);
void mparse_free(struct mparse *);
void mparse_keep(struct mparse *);
enum mandoclevel mparse_open(struct mparse *, int *, const char *);
diff --git a/mandoc_headers.3 b/mandoc_headers.3
index 85cf1894..6c30e164 100644
--- a/mandoc_headers.3
+++ b/mandoc_headers.3
@@ -98,14 +98,10 @@ and the functions
described in
.Xr mandoc 3 .
.Pp
-Uses the opaque types
+Uses the opaque type
.Vt struct mparse
from
.Pa read.c
-and
-.Vt struct mchars
-from
-.Pa chars.c
for function prototypes.
Uses the types
.Vt struct mdoc
@@ -411,11 +407,7 @@ Provides
.Vt struct termp ,
and many terminal formatting functions.
.Pp
-Uses the opaque types
-.Vt struct mchars
-from
-.Pa chars.c
-and
+Uses the opaque type
.Vt struct termp_ps
from
.Pa term_ps.c .
@@ -460,11 +452,6 @@ Provides
.Vt struct html ,
and many HTML formatting functions.
.Pp
-Uses the opaque type
-.Vt struct mchars
-from
-.Pa chars.c .
-.Pp
When this header is included, the same file should not include
.Pa term.h
or
@@ -472,10 +459,6 @@ or
.It Qq Pa main.h
Provides the top level steering functions for all formatters.
.Pp
-Uses the opaque type
-.Vt struct mchars
-from
-.Pa chars.c .
Uses the types
.Vt struct mdoc
from
diff --git a/mandocdb.c b/mandocdb.c
index de5417d6..d8ca0885 100644
--- a/mandocdb.c
+++ b/mandocdb.c
@@ -195,7 +195,6 @@ static int write_utf8; /* write UTF-8 output; else ASCII */
static int exitcode; /* to be returned by main */
static enum op op; /* operational mode */
static char basedir[PATH_MAX]; /* current base directory */
-static struct mchars *mchars; /* table of named characters */
static struct ohash mpages; /* table of distinct manual pages */
static struct ohash mlinks; /* table of directory entries */
static struct ohash names; /* table of all names */
@@ -419,9 +418,8 @@ mandocdb(int argc, char *argv[])
}
exitcode = (int)MANDOCLEVEL_OK;
- mchars = mchars_alloc();
- mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL,
- mchars, NULL);
+ mchars_alloc();
+ mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, NULL);
mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
@@ -522,7 +520,7 @@ mandocdb(int argc, char *argv[])
out:
manconf_free(&conf);
mparse_free(mp);
- mchars_free(mchars);
+ mchars_free();
mpages_free();
ohash_delete(&mpages);
ohash_delete(&mlinks);
@@ -1928,7 +1926,7 @@ render_string(char **public, size_t *psz)
*/
if (write_utf8) {
- unicode = mchars_spec2cp(mchars, seq, seqlen);
+ unicode = mchars_spec2cp(seq, seqlen);
if (unicode <= 0)
continue;
addsz = utf8(unicode, utfbuf);
@@ -1936,7 +1934,7 @@ render_string(char **public, size_t *psz)
continue;
addcp = utfbuf;
} else {
- addcp = mchars_spec2str(mchars, seq, seqlen, &addsz);
+ addcp = mchars_spec2str(seq, seqlen, &addsz);
if (addcp == NULL)
continue;
if (*addcp == ASCII_NBRSP) {
diff --git a/mchars_alloc.3 b/mchars_alloc.3
index 44442e4a..cbdf081e 100644
--- a/mchars_alloc.3
+++ b/mchars_alloc.3
@@ -25,17 +25,13 @@
.Nm mchars_spec2cp ,
.Nm mchars_spec2str
.Nd character table for mandoc
-.Sh LIBRARY
-.Lb libmandoc
.Sh SYNOPSIS
.In sys/types.h
.In mandoc.h
-.Ft "struct mchars *"
-.Fn mchars_alloc "void"
.Ft void
-.Fo mchars_free
-.Fa "struct mchars *table"
-.Fc
+.Fn mchars_alloc void
+.Ft void
+.Fn mchars_free void
.Ft char
.Fo mchars_num2char
.Fa "const char *decimal"
@@ -48,13 +44,11 @@
.Fc
.Ft int
.Fo mchars_spec2cp
-.Fa "const struct mchars *table"
.Fa "const char *name"
.Fa "size_t sz"
.Fc
.Ft "const char *"
.Fo mchars_spec2str
-.Fa "const struct mchars *table"
.Fa "const char *name"
.Fa "size_t sz"
.Fa "size_t *rsz"
@@ -135,9 +129,9 @@ escape sequences.
.Pp
The function
.Fn mchars_alloc
-allocates an opaque
-.Vt "struct mchars *"
-table object for subsequent use by the following two lookup functions.
+initializes a static
+.Vt "struct ohash"
+object for subsequent use by the following two lookup functions.
When no longer needed, this object can be destroyed with
.Fn mchars_free .
.Pp
@@ -149,9 +143,7 @@ special character
.Fa name
consisting of
.Fa sz
-characters in the
-.Fa table
-and returns the corresponding Unicode codepoint.
+characters and returns the corresponding Unicode codepoint.
If the
.Ar name
is not recognized, \-1 is returned.
@@ -175,9 +167,7 @@ special character
.Fa name
consisting of
.Fa sz
-characters in the
-.Fa table
-and returns an ASCII string representation.
+characters and returns an ASCII string representation.
The length of the representation is returned in
.Fa rsz .
In many cases, the meaning of such ASCII representations
@@ -215,6 +205,7 @@ These funtions are implemented in the file
.Sh SEE ALSO
.Xr mandoc 1 ,
.Xr mandoc_escape 3 ,
+.Xr ohash_init 3 ,
.Xr mandoc_char 7 ,
.Xr roff 7
.Sh HISTORY
diff --git a/read.c b/read.c
index 48c10600..980e0e43 100644
--- a/read.c
+++ b/read.c
@@ -50,7 +50,6 @@
struct mparse {
struct roff_man *man; /* man parser */
struct roff *roff; /* roff parser (!NULL) */
- const struct mchars *mchars; /* character table */
char *sodest; /* filename pointed to by .so */
const char *file; /* filename of current input file */
struct buf *primary; /* buffer currently being parsed */
@@ -815,7 +814,7 @@ mparse_open(struct mparse *curp, int *fd, const char *file)
struct mparse *
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
- const struct mchars *mchars, const char *defos)
+ const char *defos)
{
struct mparse *curp;
@@ -826,8 +825,7 @@ mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
curp->mmsg = mmsg;
curp->defos = defos;
- curp->mchars = mchars;
- curp->roff = roff_alloc(curp, curp->mchars, options);
+ curp->roff = roff_alloc(curp, options);
curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
curp->options & MPARSE_QUICK ? 1 : 0);
if (curp->options & MPARSE_MDOC) {
diff --git a/roff.c b/roff.c
index 28ebf80e..2dd51432 100644
--- a/roff.c
+++ b/roff.c
@@ -316,7 +316,6 @@ struct roffreg {
struct roff {
struct mparse *parse; /* parse point */
- const struct mchars *mchars; /* character table */
struct roffnode *last; /* leaf of stack */
int *rstack; /* stack of inverted `ie' values */
struct roffreg *regtab; /* number registers */
@@ -901,13 +900,12 @@ roff_free(struct roff *r)
}
struct roff *
-roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
+roff_alloc(struct mparse *parse, int options)
{
struct roff *r;
r = mandoc_calloc(1, sizeof(struct roff));
r->parse = parse;
- r->mchars = mchars;
r->options = options;
r->format = options & (MPARSE_MDOC | MPARSE_MAN);
r->rstackpos = -1;
@@ -1344,7 +1342,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
esc = mandoc_escape(&cp, &stnam, &inaml);
if (esc == ESCAPE_ERROR ||
(esc == ESCAPE_SPECIAL &&
- mchars_spec2cp(r->mchars, stnam, inaml) < 0))
+ mchars_spec2cp(stnam, inaml) < 0))
mandoc_vmsg(MANDOCERR_ESC_BAD,
r->parse, ln, (int)(stesc - buf->buf),
"%.*s", (int)(cp - stesc), stesc);
diff --git a/term.c b/term.c
index 8ca894c9..68cc1b4c 100644
--- a/term.c
+++ b/term.c
@@ -454,12 +454,11 @@ term_word(struct termp *p, const char *word)
break;
case ESCAPE_SPECIAL:
if (p->enc == TERMENC_ASCII) {
- cp = mchars_spec2str(p->symtab,
- seq, sz, &ssz);
+ cp = mchars_spec2str(seq, sz, &ssz);
if (cp != NULL)
encode(p, cp, ssz);
} else {
- uc = mchars_spec2cp(p->symtab, seq, sz);
+ uc = mchars_spec2cp(seq, sz);
if (uc > 0)
encode1(p, uc);
}
@@ -700,13 +699,11 @@ term_strlen(const struct termp *p, const char *cp)
break;
case ESCAPE_SPECIAL:
if (p->enc == TERMENC_ASCII) {
- rhs = mchars_spec2str(p->symtab,
- seq, ssz, &rsz);
+ rhs = mchars_spec2str(seq, ssz, &rsz);
if (rhs != NULL)
break;
} else {
- uc = mchars_spec2cp(p->symtab,
- seq, ssz);
+ uc = mchars_spec2cp(seq, ssz);
if (uc > 0)
sz += cond_width(p, uc, &skip);
}
diff --git a/term.h b/term.h
index e3183bd1..503fcc55 100644
--- a/term.h
+++ b/term.h
@@ -86,7 +86,6 @@ struct termp {
#define TERMP_NONEWLINE (1 << 15) /* No line break in nofill mode. */
int *buf; /* Output buffer. */
enum termenc enc; /* Type of encoding. */
- const struct mchars *symtab; /* Character table. */
enum termfont fontl; /* Last font set. */
enum termfont *fontq; /* Symmetric fonts. */
int fontsz; /* Allocated size of font stack */
diff --git a/term_ascii.c b/term_ascii.c
index b3aac5ea..1d587c63 100644
--- a/term_ascii.c
+++ b/term_ascii.c
@@ -38,8 +38,7 @@
#include "manconf.h"
#include "main.h"
-static struct termp *ascii_init(enum termenc, const struct mchars *,
- const struct manoutput *);
+static struct termp *ascii_init(enum termenc, const struct manoutput *);
static int ascii_hspan(const struct termp *,
const struct roffsu *);
static size_t ascii_width(const struct termp *, int);
@@ -59,8 +58,7 @@ static size_t locale_width(const struct termp *, int);
static struct termp *
-ascii_init(enum termenc enc, const struct mchars *mchars,
- const struct manoutput *outopts)
+ascii_init(enum termenc enc, const struct manoutput *outopts)
{
#if HAVE_WCHAR
char *v;
@@ -69,7 +67,6 @@ ascii_init(enum termenc enc, const struct mchars *mchars,
p = mandoc_calloc(1, sizeof(struct termp));
- p->symtab = mchars;
p->line = 1;
p->tabwidth = 5;
p->defrmargin = p->lastrmargin = 78;
@@ -119,24 +116,24 @@ ascii_init(enum termenc enc, const struct mchars *mchars,
}
void *
-ascii_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+ascii_alloc(const struct manoutput *outopts)
{
- return ascii_init(TERMENC_ASCII, mchars, outopts);
+ return ascii_init(TERMENC_ASCII, outopts);
}
void *
-utf8_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+utf8_alloc(const struct manoutput *outopts)
{
- return ascii_init(TERMENC_UTF8, mchars, outopts);
+ return ascii_init(TERMENC_UTF8, outopts);
}
void *
-locale_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+locale_alloc(const struct manoutput *outopts)
{
- return ascii_init(TERMENC_LOCALE, mchars, outopts);
+ return ascii_init(TERMENC_LOCALE, outopts);
}
static void
diff --git a/term_ps.c b/term_ps.c
index aa9b4b59..d1f56f70 100644
--- a/term_ps.c
+++ b/term_ps.c
@@ -108,8 +108,7 @@ static void ps_printf(struct termp *, const char *, ...);
static void ps_putchar(struct termp *, char);
static void ps_setfont(struct termp *, enum termfont);
static void ps_setwidth(struct termp *, int, int);
-static struct termp *pspdf_alloc(const struct mchars *,
- const struct manoutput *);
+static struct termp *pspdf_alloc(const struct manoutput *);
static void pdf_obj(struct termp *, size_t);
/*
@@ -510,29 +509,29 @@ static const struct font fonts[TERMFONT__MAX] = {
};
void *
-pdf_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+pdf_alloc(const struct manoutput *outopts)
{
struct termp *p;
- if (NULL != (p = pspdf_alloc(mchars, outopts)))
+ if (NULL != (p = pspdf_alloc(outopts)))
p->type = TERMTYPE_PDF;
return p;
}
void *
-ps_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+ps_alloc(const struct manoutput *outopts)
{
struct termp *p;
- if (NULL != (p = pspdf_alloc(mchars, outopts)))
+ if (NULL != (p = pspdf_alloc(outopts)))
p->type = TERMTYPE_PS;
return p;
}
static struct termp *
-pspdf_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+pspdf_alloc(const struct manoutput *outopts)
{
struct termp *p;
unsigned int pagex, pagey;
@@ -540,7 +539,6 @@ pspdf_alloc(const struct mchars *mchars, const struct manoutput *outopts)
const char *pp;
p = mandoc_calloc(1, sizeof(struct termp));
- p->symtab = mchars;
p->enc = TERMENC_ASCII;
p->fontq = mandoc_reallocarray(NULL,
(p->fontsz = 8), sizeof(enum termfont));