diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2015-10-13 22:59:54 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2015-10-13 22:59:54 +0000 |
commit | b26f648a71b3259c793410232a0d3bbdd0aa54e3 (patch) | |
tree | 38c696a975659d4498c81dddfc966f0e30bc6326 | |
parent | 35c8bbbbc8de3992d8c2d207662d365f4d566865 (diff) | |
download | mandoc-b26f648a71b3259c793410232a0d3bbdd0aa54e3.tar.gz |
Major character table cleanup:
* Use ohash(3) rather than a hand-rolled hash table.
* Make the character table static in the chars.c module:
There is no need to pass a pointer around, we most certainly
never want to use two different character tables concurrently.
* No need to keep the characters in a separate file chars.in;
that merely encourages downstream porters to mess with them.
* Sort the characters to agree with the mandoc_chars(7) manual page.
* Specify Unicode codepoints in hex, not decimal (that's the detail
that originally triggered this patch).
No functional change, minus 100 LOC, and i don't see a performance change.
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | Makefile.depend | 2 | ||||
-rw-r--r-- | cgi.c | 12 | ||||
-rw-r--r-- | chars.c | 479 | ||||
-rw-r--r-- | chars.in | 404 | ||||
-rw-r--r-- | demandoc.c | 7 | ||||
-rw-r--r-- | html.c | 6 | ||||
-rw-r--r-- | html.h | 1 | ||||
-rw-r--r-- | libmandoc.h | 3 | ||||
-rw-r--r-- | main.c | 26 | ||||
-rw-r--r-- | main.h | 19 | ||||
-rw-r--r-- | mandoc.3 | 10 | ||||
-rw-r--r-- | mandoc.h | 14 | ||||
-rw-r--r-- | mandoc_headers.3 | 21 | ||||
-rw-r--r-- | mandocdb.c | 12 | ||||
-rw-r--r-- | mchars_alloc.3 | 27 | ||||
-rw-r--r-- | read.c | 6 | ||||
-rw-r--r-- | roff.c | 6 | ||||
-rw-r--r-- | term.c | 11 | ||||
-rw-r--r-- | term.h | 1 | ||||
-rw-r--r-- | term_ascii.c | 19 | ||||
-rw-r--r-- | term_ps.c | 14 |
22 files changed, 469 insertions, 632 deletions
@@ -117,7 +117,6 @@ DISTFILES = INSTALL \ TODO \ apropos.1 \ cgi.h.example \ - chars.in \ compat_fts.h \ compat_ohash.h \ compat_stringlist.h \ diff --git a/Makefile.depend b/Makefile.depend index abac604e..85d7d673 100644 --- a/Makefile.depend +++ b/Makefile.depend @@ -1,6 +1,6 @@ att.o: att.c config.h roff.h mdoc.h libmdoc.h cgi.o: cgi.c config.h mandoc_aux.h mandoc.h roff.h main.h manconf.h mansearch.h cgi.h -chars.o: chars.c config.h mandoc.h mandoc_aux.h libmandoc.h chars.in +chars.o: chars.c config.h mandoc.h mandoc_aux.h mandoc_ohash.h compat_ohash.h libmandoc.h compat_err.o: compat_err.c config.h compat_fgetln.o: compat_fgetln.c config.h compat_fts.o: compat_fts.c config.h compat_fts.h @@ -819,7 +819,6 @@ format(const struct req *req, const char *file) { struct manoutput conf; struct mparse *mp; - struct mchars *mchars; struct roff_man *man; void *vp; int fd; @@ -830,9 +829,8 @@ format(const struct req *req, const char *file) return; } - mchars = mchars_alloc(); - mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, - mchars, req->q.manpath); + mchars_alloc(); + mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath); mparse_readfd(mp, fd, file); close(fd); @@ -852,11 +850,11 @@ format(const struct req *req, const char *file) req->q.manpath, file); pg_error_internal(); mparse_free(mp); - mchars_free(mchars); + mchars_free(); return; } - vp = html_alloc(mchars, &conf); + vp = html_alloc(&conf); if (man->macroset == MACROSET_MDOC) html_mdoc(vp, man); @@ -865,7 +863,7 @@ format(const struct req *req, const char *file) html_free(vp); mparse_free(mp); - mchars_free(mchars); + mchars_free(); free(conf.man); } @@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -21,89 +21,428 @@ #include <assert.h> #include <ctype.h> +#include <stddef.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include "mandoc.h" #include "mandoc_aux.h" +#include "mandoc_ohash.h" #include "libmandoc.h" -#define PRINT_HI 126 -#define PRINT_LO 32 - struct ln { - struct ln *next; - const char *code; + const char roffcode[16]; const char *ascii; int unicode; }; -#define LINES_MAX 332 - -#define CHAR(in, ch, code) \ - { NULL, (in), (ch), (code) }, - -#define CHAR_TBL_START static struct ln lines[LINES_MAX] = { -#define CHAR_TBL_END }; - -#include "chars.in" - -struct mchars { - struct ln **htab; +/* Special break control characters. */ +static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; +static const char ascii_break[2] = { ASCII_BREAK, '\0' }; + +static struct ln lines[] = { + + /* Spacing. */ + { " ", ascii_nbrsp, 0x00a0 }, + { "~", ascii_nbrsp, 0x00a0 }, + { "0", " ", 0x2002 }, + { "|", "", 0 }, + { "^", "", 0 }, + { "&", "", 0 }, + { "%", "", 0 }, + { ":", ascii_break, 0 }, + /* XXX The following three do not really belong here. */ + { "t", "", 0 }, + { "c", "", 0 }, + { "}", "", 0 }, + + /* Lines. */ + { "ba", "|", 0x007c }, + { "br", "|", 0x2502 }, + { "ul", "_", 0x005f }, + { "rn", "-", 0x203e }, + { "bb", "|", 0x00a6 }, + { "sl", "/", 0x002f }, + { "rs", "\\", 0x005c }, + + /* Text markers. */ + { "ci", "O", 0x25cb }, + { "bu", "+\bo", 0x2022 }, + { "dd", "|\b=", 0x2021 }, + { "dg", "|\b-", 0x2020 }, + { "lz", "<>", 0x25ca }, + { "sq", "[]", 0x25a1 }, + { "ps", "<par>", 0x00b6 }, + { "sc", "<sec>", 0x00a7 }, + { "lh", "<=", 0x261c }, + { "rh", "=>", 0x261e }, + { "at", "@", 0x0040 }, + { "sh", "#", 0x0023 }, + { "CR", "_|", 0x21b5 }, + { "OK", "\\/", 0x2713 }, + + /* Legal symbols. */ + { "co", "(C)", 0x00a9 }, + { "rg", "(R)", 0x00ae }, + { "tm", "tm", 0x2122 }, + + /* Punctuation. */ + { "em", "--", 0x2014 }, + { "en", "-", 0x2013 }, + { "hy", "-", 0x2010 }, + { "e", "\\", 0x005c }, + { ".", ".", 0x002e }, + { "r!", "!", 0x00a1 }, + { "r?", "?", 0x00bf }, + + /* Quotes. */ + { "Bq", ",,", 0x201e }, + { "bq", ",", 0x201a }, + { "lq", "\"", 0x201c }, + { "rq", "\"", 0x201d }, + { "Lq", "``", 0x201c }, + { "Rq", "''", 0x201d }, + { "oq", "`", 0x2018 }, + { "cq", "\'", 0x2019 }, + { "aq", "\'", 0x0027 }, + { "dq", "\"", 0x0022 }, + { "Fo", "<<", 0x00ab }, + { "Fc", ">>", 0x00bb }, + { "fo", "<", 0x2039 }, + { "fc", ">", 0x203a }, + + /* Brackets. */ + { "lB", "[", 0x005b }, + { "rB", "]", 0x005d }, + { "lC", "{", 0x007b }, + { "rC", "}", 0x007d }, + { "la", "<", 0x27e8 }, + { "ra", ">", 0x27e9 }, + { "bv", "|", 0x23aa }, + { "braceex", "|", 0x23aa }, + { "bracketlefttp", "|", 0x23a1 }, + { "bracketleftbt", "|", 0x23a3 }, + { "bracketleftex", "|", 0x23a2 }, + { "bracketrighttp", "|", 0x23a4 }, + { "bracketrightbt", "|", 0x23a6 }, + { "bracketrightex", "|", 0x23a5 }, + { "lt", ",-", 0x23a7 }, + { "bracelefttp", ",-", 0x23a7 }, + { "lk", "{", 0x23a8 }, + { "braceleftmid", "{", 0x23a8 }, + { "lb", "`-", 0x23a9 }, + { "braceleftbt", "`-", 0x23a9 }, + { "braceleftex", "|", 0x23aa }, + { "rt", "-.", 0x23ab }, + { "bracerighttp", "-.", 0x23ab }, + { "rk", "}", 0x23ac }, + { "bracerightmid", "}", 0x23ac }, + { "rb", "-\'", 0x23ad }, + { "bracerightbt", "-\'", 0x23ad }, + { "bracerightex", "|", 0x23aa }, + { "parenlefttp", "/", 0x239b }, + { "parenleftbt", "\\", 0x239d }, + { "parenleftex", "|", 0x239c }, + { "parenrighttp", "\\", 0x239e }, + { "parenrightbt", "/", 0x23a0 }, + { "parenrightex", "|", 0x239f }, + + /* Arrows and lines. */ + { "<-", "<-", 0x2190 }, + { "->", "->", 0x2192 }, + { "<>", "<->", 0x2194 }, + { "da", "|\bv", 0x2193 }, + { "ua", "|\b^", 0x2191 }, + { "va", "^v", 0x2195 }, + { "lA", "<=", 0x21d0 }, + { "rA", "=>", 0x21d2 }, + { "hA", "<=>", 0x21d4 }, + { "uA", "=\b^", 0x21d1 }, + { "dA", "=\bv", 0x21d3 }, + { "vA", "^=v", 0x21d5 }, + + /* Logic. */ + { "AN", "^", 0x2227 }, + { "OR", "v", 0x2228 }, + { "no", "~", 0x00ac }, + { "tno", "~", 0x00ac }, + { "te", "3", 0x2203 }, + { "fa", "-\bV", 0x2200 }, + { "st", "-)", 0x220b }, + { "tf", ".:.", 0x2234 }, + { "3d", ".:.", 0x2234 }, + { "or", "|", 0x007c }, + + /* Mathematicals. */ + { "pl", "+", 0x002b }, + { "mi", "-", 0x2212 }, + { "-", "-", 0x002d }, + { "-+", "-+", 0x2213 }, + { "+-", "+-", 0x00b1 }, + { "t+-", "+-", 0x00b1 }, + { "pc", ".", 0x00b7 }, + { "md", ".", 0x22c5 }, + { "mu", "x", 0x00d7 }, + { "tmu", "x", 0x00d7 }, + { "c*", "O\bx", 0x2297 }, + { "c+", "O\b+", 0x2295 }, + { "di", "-:-", 0x00f7 }, + { "tdi", "-:-", 0x00f7 }, + { "f/", "/", 0x2044 }, + { "**", "*", 0x2217 }, + { "<=", "<=", 0x2264 }, + { ">=", ">=", 0x2265 }, + { "<<", "<<", 0x226a }, + { ">>", ">>", 0x226b }, + { "eq", "=", 0x003d }, + { "!=", "!=", 0x2260 }, + { "==", "==", 0x2261 }, + { "ne", "!==", 0x2262 }, + { "ap", "~", 0x223c }, + { "|=", "-~", 0x2243 }, + { "=~", "=~", 0x2245 }, + { "~~", "~~", 0x2248 }, + { "~=", "~=", 0x2248 }, + { "pt", "oc", 0x221d }, + { "es", "{}", 0x2205 }, + { "mo", "E", 0x2208 }, + { "nm", "!E", 0x2209 }, + { "sb", "(=", 0x2282 }, + { "nb", "(!=", 0x2284 }, + { "sp", "=)", 0x2283 }, + { "nc", "!=)", 0x2285 }, + { "ib", "(=\b_", 0x2286 }, + { "ip", "=\b_)", 0x2287 }, + { "ca", "(^)", 0x2229 }, + { "cu", "U", 0x222a }, + { "/_", "_\b/", 0x2220 }, + { "pp", "_\b|", 0x22a5 }, + { "is", "'\b,\bI", 0x222b }, + { "integral", "'\b,\bI", 0x222b }, + { "sum", "E", 0x2211 }, + { "product", "TT", 0x220f }, + { "coproduct", "U", 0x2210 }, + { "gr", "V", 0x2207 }, + { "sr", "\\/", 0x221a }, + { "sqrt", "\\/", 0x221a }, + { "lc", "|~", 0x2308 }, + { "rc", "~|", 0x2309 }, + { "lf", "|_", 0x230a }, + { "rf", "_|", 0x230b }, + { "if", "oo", 0x221e }, + { "Ah", "N", 0x2135 }, + { "Im", "I", 0x2111 }, + { "Re", "R", 0x211c }, + { "pd", "a", 0x2202 }, + { "-h", "/h", 0x210f }, + { "12", "1/2", 0x00bd }, + { "14", "1/4", 0x00bc }, + { "34", "3/4", 0x00be }, + + /* Ligatures. */ + { "ff", "ff", 0xfb00 }, + { "fi", "fi", 0xfb01 }, + { "fl", "fl", 0xfb02 }, + { "Fi", "ffi", 0xfb03 }, + { "Fl", "ffl", 0xfb04 }, + { "AE", "AE", 0x00c6 }, + { "ae", "ae", 0x00e6 }, + { "OE", "OE", 0x0152 }, + { "oe", "oe", 0x0153 }, + { "ss", "ss", 0x00df }, + { "IJ", "IJ", 0x0132 }, + { "ij", "ij", 0x0133 }, + + /* Accents. */ + { "a\"", "\"", 0x02dd }, + { "a-", "-", 0x00af }, + { "a.", ".", 0x02d9 }, + { "a^", "^", 0x005e }, + { "aa", "\'", 0x00b4 }, + { "\'", "\'", 0x00b4 }, + { "ga", "`", 0x0060 }, + { "`", "`", 0x0060 }, + { "ab", "'\b`", 0x02d8 }, + { "ac", ",", 0x00b8 }, + { "ad", "\"", 0x00a8 }, + { "ah", "v", 0x02c7 }, + { "ao", "o", 0x02da }, + { "a~", "~", 0x007e }, + { "ho", ",", 0x02db }, + { "ha", "^", 0x005e }, + { "ti", "~", 0x007e }, + + /* Accented letters. */ + { "'A", "'\bA", 0x00c1 }, + { "'E", "'\bE", 0x00c9 }, + { "'I", "'\bI", 0x00cd }, + { "'O", "'\bO", 0x00d3 }, + { "'U", "'\bU", 0x00da }, + { "'a", "'\ba", 0x00e1 }, + { "'e", "'\be", 0x00e9 }, + { "'i", "'\bi", 0x00ed }, + { "'o", "'\bo", 0x00f3 }, + { "'u", "'\bu", 0x00fa }, + { "`A", "`\bA", 0x00c0 }, + { "`E", "`\bE", 0x00c8 }, + { "`I", "`\bI", 0x00cc }, + { "`O", "`\bO", 0x00d2 }, + { "`U", "`\bU", 0x00d9 }, + { "`a", "`\ba", 0x00e0 }, + { "`e", "`\be", 0x00e8 }, + { "`i", "`\bi", 0x00ec }, + { "`o", "`\bo", 0x00f2 }, + { "`u", "`\bu", 0x00f9 }, + { "~A", "~\bA", 0x00c3 }, + { "~N", "~\bN", 0x00d1 }, + { "~O", "~\bO", 0x00d5 }, + { "~a", "~\ba", 0x00e3 }, + { "~n", "~\bn", 0x00f1 }, + { "~o", "~\bo", 0x00f5 }, + { ":A", "\"\bA", 0x00c4 }, + { ":E", "\"\bE", 0x00cb }, + { ":I", "\"\bI", 0x00cf }, + { ":O", "\"\bO", 0x00d6 }, + { ":U", "\"\bU", 0x00dc }, + { ":a", "\"\ba", 0x00e4 }, + { ":e", "\"\be", 0x00eb }, + { ":i", "\"\bi", 0x00ef }, + { ":o", "\"\bo", 0x00f6 }, + { ":u", "\"\bu", 0x00fc }, + { ":y", "\"\by", 0x00ff }, + { "^A", "^\bA", 0x00c2 }, + { "^E", "^\bE", 0x00ca }, + { "^I", "^\bI", 0x00ce }, + { "^O", "^\bO", 0x00d4 }, + { "^U", "^\bU", 0x00db }, + { "^a", "^\ba", 0x00e2 }, + { "^e", "^\be", 0x00ea }, + { "^i", "^\bi", 0x00ee }, + { "^o", "^\bo", 0x00f4 }, + { "^u", "^\bu", 0x00fb }, + { ",C", ",\bC", 0x00c7 }, + { ",c", ",\bc", 0x00e7 }, + { "/L", "/\bL", 0x0141 }, + { "/l", "/\bl", 0x0142 }, + { "/O", "/\bO", 0x00d8 }, + { "/o", "/\bo", 0x00f8 }, + { "oA", "o\bA", 0x00c5 }, + { "oa", "o\ba", 0x00e5 }, + + /* Special letters. */ + { "-D", "-\bD", 0x00d0 }, + { "Sd", "d", 0x00f0 }, + { "TP", "Th", 0x00de }, + { "Tp", "th", 0x00fe }, + { ".i", "i", 0x0131 }, + { ".j", "j", 0x0237 }, + + /* Currency. */ + { "Do", "$", 0x0024 }, + { "ct", "/\bc", 0x00a2 }, + { "Eu", "EUR", 0x20ac }, + { "eu", "EUR", 0x20ac }, + { "Ye", "=\bY", 0x00a5 }, + { "Po", "GBP", 0x00a3 }, + { "Cs", "o\bx", 0x00a4 }, + { "Fn", ",\bf", 0x0192 }, + + /* Units. */ + { "de", "<deg>", 0x00b0 }, + { "%0", "%o", 0x2030 }, + { "fm", "\'", 0x2032 }, + { "sd", "''", 0x2033 }, + { "mc", ",\bu", 0x00b5 }, + + /* Greek characters. */ + { "*A", "A", 0x0391 }, + { "*B", "B", 0x0392 }, + { "*G", "G", 0x0393 }, + { "*D", "_\b/_\b\\", 0x0394 }, + { "*E", "E", 0x0395 }, + { "*Z", "Z", 0x0396 }, + { "*Y", "H", 0x0397 }, + { "*H", "-\bO", 0x0398 }, + { "*I", "I", 0x0399 }, + { "*K", "K", 0x039a }, + { "*L", "/\\", 0x039b }, + { "*M", "M", 0x039c }, + { "*N", "N", 0x039d }, + { "*C", "_\bH", 0x039e }, + { "*O", "O", 0x039f }, + { "*P", "TT", 0x03a0 }, + { "*R", "P", 0x03a1 }, + { "*S", "S", 0x03a3 }, + { "*T", "T", 0x03a4 }, + { "*U", "Y", 0x03a5 }, + { "*F", "I\bO", 0x03a6 }, + { "*X", "X", 0x03a7 }, + { "*Q", "I\bY", 0x03a8 }, + { "*W", "_\bO", 0x03a9 }, + { "*a", "a", 0x03b1 }, + { "*b", "B", 0x03b2 }, + { "*g", "y", 0x03b3 }, + { "*d", "d", 0x03b4 }, + { "*e", "e", 0x03b5 }, + { "*z", ",\bC", 0x03b6 }, + { "*y", "n", 0x03b7 }, + { "*h", "-\b0", 0x03b8 }, + { "*i", "i", 0x03b9 }, + { "*k", "k", 0x03ba }, + { "*l", ">\b\\", 0x03bb }, + { "*m", ",\bu", 0x03bc }, + { "*n", "v", 0x03bd }, + { "*c", ",\bE", 0x03be }, + { "*o", "o", 0x03bf }, + { "*p", "-\bn", 0x03c0 }, + { "*r", "p", 0x03c1 }, + { "*s", "-\bo", 0x03c3 }, + { "*t", "~\bt", 0x03c4 }, + { "*u", "u", 0x03c5 }, + { "*f", "|\bo", 0x03d5 }, + { "*x", "x", 0x03c7 }, + { "*q", "|\bu", 0x03c8 }, + { "*w", "w", 0x03c9 }, + { "+h", "-\b0", 0x03d1 }, + { "+f", "|\bo", 0x03c6 }, + { "+p", "-\bw", 0x03d6 }, + { "+e", "e", 0x03f5 }, + { "ts", "s", 0x03c2 }, }; -static const struct ln *find(const struct mchars *, - const char *, size_t); +static struct ohash mchars; void -mchars_free(struct mchars *arg) +mchars_free(void) { - free(arg->htab); - free(arg); + ohash_delete(&mchars); } -struct mchars * +void mchars_alloc(void) { - struct mchars *tab; - struct ln **htab; - struct ln *pp; - int i, hash; - - /* - * Constructs a very basic chaining hashtable. The hash routine - * is simply the integral value of the first character. - * Subsequent entries are chained in the order they're processed. - */ - - tab = mandoc_malloc(sizeof(struct mchars)); - htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *)); - - for (i = 0; i < LINES_MAX; i++) { - hash = (int)lines[i].code[0] - PRINT_LO; - - if (NULL == (pp = htab[hash])) { - htab[hash] = &lines[i]; - continue; - } - - for ( ; pp->next; pp = pp->next) - /* Scan ahead. */ ; - pp->next = &lines[i]; + size_t i; + unsigned int slot; + + mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode)); + for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) { + slot = ohash_qlookup(&mchars, lines[i].roffcode); + assert(ohash_find(&mchars, slot) == NULL); + ohash_insert(&mchars, slot, lines + i); } - - tab->htab = htab; - return tab; } int -mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) +mchars_spec2cp(const char *p, size_t sz) { const struct ln *ln; + const char *end; - ln = find(arg, p, sz); + end = p + sz; + ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1; } @@ -127,12 +466,13 @@ mchars_num2uc(const char *p, size_t sz) } const char * -mchars_spec2str(const struct mchars *arg, - const char *p, size_t sz, size_t *rsz) +mchars_spec2str(const char *p, size_t sz, size_t *rsz) { const struct ln *ln; + const char *end; - ln = find(arg, p, sz); + end = p + sz; + ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); if (ln == NULL) { *rsz = 1; return sz == 1 ? p : NULL; @@ -145,31 +485,10 @@ mchars_spec2str(const struct mchars *arg, const char * mchars_uc2str(int uc) { - int i; + size_t i; - for (i = 0; i < LINES_MAX; i++) + for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) if (uc == lines[i].unicode) return lines[i].ascii; return "<?>"; } - -static const struct ln * -find(const struct mchars *tab, const char *p, size_t sz) -{ - const struct ln *pp; - int hash; - - assert(p); - - if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) - return NULL; - - hash = (int)p[0] - PRINT_LO; - - for (pp = tab->htab[hash]; pp; pp = pp->next) - if (0 == strncmp(pp->code, p, sz) && - '\0' == pp->code[(int)sz]) - return pp; - - return NULL; -} diff --git a/chars.in b/chars.in deleted file mode 100644 index f34a098f..00000000 --- a/chars.in +++ /dev/null @@ -1,404 +0,0 @@ -/* $Id$ */ -/* - * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * The ASCII translation tables. - * - * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx - * and so on) whose length is listed second element. The right-hand - * side is what's produced by the front-end, with the fourth element - * being its length. - * - * XXX - C-escape strings! - * XXX - update LINES_MAX if adding more! - */ - -/* Special break control characters. */ -static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; -static const char ascii_break[2] = { ASCII_BREAK, '\0' }; - -CHAR_TBL_START - -/* Spacing. */ -CHAR(" ", ascii_nbrsp, 160) -CHAR("~", ascii_nbrsp, 160) -CHAR("0", " ", 8194) -CHAR("|", "", 0) -CHAR("^", "", 0) -CHAR("&", "", 0) -CHAR("%", "", 0) -CHAR(":", ascii_break, 0) -/* XXX The following three do not really belong into this file. */ -CHAR("t", "", 0) -CHAR("c", "", 0) -CHAR("}", "", 0) - -/* Accents. */ -CHAR("a\"", "\"", 733) -CHAR("a-", "-", 175) -CHAR("a.", ".", 729) -CHAR("a^", "^", 94) -CHAR("\'", "\'", 180) -CHAR("aa", "\'", 180) -CHAR("ga", "`", 96) -CHAR("`", "`", 96) -CHAR("ab", "'\b`", 728) -CHAR("ac", ",", 184) -CHAR("ad", "\"", 168) -CHAR("ah", "v", 711) -CHAR("ao", "o", 730) -CHAR("a~", "~", 126) -CHAR("ho", ",", 731) -CHAR("ha", "^", 94) -CHAR("ti", "~", 126) - -/* Quotes. */ -CHAR("Bq", ",,", 8222) -CHAR("bq", ",", 8218) -CHAR("lq", "\"", 8220) -CHAR("rq", "\"", 8221) -CHAR("Lq", "``", 8220) -CHAR("Rq", "''", 8221) -CHAR("oq", "`", 8216) -CHAR("cq", "\'", 8217) -CHAR("aq", "\'", 39) -CHAR("dq", "\"", 34) -CHAR("Fo", "<<", 171) -CHAR("Fc", ">>", 187) -CHAR("fo", "<", 8249) -CHAR("fc", ">", 8250) - -/* Brackets. */ -CHAR("lB", "[", 91) -CHAR("rB", "]", 93) -CHAR("lC", "{", 123) -CHAR("rC", "}", 125) -CHAR("la", "<", 10216) -CHAR("ra", ">", 10217) -CHAR("bv", "|", 9130) -CHAR("braceex", "|", 9130) -CHAR("bracketlefttp", "|", 9121) -CHAR("bracketleftbt", "|", 9123) -CHAR("bracketleftex", "|", 9122) -CHAR("bracketrighttp", "|", 9124) -CHAR("bracketrightbt", "|", 9126) -CHAR("bracketrightex", "|", 9125) -CHAR("lt", ",-", 9127) -CHAR("bracelefttp", ",-", 9127) -CHAR("lk", "{", 9128) -CHAR("braceleftmid", "{", 9128) -CHAR("lb", "`-", 9129) -CHAR("braceleftbt", "`-", 9129) -CHAR("braceleftex", "|", 9130) -CHAR("rt", "-.", 9131) -CHAR("bracerighttp", "-.", 9131) -CHAR("rk", "}", 9132) -CHAR("bracerightmid", "}", 9132) -CHAR("rb", "-\'", 9133) -CHAR("bracerightbt", "-\'", 9133) -CHAR("bracerightex", "|", 9130) -CHAR("parenlefttp", "/", 9115) -CHAR("parenleftbt", "\\", 9117) -CHAR("parenleftex", "|", 9116) -CHAR("parenrighttp", "\\", 9118) -CHAR("parenrightbt", "/", 9120) -CHAR("parenrightex", "|", 9119) - -/* Greek characters. */ -CHAR("*A", "A", 913) -CHAR("*B", "B", 914) -CHAR("*G", "G", 915) -CHAR("*D", "_\b/_\b\\", 916) -CHAR("*E", "E", 917) -CHAR("*Z", "Z", 918) -CHAR("*Y", "H", 919) -CHAR("*H", "-\bO", 920) -CHAR("*I", "I", 921) -CHAR("*K", "K", 922) -CHAR("*L", "/\\", 923) -CHAR("*M", "M", 924) -CHAR("*N", "N", 925) -CHAR("*C", "_\bH", 926) -CHAR("*O", "O", 927) -CHAR("*P", "TT", 928) -CHAR("*R", "P", 929) -CHAR("*S", "S", 931) -CHAR("*T", "T", 932) -CHAR("*U", "Y", 933) -CHAR("*F", "I\bO", 934) -CHAR("*X", "X", 935) -CHAR("*Q", "I\bY", 936) -CHAR("*W", "_\bO", 937) -CHAR("*a", "a", 945) -CHAR("*b", "B", 946) -CHAR("*g", "y", 947) -CHAR("*d", "d", 948) -CHAR("*e", "e", 949) -CHAR("*z", ",\bC", 950) -CHAR("*y", "n", 951) -CHAR("*h", "-\b0", 952) -CHAR("*i", "i", 953) -CHAR("*k", "k", 954) -CHAR("*l", ">\b\\", 955) -CHAR("*m", ",\bu", 956) -CHAR("*n", "v", 957) -CHAR("*c", ",\bE", 958) -CHAR("*o", "o", 959) -CHAR("*p", "-\bn", 960) -CHAR("*r", "p", 961) -CHAR("*s", "-\bo", 963) -CHAR("*t", "~\bt", 964) -CHAR("*u", "u", 965) -CHAR("*f", "|\bo", 981) -CHAR("*x", "x", 967) -CHAR("*q", "|\bu", 968) -CHAR("*w", "w", 969) -CHAR("+h", "-\b0", 977) -CHAR("+f", "|\bo", 966) -CHAR("+p", "-\bw", 982) -CHAR("+e", "e", 1013) -CHAR("ts", "s", 962) - -/* Accented letters. */ -CHAR(",C", ",\bC", 199) -CHAR(",c", ",\bc", 231) -CHAR("/L", "/\bL", 321) -CHAR("/O", "/\bO", 216) -CHAR("/l", "/\bl", 322) -CHAR("/o", "/\bo", 248) -CHAR("oA", "o\bA", 197) -CHAR("oa", "o\ba", 229) -CHAR(":A", "\"\bA", 196) -CHAR(":E", "\"\bE", 203) -CHAR(":I", "\"\bI", 207) -CHAR(":O", "\"\bO", 214) -CHAR(":U", "\"\bU", 220) -CHAR(":a", "\"\ba", 228) -CHAR(":e", "\"\be", 235) -CHAR(":i", "\"\bi", 239) -CHAR(":o", "\"\bo", 246) -CHAR(":u", "\"\bu", 252) -CHAR(":y", "\"\by", 255) -CHAR("'A", "'\bA", 193) -CHAR("'E", "'\bE", 201) -CHAR("'I", "'\bI", 205) -CHAR("'O", "'\bO", 211) -CHAR("'U", "'\bU", 218) -CHAR("'a", "'\ba", 225) -CHAR("'e", "'\be", 233) -CHAR("'i", "'\bi", 237) -CHAR("'o", "'\bo", 243) -CHAR("'u", "'\bu", 250) -CHAR("^A", "^\bA", 194) -CHAR("^E", "^\bE", 202) -CHAR("^I", "^\bI", 206) -CHAR("^O", "^\bO", 212) -CHAR("^U", "^\bU", 219) -CHAR("^a", "^\ba", 226) -CHAR("^e", "^\be", 234) -CHAR("^i", "^\bi", 238) -CHAR("^o", "^\bo", 244) -CHAR("^u", "^\bu", 251) -CHAR("`A", "`\bA", 192) -CHAR("`E", "`\bE", 200) -CHAR("`I", "`\bI", 204) -CHAR("`O", "`\bO", 210) -CHAR("`U", "`\bU", 217) -CHAR("`a", "`\ba", 224) -CHAR("`e", "`\be", 232) -CHAR("`i", "`\bi", 236) -CHAR("`o", "`\bo", 242) -CHAR("`u", "`\bu", 249) -CHAR("~A", "~\bA", 195) -CHAR("~N", "~\bN", 209) -CHAR("~O", "~\bO", 213) -CHAR("~a", "~\ba", 227) -CHAR("~n", "~\bn", 241) -CHAR("~o", "~\bo", 245) - -/* Arrows and lines. */ -CHAR("<-", "<-", 8592) -CHAR("->", "->", 8594) -CHAR("<>", "<->", 8596) -CHAR("da", "|\bv", 8595) -CHAR("ua", "|\b^", 8593) -CHAR("va", "^v", 8597) -CHAR("lA", "<=", 8656) -CHAR("rA", "=>", 8658) -CHAR("hA", "<=>", 8660) -CHAR("dA", "=\bv", 8659) -CHAR("uA", "=\b^", 8657) -CHAR("vA", "^=v", 8661) - -/* Logic. */ -CHAR("AN", "^", 8743) -CHAR("OR", "v", 8744) -CHAR("no", "~", 172) -CHAR("tno", "~", 172) -CHAR("te", "3", 8707) -CHAR("fa", "-\bV", 8704) -CHAR("st", "-)", 8715) -CHAR("tf", ".:.", 8756) -CHAR("3d", ".:.", 8756) -CHAR("or", "|", 124) - -/* Mathematicals. */ -CHAR("pl", "+", 43) -CHAR("mi", "-", 8722) -CHAR("-", "-", 45) -CHAR("-+", "-+", 8723) -CHAR("+-", "+-", 177) -CHAR("t+-", "+-", 177) -CHAR("pc", ".", 183) -CHAR("md", ".", 8901) -CHAR("mu", "x", 215) -CHAR("tmu", "x", 215) -CHAR("c*", "O\bx", 8855) -CHAR("c+", "O\b+", 8853) -CHAR("di", "-:-", 247) -CHAR("tdi", "-:-", 247) -CHAR("f/", "/", 8260) -CHAR("**", "*", 8727) -CHAR("<=", "<=", 8804) -CHAR(">=", ">=", 8805) -CHAR("<<", "<<", 8810) -CHAR(">>", ">>", 8811) -CHAR("eq", "=", 61) -CHAR("!=", "!=", 8800) -CHAR("==", "==", 8801) -CHAR("ne", "!==", 8802) -CHAR("=~", "=~", 8773) -CHAR("|=", "-~", 8771) -CHAR("ap", "~", 8764) -CHAR("~~", "~~", 8776) -CHAR("~=", "~=", 8776) -CHAR("pt", "oc", 8733) -CHAR("es", "{}", 8709) -CHAR("mo", "E", 8712) -CHAR("nm", "!E", 8713) -CHAR("sb", "(=", 8834) -CHAR("nb", "(!=", 8836) -CHAR("sp", "=)", 8835) -CHAR("nc", "!=)", 8837) -CHAR("ib", "(=\b_", 8838) -CHAR("ip", "=\b_)", 8839) -CHAR("ca", "(^)", 8745) -CHAR("cu", "U", 8746) -CHAR("/_", "_\b/", 8736) -CHAR("pp", "_\b|", 8869) -CHAR("is", "'\b,\bI", 8747) -CHAR("integral", "'\b,\bI", 8747) -CHAR("sum", "E", 8721) -CHAR("product", "TT", 8719) -CHAR("coproduct", "U", 8720) -CHAR("gr", "V", 8711) -CHAR("sr", "\\/", 8730) -CHAR("sqrt", "\\/", 8730) -CHAR("lc", "|~", 8968) -CHAR("rc", "~|", 8969) -CHAR("lf", "|_", 8970) -CHAR("rf", "_|", 8971) -CHAR("if", "oo", 8734) -CHAR("Ah", "N", 8501) -CHAR("Im", "I", 8465) -CHAR("Re", "R", 8476) -CHAR("pd", "a", 8706) -CHAR("-h", "/h", 8463) -CHAR("12", "1/2", 189) -CHAR("14", "1/4", 188) -CHAR("34", "3/4", 190) - -/* Ligatures. */ -CHAR("ff", "ff", 64256) -CHAR("fi", "fi", 64257) -CHAR("fl", "fl", 64258) -CHAR("Fi", "ffi", 64259) -CHAR("Fl", "ffl", 64260) -CHAR("AE", "AE", 198) -CHAR("ae", "ae", 230) -CHAR("OE", "OE", 338) -CHAR("oe", "oe", 339) -CHAR("ss", "ss", 223) -CHAR("IJ", "IJ", 306) -CHAR("ij", "ij", 307) - -/* Special letters. */ -CHAR("-D", "-\bD", 208) -CHAR("Sd", "d", 240) -CHAR("TP", "Th", 222) -CHAR("Tp", "th", 254) -CHAR(".i", "i", 305) -CHAR(".j", "j", 567) - -/* Currency. */ -CHAR("Do", "$", 36) -CHAR("ct", "/\bc", 162) -CHAR("Eu", "EUR", 8364) -CHAR("eu", "EUR", 8364) -CHAR("Ye", "=\bY", 165) -CHAR("Po", "GBP", 163) -CHAR("Cs", "o\bx", 164) -CHAR("Fn", ",\bf", 402) - -/* Lines. */ -CHAR("ba", "|", 124) -CHAR("br", "|", 9474) -CHAR("ul", "_", 95) -CHAR("rn", "-", 8254) -CHAR("bb", "|", 166) -CHAR("sl", "/", 47) -CHAR("rs", "\\", 92) - -/* Text markers. */ -CHAR("ci", "O", 9675) -CHAR("bu", "+\bo", 8226) -CHAR("dd", "|\b=", 8225) -CHAR("dg", "|\b-", 8224) -CHAR("lz", "<>", 9674) -CHAR("sq", "[]", 9633) -CHAR("ps", "<par>", 182) -CHAR("sc", "<sec>", 167) -CHAR("lh", "<=", 9756) -CHAR("rh", "=>", 9758) -CHAR("at", "@", 64) -CHAR("sh", "#", 35) -CHAR("CR", "_|", 8629) -CHAR("OK", "\\/", 10003) - -/* Legal symbols. */ -CHAR("co", "(C)", 169) -CHAR("rg", "(R)", 174) -CHAR("tm", "tm", 8482) - -/* Punctuation. */ -CHAR(".", ".", 46) -CHAR("r!", "!", 161) -CHAR("r?", "?", 191) -CHAR("em", "--", 8212) -CHAR("en", "-", 8211) -CHAR("hy", "-", 8208) -CHAR("e", "\\", 92) - -/* Units. */ -CHAR("de", "<deg>", 176) -CHAR("%0", "%o", 8240) -CHAR("fm", "\'", 8242) -CHAR("sd", "''", 8243) -CHAR("mc", ",\bu", 181) - -CHAR_TBL_END @@ -44,7 +44,6 @@ int main(int argc, char *argv[]) { struct mparse *mp; - struct mchars *mchars; int ch, fd, i, list; extern int optind; @@ -79,8 +78,8 @@ main(int argc, char *argv[]) argc -= optind; argv += optind; - mchars = mchars_alloc(); - mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, mchars, NULL); + mchars_alloc(); + mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, NULL); assert(mp); if (argc < 1) @@ -96,7 +95,7 @@ main(int argc, char *argv[]) } mparse_free(mp); - mchars_free(mchars); + mchars_free(); return (int)MANDOCLEVEL_OK; } @@ -130,15 +130,13 @@ static void print_attr(struct html *, const char *, const char *); void * -html_alloc(const struct mchars *mchars, const struct manoutput *outopts) +html_alloc(const struct manoutput *outopts) { struct html *h; h = mandoc_calloc(1, sizeof(struct html)); h->tags.head = NULL; - h->symtab = mchars; - h->style = outopts->style; h->base_man = outopts->man; h->base_includes = outopts->includes; @@ -398,7 +396,7 @@ print_encode(struct html *h, const char *p, int norecurse) continue; break; case ESCAPE_SPECIAL: - c = mchars_spec2cp(h->symtab, seq, len); + c = mchars_spec2cp(seq, len); if (c <= 0) continue; break; @@ -130,7 +130,6 @@ struct html { struct tagq tags; /* stack of open tags */ struct rofftbl tbl; /* current table */ struct tag *tblt; /* current open table scope */ - const struct mchars *symtab; /* character table */ char *base_man; /* base for manpage href */ char *base_includes; /* base for include href */ char *style; /* style-sheet URI */ diff --git a/libmandoc.h b/libmandoc.h index 42bcb059..f570a557 100644 --- a/libmandoc.h +++ b/libmandoc.h @@ -35,7 +35,6 @@ struct buf { __BEGIN_DECLS struct mparse; -struct mchars; struct tbl_span; struct eqn; struct roff; @@ -68,7 +67,7 @@ int preconv_encode(struct buf *, size_t *, struct buf *, size_t *, int *); void roff_free(struct roff *); -struct roff *roff_alloc(struct mparse *, const struct mchars *, int); +struct roff *roff_alloc(struct mparse *, int); void roff_reset(struct roff *); void roff_man_free(struct roff_man *); struct roff_man *roff_man_alloc(struct roff *, struct mparse *, @@ -77,7 +77,6 @@ enum outt { struct curparse { struct mparse *mp; - struct mchars *mchars; /* character table */ enum mandoclevel wlevel; /* ignore messages below this */ int wstop; /* stop after a file with a warning */ enum outt outtype; /* which output to use */ @@ -422,9 +421,8 @@ main(int argc, char *argv[]) if (search.argmode == ARG_FILE && ! moptions(&options, auxpaths)) return (int)MANDOCLEVEL_BADARG; - curp.mchars = mchars_alloc(); - curp.mp = mparse_alloc(options, curp.wlevel, mmsg, - curp.mchars, defos); + mchars_alloc(); + curp.mp = mparse_alloc(options, curp.wlevel, mmsg, defos); /* * Conditionally start up the lookaside buffer before parsing. @@ -478,7 +476,7 @@ main(int argc, char *argv[]) if (curp.outfree) (*curp.outfree)(curp.outdata); mparse_free(curp.mp); - mchars_free(curp.mchars); + mchars_free(); out: if (search.argmode != ARG_FILE) { @@ -662,33 +660,27 @@ parse(struct curparse *curp, int fd, const char *file) if ( ! (curp->outman && curp->outmdoc)) { switch (curp->outtype) { case OUTT_HTML: - curp->outdata = html_alloc(curp->mchars, - curp->outopts); + curp->outdata = html_alloc(curp->outopts); curp->outfree = html_free; break; case OUTT_UTF8: - curp->outdata = utf8_alloc(curp->mchars, - curp->outopts); + curp->outdata = utf8_alloc(curp->outopts); curp->outfree = ascii_free; break; case OUTT_LOCALE: - curp->outdata = locale_alloc(curp->mchars, - curp->outopts); + curp->outdata = locale_alloc(curp->outopts); curp->outfree = ascii_free; break; case OUTT_ASCII: - curp->outdata = ascii_alloc(curp->mchars, - curp->outopts); + curp->outdata = ascii_alloc(curp->outopts); curp->outfree = ascii_free; break; case OUTT_PDF: - curp->outdata = pdf_alloc(curp->mchars, - curp->outopts); + curp->outdata = pdf_alloc(curp->outopts); curp->outfree = pspdf_free; break; case OUTT_PS: - curp->outdata = ps_alloc(curp->mchars, - curp->outopts); + curp->outdata = ps_alloc(curp->outopts); curp->outfree = pspdf_free; break; default: @@ -20,7 +20,6 @@ __BEGIN_DECLS -struct mchars; struct roff_man; struct manoutput; @@ -31,8 +30,7 @@ struct manoutput; * terminal output routines with different character settings. */ -void *html_alloc(const struct mchars *, - const struct manoutput *); +void *html_alloc(const struct manoutput *); void html_mdoc(void *, const struct roff_man *); void html_man(void *, const struct roff_man *); void html_free(void *); @@ -43,19 +41,14 @@ void tree_man(void *, const struct roff_man *); void man_mdoc(void *, const struct roff_man *); void man_man(void *, const struct roff_man *); -void *locale_alloc(const struct mchars *, - const struct manoutput *); -void *utf8_alloc(const struct mchars *, - const struct manoutput *); -void *ascii_alloc(const struct mchars *, - const struct manoutput *); +void *locale_alloc(const struct manoutput *); +void *utf8_alloc(const struct manoutput *); +void *ascii_alloc(const struct manoutput *); void ascii_free(void *); void ascii_sepline(void *); -void *pdf_alloc(const struct mchars *, - const struct manoutput *); -void *ps_alloc(const struct mchars *, - const struct manoutput *); +void *pdf_alloc(const struct manoutput *); +void *ps_alloc(const struct manoutput *); void pspdf_free(void *); void terminal_mdoc(void *, const struct roff_man *); @@ -50,7 +50,6 @@ .Fa "int options" .Fa "enum mandoclevel wlevel" .Fa "mandocmsg mmsg" -.Fa "const struct mchars *mchars" .Fa "char *defos" .Fc .Ft void @@ -210,12 +209,6 @@ An error or warning message during parsing. A classification of an .Vt "enum mandocerr" as regards system operation. -.It Vt "struct mchars" -An opaque pointer to a a character table. -Created with -.Xr mchars_alloc 3 -and freed with -.Xr mchars_free 3 . .It Vt "struct mparse" An opaque pointer to a running parse sequence. Created with @@ -340,9 +333,6 @@ A callback function to handle errors and warnings. See .Pa main.c for an example. -.It Ar mchars -An opaque pointer to a a character table obtained from -.Xr mchars_alloc 3 . .It Ar defos A default string for the .Xr mdoc 7 @@ -410,21 +410,17 @@ typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, __BEGIN_DECLS struct mparse; -struct mchars; struct roff_man; enum mandoc_esc mandoc_escape(const char **, const char **, int *); -struct mchars *mchars_alloc(void); -void mchars_free(struct mchars *); +void mchars_alloc(void); +void mchars_free(void); int mchars_num2char(const char *, size_t); const char *mchars_uc2str(int); int mchars_num2uc(const char *, size_t); -int mchars_spec2cp(const struct mchars *, - const char *, size_t); -const char *mchars_spec2str(const struct mchars *, - const char *, size_t, size_t *); -struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg, - const struct mchars *, const char *); +int mchars_spec2cp(const char *, size_t); +const char *mchars_spec2str(const char *, size_t, size_t *); +struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg, const char *); void mparse_free(struct mparse *); void mparse_keep(struct mparse *); enum mandoclevel mparse_open(struct mparse *, int *, const char *); diff --git a/mandoc_headers.3 b/mandoc_headers.3 index 85cf1894..6c30e164 100644 --- a/mandoc_headers.3 +++ b/mandoc_headers.3 @@ -98,14 +98,10 @@ and the functions described in .Xr mandoc 3 . .Pp -Uses the opaque types +Uses the opaque type .Vt struct mparse from .Pa read.c -and -.Vt struct mchars -from -.Pa chars.c for function prototypes. Uses the types .Vt struct mdoc @@ -411,11 +407,7 @@ Provides .Vt struct termp , and many terminal formatting functions. .Pp -Uses the opaque types -.Vt struct mchars -from -.Pa chars.c -and +Uses the opaque type .Vt struct termp_ps from .Pa term_ps.c . @@ -460,11 +452,6 @@ Provides .Vt struct html , and many HTML formatting functions. .Pp -Uses the opaque type -.Vt struct mchars -from -.Pa chars.c . -.Pp When this header is included, the same file should not include .Pa term.h or @@ -472,10 +459,6 @@ or .It Qq Pa main.h Provides the top level steering functions for all formatters. .Pp -Uses the opaque type -.Vt struct mchars -from -.Pa chars.c . Uses the types .Vt struct mdoc from @@ -195,7 +195,6 @@ static int write_utf8; /* write UTF-8 output; else ASCII */ static int exitcode; /* to be returned by main */ static enum op op; /* operational mode */ static char basedir[PATH_MAX]; /* current base directory */ -static struct mchars *mchars; /* table of named characters */ static struct ohash mpages; /* table of distinct manual pages */ static struct ohash mlinks; /* table of directory entries */ static struct ohash names; /* table of all names */ @@ -419,9 +418,8 @@ mandocdb(int argc, char *argv[]) } exitcode = (int)MANDOCLEVEL_OK; - mchars = mchars_alloc(); - mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, - mchars, NULL); + mchars_alloc(); + mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, NULL); mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); @@ -522,7 +520,7 @@ mandocdb(int argc, char *argv[]) out: manconf_free(&conf); mparse_free(mp); - mchars_free(mchars); + mchars_free(); mpages_free(); ohash_delete(&mpages); ohash_delete(&mlinks); @@ -1928,7 +1926,7 @@ render_string(char **public, size_t *psz) */ if (write_utf8) { - unicode = mchars_spec2cp(mchars, seq, seqlen); + unicode = mchars_spec2cp(seq, seqlen); if (unicode <= 0) continue; addsz = utf8(unicode, utfbuf); @@ -1936,7 +1934,7 @@ render_string(char **public, size_t *psz) continue; addcp = utfbuf; } else { - addcp = mchars_spec2str(mchars, seq, seqlen, &addsz); + addcp = mchars_spec2str(seq, seqlen, &addsz); if (addcp == NULL) continue; if (*addcp == ASCII_NBRSP) { diff --git a/mchars_alloc.3 b/mchars_alloc.3 index 44442e4a..cbdf081e 100644 --- a/mchars_alloc.3 +++ b/mchars_alloc.3 @@ -25,17 +25,13 @@ .Nm mchars_spec2cp , .Nm mchars_spec2str .Nd character table for mandoc -.Sh LIBRARY -.Lb libmandoc .Sh SYNOPSIS .In sys/types.h .In mandoc.h -.Ft "struct mchars *" -.Fn mchars_alloc "void" .Ft void -.Fo mchars_free -.Fa "struct mchars *table" -.Fc +.Fn mchars_alloc void +.Ft void +.Fn mchars_free void .Ft char .Fo mchars_num2char .Fa "const char *decimal" @@ -48,13 +44,11 @@ .Fc .Ft int .Fo mchars_spec2cp -.Fa "const struct mchars *table" .Fa "const char *name" .Fa "size_t sz" .Fc .Ft "const char *" .Fo mchars_spec2str -.Fa "const struct mchars *table" .Fa "const char *name" .Fa "size_t sz" .Fa "size_t *rsz" @@ -135,9 +129,9 @@ escape sequences. .Pp The function .Fn mchars_alloc -allocates an opaque -.Vt "struct mchars *" -table object for subsequent use by the following two lookup functions. +initializes a static +.Vt "struct ohash" +object for subsequent use by the following two lookup functions. When no longer needed, this object can be destroyed with .Fn mchars_free . .Pp @@ -149,9 +143,7 @@ special character .Fa name consisting of .Fa sz -characters in the -.Fa table -and returns the corresponding Unicode codepoint. +characters and returns the corresponding Unicode codepoint. If the .Ar name is not recognized, \-1 is returned. @@ -175,9 +167,7 @@ special character .Fa name consisting of .Fa sz -characters in the -.Fa table -and returns an ASCII string representation. +characters and returns an ASCII string representation. The length of the representation is returned in .Fa rsz . In many cases, the meaning of such ASCII representations @@ -215,6 +205,7 @@ These funtions are implemented in the file .Sh SEE ALSO .Xr mandoc 1 , .Xr mandoc_escape 3 , +.Xr ohash_init 3 , .Xr mandoc_char 7 , .Xr roff 7 .Sh HISTORY @@ -50,7 +50,6 @@ struct mparse { struct roff_man *man; /* man parser */ struct roff *roff; /* roff parser (!NULL) */ - const struct mchars *mchars; /* character table */ char *sodest; /* filename pointed to by .so */ const char *file; /* filename of current input file */ struct buf *primary; /* buffer currently being parsed */ @@ -815,7 +814,7 @@ mparse_open(struct mparse *curp, int *fd, const char *file) struct mparse * mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, - const struct mchars *mchars, const char *defos) + const char *defos) { struct mparse *curp; @@ -826,8 +825,7 @@ mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, curp->mmsg = mmsg; curp->defos = defos; - curp->mchars = mchars; - curp->roff = roff_alloc(curp, curp->mchars, options); + curp->roff = roff_alloc(curp, options); curp->man = roff_man_alloc( curp->roff, curp, curp->defos, curp->options & MPARSE_QUICK ? 1 : 0); if (curp->options & MPARSE_MDOC) { @@ -316,7 +316,6 @@ struct roffreg { struct roff { struct mparse *parse; /* parse point */ - const struct mchars *mchars; /* character table */ struct roffnode *last; /* leaf of stack */ int *rstack; /* stack of inverted `ie' values */ struct roffreg *regtab; /* number registers */ @@ -901,13 +900,12 @@ roff_free(struct roff *r) } struct roff * -roff_alloc(struct mparse *parse, const struct mchars *mchars, int options) +roff_alloc(struct mparse *parse, int options) { struct roff *r; r = mandoc_calloc(1, sizeof(struct roff)); r->parse = parse; - r->mchars = mchars; r->options = options; r->format = options & (MPARSE_MDOC | MPARSE_MAN); r->rstackpos = -1; @@ -1344,7 +1342,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) esc = mandoc_escape(&cp, &stnam, &inaml); if (esc == ESCAPE_ERROR || (esc == ESCAPE_SPECIAL && - mchars_spec2cp(r->mchars, stnam, inaml) < 0)) + mchars_spec2cp(stnam, inaml) < 0)) mandoc_vmsg(MANDOCERR_ESC_BAD, r->parse, ln, (int)(stesc - buf->buf), "%.*s", (int)(cp - stesc), stesc); @@ -454,12 +454,11 @@ term_word(struct termp *p, const char *word) break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { - cp = mchars_spec2str(p->symtab, - seq, sz, &ssz); + cp = mchars_spec2str(seq, sz, &ssz); if (cp != NULL) encode(p, cp, ssz); } else { - uc = mchars_spec2cp(p->symtab, seq, sz); + uc = mchars_spec2cp(seq, sz); if (uc > 0) encode1(p, uc); } @@ -700,13 +699,11 @@ term_strlen(const struct termp *p, const char *cp) break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { - rhs = mchars_spec2str(p->symtab, - seq, ssz, &rsz); + rhs = mchars_spec2str(seq, ssz, &rsz); if (rhs != NULL) break; } else { - uc = mchars_spec2cp(p->symtab, - seq, ssz); + uc = mchars_spec2cp(seq, ssz); if (uc > 0) sz += cond_width(p, uc, &skip); } @@ -86,7 +86,6 @@ struct termp { #define TERMP_NONEWLINE (1 << 15) /* No line break in nofill mode. */ int *buf; /* Output buffer. */ enum termenc enc; /* Type of encoding. */ - const struct mchars *symtab; /* Character table. */ enum termfont fontl; /* Last font set. */ enum termfont *fontq; /* Symmetric fonts. */ int fontsz; /* Allocated size of font stack */ diff --git a/term_ascii.c b/term_ascii.c index b3aac5ea..1d587c63 100644 --- a/term_ascii.c +++ b/term_ascii.c @@ -38,8 +38,7 @@ #include "manconf.h" #include "main.h" -static struct termp *ascii_init(enum termenc, const struct mchars *, - const struct manoutput *); +static struct termp *ascii_init(enum termenc, const struct manoutput *); static int ascii_hspan(const struct termp *, const struct roffsu *); static size_t ascii_width(const struct termp *, int); @@ -59,8 +58,7 @@ static size_t locale_width(const struct termp *, int); static struct termp * -ascii_init(enum termenc enc, const struct mchars *mchars, - const struct manoutput *outopts) +ascii_init(enum termenc enc, const struct manoutput *outopts) { #if HAVE_WCHAR char *v; @@ -69,7 +67,6 @@ ascii_init(enum termenc enc, const struct mchars *mchars, p = mandoc_calloc(1, sizeof(struct termp)); - p->symtab = mchars; p->line = 1; p->tabwidth = 5; p->defrmargin = p->lastrmargin = 78; @@ -119,24 +116,24 @@ ascii_init(enum termenc enc, const struct mchars *mchars, } void * -ascii_alloc(const struct mchars *mchars, const struct manoutput *outopts) +ascii_alloc(const struct manoutput *outopts) { - return ascii_init(TERMENC_ASCII, mchars, outopts); + return ascii_init(TERMENC_ASCII, outopts); } void * -utf8_alloc(const struct mchars *mchars, const struct manoutput *outopts) +utf8_alloc(const struct manoutput *outopts) { - return ascii_init(TERMENC_UTF8, mchars, outopts); + return ascii_init(TERMENC_UTF8, outopts); } void * -locale_alloc(const struct mchars *mchars, const struct manoutput *outopts) +locale_alloc(const struct manoutput *outopts) { - return ascii_init(TERMENC_LOCALE, mchars, outopts); + return ascii_init(TERMENC_LOCALE, outopts); } static void @@ -108,8 +108,7 @@ static void ps_printf(struct termp *, const char *, ...); static void ps_putchar(struct termp *, char); static void ps_setfont(struct termp *, enum termfont); static void ps_setwidth(struct termp *, int, int); -static struct termp *pspdf_alloc(const struct mchars *, - const struct manoutput *); +static struct termp *pspdf_alloc(const struct manoutput *); static void pdf_obj(struct termp *, size_t); /* @@ -510,29 +509,29 @@ static const struct font fonts[TERMFONT__MAX] = { }; void * -pdf_alloc(const struct mchars *mchars, const struct manoutput *outopts) +pdf_alloc(const struct manoutput *outopts) { struct termp *p; - if (NULL != (p = pspdf_alloc(mchars, outopts))) + if (NULL != (p = pspdf_alloc(outopts))) p->type = TERMTYPE_PDF; return p; } void * -ps_alloc(const struct mchars *mchars, const struct manoutput *outopts) +ps_alloc(const struct manoutput *outopts) { struct termp *p; - if (NULL != (p = pspdf_alloc(mchars, outopts))) + if (NULL != (p = pspdf_alloc(outopts))) p->type = TERMTYPE_PS; return p; } static struct termp * -pspdf_alloc(const struct mchars *mchars, const struct manoutput *outopts) +pspdf_alloc(const struct manoutput *outopts) { struct termp *p; unsigned int pagex, pagey; @@ -540,7 +539,6 @@ pspdf_alloc(const struct mchars *mchars, const struct manoutput *outopts) const char *pp; p = mandoc_calloc(1, sizeof(struct termp)); - p->symtab = mchars; p->enc = TERMENC_ASCII; p->fontq = mandoc_reallocarray(NULL, (p->fontsz = 8), sizeof(enum termfont)); |