diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2008-12-04 19:31:57 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2008-12-04 19:31:57 +0000 |
commit | 50ecaba08336212e4dc1802f15d7d3f4ffad186a (patch) | |
tree | bfc5e27855965ac17df779fa730772c1a696f7a3 | |
parent | fb40e7adf35e6b21b0e64e170b71e1a4d2cf5351 (diff) | |
download | mandoc-50ecaba08336212e4dc1802f15d7d3f4ffad186a.tar.gz |
Moved charset recognition into the filter.
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | index.7 | 4 | ||||
-rw-r--r-- | ml.c | 139 | ||||
-rw-r--r-- | mlg.c | 138 | ||||
-rw-r--r-- | private.h | 7 | ||||
-rw-r--r-- | roff.c | 14 | ||||
-rw-r--r-- | tokens.c | 113 |
7 files changed, 202 insertions, 215 deletions
@@ -27,7 +27,7 @@ FAIL = test.0 test.1 test.2 test.3 test.4 test.5 test.6 \ SUCCEED = test.7 test.8 test.9 test.10 test.11 test.12 test.13 \ test.14 test.16 test.17 test.18 test.19 test.21 test.23 \ test.25 test.28 test.29 test.31 test.32 test.33 test.34 \ - test.35 test.37 test.38 test.39 + test.35 test.38 test.39 all: mdocml @@ -47,9 +47,9 @@ respectively), correctly-ordered document prelude, .It sane argument values (such as those for -.Sq \& Dt +.Sq \&.Dt or -.Sq \& Sm ) , +.Sq \&.Sm ) , .It and so on. .El @@ -16,6 +16,7 @@ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ +#include <assert.h> #include <stdlib.h> #include <string.h> @@ -33,13 +34,147 @@ int ml_nputstring(struct md_mbuf *p, const char *buf, size_t sz, size_t *pos) { - int i; + int i, v; const char *seq; size_t ssz; for (i = 0; i < (int)sz; i++) { switch (buf[i]) { + /* Escaped value. */ + case ('\\'): + if (-1 == (v = rofftok_scan(buf, &i))) { + /* TODO: error. */ + return(0); + } + + switch (v) { + case (ROFFTok_Sp_A): + seq = "\\a"; + ssz = 2; + break; + case (ROFFTok_Sp_B): + seq = "\\b"; + ssz = 2; + break; + case (ROFFTok_Sp_F): + seq = "\\f"; + ssz = 2; + break; + case (ROFFTok_Sp_N): + seq = "\\n"; + ssz = 2; + break; + case (ROFFTok_Sp_R): + seq = "\\r"; + ssz = 2; + break; + case (ROFFTok_Sp_T): + seq = "\\t"; + ssz = 2; + break; + case (ROFFTok_Sp_V): + seq = "\\v"; + ssz = 2; + break; + case (ROFFTok_Sp_0): + seq = "\\0"; + ssz = 2; + break; + case (ROFFTok_Space): + seq = " "; + ssz = 6; + break; + case (ROFFTok_Hyphen): + seq = "‐"; + ssz = 7; + break; + case (ROFFTok_Em): + seq = "—"; + ssz = 7; + break; + case (ROFFTok_En): + seq = "–"; + ssz = 7; + break; + case (ROFFTok_Ge): + seq = "≥"; + ssz = 7; + break; + case (ROFFTok_Le): + seq = "≤"; + ssz = 7; + break; + case (ROFFTok_Rquote): + seq = "”"; + ssz = 7; + break; + case (ROFFTok_Lquote): + seq = "“"; + ssz = 7; + break; + case (ROFFTok_Uparrow): + seq = "↑"; + ssz = 7; + break; + case (ROFFTok_Acute): + seq = "´"; + ssz = 6; + break; + case (ROFFTok_Grave): + seq = "`"; + ssz = 5; + break; + case (ROFFTok_Pi): + seq = "π"; + ssz = 6; + break; + case (ROFFTok_Ne): + seq = "≠"; + ssz = 7; + break; + case (ROFFTok_Lt): + seq = "<"; + ssz = 4; + break; + case (ROFFTok_Gt): + seq = ">"; + ssz = 4; + break; + case (ROFFTok_Plusmin): + seq = "±"; + ssz = 6; + break; + case (ROFFTok_Infty): + seq = "∞"; + ssz = 7; + break; + case (ROFFTok_Bar): + seq = "|"; + ssz = 6; + break; + case (ROFFTok_Nan): + seq = "Nan"; + ssz = 3; + break; + case (ROFFTok_Quote): + seq = """; + ssz = 6; + break; + case (ROFFTok_Slash): + seq = "\\"; + ssz = 1; + break; + case (ROFFTok_Null): + seq = ""; + ssz = 0; + break; + default: + /* TODO: print error. */ + return(-1); + } + break; + /* Ampersand ml-escape. */ case ('&'): seq = "&"; @@ -70,7 +205,7 @@ ml_nputstring(struct md_mbuf *p, break; } - if ( ! ml_nputs(p, seq, ssz, pos)) + if (ssz > 0 && ! ml_nputs(p, seq, ssz, pos)) return(-1); } return(1); @@ -70,7 +70,6 @@ static int mlg_rofftail(void *); static int mlg_roffin(void *, int, int *, char **); static int mlg_roffdata(void *, int, const char *, char *); -static int mlg_rofftoken(void *, int, int); static int mlg_roffout(void *, int); static int mlg_roffblkin(void *, int, int *, char **); static int mlg_roffblkout(void *, int); @@ -357,7 +356,6 @@ mlg_alloc(const struct md_args *args, cb.roffspecial = mlg_roffspecial; cb.roffmsg = mlg_roffmsg; cb.roffdata = mlg_roffdata; - cb.rofftoken = mlg_rofftoken; if (NULL == (p = calloc(1, sizeof(struct md_mlg)))) err(1, "calloc"); @@ -552,142 +550,6 @@ mlg_roffmsg(void *arg, enum roffmsg lvl, static int -mlg_rofftoken(void *arg, int space, int value) -{ - struct md_mlg *p; - const char *seq; - size_t sz, res; - - assert(arg); - p = (struct md_mlg *)arg; - - switch (value) { - case (ROFFTok_Sp_A): - seq = "\\a"; - sz = 2; - break; - case (ROFFTok_Sp_B): - seq = "\\b"; - sz = 2; - break; - case (ROFFTok_Sp_F): - seq = "\\f"; - sz = 2; - break; - case (ROFFTok_Sp_N): - seq = "\\n"; - sz = 2; - break; - case (ROFFTok_Sp_R): - seq = "\\r"; - sz = 2; - break; - case (ROFFTok_Sp_T): - seq = "\\t"; - sz = 2; - break; - case (ROFFTok_Sp_V): - seq = "\\v"; - sz = 2; - break; - case (ROFFTok_Space): - seq = " "; - sz = 6; - break; - case (ROFFTok_Hyphen): - seq = "‐"; - sz = 7; - break; - case (ROFFTok_Em): - seq = "—"; - sz = 7; - break; - case (ROFFTok_En): - seq = "–"; - sz = 7; - break; - case (ROFFTok_Ge): - seq = "≥"; - sz = 7; - break; - case (ROFFTok_Le): - seq = "≤"; - sz = 7; - break; - case (ROFFTok_Rquote): - seq = "”"; - sz = 7; - break; - case (ROFFTok_Lquote): - seq = "“"; - sz = 7; - break; - case (ROFFTok_Uparrow): - seq = "↑"; - sz = 7; - break; - case (ROFFTok_Acute): - seq = "´"; - sz = 6; - break; - case (ROFFTok_Grave): - seq = "`"; - sz = 5; - break; - case (ROFFTok_Pi): - seq = "π"; - sz = 6; - break; - case (ROFFTok_Ne): - seq = "≠"; - sz = 7; - break; - case (ROFFTok_Lt): - seq = "<"; - sz = 4; - break; - case (ROFFTok_Gt): - seq = ">"; - sz = 4; - break; - case (ROFFTok_Plusmin): - seq = "±"; - sz = 6; - break; - case (ROFFTok_Infty): - seq = "∞"; - sz = 7; - break; - case (ROFFTok_Bar): - seq = "|"; - sz = 6; - break; - case (ROFFTok_Nan): - seq = "Nan"; - sz = 3; - break; - case (ROFFTok_Quote): - seq = """; - sz = 6; - break; - default: - /* TODO: print error. */ - return(0); - } - - if (space && ! ml_nputs(p->mbuf, " ", 1, &res)) - return(0); - p->pos += res; - - if ( ! ml_nputs(p->mbuf, seq, sz, &res)) - return(0); - p->pos += res; - - return(1); -} - - -static int mlg_roffdata(void *arg, int space, const char *start, char *buf) { struct md_mlg *p; @@ -65,7 +65,9 @@ struct md_mbuf { #define ROFFTok_Bar 25 #define ROFFTok_Nan 26 #define ROFFTok_Quote 27 -#define ROFFTok_MAX 28 +#define ROFFTok_Sp_0 28 +#define ROFFTok_Slash 29 +#define ROFFTok_MAX 30 #define ROFF___ 0 #define ROFF_Dd 1 @@ -251,7 +253,6 @@ struct roffcb { const char *, const char *, const char *); int (*rofftail)(void *); int (*roffdata)(void *, int, const char *, char *); - int (*rofftoken)(void *, int, int); int (*roffin)(void *, int, int *, char **); int (*roffout)(void *, int); int (*roffblkin)(void *, int, int *, char **); @@ -290,7 +291,7 @@ struct rofftree *roff_alloc(const struct roffcb *, void *); int roff_engine(struct rofftree *, char *); int roff_free(struct rofftree *, int); -int rofftok_scan(const char *); +int rofftok_scan(const char *, int *); __END_DECLS @@ -969,23 +969,9 @@ roffparseopts(struct rofftree *tree, int tok, static int roffdata(struct rofftree *tree, int space, char *buf) { - int tok; if (0 == *buf) return(1); - - if (-1 == (tok = rofftok_scan(buf))) { - roff_err(tree, buf, "invalid character sequence"); - return(0); - } else if (ROFFTok_MAX != tok) { - if (ROFFTok_Null == tok) { /* FIXME */ - buf += 2; - return(roffdata(tree, space, buf)); - } - return((*tree->cb.rofftoken) - (tree->arg, space != 0, tok)); - } - return((*tree->cb.roffdata)(tree->arg, space != 0, tree->cur, buf)); } @@ -24,55 +24,58 @@ #include "private.h" -static int rofftok_dashes(const char *); -static int rofftok_special(const char *); -static int rofftok_predef(const char *); -static int rofftok_defined(const char *); +static int rofftok_dashes(const char *, int *); +static int rofftok_special(const char *, int *); +static int rofftok_predef(const char *, int *); +static int rofftok_defined(const char *, int *); static int -rofftok_defined(const char *buf) +rofftok_defined(const char *buf, int *i) { - if (0 == *buf) - return(-1); - if (0 == *(buf + 1)) + const char *p; + + if (0 == buf[*i]) return(-1); - if (0 != *(buf + 2)) + if (0 == buf[*i + 1]) return(-1); - if (0 == strcmp(buf, ">=")) + (*i)++; + p = &buf[(*i)++]; + + if (0 == memcmp(p, ">=", 2)) return(ROFFTok_Ge); - else if (0 == strcmp(buf, "<=")) + else if (0 == memcmp(p, "<=", 2)) return(ROFFTok_Le); - else if (0 == strcmp(buf, "Rq")) + else if (0 == memcmp(p, "Rq", 2)) return(ROFFTok_Rquote); - else if (0 == strcmp(buf, "Lq")) + else if (0 == memcmp(p, "Lq", 2)) return(ROFFTok_Lquote); - else if (0 == strcmp(buf, "ua")) + else if (0 == memcmp(p, "ua", 2)) return(ROFFTok_Uparrow); - else if (0 == strcmp(buf, "aa")) + else if (0 == memcmp(p, "aa", 2)) return(ROFFTok_Acute); - else if (0 == strcmp(buf, "ga")) + else if (0 == memcmp(p, "ga", 2)) return(ROFFTok_Grave); - else if (0 == strcmp(buf, "Pi")) + else if (0 == memcmp(p, "Pi", 2)) return(ROFFTok_Pi); - else if (0 == strcmp(buf, "Ne")) + else if (0 == memcmp(p, "Ne", 2)) return(ROFFTok_Ne); - else if (0 == strcmp(buf, "Le")) + else if (0 == memcmp(p, "Le", 2)) return(ROFFTok_Le); - else if (0 == strcmp(buf, "Ge")) + else if (0 == memcmp(p, "Ge", 2)) return(ROFFTok_Ge); - else if (0 == strcmp(buf, "Lt")) + else if (0 == memcmp(p, "Lt", 2)) return(ROFFTok_Lt); - else if (0 == strcmp(buf, "Gt")) + else if (0 == memcmp(p, "Gt", 2)) return(ROFFTok_Gt); - else if (0 == strcmp(buf, "Pm")) + else if (0 == memcmp(p, "Pm", 2)) return(ROFFTok_Plusmin); - else if (0 == strcmp(buf, "If")) + else if (0 == memcmp(p, "If", 2)) return(ROFFTok_Infty); - else if (0 == strcmp(buf, "Na")) + else if (0 == memcmp(p, "Na", 2)) return(ROFFTok_Nan); - else if (0 == strcmp(buf, "Ba")) + else if (0 == memcmp(p, "Ba", 2)) return(ROFFTok_Bar); return(-1); @@ -80,15 +83,14 @@ rofftok_defined(const char *buf) static int -rofftok_predef(const char *buf) +rofftok_predef(const char *buf, int *i) { - if (0 == *buf) + if (0 == buf[*i]) return(-1); + if ('(' == buf[*i]) + return(rofftok_defined(buf, i)); - if ('(' == *buf) - return(rofftok_defined(++buf)); - - switch (*buf) { + switch (buf[*i]) { case ('q'): return(ROFFTok_Quote); default: @@ -100,20 +102,17 @@ rofftok_predef(const char *buf) static int -rofftok_dashes(const char *buf) +rofftok_dashes(const char *buf, int *i) { - if (0 == *buf) + if (0 == buf[*i]) return(-1); - else if (*buf++ != 'e') + else if (buf[(*i)++] != 'e') return(-1); - - if (0 == *buf) - return(-1); - else if (0 != *(buf + 1)) + if (0 == buf[*i]) return(-1); - switch (*buf) { + switch (buf[*i]) { case ('m'): return(ROFFTok_Em); case ('n'): @@ -126,15 +125,13 @@ rofftok_dashes(const char *buf) static int -rofftok_special(const char *buf) +rofftok_special(const char *buf, int *i) { - if (0 == *buf) - return(-1); - else if (0 != *(buf + 1)) - return(-1); + if (0 == buf[*i]) + return(ROFFTok_Slash); - switch (*buf) { + switch (buf[*i]) { case ('a'): return(ROFFTok_Sp_A); case ('b'): @@ -149,6 +146,8 @@ rofftok_special(const char *buf) return(ROFFTok_Sp_T); case ('v'): return(ROFFTok_Sp_V); + case ('0'): + return(ROFFTok_Sp_0); default: break; } @@ -157,19 +156,22 @@ rofftok_special(const char *buf) int -rofftok_scan(const char *buf) +rofftok_scan(const char *buf, int *i) { assert(*buf); - if ('\\' != *buf++) - return(ROFFTok_MAX); + assert(buf[*i] == '\\'); + + (*i)++; - for ( ; *buf; buf++) { - switch (*buf) { + for ( ; buf[*i]; (*i)++) { + switch (buf[*i]) { case ('e'): - return(rofftok_special(++buf)); + (*i)++; + return(rofftok_special(buf, i)); case ('('): - return(rofftok_dashes(++buf)); + (*i)++; + return(rofftok_dashes(buf, i)); case (' '): return(ROFFTok_Space); case ('&'): @@ -177,9 +179,10 @@ rofftok_scan(const char *buf) case ('-'): return(ROFFTok_Hyphen); case ('*'): - return(rofftok_predef(++buf)); + (*i)++; + return(rofftok_predef(buf, i)); case ('\\'): - return(ROFFTok_MAX); + return(ROFFTok_Slash); default: break; } |