diff options
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | html.c | 2 | ||||
-rw-r--r-- | index.7 | 65 | ||||
-rw-r--r-- | mdocml.css | 120 | ||||
-rw-r--r-- | ml.c | 41 | ||||
-rw-r--r-- | ml.h | 2 | ||||
-rw-r--r-- | mlg.c | 237 | ||||
-rw-r--r-- | private.h | 38 | ||||
-rw-r--r-- | roff.c | 119 | ||||
-rw-r--r-- | tokens.c | 184 |
10 files changed, 645 insertions, 181 deletions
@@ -2,15 +2,15 @@ CFLAGS += -W -Wall -Wno-unused-parameter -g -DDEBUG LINTFLAGS += -c -e -f -u -LNS = mdocml.ln html.ln xml.ln libmdocml.ln roff.ln ml.ln mlg.ln compat.ln +LNS = mdocml.ln html.ln xml.ln libmdocml.ln roff.ln ml.ln mlg.ln compat.ln tokens.ln LLNS = llib-lmdocml.ln LIBS = libmdocml.a -OBJS = mdocml.o html.o xml.o libmdocml.o roff.o ml.o mlg.o compat.o +OBJS = mdocml.o html.o xml.o libmdocml.o roff.o ml.o mlg.o compat.o tokens.o -SRCS = mdocml.c html.c xml.c libmdocml.c roff.c ml.c mlg.c compat.c +SRCS = mdocml.c html.c xml.c libmdocml.c roff.c ml.c mlg.c compat.c tokens.c HEADS = libmdocml.h private.h @@ -22,12 +22,12 @@ INSTALL = Makefile $(HEADS) $(SRCS) $(MANS) FAIL = test.0 test.1 test.2 test.3 test.4 test.5 test.6 \ test.15 test.20 test.22 test.24 test.26 test.27 test.30 \ - test.36 + test.36 test.37 test.40 SUCCEED = test.7 test.8 test.9 test.10 test.11 test.12 test.13 \ test.14 test.16 test.17 test.18 test.19 test.21 test.23 \ test.25 test.28 test.29 test.31 test.32 test.33 test.34 \ - test.35 + test.35 test.37 test.38 test.39 all: mdocml @@ -52,15 +52,15 @@ mdocml.tgz: $(INSTALL) ( cd .dist/ && tar zcf ../mdocml.tgz mdocml/ ) rm -rf .dist/ -llib-lmdocml.ln: mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln - $(LINT) $(LINTFLAGS) -Cmdocml mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln +llib-lmdocml.ln: mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln tokens.ln + $(LINT) $(LINTFLAGS) -Cmdocml mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln tokens.ln mdocml.ln: mdocml.c libmdocml.h mdocml.o: mdocml.c libmdocml.h -libmdocml.a: libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o - $(AR) rs $@ libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o +libmdocml.a: libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o tokens.o + $(AR) rs $@ libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o tokens.o xml.ln: xml.c private.h libmdocml.h ml.h @@ -163,7 +163,7 @@ html_begin(struct md_mbuf *mbuf, const struct md_args *args, assert(args->params.html.css); if (HTML_CSS_EMBED & args->params.html.flags) { - if ( ! ml_puts(mbuf, " <style><!--\n", &res)) + if ( ! ml_puts(mbuf, " <style type=\"text/css\"><!--\n", &res)) return(0); if ( ! html_loadcss(mbuf, args->params.html.css)) return(0); diff --git a/index.7 b/index.7 new file mode 100644 index 00000000..ff3f0d59 --- /dev/null +++ b/index.7 @@ -0,0 +1,65 @@ +.\" +.Dd $Mdocdate$ +.Dt index 7 +.Os LOCAL +.\" +.Sh NAME +.Nm mdocml +.Nd compile manpage source into mark-up language +.\" +.Sh DESCRIPTION +The +.Nm +utility compiles +.Xr mdoc 7 +macros, such as those inheriting from +.Xr mdoc.samples 7 , +into XML or HTML documents. Unlike other similar utilities such as +.Xr rman 1 +and +.Xr man2html 1 , +.Nm +acts directly on source documents, validating its input and producing a +variety of outputs. +.Ss Features +In order to operate properly, +.Nm +fully validates its input. This includes, but is not limited to, the +following checks: +.Pp +.Bl -enum -compact +.It +special characters (such as +.Sq \en +and +.Sq \et ) , +.It +macro scope (such as +.Sq \&.Sh +macros clobbering a pending +.Sq \&.Bl +scope), +.It +predefined characters (such as \\*(>= and \\*q), +.It +correctly-ordered document prelude, +.El +.\" +.Sh ENVIRONMENT +The +.Nm +utility has been tested under Linux and OpenBSD. +.\" +.Sh EXAMPLES +This page was produced as follows: +.Pp +.D1 % mdocml -fhtml -o index.html index.7 +.\" +.Sh SEE ALSO +TODO. +.\" +.Sh AUTHORS +The +.Nm +utility was written by +.An Em Kristaps Dzonsons Aq kristaps@kth.se . @@ -1,92 +1,28 @@ - body - { - margin: 0px; - font-family: Tahoma, sans-serif; - font-size: small; - } - - div.mdoc - { - width: 600px; - } - - div.block-Sh - { - margin-bottom: 20px; - } - - div.head-Sh - { - font-weight: bold; - font-size: larger; - } - - div.head-Ss - { - font-weight: bold; - margin-top: 10px; - text-align: justify; - } - - div.body-Sh - { - margin-left: 20px; - margin-top: 10px; - text-align: justify; - } - - span.inline-Nd:before - { - content: ' - '; - } - - span.inline-Fl:before - { - content: '-'; - } - - span.inline-Fl - { - font-weight: bolder; - } - - span.inline-Ar - { - text-decoration: underline; - } - - span.inline-Pa - { - text-decoration: underline; - } - - span.inline-Op:before - { - content: '['; - } - - span.inline-Op:after - { - content: ']'; - } - - div.block-Bl - { - margin-top: 10px; - margin-left: 20px; - } - - div.inline-Pp - { - margin-bottom: 10px; - } - - span.inline-D1 - { - margin-left: 20px; - } - - span.inline-Qq:before { content: '``'; } - span.inline-Qq:after { content: '\'\''; } - span.inline-Sq:before { content: '`'; } - span.inline-Sq:after { content: '\''; } + body { margin: 10px; + font-family: Tahoma, sans-serif; + font-size: small; } + div.mdoc { width: 600px; } + div.block-Sh { margin-bottom: 20px; } + div.head-Sh { font-weight: bold; + font-size: larger; } + div.head-Ss { font-weight: bold; + margin-top: 10px; + text-align: justify; } + div.body-Sh { margin-left: 20px; + margin-top: 10px; + text-align: justify; } + span.inline-Nd:before { content: ' \2014 '; } + span.inline-Fl:before { content: '-'; } + span.inline-Fl { font-weight: bolder; } + span.inline-Ar { text-decoration: underline; } + span.inline-Pa { text-decoration: underline; } + span.inline-Op:before { content: '['; } + span.inline-Op:after { content: ']'; } + div.block-Bl { margin-top: 10px; + margin-left: 20px; } + div.inline-Pp { margin-bottom: 10px; } + span.inline-D1 { margin-left: 20px; } + span.inline-Qq:before { content: '\201c'; } + span.inline-Qq:after { content: '\201d'; } + span.inline-Sq:before { content: '\2018'; } + span.inline-Sq:after { content: '\2019'; } @@ -34,30 +34,44 @@ ml_nputstring(struct md_mbuf *p, const char *buf, size_t sz, size_t *pos) { int i; + const char *seq; + size_t ssz; for (i = 0; i < (int)sz; i++) { switch (buf[i]) { + + /* Ampersand ml-escape. */ case ('&'): - if ( ! ml_nputs(p, "&", 5, pos)) - return(0); + seq = "&"; + ssz = 5; break; + + /* Quotation ml-escape. */ case ('"'): - if ( ! ml_nputs(p, """, 6, pos)) - return(0); + seq = """; + ssz = 6; break; + + /* Lt ml-escape. */ case ('<'): - if ( ! ml_nputs(p, "<", 4, pos)) - return(0); + seq = "<"; + ssz = 4; break; + + /* Gt ml-escape. */ case ('>'): - if ( ! ml_nputs(p, ">", 4, pos)) - return(0); + seq = ">"; + ssz = 4; break; + default: - if ( ! ml_nputs(p, &buf[i], 1, pos)) - return(0); + seq = &buf[i]; + ssz = 1; break; } + + if ( ! ml_nputs(p, seq, ssz, pos)) + return(-1); } return(1); } @@ -67,6 +81,9 @@ int ml_nputs(struct md_mbuf *p, const char *buf, size_t sz, size_t *pos) { + if (0 == sz) + return(1); + if ( ! md_buf_puts(p, buf, sz)) return(0); @@ -80,7 +97,9 @@ ml_puts(struct md_mbuf *p, const char *buf, size_t *pos) { size_t sz; - sz = strlen(buf); + if (0 == (sz = strlen(buf))) + return(1); + if ( ! md_buf_puts(p, buf, sz)) return(0); *pos += sz; @@ -29,6 +29,7 @@ enum md_ns { MD_NS_DEFAULT, }; + typedef int (*ml_begin)(struct md_mbuf *, const struct md_args *, const struct tm *, const char *, const char *, const char *, const char *); @@ -40,7 +41,6 @@ typedef ssize_t (*ml_begintag)(struct md_mbuf *, const struct md_args *, enum md_ns, int, const int *, const char **); - __BEGIN_DECLS int ml_nputstring(struct md_mbuf *, @@ -68,15 +68,19 @@ static int mlg_roffhead(void *, const struct tm *, const char *, const char *); static int mlg_rofftail(void *); static int mlg_roffin(void *, int, int *, char **); -static int mlg_roffdata(void *, int, char *); +static int mlg_roffdata(void *, int, + const char *, char *); +static int mlg_rofftoken(void *, int, int); static int mlg_roffout(void *, int); static int mlg_roffblkin(void *, int, int *, char **); static int mlg_roffblkout(void *, int); static int mlg_roffspecial(void *, int, int *, char **, char **); -static int mlg_roffblkheadin(void *, int, int *, char **); +static int mlg_roffblkheadin(void *, int, + int *, char **); static int mlg_roffblkheadout(void *, int); -static int mlg_roffblkbodyin(void *, int, int *, char **); +static int mlg_roffblkbodyin(void *, int, + int *, char **); static int mlg_roffblkbodyout(void *, int); static int mlg_beginblk(struct md_mlg *, enum md_ns, int, @@ -88,7 +92,14 @@ static int mlg_endtag(struct md_mlg *, enum md_ns, int); static int mlg_indent(struct md_mlg *); static int mlg_newline(struct md_mlg *); static void mlg_mode(struct md_mlg *, enum md_tok); -static int mlg_data(struct md_mlg *, int, char *); +static int mlg_data(struct md_mlg *, int, + const char *, char *); +static void mlg_err(struct md_mlg *, const char *, + const char *, char *); +static void mlg_warn(struct md_mlg *, const char *, + const char *, char *); +static void mlg_msg(struct md_mlg *, enum roffmsg, + const char *, const char *, char *); #ifdef __linux__ extern size_t strlcat(char *, const char *, size_t); @@ -221,10 +232,11 @@ mlg_mode(struct md_mlg *p, enum md_tok ns) static int -mlg_data(struct md_mlg *p, int space, char *buf) +mlg_data(struct md_mlg *p, int space, const char *start, char *buf) { size_t sz; char *bufp; + int c; assert(p->mbuf); assert(0 != p->indent); @@ -252,9 +264,19 @@ mlg_data(struct md_mlg *p, int space, char *buf) if (0 == p->pos) { if ( ! mlg_indent(p)) return(0); - if ( ! ml_nputstring(p->mbuf, bufp, - sz, &p->pos)) + + c = ml_nputstring(p->mbuf, bufp, sz, &p->pos); + if (0 == c) { + mlg_err(p, start, bufp, "invalid " + "character sequence"); + return(0); + } else if (c > 1) { + mlg_warn(p, start, bufp, "bogus " + "character sequence"); + return(0); + } else if (-1 == c) return(0); + if (p->indent * INDENT + sz >= COLUMNS) if ( ! mlg_newline(p)) return(0); @@ -273,7 +295,16 @@ mlg_data(struct md_mlg *p, int space, char *buf) return(0); } - if ( ! ml_nputstring(p->mbuf, bufp, sz, &p->pos)) + c = ml_nputstring(p->mbuf, bufp, sz, &p->pos); + if (0 == c) { + mlg_err(p, start, bufp, "invalid " + "character sequence"); + return(0); + } else if (c > 1) { + mlg_warn(p, start, bufp, "bogus " + "character sequence"); + return(0); + } else if (-1 == c) return(0); if ( ! (ML_OVERRIDE_ALL & p->flags)) @@ -326,6 +357,7 @@ mlg_alloc(const struct md_args *args, cb.roffspecial = mlg_roffspecial; cb.roffmsg = mlg_roffmsg; cb.roffdata = mlg_roffdata; + cb.rofftoken = mlg_rofftoken; if (NULL == (p = calloc(1, sizeof(struct md_mlg)))) err(1, "calloc"); @@ -514,12 +546,182 @@ static void mlg_roffmsg(void *arg, enum roffmsg lvl, const char *buf, const char *pos, char *msg) { - char *level; + + mlg_msg((struct md_mlg *)arg, lvl, buf, pos, msg); +} + + +static int +mlg_rofftoken(void *arg, int space, int value) +{ + struct md_mlg *p; + const char *seq; + size_t sz, res; + + assert(arg); + p = (struct md_mlg *)arg; + + switch (value) { + case (ROFFTok_Sp_A): + seq = "\\a"; + sz = 2; + break; + case (ROFFTok_Sp_B): + seq = "\\b"; + sz = 2; + break; + case (ROFFTok_Sp_F): + seq = "\\f"; + sz = 2; + break; + case (ROFFTok_Sp_N): + seq = "\\n"; + sz = 2; + break; + case (ROFFTok_Sp_R): + seq = "\\r"; + sz = 2; + break; + case (ROFFTok_Sp_T): + seq = "\\t"; + sz = 2; + break; + case (ROFFTok_Sp_V): + seq = "\\v"; + sz = 2; + break; + case (ROFFTok_Space): + seq = " "; + sz = 6; + break; + case (ROFFTok_Null): + seq = ""; + sz = 0; + break; + case (ROFFTok_Hyphen): + seq = "‐"; + sz = 7; + break; + case (ROFFTok_Em): + seq = "—"; + sz = 7; + break; + case (ROFFTok_En): + seq = "–"; + sz = 7; + break; + case (ROFFTok_Ge): + seq = "≥"; + sz = 7; + break; + case (ROFFTok_Le): + seq = "≤"; + sz = 7; + break; + case (ROFFTok_Rquote): + seq = "”"; + sz = 7; + break; + case (ROFFTok_Lquote): + seq = "“"; + sz = 7; + break; + case (ROFFTok_Uparrow): + seq = "↑"; + sz = 7; + break; + case (ROFFTok_Acute): + seq = "´"; + sz = 6; + break; + case (ROFFTok_Grave): + seq = "`"; + sz = 5; + break; + case (ROFFTok_Pi): + seq = "π"; + sz = 6; + break; + case (ROFFTok_Ne): + seq = "≠"; + sz = 7; + break; + case (ROFFTok_Lt): + seq = "<"; + sz = 4; + break; + case (ROFFTok_Gt): + seq = ">"; + sz = 4; + break; + case (ROFFTok_Plusmin): + seq = "±"; + sz = 6; + break; + case (ROFFTok_Infty): + seq = "∞"; + sz = 7; + break; + case (ROFFTok_Bar): + seq = "|"; + sz = 6; + break; + case (ROFFTok_Nan): + seq = "Nan"; + sz = 3; + break; + } + + if (space && ! ml_nputs(p->mbuf, " ", 1, &res)) + return(0); + p->pos += res; + + if (0 != sz && ! ml_nputs(p->mbuf, seq, sz, &res)) + return(0); + p->pos += res; + + return(1); +} + + +static int +mlg_roffdata(void *arg, int space, const char *start, char *buf) +{ struct md_mlg *p; assert(arg); p = (struct md_mlg *)arg; + if ( ! mlg_data(p, space, start, buf)) + return(0); + + mlg_mode(p, MD_TEXT); + return(1); +} + + +static void +mlg_err(struct md_mlg *p, const char *buf, const char *pos, char *msg) +{ + + mlg_msg(p, ROFF_ERROR, buf, pos, msg); +} + + +static void +mlg_warn(struct md_mlg *p, const char *buf, const char *pos, char *msg) +{ + + mlg_msg(p, ROFF_WARN, buf, pos, msg); +} + + +static void +mlg_msg(struct md_mlg *p, enum roffmsg lvl, + const char *buf, const char *pos, char *msg) +{ + char *level; + switch (lvl) { case (ROFF_WARN): if ( ! (MD_WARN_ALL & p->args->warnings)) @@ -542,20 +744,3 @@ mlg_roffmsg(void *arg, enum roffmsg lvl, p->rbuf->name, level, msg); } - - -static int -mlg_roffdata(void *arg, int space, char *buf) -{ - struct md_mlg *p; - - assert(arg); - p = (struct md_mlg *)arg; - - if ( ! mlg_data(p, space, buf)) - return(0); - - mlg_mode(p, MD_TEXT); - return(1); -} - @@ -19,6 +19,8 @@ #ifndef PRIVATE_H #define PRIVATE_H +#include <time.h> + struct md_rbuf { int fd; /* Open descriptor. */ char *name; /* Name of file. */ @@ -35,6 +37,35 @@ struct md_mbuf { size_t pos; /* Position in buffer. */ }; +#define ROFFTok_Sp_A 0 +#define ROFFTok_Sp_B 1 +#define ROFFTok_Sp_F 2 +#define ROFFTok_Sp_N 3 +#define ROFFTok_Sp_R 4 +#define ROFFTok_Sp_T 5 +#define ROFFTok_Sp_V 6 +#define ROFFTok_Space 7 +#define ROFFTok_Null 8 +#define ROFFTok_Hyphen 9 +#define ROFFTok_Em 10 +#define ROFFTok_En 11 +#define ROFFTok_Ge 12 +#define ROFFTok_Le 13 +#define ROFFTok_Rquote 14 +#define ROFFTok_Lquote 15 +#define ROFFTok_Uparrow 16 +#define ROFFTok_Acute 17 +#define ROFFTok_Grave 18 +#define ROFFTok_Pi 19 +#define ROFFTok_Ne 20 +#define ROFFTok_Lt 21 +#define ROFFTok_Gt 22 +#define ROFFTok_Plusmin 23 +#define ROFFTok_Infty 24 +#define ROFFTok_Bar 25 +#define ROFFTok_Nan 26 +#define ROFFTok_MAX 27 + #define ROFF___ 0 #define ROFF_Dd 1 #define ROFF_Dt 2 @@ -205,6 +236,8 @@ struct md_mbuf { #define ROFF_Words 59 #define ROFF_ARGMAX 60 +#define ROFF_MAXLINEARG 32 + extern const char *const *toknames; extern const char *const *tokargnames; @@ -216,7 +249,8 @@ struct roffcb { int (*roffhead)(void *, const struct tm *, const char *, const char *, const char *, const char *); int (*rofftail)(void *); - int (*roffdata)(void *, int, char *); + int (*roffdata)(void *, int, const char *, char *); + int (*rofftoken)(void *, int, int); int (*roffin)(void *, int, int *, char **); int (*roffout)(void *, int); int (*roffblkin)(void *, int, int *, char **); @@ -255,6 +289,8 @@ struct rofftree *roff_alloc(const struct roffcb *, void *); int roff_engine(struct rofftree *, char *); int roff_free(struct rofftree *, int); +int rofftok_scan(const char *); + __END_DECLS #endif /*!PRIVATE_H*/ @@ -42,8 +42,6 @@ /* TODO: macros with a set number of arguments? */ /* TODO: validate Dt macro arguments. */ -#define ROFF_MAXARG 32 - enum roffd { ROFF_ENTER = 0, ROFF_EXIT @@ -133,7 +131,8 @@ static int roffparseopts(struct rofftree *, int, char ***, int *, char **); static int roffcall(struct rofftree *, int, char **); static int roffparse(struct rofftree *, char *); -static int textparse(const struct rofftree *, char *); +static int textparse(struct rofftree *, char *); +static int roffdata(struct rofftree *, int, char *); #ifdef __linux__ extern size_t strlcat(char *, const char *, size_t); @@ -272,8 +271,8 @@ static const struct rofftok tokens[ROFF_MAX] = { { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sy */ { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Tn */ { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ux */ - { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */ - { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */ + { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */ + { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */ { roff_layout, NULL, NULL, roffchild_Fo, 0, ROFF_LAYOUT, 0 }, /* Fo */ { roff_noop, NULL, roffparent_Fc, NULL, ROFF_Fo, ROFF_LAYOUT, 0 }, /* Fc */ { roff_layout, NULL, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Oo */ @@ -450,14 +449,43 @@ roff_engine(struct rofftree *tree, char *buf) static int -textparse(const struct rofftree *tree, char *buf) +textparse(struct rofftree *tree, char *buf) { + char *bufp; + + /* TODO: literal parsing. */ if ( ! (ROFF_BODY & tree->state)) { roff_err(tree, buf, "data not in body"); return(0); } - return((*tree->cb.roffdata)(tree->arg, 1, buf)); + + /* LINTED */ + while (*buf) { + while (*buf && isspace(*buf)) + buf++; + + if (0 == *buf) + break; + + bufp = buf++; + + while (*buf && ! isspace(*buf)) + buf++; + + if (0 != *buf) { + *buf++ = 0; + if ( ! roffdata(tree, 1, bufp)) + return(0); + continue; + } + + if ( ! roffdata(tree, 1, bufp)) + return(0); + break; + } + + return(1); } @@ -474,7 +502,7 @@ roffargs(const struct rofftree *tree, p = buf; /* LINTED */ - for (i = 0; *buf && i < ROFF_MAXARG; i++) { + for (i = 0; *buf && i < ROFF_MAXLINEARG; i++) { if ('\"' == *buf) { argv[i] = ++buf; while (*buf && '\"' != *buf) @@ -499,7 +527,7 @@ roffargs(const struct rofftree *tree, } assert(i > 0); - if (ROFF_MAXARG == i && *buf) { + if (ROFF_MAXLINEARG == i && *buf) { roff_err(tree, p, "too many arguments for `%s'", toknames [tok]); return(0); @@ -530,7 +558,7 @@ roffparse(struct rofftree *tree, char *buf) { int tok, t; struct roffnode *n; - char *argv[ROFF_MAXARG]; + char *argv[ROFF_MAXLINEARG]; char **argvp; if (0 != *buf && 0 != *(buf + 1) && 0 != *(buf + 2)) @@ -907,7 +935,7 @@ roffpurgepunct(struct rofftree *tree, char **argv) /* LINTED */ while (argv[i]) - if ( ! (*tree->cb.roffdata)(tree->arg, 0, argv[i++])) + if ( ! roffdata(tree, 0, argv[i++])) return(0); return(1); } @@ -938,6 +966,23 @@ roffparseopts(struct rofftree *tree, int tok, } +static int +roffdata(struct rofftree *tree, int space, char *buf) +{ + int tok; + + if (-1 == (tok = rofftok_scan(buf))) { + roff_err(tree, buf, "invalid character sequence"); + return(0); + } else if (ROFFTok_MAX != tok) + return((*tree->cb.rofftoken) + (tree->arg, space != 0, tok)); + + return((*tree->cb.roffdata)(tree->arg, + space != 0, tree->cur, buf)); +} + + /* ARGSUSED */ static int roff_Dd(ROFFCALL_ARGS) @@ -1108,11 +1153,9 @@ roff_Sm(ROFFCALL_ARGS) tok, argcp, argvp, morep)) return(0); - while (*argv) { - if ((*tree->cb.roffdata)(tree->arg, 1, *argv++)) - continue; - return(0); - } + while (*argv) + if ( ! roffdata(tree, 1, *argv++)) + return(0); return(1); } @@ -1143,18 +1186,19 @@ roff_Ns(ROFFCALL_ARGS) } if ( ! roffispunct(*argv)) { - if ((*tree->cb.roffdata)(tree->arg, 1, *argv++)) - continue; - return(0); + if ( ! roffdata(tree, 1, *argv++)) + return(0); + continue; } + for (j = 0; argv[j]; j++) if ( ! roffispunct(argv[j])) break; if (argv[j]) { - if ((*tree->cb.roffdata)(tree->arg, 0, *argv++)) - continue; - return(0); + if ( ! roffdata(tree, 0, *argv++)) + return(0); + continue; } break; @@ -1221,8 +1265,8 @@ roff_Os(ROFFCALL_ARGS) static int roff_layout(ROFFCALL_ARGS) { - int i, c, argcp[ROFF_MAXARG]; - char *argvp[ROFF_MAXARG]; + int i, c, argcp[ROFF_MAXLINEARG]; + char *argvp[ROFF_MAXLINEARG]; if (ROFF_PRELUDE & tree->state) { roff_err(tree, *argv, "bad `%s' in prelude", @@ -1266,11 +1310,10 @@ roff_layout(ROFFCALL_ARGS) if ( ! (ROFF_PARSED & tokens[tok].flags)) { i = 0; - while (*argv) { - if ( ! (*tree->cb.roffdata)(tree->arg, i, *argv++)) + while (*argv) + if ( ! roffdata(tree, i++, *argv++)) return(0); - i = 1; - } + if ( ! (*tree->cb.roffblkheadout)(tree->arg, tok)) return(0); return((*tree->cb.roffblkbodyin) @@ -1286,10 +1329,8 @@ roff_layout(ROFFCALL_ARGS) while (*argv) { if (ROFF_MAX == (c = rofffindcallable(*argv))) { assert(tree->arg); - if ( ! (*tree->cb.roffdata) - (tree->arg, i, *argv++)) + if ( ! roffdata(tree, i++, *argv++)) return(0); - i = 1; continue; } if ( ! roffcall(tree, c, argv)) @@ -1329,8 +1370,8 @@ roff_layout(ROFFCALL_ARGS) static int roff_text(ROFFCALL_ARGS) { - int i, j, first, c, argcp[ROFF_MAXARG]; - char *argvp[ROFF_MAXARG]; + int i, j, first, c, argcp[ROFF_MAXLINEARG]; + char *argvp[ROFF_MAXLINEARG]; if (ROFF_PRELUDE & tree->state) { roff_err(tree, *argv, "`%s' disallowed in prelude", @@ -1350,11 +1391,10 @@ roff_text(ROFFCALL_ARGS) if ( ! (ROFF_PARSED & tokens[tok].flags)) { i = 0; - while (*argv) { - if ( ! (*tree->cb.roffdata)(tree->arg, i, *argv++)) + while (*argv) + if ( ! roffdata(tree, i++, *argv++)) return(0); - i = 1; - } + return((*tree->cb.roffout)(tree->arg, tok)); } @@ -1383,9 +1423,8 @@ roff_text(ROFFCALL_ARGS) } if ( ! roffispunct(*argv)) { - if ( ! (*tree->cb.roffdata)(tree->arg, i, *argv++)) + if ( ! roffdata(tree, i++, *argv++)) return(0); - i = 1; continue; } @@ -1395,7 +1434,7 @@ roff_text(ROFFCALL_ARGS) break; if (argv[j]) { - if ( ! (*tree->cb.roffdata)(tree->arg, 0, *argv++)) + if ( ! roffdata(tree, 0, *argv++)) return(0); continue; } diff --git a/tokens.c b/tokens.c new file mode 100644 index 00000000..07cdb671 --- /dev/null +++ b/tokens.c @@ -0,0 +1,184 @@ +/* $Id$ */ +/* + * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all + * copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "libmdocml.h" +#include "private.h" + + +static int rofftok_dashes(const char *); +static int rofftok_special(const char *); +static int rofftok_predef(const char *); +static int rofftok_defined(const char *); + + +static int +rofftok_defined(const char *buf) +{ + if (0 == *buf) + return(-1); + if (0 == *(buf + 1)) + return(-1); + if (0 != *(buf + 2)) + return(-1); + + if (0 == strcmp(buf, ">=")) + return(ROFFTok_Ge); + else if (0 == strcmp(buf, "<=")) + return(ROFFTok_Le); + else if (0 == strcmp(buf, "Rq")) + return(ROFFTok_Rquote); + else if (0 == strcmp(buf, "Lq")) + return(ROFFTok_Lquote); + else if (0 == strcmp(buf, "ua")) + return(ROFFTok_Uparrow); + else if (0 == strcmp(buf, "aa")) + return(ROFFTok_Acute); + else if (0 == strcmp(buf, "ga")) + return(ROFFTok_Grave); + else if (0 == strcmp(buf, "Pi")) + return(ROFFTok_Pi); + else if (0 == strcmp(buf, "Ne")) + return(ROFFTok_Ne); + else if (0 == strcmp(buf, "Le")) + return(ROFFTok_Le); + else if (0 == strcmp(buf, "Ge")) + return(ROFFTok_Ge); + else if (0 == strcmp(buf, "Lt")) + return(ROFFTok_Lt); + else if (0 == strcmp(buf, "Gt")) + return(ROFFTok_Gt); + else if (0 == strcmp(buf, "Pm")) + return(ROFFTok_Plusmin); + else if (0 == strcmp(buf, "If")) + return(ROFFTok_Infty); + else if (0 == strcmp(buf, "Na")) + return(ROFFTok_Nan); + else if (0 == strcmp(buf, "Ba")) + return(ROFFTok_Bar); + + return(-1); +} + + +static int +rofftok_predef(const char *buf) +{ + if (0 == *buf) + return(-1); + + if ('(' == *buf) + return(rofftok_defined(++buf)); + + /* TODO */ + + return(-1); +} + + +static int +rofftok_dashes(const char *buf) +{ + + if (0 == *buf) + return(-1); + else if (*buf++ != 'e') + return(-1); + + if (0 == *buf) + return(-1); + else if (0 != *(buf + 1)) + return(-1); + + switch (*buf) { + case ('m'): + return(ROFFTok_Em); + case ('n'): + return(ROFFTok_En); + default: + break; + } + return(-1); +} + + +static int +rofftok_special(const char *buf) +{ + + if (0 == *buf) + return(-1); + else if (0 != *(buf + 1)) + return(-1); + + switch (*buf) { + case ('a'): + return(ROFFTok_Sp_A); + case ('b'): + return(ROFFTok_Sp_B); + case ('f'): + return(ROFFTok_Sp_F); + case ('n'): + return(ROFFTok_Sp_N); + case ('r'): + return(ROFFTok_Sp_R); + case ('t'): + return(ROFFTok_Sp_T); + case ('v'): + return(ROFFTok_Sp_V); + default: + break; + } + return(-1); +} + + +int +rofftok_scan(const char *buf) +{ + + assert(*buf); + if ('\\' != *buf++) + return(ROFFTok_MAX); + + for ( ; *buf; buf++) { + switch (*buf) { + case ('e'): + return(rofftok_special(++buf)); + case ('('): + return(rofftok_dashes(++buf)); + case (' '): + return(ROFFTok_Space); + case ('&'): + return(ROFFTok_Null); + case ('-'): + return(ROFFTok_Hyphen); + case ('*'): + return(rofftok_predef(++buf)); + default: + break; + } + } + + return(-1); +} + + |