summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2008-12-04 16:19:52 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2008-12-04 16:19:52 +0000
commit7ea64ffd96cef95695a6c74dbf90f76d73fa961d (patch)
tree361049b7cd2f7f22d1b50a1dbfd403c7c0cf1182
parentbbaed73e9a14101e88d84225a902613116c536e9 (diff)
downloadmandoc-7ea64ffd96cef95695a6c74dbf90f76d73fa961d.tar.gz
Character-encoding tests.
-rw-r--r--Makefile18
-rw-r--r--html.c2
-rw-r--r--index.765
-rw-r--r--mdocml.css120
-rw-r--r--ml.c41
-rw-r--r--ml.h2
-rw-r--r--mlg.c237
-rw-r--r--private.h38
-rw-r--r--roff.c119
-rw-r--r--tokens.c184
10 files changed, 645 insertions, 181 deletions
diff --git a/Makefile b/Makefile
index 163debe4..cd9100fc 100644
--- a/Makefile
+++ b/Makefile
@@ -2,15 +2,15 @@ CFLAGS += -W -Wall -Wno-unused-parameter -g -DDEBUG
LINTFLAGS += -c -e -f -u
-LNS = mdocml.ln html.ln xml.ln libmdocml.ln roff.ln ml.ln mlg.ln compat.ln
+LNS = mdocml.ln html.ln xml.ln libmdocml.ln roff.ln ml.ln mlg.ln compat.ln tokens.ln
LLNS = llib-lmdocml.ln
LIBS = libmdocml.a
-OBJS = mdocml.o html.o xml.o libmdocml.o roff.o ml.o mlg.o compat.o
+OBJS = mdocml.o html.o xml.o libmdocml.o roff.o ml.o mlg.o compat.o tokens.o
-SRCS = mdocml.c html.c xml.c libmdocml.c roff.c ml.c mlg.c compat.c
+SRCS = mdocml.c html.c xml.c libmdocml.c roff.c ml.c mlg.c compat.c tokens.c
HEADS = libmdocml.h private.h
@@ -22,12 +22,12 @@ INSTALL = Makefile $(HEADS) $(SRCS) $(MANS)
FAIL = test.0 test.1 test.2 test.3 test.4 test.5 test.6 \
test.15 test.20 test.22 test.24 test.26 test.27 test.30 \
- test.36
+ test.36 test.37 test.40
SUCCEED = test.7 test.8 test.9 test.10 test.11 test.12 test.13 \
test.14 test.16 test.17 test.18 test.19 test.21 test.23 \
test.25 test.28 test.29 test.31 test.32 test.33 test.34 \
- test.35
+ test.35 test.37 test.38 test.39
all: mdocml
@@ -52,15 +52,15 @@ mdocml.tgz: $(INSTALL)
( cd .dist/ && tar zcf ../mdocml.tgz mdocml/ )
rm -rf .dist/
-llib-lmdocml.ln: mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln
- $(LINT) $(LINTFLAGS) -Cmdocml mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln
+llib-lmdocml.ln: mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln tokens.ln
+ $(LINT) $(LINTFLAGS) -Cmdocml mdocml.ln libmdocml.ln html.ln xml.ln roff.ln ml.ln mlg.ln compat.ln tokens.ln
mdocml.ln: mdocml.c libmdocml.h
mdocml.o: mdocml.c libmdocml.h
-libmdocml.a: libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o
- $(AR) rs $@ libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o
+libmdocml.a: libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o tokens.o
+ $(AR) rs $@ libmdocml.o html.o xml.o roff.o ml.o mlg.o compat.o tokens.o
xml.ln: xml.c private.h libmdocml.h ml.h
diff --git a/html.c b/html.c
index 6e968789..de733ab6 100644
--- a/html.c
+++ b/html.c
@@ -163,7 +163,7 @@ html_begin(struct md_mbuf *mbuf, const struct md_args *args,
assert(args->params.html.css);
if (HTML_CSS_EMBED & args->params.html.flags) {
- if ( ! ml_puts(mbuf, " <style><!--\n", &res))
+ if ( ! ml_puts(mbuf, " <style type=\"text/css\"><!--\n", &res))
return(0);
if ( ! html_loadcss(mbuf, args->params.html.css))
return(0);
diff --git a/index.7 b/index.7
new file mode 100644
index 00000000..ff3f0d59
--- /dev/null
+++ b/index.7
@@ -0,0 +1,65 @@
+.\"
+.Dd $Mdocdate$
+.Dt index 7
+.Os LOCAL
+.\"
+.Sh NAME
+.Nm mdocml
+.Nd compile manpage source into mark-up language
+.\"
+.Sh DESCRIPTION
+The
+.Nm
+utility compiles
+.Xr mdoc 7
+macros, such as those inheriting from
+.Xr mdoc.samples 7 ,
+into XML or HTML documents. Unlike other similar utilities such as
+.Xr rman 1
+and
+.Xr man2html 1 ,
+.Nm
+acts directly on source documents, validating its input and producing a
+variety of outputs.
+.Ss Features
+In order to operate properly,
+.Nm
+fully validates its input. This includes, but is not limited to, the
+following checks:
+.Pp
+.Bl -enum -compact
+.It
+special characters (such as
+.Sq \en
+and
+.Sq \et ) ,
+.It
+macro scope (such as
+.Sq \&.Sh
+macros clobbering a pending
+.Sq \&.Bl
+scope),
+.It
+predefined characters (such as \\*(>= and \\*q),
+.It
+correctly-ordered document prelude,
+.El
+.\"
+.Sh ENVIRONMENT
+The
+.Nm
+utility has been tested under Linux and OpenBSD.
+.\"
+.Sh EXAMPLES
+This page was produced as follows:
+.Pp
+.D1 % mdocml -fhtml -o index.html index.7
+.\"
+.Sh SEE ALSO
+TODO.
+.\"
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Em Kristaps Dzonsons Aq kristaps@kth.se .
diff --git a/mdocml.css b/mdocml.css
index 3acf0d0e..f59f009a 100644
--- a/mdocml.css
+++ b/mdocml.css
@@ -1,92 +1,28 @@
- body
- {
- margin: 0px;
- font-family: Tahoma, sans-serif;
- font-size: small;
- }
-
- div.mdoc
- {
- width: 600px;
- }
-
- div.block-Sh
- {
- margin-bottom: 20px;
- }
-
- div.head-Sh
- {
- font-weight: bold;
- font-size: larger;
- }
-
- div.head-Ss
- {
- font-weight: bold;
- margin-top: 10px;
- text-align: justify;
- }
-
- div.body-Sh
- {
- margin-left: 20px;
- margin-top: 10px;
- text-align: justify;
- }
-
- span.inline-Nd:before
- {
- content: ' - ';
- }
-
- span.inline-Fl:before
- {
- content: '-';
- }
-
- span.inline-Fl
- {
- font-weight: bolder;
- }
-
- span.inline-Ar
- {
- text-decoration: underline;
- }
-
- span.inline-Pa
- {
- text-decoration: underline;
- }
-
- span.inline-Op:before
- {
- content: '[';
- }
-
- span.inline-Op:after
- {
- content: ']';
- }
-
- div.block-Bl
- {
- margin-top: 10px;
- margin-left: 20px;
- }
-
- div.inline-Pp
- {
- margin-bottom: 10px;
- }
-
- span.inline-D1
- {
- margin-left: 20px;
- }
-
- span.inline-Qq:before { content: '``'; }
- span.inline-Qq:after { content: '\'\''; }
- span.inline-Sq:before { content: '`'; }
- span.inline-Sq:after { content: '\''; }
+ body { margin: 10px;
+ font-family: Tahoma, sans-serif;
+ font-size: small; }
+ div.mdoc { width: 600px; }
+ div.block-Sh { margin-bottom: 20px; }
+ div.head-Sh { font-weight: bold;
+ font-size: larger; }
+ div.head-Ss { font-weight: bold;
+ margin-top: 10px;
+ text-align: justify; }
+ div.body-Sh { margin-left: 20px;
+ margin-top: 10px;
+ text-align: justify; }
+ span.inline-Nd:before { content: ' \2014 '; }
+ span.inline-Fl:before { content: '-'; }
+ span.inline-Fl { font-weight: bolder; }
+ span.inline-Ar { text-decoration: underline; }
+ span.inline-Pa { text-decoration: underline; }
+ span.inline-Op:before { content: '['; }
+ span.inline-Op:after { content: ']'; }
+ div.block-Bl { margin-top: 10px;
+ margin-left: 20px; }
+ div.inline-Pp { margin-bottom: 10px; }
+ span.inline-D1 { margin-left: 20px; }
+ span.inline-Qq:before { content: '\201c'; }
+ span.inline-Qq:after { content: '\201d'; }
+ span.inline-Sq:before { content: '\2018'; }
+ span.inline-Sq:after { content: '\2019'; }
diff --git a/ml.c b/ml.c
index 04980b8f..e174ccfe 100644
--- a/ml.c
+++ b/ml.c
@@ -34,30 +34,44 @@ ml_nputstring(struct md_mbuf *p,
const char *buf, size_t sz, size_t *pos)
{
int i;
+ const char *seq;
+ size_t ssz;
for (i = 0; i < (int)sz; i++) {
switch (buf[i]) {
+
+ /* Ampersand ml-escape. */
case ('&'):
- if ( ! ml_nputs(p, "&amp;", 5, pos))
- return(0);
+ seq = "&amp;";
+ ssz = 5;
break;
+
+ /* Quotation ml-escape. */
case ('"'):
- if ( ! ml_nputs(p, "&quot;", 6, pos))
- return(0);
+ seq = "&quot;";
+ ssz = 6;
break;
+
+ /* Lt ml-escape. */
case ('<'):
- if ( ! ml_nputs(p, "&lt;", 4, pos))
- return(0);
+ seq = "&lt;";
+ ssz = 4;
break;
+
+ /* Gt ml-escape. */
case ('>'):
- if ( ! ml_nputs(p, "&gt;", 4, pos))
- return(0);
+ seq = "&gt;";
+ ssz = 4;
break;
+
default:
- if ( ! ml_nputs(p, &buf[i], 1, pos))
- return(0);
+ seq = &buf[i];
+ ssz = 1;
break;
}
+
+ if ( ! ml_nputs(p, seq, ssz, pos))
+ return(-1);
}
return(1);
}
@@ -67,6 +81,9 @@ int
ml_nputs(struct md_mbuf *p, const char *buf, size_t sz, size_t *pos)
{
+ if (0 == sz)
+ return(1);
+
if ( ! md_buf_puts(p, buf, sz))
return(0);
@@ -80,7 +97,9 @@ ml_puts(struct md_mbuf *p, const char *buf, size_t *pos)
{
size_t sz;
- sz = strlen(buf);
+ if (0 == (sz = strlen(buf)))
+ return(1);
+
if ( ! md_buf_puts(p, buf, sz))
return(0);
*pos += sz;
diff --git a/ml.h b/ml.h
index c3d25d52..b2306cce 100644
--- a/ml.h
+++ b/ml.h
@@ -29,6 +29,7 @@ enum md_ns {
MD_NS_DEFAULT,
};
+
typedef int (*ml_begin)(struct md_mbuf *, const struct md_args *,
const struct tm *, const char *, const char *,
const char *, const char *);
@@ -40,7 +41,6 @@ typedef ssize_t (*ml_begintag)(struct md_mbuf *,
const struct md_args *, enum md_ns, int,
const int *, const char **);
-
__BEGIN_DECLS
int ml_nputstring(struct md_mbuf *,
diff --git a/mlg.c b/mlg.c
index cad6e9bb..ac413e3a 100644
--- a/mlg.c
+++ b/mlg.c
@@ -68,15 +68,19 @@ static int mlg_roffhead(void *, const struct tm *,
const char *, const char *);
static int mlg_rofftail(void *);
static int mlg_roffin(void *, int, int *, char **);
-static int mlg_roffdata(void *, int, char *);
+static int mlg_roffdata(void *, int,
+ const char *, char *);
+static int mlg_rofftoken(void *, int, int);
static int mlg_roffout(void *, int);
static int mlg_roffblkin(void *, int, int *, char **);
static int mlg_roffblkout(void *, int);
static int mlg_roffspecial(void *, int, int *,
char **, char **);
-static int mlg_roffblkheadin(void *, int, int *, char **);
+static int mlg_roffblkheadin(void *, int,
+ int *, char **);
static int mlg_roffblkheadout(void *, int);
-static int mlg_roffblkbodyin(void *, int, int *, char **);
+static int mlg_roffblkbodyin(void *, int,
+ int *, char **);
static int mlg_roffblkbodyout(void *, int);
static int mlg_beginblk(struct md_mlg *, enum md_ns, int,
@@ -88,7 +92,14 @@ static int mlg_endtag(struct md_mlg *, enum md_ns, int);
static int mlg_indent(struct md_mlg *);
static int mlg_newline(struct md_mlg *);
static void mlg_mode(struct md_mlg *, enum md_tok);
-static int mlg_data(struct md_mlg *, int, char *);
+static int mlg_data(struct md_mlg *, int,
+ const char *, char *);
+static void mlg_err(struct md_mlg *, const char *,
+ const char *, char *);
+static void mlg_warn(struct md_mlg *, const char *,
+ const char *, char *);
+static void mlg_msg(struct md_mlg *, enum roffmsg,
+ const char *, const char *, char *);
#ifdef __linux__
extern size_t strlcat(char *, const char *, size_t);
@@ -221,10 +232,11 @@ mlg_mode(struct md_mlg *p, enum md_tok ns)
static int
-mlg_data(struct md_mlg *p, int space, char *buf)
+mlg_data(struct md_mlg *p, int space, const char *start, char *buf)
{
size_t sz;
char *bufp;
+ int c;
assert(p->mbuf);
assert(0 != p->indent);
@@ -252,9 +264,19 @@ mlg_data(struct md_mlg *p, int space, char *buf)
if (0 == p->pos) {
if ( ! mlg_indent(p))
return(0);
- if ( ! ml_nputstring(p->mbuf, bufp,
- sz, &p->pos))
+
+ c = ml_nputstring(p->mbuf, bufp, sz, &p->pos);
+ if (0 == c) {
+ mlg_err(p, start, bufp, "invalid "
+ "character sequence");
+ return(0);
+ } else if (c > 1) {
+ mlg_warn(p, start, bufp, "bogus "
+ "character sequence");
+ return(0);
+ } else if (-1 == c)
return(0);
+
if (p->indent * INDENT + sz >= COLUMNS)
if ( ! mlg_newline(p))
return(0);
@@ -273,7 +295,16 @@ mlg_data(struct md_mlg *p, int space, char *buf)
return(0);
}
- if ( ! ml_nputstring(p->mbuf, bufp, sz, &p->pos))
+ c = ml_nputstring(p->mbuf, bufp, sz, &p->pos);
+ if (0 == c) {
+ mlg_err(p, start, bufp, "invalid "
+ "character sequence");
+ return(0);
+ } else if (c > 1) {
+ mlg_warn(p, start, bufp, "bogus "
+ "character sequence");
+ return(0);
+ } else if (-1 == c)
return(0);
if ( ! (ML_OVERRIDE_ALL & p->flags))
@@ -326,6 +357,7 @@ mlg_alloc(const struct md_args *args,
cb.roffspecial = mlg_roffspecial;
cb.roffmsg = mlg_roffmsg;
cb.roffdata = mlg_roffdata;
+ cb.rofftoken = mlg_rofftoken;
if (NULL == (p = calloc(1, sizeof(struct md_mlg))))
err(1, "calloc");
@@ -514,12 +546,182 @@ static void
mlg_roffmsg(void *arg, enum roffmsg lvl,
const char *buf, const char *pos, char *msg)
{
- char *level;
+
+ mlg_msg((struct md_mlg *)arg, lvl, buf, pos, msg);
+}
+
+
+static int
+mlg_rofftoken(void *arg, int space, int value)
+{
+ struct md_mlg *p;
+ const char *seq;
+ size_t sz, res;
+
+ assert(arg);
+ p = (struct md_mlg *)arg;
+
+ switch (value) {
+ case (ROFFTok_Sp_A):
+ seq = "\\a";
+ sz = 2;
+ break;
+ case (ROFFTok_Sp_B):
+ seq = "\\b";
+ sz = 2;
+ break;
+ case (ROFFTok_Sp_F):
+ seq = "\\f";
+ sz = 2;
+ break;
+ case (ROFFTok_Sp_N):
+ seq = "\\n";
+ sz = 2;
+ break;
+ case (ROFFTok_Sp_R):
+ seq = "\\r";
+ sz = 2;
+ break;
+ case (ROFFTok_Sp_T):
+ seq = "\\t";
+ sz = 2;
+ break;
+ case (ROFFTok_Sp_V):
+ seq = "\\v";
+ sz = 2;
+ break;
+ case (ROFFTok_Space):
+ seq = "&nbsp;";
+ sz = 6;
+ break;
+ case (ROFFTok_Null):
+ seq = "";
+ sz = 0;
+ break;
+ case (ROFFTok_Hyphen):
+ seq = "&#8208;";
+ sz = 7;
+ break;
+ case (ROFFTok_Em):
+ seq = "&#8212;";
+ sz = 7;
+ break;
+ case (ROFFTok_En):
+ seq = "&#8211;";
+ sz = 7;
+ break;
+ case (ROFFTok_Ge):
+ seq = "&#8805;";
+ sz = 7;
+ break;
+ case (ROFFTok_Le):
+ seq = "&#8804;";
+ sz = 7;
+ break;
+ case (ROFFTok_Rquote):
+ seq = "&#8221;";
+ sz = 7;
+ break;
+ case (ROFFTok_Lquote):
+ seq = "&#8220;";
+ sz = 7;
+ break;
+ case (ROFFTok_Uparrow):
+ seq = "&#8593;";
+ sz = 7;
+ break;
+ case (ROFFTok_Acute):
+ seq = "&#180;";
+ sz = 6;
+ break;
+ case (ROFFTok_Grave):
+ seq = "&#96;";
+ sz = 5;
+ break;
+ case (ROFFTok_Pi):
+ seq = "&#960;";
+ sz = 6;
+ break;
+ case (ROFFTok_Ne):
+ seq = "&#8800;";
+ sz = 7;
+ break;
+ case (ROFFTok_Lt):
+ seq = "&lt;";
+ sz = 4;
+ break;
+ case (ROFFTok_Gt):
+ seq = "&gt;";
+ sz = 4;
+ break;
+ case (ROFFTok_Plusmin):
+ seq = "&#177;";
+ sz = 6;
+ break;
+ case (ROFFTok_Infty):
+ seq = "&#8734;";
+ sz = 7;
+ break;
+ case (ROFFTok_Bar):
+ seq = "&#124;";
+ sz = 6;
+ break;
+ case (ROFFTok_Nan):
+ seq = "Nan";
+ sz = 3;
+ break;
+ }
+
+ if (space && ! ml_nputs(p->mbuf, " ", 1, &res))
+ return(0);
+ p->pos += res;
+
+ if (0 != sz && ! ml_nputs(p->mbuf, seq, sz, &res))
+ return(0);
+ p->pos += res;
+
+ return(1);
+}
+
+
+static int
+mlg_roffdata(void *arg, int space, const char *start, char *buf)
+{
struct md_mlg *p;
assert(arg);
p = (struct md_mlg *)arg;
+ if ( ! mlg_data(p, space, start, buf))
+ return(0);
+
+ mlg_mode(p, MD_TEXT);
+ return(1);
+}
+
+
+static void
+mlg_err(struct md_mlg *p, const char *buf, const char *pos, char *msg)
+{
+
+ mlg_msg(p, ROFF_ERROR, buf, pos, msg);
+}
+
+
+static void
+mlg_warn(struct md_mlg *p, const char *buf, const char *pos, char *msg)
+{
+
+ mlg_msg(p, ROFF_WARN, buf, pos, msg);
+}
+
+
+static void
+mlg_msg(struct md_mlg *p, enum roffmsg lvl,
+ const char *buf, const char *pos, char *msg)
+{
+ char *level;
+
switch (lvl) {
case (ROFF_WARN):
if ( ! (MD_WARN_ALL & p->args->warnings))
@@ -542,20 +744,3 @@ mlg_roffmsg(void *arg, enum roffmsg lvl,
p->rbuf->name, level, msg);
}
-
-
-static int
-mlg_roffdata(void *arg, int space, char *buf)
-{
- struct md_mlg *p;
-
- assert(arg);
- p = (struct md_mlg *)arg;
-
- if ( ! mlg_data(p, space, buf))
- return(0);
-
- mlg_mode(p, MD_TEXT);
- return(1);
-}
-
diff --git a/private.h b/private.h
index ff7dba05..e5a55431 100644
--- a/private.h
+++ b/private.h
@@ -19,6 +19,8 @@
#ifndef PRIVATE_H
#define PRIVATE_H
+#include <time.h>
+
struct md_rbuf {
int fd; /* Open descriptor. */
char *name; /* Name of file. */
@@ -35,6 +37,35 @@ struct md_mbuf {
size_t pos; /* Position in buffer. */
};
+#define ROFFTok_Sp_A 0
+#define ROFFTok_Sp_B 1
+#define ROFFTok_Sp_F 2
+#define ROFFTok_Sp_N 3
+#define ROFFTok_Sp_R 4
+#define ROFFTok_Sp_T 5
+#define ROFFTok_Sp_V 6
+#define ROFFTok_Space 7
+#define ROFFTok_Null 8
+#define ROFFTok_Hyphen 9
+#define ROFFTok_Em 10
+#define ROFFTok_En 11
+#define ROFFTok_Ge 12
+#define ROFFTok_Le 13
+#define ROFFTok_Rquote 14
+#define ROFFTok_Lquote 15
+#define ROFFTok_Uparrow 16
+#define ROFFTok_Acute 17
+#define ROFFTok_Grave 18
+#define ROFFTok_Pi 19
+#define ROFFTok_Ne 20
+#define ROFFTok_Lt 21
+#define ROFFTok_Gt 22
+#define ROFFTok_Plusmin 23
+#define ROFFTok_Infty 24
+#define ROFFTok_Bar 25
+#define ROFFTok_Nan 26
+#define ROFFTok_MAX 27
+
#define ROFF___ 0
#define ROFF_Dd 1
#define ROFF_Dt 2
@@ -205,6 +236,8 @@ struct md_mbuf {
#define ROFF_Words 59
#define ROFF_ARGMAX 60
+#define ROFF_MAXLINEARG 32
+
extern const char *const *toknames;
extern const char *const *tokargnames;
@@ -216,7 +249,8 @@ struct roffcb {
int (*roffhead)(void *, const struct tm *, const char *,
const char *, const char *, const char *);
int (*rofftail)(void *);
- int (*roffdata)(void *, int, char *);
+ int (*roffdata)(void *, int, const char *, char *);
+ int (*rofftoken)(void *, int, int);
int (*roffin)(void *, int, int *, char **);
int (*roffout)(void *, int);
int (*roffblkin)(void *, int, int *, char **);
@@ -255,6 +289,8 @@ struct rofftree *roff_alloc(const struct roffcb *, void *);
int roff_engine(struct rofftree *, char *);
int roff_free(struct rofftree *, int);
+int rofftok_scan(const char *);
+
__END_DECLS
#endif /*!PRIVATE_H*/
diff --git a/roff.c b/roff.c
index 23369a6b..b30add22 100644
--- a/roff.c
+++ b/roff.c
@@ -42,8 +42,6 @@
/* TODO: macros with a set number of arguments? */
/* TODO: validate Dt macro arguments. */
-#define ROFF_MAXARG 32
-
enum roffd {
ROFF_ENTER = 0,
ROFF_EXIT
@@ -133,7 +131,8 @@ static int roffparseopts(struct rofftree *, int,
char ***, int *, char **);
static int roffcall(struct rofftree *, int, char **);
static int roffparse(struct rofftree *, char *);
-static int textparse(const struct rofftree *, char *);
+static int textparse(struct rofftree *, char *);
+static int roffdata(struct rofftree *, int, char *);
#ifdef __linux__
extern size_t strlcat(char *, const char *, size_t);
@@ -272,8 +271,8 @@ static const struct rofftok tokens[ROFF_MAX] = {
{ roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sy */
{ roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Tn */
{ roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ux */
- { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */
- { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */
+ { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */
+ { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */
{ roff_layout, NULL, NULL, roffchild_Fo, 0, ROFF_LAYOUT, 0 }, /* Fo */
{ roff_noop, NULL, roffparent_Fc, NULL, ROFF_Fo, ROFF_LAYOUT, 0 }, /* Fc */
{ roff_layout, NULL, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Oo */
@@ -450,14 +449,43 @@ roff_engine(struct rofftree *tree, char *buf)
static int
-textparse(const struct rofftree *tree, char *buf)
+textparse(struct rofftree *tree, char *buf)
{
+ char *bufp;
+
+ /* TODO: literal parsing. */
if ( ! (ROFF_BODY & tree->state)) {
roff_err(tree, buf, "data not in body");
return(0);
}
- return((*tree->cb.roffdata)(tree->arg, 1, buf));
+
+ /* LINTED */
+ while (*buf) {
+ while (*buf && isspace(*buf))
+ buf++;
+
+ if (0 == *buf)
+ break;
+
+ bufp = buf++;
+
+ while (*buf && ! isspace(*buf))
+ buf++;
+
+ if (0 != *buf) {
+ *buf++ = 0;
+ if ( ! roffdata(tree, 1, bufp))
+ return(0);
+ continue;
+ }
+
+ if ( ! roffdata(tree, 1, bufp))
+ return(0);
+ break;
+ }
+
+ return(1);
}
@@ -474,7 +502,7 @@ roffargs(const struct rofftree *tree,
p = buf;
/* LINTED */
- for (i = 0; *buf && i < ROFF_MAXARG; i++) {
+ for (i = 0; *buf && i < ROFF_MAXLINEARG; i++) {
if ('\"' == *buf) {
argv[i] = ++buf;
while (*buf && '\"' != *buf)
@@ -499,7 +527,7 @@ roffargs(const struct rofftree *tree,
}
assert(i > 0);
- if (ROFF_MAXARG == i && *buf) {
+ if (ROFF_MAXLINEARG == i && *buf) {
roff_err(tree, p, "too many arguments for `%s'", toknames
[tok]);
return(0);
@@ -530,7 +558,7 @@ roffparse(struct rofftree *tree, char *buf)
{
int tok, t;
struct roffnode *n;
- char *argv[ROFF_MAXARG];
+ char *argv[ROFF_MAXLINEARG];
char **argvp;
if (0 != *buf && 0 != *(buf + 1) && 0 != *(buf + 2))
@@ -907,7 +935,7 @@ roffpurgepunct(struct rofftree *tree, char **argv)
/* LINTED */
while (argv[i])
- if ( ! (*tree->cb.roffdata)(tree->arg, 0, argv[i++]))
+ if ( ! roffdata(tree, 0, argv[i++]))
return(0);
return(1);
}
@@ -938,6 +966,23 @@ roffparseopts(struct rofftree *tree, int tok,
}
+static int
+roffdata(struct rofftree *tree, int space, char *buf)
+{
+ int tok;
+
+ if (-1 == (tok = rofftok_scan(buf))) {
+ roff_err(tree, buf, "invalid character sequence");
+ return(0);
+ } else if (ROFFTok_MAX != tok)
+ return((*tree->cb.rofftoken)
+ (tree->arg, space != 0, tok));
+
+ return((*tree->cb.roffdata)(tree->arg,
+ space != 0, tree->cur, buf));
+}
+
+
/* ARGSUSED */
static int
roff_Dd(ROFFCALL_ARGS)
@@ -1108,11 +1153,9 @@ roff_Sm(ROFFCALL_ARGS)
tok, argcp, argvp, morep))
return(0);
- while (*argv) {
- if ((*tree->cb.roffdata)(tree->arg, 1, *argv++))
- continue;
- return(0);
- }
+ while (*argv)
+ if ( ! roffdata(tree, 1, *argv++))
+ return(0);
return(1);
}
@@ -1143,18 +1186,19 @@ roff_Ns(ROFFCALL_ARGS)
}
if ( ! roffispunct(*argv)) {
- if ((*tree->cb.roffdata)(tree->arg, 1, *argv++))
- continue;
- return(0);
+ if ( ! roffdata(tree, 1, *argv++))
+ return(0);
+ continue;
}
+
for (j = 0; argv[j]; j++)
if ( ! roffispunct(argv[j]))
break;
if (argv[j]) {
- if ((*tree->cb.roffdata)(tree->arg, 0, *argv++))
- continue;
- return(0);
+ if ( ! roffdata(tree, 0, *argv++))
+ return(0);
+ continue;
}
break;
@@ -1221,8 +1265,8 @@ roff_Os(ROFFCALL_ARGS)
static int
roff_layout(ROFFCALL_ARGS)
{
- int i, c, argcp[ROFF_MAXARG];
- char *argvp[ROFF_MAXARG];
+ int i, c, argcp[ROFF_MAXLINEARG];
+ char *argvp[ROFF_MAXLINEARG];
if (ROFF_PRELUDE & tree->state) {
roff_err(tree, *argv, "bad `%s' in prelude",
@@ -1266,11 +1310,10 @@ roff_layout(ROFFCALL_ARGS)
if ( ! (ROFF_PARSED & tokens[tok].flags)) {
i = 0;
- while (*argv) {
- if ( ! (*tree->cb.roffdata)(tree->arg, i, *argv++))
+ while (*argv)
+ if ( ! roffdata(tree, i++, *argv++))
return(0);
- i = 1;
- }
+
if ( ! (*tree->cb.roffblkheadout)(tree->arg, tok))
return(0);
return((*tree->cb.roffblkbodyin)
@@ -1286,10 +1329,8 @@ roff_layout(ROFFCALL_ARGS)
while (*argv) {
if (ROFF_MAX == (c = rofffindcallable(*argv))) {
assert(tree->arg);
- if ( ! (*tree->cb.roffdata)
- (tree->arg, i, *argv++))
+ if ( ! roffdata(tree, i++, *argv++))
return(0);
- i = 1;
continue;
}
if ( ! roffcall(tree, c, argv))
@@ -1329,8 +1370,8 @@ roff_layout(ROFFCALL_ARGS)
static int
roff_text(ROFFCALL_ARGS)
{
- int i, j, first, c, argcp[ROFF_MAXARG];
- char *argvp[ROFF_MAXARG];
+ int i, j, first, c, argcp[ROFF_MAXLINEARG];
+ char *argvp[ROFF_MAXLINEARG];
if (ROFF_PRELUDE & tree->state) {
roff_err(tree, *argv, "`%s' disallowed in prelude",
@@ -1350,11 +1391,10 @@ roff_text(ROFFCALL_ARGS)
if ( ! (ROFF_PARSED & tokens[tok].flags)) {
i = 0;
- while (*argv) {
- if ( ! (*tree->cb.roffdata)(tree->arg, i, *argv++))
+ while (*argv)
+ if ( ! roffdata(tree, i++, *argv++))
return(0);
- i = 1;
- }
+
return((*tree->cb.roffout)(tree->arg, tok));
}
@@ -1383,9 +1423,8 @@ roff_text(ROFFCALL_ARGS)
}
if ( ! roffispunct(*argv)) {
- if ( ! (*tree->cb.roffdata)(tree->arg, i, *argv++))
+ if ( ! roffdata(tree, i++, *argv++))
return(0);
- i = 1;
continue;
}
@@ -1395,7 +1434,7 @@ roff_text(ROFFCALL_ARGS)
break;
if (argv[j]) {
- if ( ! (*tree->cb.roffdata)(tree->arg, 0, *argv++))
+ if ( ! roffdata(tree, 0, *argv++))
return(0);
continue;
}
diff --git a/tokens.c b/tokens.c
new file mode 100644
index 00000000..07cdb671
--- /dev/null
+++ b/tokens.c
@@ -0,0 +1,184 @@
+/* $Id$ */
+/*
+ * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the
+ * above copyright notice and this permission notice appear in all
+ * copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+ * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libmdocml.h"
+#include "private.h"
+
+
+static int rofftok_dashes(const char *);
+static int rofftok_special(const char *);
+static int rofftok_predef(const char *);
+static int rofftok_defined(const char *);
+
+
+static int
+rofftok_defined(const char *buf)
+{
+ if (0 == *buf)
+ return(-1);
+ if (0 == *(buf + 1))
+ return(-1);
+ if (0 != *(buf + 2))
+ return(-1);
+
+ if (0 == strcmp(buf, ">="))
+ return(ROFFTok_Ge);
+ else if (0 == strcmp(buf, "<="))
+ return(ROFFTok_Le);
+ else if (0 == strcmp(buf, "Rq"))
+ return(ROFFTok_Rquote);
+ else if (0 == strcmp(buf, "Lq"))
+ return(ROFFTok_Lquote);
+ else if (0 == strcmp(buf, "ua"))
+ return(ROFFTok_Uparrow);
+ else if (0 == strcmp(buf, "aa"))
+ return(ROFFTok_Acute);
+ else if (0 == strcmp(buf, "ga"))
+ return(ROFFTok_Grave);
+ else if (0 == strcmp(buf, "Pi"))
+ return(ROFFTok_Pi);
+ else if (0 == strcmp(buf, "Ne"))
+ return(ROFFTok_Ne);
+ else if (0 == strcmp(buf, "Le"))
+ return(ROFFTok_Le);
+ else if (0 == strcmp(buf, "Ge"))
+ return(ROFFTok_Ge);
+ else if (0 == strcmp(buf, "Lt"))
+ return(ROFFTok_Lt);
+ else if (0 == strcmp(buf, "Gt"))
+ return(ROFFTok_Gt);
+ else if (0 == strcmp(buf, "Pm"))
+ return(ROFFTok_Plusmin);
+ else if (0 == strcmp(buf, "If"))
+ return(ROFFTok_Infty);
+ else if (0 == strcmp(buf, "Na"))
+ return(ROFFTok_Nan);
+ else if (0 == strcmp(buf, "Ba"))
+ return(ROFFTok_Bar);
+
+ return(-1);
+}
+
+
+static int
+rofftok_predef(const char *buf)
+{
+ if (0 == *buf)
+ return(-1);
+
+ if ('(' == *buf)
+ return(rofftok_defined(++buf));
+
+ /* TODO */
+
+ return(-1);
+}
+
+
+static int
+rofftok_dashes(const char *buf)
+{
+
+ if (0 == *buf)
+ return(-1);
+ else if (*buf++ != 'e')
+ return(-1);
+
+ if (0 == *buf)
+ return(-1);
+ else if (0 != *(buf + 1))
+ return(-1);
+
+ switch (*buf) {
+ case ('m'):
+ return(ROFFTok_Em);
+ case ('n'):
+ return(ROFFTok_En);
+ default:
+ break;
+ }
+ return(-1);
+}
+
+
+static int
+rofftok_special(const char *buf)
+{
+
+ if (0 == *buf)
+ return(-1);
+ else if (0 != *(buf + 1))
+ return(-1);
+
+ switch (*buf) {
+ case ('a'):
+ return(ROFFTok_Sp_A);
+ case ('b'):
+ return(ROFFTok_Sp_B);
+ case ('f'):
+ return(ROFFTok_Sp_F);
+ case ('n'):
+ return(ROFFTok_Sp_N);
+ case ('r'):
+ return(ROFFTok_Sp_R);
+ case ('t'):
+ return(ROFFTok_Sp_T);
+ case ('v'):
+ return(ROFFTok_Sp_V);
+ default:
+ break;
+ }
+ return(-1);
+}
+
+
+int
+rofftok_scan(const char *buf)
+{
+
+ assert(*buf);
+ if ('\\' != *buf++)
+ return(ROFFTok_MAX);
+
+ for ( ; *buf; buf++) {
+ switch (*buf) {
+ case ('e'):
+ return(rofftok_special(++buf));
+ case ('('):
+ return(rofftok_dashes(++buf));
+ case (' '):
+ return(ROFFTok_Space);
+ case ('&'):
+ return(ROFFTok_Null);
+ case ('-'):
+ return(ROFFTok_Hyphen);
+ case ('*'):
+ return(rofftok_predef(++buf));
+ default:
+ break;
+ }
+ }
+
+ return(-1);
+}
+
+