summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2008-12-04 19:31:57 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2008-12-04 19:31:57 +0000
commit50ecaba08336212e4dc1802f15d7d3f4ffad186a (patch)
treebfc5e27855965ac17df779fa730772c1a696f7a3
parentfb40e7adf35e6b21b0e64e170b71e1a4d2cf5351 (diff)
downloadmandoc-50ecaba08336212e4dc1802f15d7d3f4ffad186a.tar.gz
Moved charset recognition into the filter.
-rw-r--r--Makefile2
-rw-r--r--index.74
-rw-r--r--ml.c139
-rw-r--r--mlg.c138
-rw-r--r--private.h7
-rw-r--r--roff.c14
-rw-r--r--tokens.c113
7 files changed, 202 insertions, 215 deletions
diff --git a/Makefile b/Makefile
index cd9100fc..db7297b7 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ FAIL = test.0 test.1 test.2 test.3 test.4 test.5 test.6 \
SUCCEED = test.7 test.8 test.9 test.10 test.11 test.12 test.13 \
test.14 test.16 test.17 test.18 test.19 test.21 test.23 \
test.25 test.28 test.29 test.31 test.32 test.33 test.34 \
- test.35 test.37 test.38 test.39
+ test.35 test.38 test.39
all: mdocml
diff --git a/index.7 b/index.7
index b88246bf..71f77d2f 100644
--- a/index.7
+++ b/index.7
@@ -47,9 +47,9 @@ respectively),
correctly-ordered document prelude,
.It
sane argument values (such as those for
-.Sq \& Dt
+.Sq \&.Dt
or
-.Sq \& Sm ) ,
+.Sq \&.Sm ) ,
.It
and so on.
.El
diff --git a/ml.c b/ml.c
index e174ccfe..ff047d79 100644
--- a/ml.c
+++ b/ml.c
@@ -16,6 +16,7 @@
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
+#include <assert.h>
#include <stdlib.h>
#include <string.h>
@@ -33,13 +34,147 @@ int
ml_nputstring(struct md_mbuf *p,
const char *buf, size_t sz, size_t *pos)
{
- int i;
+ int i, v;
const char *seq;
size_t ssz;
for (i = 0; i < (int)sz; i++) {
switch (buf[i]) {
+ /* Escaped value. */
+ case ('\\'):
+ if (-1 == (v = rofftok_scan(buf, &i))) {
+ /* TODO: error. */
+ return(0);
+ }
+
+ switch (v) {
+ case (ROFFTok_Sp_A):
+ seq = "\\a";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_B):
+ seq = "\\b";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_F):
+ seq = "\\f";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_N):
+ seq = "\\n";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_R):
+ seq = "\\r";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_T):
+ seq = "\\t";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_V):
+ seq = "\\v";
+ ssz = 2;
+ break;
+ case (ROFFTok_Sp_0):
+ seq = "\\0";
+ ssz = 2;
+ break;
+ case (ROFFTok_Space):
+ seq = "&nbsp;";
+ ssz = 6;
+ break;
+ case (ROFFTok_Hyphen):
+ seq = "&#8208;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Em):
+ seq = "&#8212;";
+ ssz = 7;
+ break;
+ case (ROFFTok_En):
+ seq = "&#8211;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Ge):
+ seq = "&#8805;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Le):
+ seq = "&#8804;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Rquote):
+ seq = "&#8221;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Lquote):
+ seq = "&#8220;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Uparrow):
+ seq = "&#8593;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Acute):
+ seq = "&#180;";
+ ssz = 6;
+ break;
+ case (ROFFTok_Grave):
+ seq = "&#96;";
+ ssz = 5;
+ break;
+ case (ROFFTok_Pi):
+ seq = "&#960;";
+ ssz = 6;
+ break;
+ case (ROFFTok_Ne):
+ seq = "&#8800;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Lt):
+ seq = "&lt;";
+ ssz = 4;
+ break;
+ case (ROFFTok_Gt):
+ seq = "&gt;";
+ ssz = 4;
+ break;
+ case (ROFFTok_Plusmin):
+ seq = "&#177;";
+ ssz = 6;
+ break;
+ case (ROFFTok_Infty):
+ seq = "&#8734;";
+ ssz = 7;
+ break;
+ case (ROFFTok_Bar):
+ seq = "&#124;";
+ ssz = 6;
+ break;
+ case (ROFFTok_Nan):
+ seq = "Nan";
+ ssz = 3;
+ break;
+ case (ROFFTok_Quote):
+ seq = "&quot;";
+ ssz = 6;
+ break;
+ case (ROFFTok_Slash):
+ seq = "\\";
+ ssz = 1;
+ break;
+ case (ROFFTok_Null):
+ seq = "";
+ ssz = 0;
+ break;
+ default:
+ /* TODO: print error. */
+ return(-1);
+ }
+ break;
+
/* Ampersand ml-escape. */
case ('&'):
seq = "&amp;";
@@ -70,7 +205,7 @@ ml_nputstring(struct md_mbuf *p,
break;
}
- if ( ! ml_nputs(p, seq, ssz, pos))
+ if (ssz > 0 && ! ml_nputs(p, seq, ssz, pos))
return(-1);
}
return(1);
diff --git a/mlg.c b/mlg.c
index 52bbb822..53dbfa6e 100644
--- a/mlg.c
+++ b/mlg.c
@@ -70,7 +70,6 @@ static int mlg_rofftail(void *);
static int mlg_roffin(void *, int, int *, char **);
static int mlg_roffdata(void *, int,
const char *, char *);
-static int mlg_rofftoken(void *, int, int);
static int mlg_roffout(void *, int);
static int mlg_roffblkin(void *, int, int *, char **);
static int mlg_roffblkout(void *, int);
@@ -357,7 +356,6 @@ mlg_alloc(const struct md_args *args,
cb.roffspecial = mlg_roffspecial;
cb.roffmsg = mlg_roffmsg;
cb.roffdata = mlg_roffdata;
- cb.rofftoken = mlg_rofftoken;
if (NULL == (p = calloc(1, sizeof(struct md_mlg))))
err(1, "calloc");
@@ -552,142 +550,6 @@ mlg_roffmsg(void *arg, enum roffmsg lvl,
static int
-mlg_rofftoken(void *arg, int space, int value)
-{
- struct md_mlg *p;
- const char *seq;
- size_t sz, res;
-
- assert(arg);
- p = (struct md_mlg *)arg;
-
- switch (value) {
- case (ROFFTok_Sp_A):
- seq = "\\a";
- sz = 2;
- break;
- case (ROFFTok_Sp_B):
- seq = "\\b";
- sz = 2;
- break;
- case (ROFFTok_Sp_F):
- seq = "\\f";
- sz = 2;
- break;
- case (ROFFTok_Sp_N):
- seq = "\\n";
- sz = 2;
- break;
- case (ROFFTok_Sp_R):
- seq = "\\r";
- sz = 2;
- break;
- case (ROFFTok_Sp_T):
- seq = "\\t";
- sz = 2;
- break;
- case (ROFFTok_Sp_V):
- seq = "\\v";
- sz = 2;
- break;
- case (ROFFTok_Space):
- seq = "&nbsp;";
- sz = 6;
- break;
- case (ROFFTok_Hyphen):
- seq = "&#8208;";
- sz = 7;
- break;
- case (ROFFTok_Em):
- seq = "&#8212;";
- sz = 7;
- break;
- case (ROFFTok_En):
- seq = "&#8211;";
- sz = 7;
- break;
- case (ROFFTok_Ge):
- seq = "&#8805;";
- sz = 7;
- break;
- case (ROFFTok_Le):
- seq = "&#8804;";
- sz = 7;
- break;
- case (ROFFTok_Rquote):
- seq = "&#8221;";
- sz = 7;
- break;
- case (ROFFTok_Lquote):
- seq = "&#8220;";
- sz = 7;
- break;
- case (ROFFTok_Uparrow):
- seq = "&#8593;";
- sz = 7;
- break;
- case (ROFFTok_Acute):
- seq = "&#180;";
- sz = 6;
- break;
- case (ROFFTok_Grave):
- seq = "&#96;";
- sz = 5;
- break;
- case (ROFFTok_Pi):
- seq = "&#960;";
- sz = 6;
- break;
- case (ROFFTok_Ne):
- seq = "&#8800;";
- sz = 7;
- break;
- case (ROFFTok_Lt):
- seq = "&lt;";
- sz = 4;
- break;
- case (ROFFTok_Gt):
- seq = "&gt;";
- sz = 4;
- break;
- case (ROFFTok_Plusmin):
- seq = "&#177;";
- sz = 6;
- break;
- case (ROFFTok_Infty):
- seq = "&#8734;";
- sz = 7;
- break;
- case (ROFFTok_Bar):
- seq = "&#124;";
- sz = 6;
- break;
- case (ROFFTok_Nan):
- seq = "Nan";
- sz = 3;
- break;
- case (ROFFTok_Quote):
- seq = "&quot;";
- sz = 6;
- break;
- default:
- /* TODO: print error. */
- return(0);
- }
-
- if (space && ! ml_nputs(p->mbuf, " ", 1, &res))
- return(0);
- p->pos += res;
-
- if ( ! ml_nputs(p->mbuf, seq, sz, &res))
- return(0);
- p->pos += res;
-
- return(1);
-}
-
-
-static int
mlg_roffdata(void *arg, int space, const char *start, char *buf)
{
struct md_mlg *p;
diff --git a/private.h b/private.h
index b60686a8..9c7f242f 100644
--- a/private.h
+++ b/private.h
@@ -65,7 +65,9 @@ struct md_mbuf {
#define ROFFTok_Bar 25
#define ROFFTok_Nan 26
#define ROFFTok_Quote 27
-#define ROFFTok_MAX 28
+#define ROFFTok_Sp_0 28
+#define ROFFTok_Slash 29
+#define ROFFTok_MAX 30
#define ROFF___ 0
#define ROFF_Dd 1
@@ -251,7 +253,6 @@ struct roffcb {
const char *, const char *, const char *);
int (*rofftail)(void *);
int (*roffdata)(void *, int, const char *, char *);
- int (*rofftoken)(void *, int, int);
int (*roffin)(void *, int, int *, char **);
int (*roffout)(void *, int);
int (*roffblkin)(void *, int, int *, char **);
@@ -290,7 +291,7 @@ struct rofftree *roff_alloc(const struct roffcb *, void *);
int roff_engine(struct rofftree *, char *);
int roff_free(struct rofftree *, int);
-int rofftok_scan(const char *);
+int rofftok_scan(const char *, int *);
__END_DECLS
diff --git a/roff.c b/roff.c
index 08d10ffb..fbfd6d8f 100644
--- a/roff.c
+++ b/roff.c
@@ -969,23 +969,9 @@ roffparseopts(struct rofftree *tree, int tok,
static int
roffdata(struct rofftree *tree, int space, char *buf)
{
- int tok;
if (0 == *buf)
return(1);
-
- if (-1 == (tok = rofftok_scan(buf))) {
- roff_err(tree, buf, "invalid character sequence");
- return(0);
- } else if (ROFFTok_MAX != tok) {
- if (ROFFTok_Null == tok) { /* FIXME */
- buf += 2;
- return(roffdata(tree, space, buf));
- }
- return((*tree->cb.rofftoken)
- (tree->arg, space != 0, tok));
- }
-
return((*tree->cb.roffdata)(tree->arg,
space != 0, tree->cur, buf));
}
diff --git a/tokens.c b/tokens.c
index 10d35153..0ea4e113 100644
--- a/tokens.c
+++ b/tokens.c
@@ -24,55 +24,58 @@
#include "private.h"
-static int rofftok_dashes(const char *);
-static int rofftok_special(const char *);
-static int rofftok_predef(const char *);
-static int rofftok_defined(const char *);
+static int rofftok_dashes(const char *, int *);
+static int rofftok_special(const char *, int *);
+static int rofftok_predef(const char *, int *);
+static int rofftok_defined(const char *, int *);
static int
-rofftok_defined(const char *buf)
+rofftok_defined(const char *buf, int *i)
{
- if (0 == *buf)
- return(-1);
- if (0 == *(buf + 1))
+ const char *p;
+
+ if (0 == buf[*i])
return(-1);
- if (0 != *(buf + 2))
+ if (0 == buf[*i + 1])
return(-1);
- if (0 == strcmp(buf, ">="))
+ (*i)++;
+ p = &buf[(*i)++];
+
+ if (0 == memcmp(p, ">=", 2))
return(ROFFTok_Ge);
- else if (0 == strcmp(buf, "<="))
+ else if (0 == memcmp(p, "<=", 2))
return(ROFFTok_Le);
- else if (0 == strcmp(buf, "Rq"))
+ else if (0 == memcmp(p, "Rq", 2))
return(ROFFTok_Rquote);
- else if (0 == strcmp(buf, "Lq"))
+ else if (0 == memcmp(p, "Lq", 2))
return(ROFFTok_Lquote);
- else if (0 == strcmp(buf, "ua"))
+ else if (0 == memcmp(p, "ua", 2))
return(ROFFTok_Uparrow);
- else if (0 == strcmp(buf, "aa"))
+ else if (0 == memcmp(p, "aa", 2))
return(ROFFTok_Acute);
- else if (0 == strcmp(buf, "ga"))
+ else if (0 == memcmp(p, "ga", 2))
return(ROFFTok_Grave);
- else if (0 == strcmp(buf, "Pi"))
+ else if (0 == memcmp(p, "Pi", 2))
return(ROFFTok_Pi);
- else if (0 == strcmp(buf, "Ne"))
+ else if (0 == memcmp(p, "Ne", 2))
return(ROFFTok_Ne);
- else if (0 == strcmp(buf, "Le"))
+ else if (0 == memcmp(p, "Le", 2))
return(ROFFTok_Le);
- else if (0 == strcmp(buf, "Ge"))
+ else if (0 == memcmp(p, "Ge", 2))
return(ROFFTok_Ge);
- else if (0 == strcmp(buf, "Lt"))
+ else if (0 == memcmp(p, "Lt", 2))
return(ROFFTok_Lt);
- else if (0 == strcmp(buf, "Gt"))
+ else if (0 == memcmp(p, "Gt", 2))
return(ROFFTok_Gt);
- else if (0 == strcmp(buf, "Pm"))
+ else if (0 == memcmp(p, "Pm", 2))
return(ROFFTok_Plusmin);
- else if (0 == strcmp(buf, "If"))
+ else if (0 == memcmp(p, "If", 2))
return(ROFFTok_Infty);
- else if (0 == strcmp(buf, "Na"))
+ else if (0 == memcmp(p, "Na", 2))
return(ROFFTok_Nan);
- else if (0 == strcmp(buf, "Ba"))
+ else if (0 == memcmp(p, "Ba", 2))
return(ROFFTok_Bar);
return(-1);
@@ -80,15 +83,14 @@ rofftok_defined(const char *buf)
static int
-rofftok_predef(const char *buf)
+rofftok_predef(const char *buf, int *i)
{
- if (0 == *buf)
+ if (0 == buf[*i])
return(-1);
+ if ('(' == buf[*i])
+ return(rofftok_defined(buf, i));
- if ('(' == *buf)
- return(rofftok_defined(++buf));
-
- switch (*buf) {
+ switch (buf[*i]) {
case ('q'):
return(ROFFTok_Quote);
default:
@@ -100,20 +102,17 @@ rofftok_predef(const char *buf)
static int
-rofftok_dashes(const char *buf)
+rofftok_dashes(const char *buf, int *i)
{
- if (0 == *buf)
+ if (0 == buf[*i])
return(-1);
- else if (*buf++ != 'e')
+ else if (buf[(*i)++] != 'e')
return(-1);
-
- if (0 == *buf)
- return(-1);
- else if (0 != *(buf + 1))
+ if (0 == buf[*i])
return(-1);
- switch (*buf) {
+ switch (buf[*i]) {
case ('m'):
return(ROFFTok_Em);
case ('n'):
@@ -126,15 +125,13 @@ rofftok_dashes(const char *buf)
static int
-rofftok_special(const char *buf)
+rofftok_special(const char *buf, int *i)
{
- if (0 == *buf)
- return(-1);
- else if (0 != *(buf + 1))
- return(-1);
+ if (0 == buf[*i])
+ return(ROFFTok_Slash);
- switch (*buf) {
+ switch (buf[*i]) {
case ('a'):
return(ROFFTok_Sp_A);
case ('b'):
@@ -149,6 +146,8 @@ rofftok_special(const char *buf)
return(ROFFTok_Sp_T);
case ('v'):
return(ROFFTok_Sp_V);
+ case ('0'):
+ return(ROFFTok_Sp_0);
default:
break;
}
@@ -157,19 +156,22 @@ rofftok_special(const char *buf)
int
-rofftok_scan(const char *buf)
+rofftok_scan(const char *buf, int *i)
{
assert(*buf);
- if ('\\' != *buf++)
- return(ROFFTok_MAX);
+ assert(buf[*i] == '\\');
+
+ (*i)++;
- for ( ; *buf; buf++) {
- switch (*buf) {
+ for ( ; buf[*i]; (*i)++) {
+ switch (buf[*i]) {
case ('e'):
- return(rofftok_special(++buf));
+ (*i)++;
+ return(rofftok_special(buf, i));
case ('('):
- return(rofftok_dashes(++buf));
+ (*i)++;
+ return(rofftok_dashes(buf, i));
case (' '):
return(ROFFTok_Space);
case ('&'):
@@ -177,9 +179,10 @@ rofftok_scan(const char *buf)
case ('-'):
return(ROFFTok_Hyphen);
case ('*'):
- return(rofftok_predef(++buf));
+ (*i)++;
+ return(rofftok_predef(buf, i));
case ('\\'):
- return(ROFFTok_MAX);
+ return(ROFFTok_Slash);
default:
break;
}