summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.addescape17
-rw-r--r--mdocterm.140
-rw-r--r--mdocterm.c198
-rw-r--r--term.h9
4 files changed, 148 insertions, 116 deletions
diff --git a/README.addescape b/README.addescape
new file mode 100644
index 00000000..93231406
--- /dev/null
+++ b/README.addescape
@@ -0,0 +1,17 @@
+$Id$
+
+This documents adding a new character escape to mdocterm(1). Character
+escapes are only syntax-validated in the back-end.
+
+Character escape may be in the form of \*x, \*(xx, \x, \(xx, \[n] and so
+on and so on. All of these are recognised according to their byte
+length.
+
+(1) If the escape is NOT recognised in enum tsym in term.h, add it.
+
+(2) Modify/create static struct termenc termencN, where N is the number
+of characters in the encoding. This is in mdocterm.c.
+
+(3) Possibly modify nescape() to recognise a new termencN.
+
+Everything else is automatic.
diff --git a/mdocterm.1 b/mdocterm.1
index ea0f0f48..81dd619d 100644
--- a/mdocterm.1
+++ b/mdocterm.1
@@ -93,11 +93,11 @@ is
.Ss Character Escapes
This section documents the character-escapes accepted by
.Xr mdocterm 1 .
-Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx
-and \\*x forms described in
+Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx,
+\\*[n] and \\*x forms described in
.Xr mdoc.samples 7
-are deprecated, but still correctly rendered. For all two-character
-sequences, \\(xx is equivalent to the n-character \\[xx].
+are deprecated, but still rendered. All one- and two-character
+sequences may be used in the n-character sequence \\[n].
.Pp
Note that the
.Em Output
@@ -106,6 +106,22 @@ column will render differently whether executed with
or another output filter.
.\" PARAGRAPH
.Pp
+Grammatic:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.It \(em
+\\(em (em-dash)
+.It \(en
+\\(en (en-dash)
+.It \-
+\\- (hyphen)
+.It \\
+\\ (back-slash)
+.El
+.\" PARAGRAPH
+.Pp
Enclosures:
.Pp
.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
@@ -118,9 +134,9 @@ Enclosures:
.It \(lq
\\(lq (left double-quote)
.It \(rq
-\\(rq (right double-quote)
+\\(rq, \\' (right double-quote)
.It \(oq
-\\(lq (left single-quote)
+\\(lq, \\` (left single-quote)
.It \(aq
\\(aq (right single-quote, apostrophe)
.El
@@ -161,13 +177,11 @@ Mathematical:
\\(na (NaN)*
.It \(+-
\\(+- (plus-minus)
+.It \(**
+\\(** (asterisk)
.El
.\" PARAGRAPH
.Pp
-*This is a deviation from the standard, as NaN is usually rendered as
-\\*(Na, which is a deprecated form. We introduce \\(na, which follows
-the more general syntax.
-.Pp
Diacritics:
.Pp
.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
@@ -189,7 +203,13 @@ Special symbols:
\\(bu (bullet)
.It \(ba
\\(ba (bar)
+.It \(co
+\\(co (copyright)
.El
+.Pp
+*This is a deviation from the standard, as NaN is usually rendered as
+\\*(Na, which is a deprecated form. We introduce \\(na, which follows
+the more general syntax.
.\" SECTION
.Sh EXAMPLES
To display this manual page:
diff --git a/mdocterm.c b/mdocterm.c
index f9c89230..e34ab4f2 100644
--- a/mdocterm.c
+++ b/mdocterm.c
@@ -32,6 +32,11 @@
#include "mmain.h"
#include "term.h"
+struct termenc {
+ const char *enc;
+ int sym;
+};
+
static void body(struct termp *,
struct termpair *,
const struct mdoc_meta *,
@@ -57,6 +62,67 @@ extern size_t strlcat(char *, const char *, size_t);
extern size_t strlcpy(char *, const char *, size_t);
#endif
+static struct termenc termenc1[] = {
+ { "\\", TERMSYM_SLASH },
+ { "\'", TERMSYM_RSQUOTE },
+ { "`", TERMSYM_LSQUOTE },
+ { "-", TERMSYM_HYPHEN },
+ { " ", TERMSYM_SPACE },
+ { ".", TERMSYM_PERIOD },
+ { "&", TERMSYM_BREAK },
+ { "e", TERMSYM_SLASH },
+ { "q", TERMSYM_DQUOTE },
+ { NULL, 0 }
+};
+
+static struct termenc termenc2[] = {
+ { "rB", TERMSYM_RBRACK },
+ { "lB", TERMSYM_LBRACK },
+ { "Lq", TERMSYM_LDQUOTE },
+ { "lq", TERMSYM_LDQUOTE },
+ { "Rq", TERMSYM_RDQUOTE },
+ { "rq", TERMSYM_RDQUOTE },
+ { "oq", TERMSYM_LSQUOTE },
+ { "aq", TERMSYM_RSQUOTE },
+
+ { "<-", TERMSYM_LARROW },
+ { "->", TERMSYM_RARROW },
+ { "ua", TERMSYM_UARROW },
+ { "da", TERMSYM_DARROW },
+
+ { "bu", TERMSYM_BULLET },
+ { "Ba", TERMSYM_BAR },
+ { "ba", TERMSYM_BAR },
+ { "co", TERMSYM_COPY },
+ { "Am", TERMSYM_AMP },
+
+ { "Le", TERMSYM_LE },
+ { "<=", TERMSYM_LE },
+ { "Ge", TERMSYM_GE },
+ { "=>", TERMSYM_GE },
+ { "==", TERMSYM_EQ },
+ { "Ne", TERMSYM_NEQ },
+ { "!=", TERMSYM_NEQ },
+ { "Pm", TERMSYM_PLUSMINUS },
+ { "+-", TERMSYM_PLUSMINUS },
+ { "If", TERMSYM_INF2 },
+ { "if", TERMSYM_INF },
+ { "Na", TERMSYM_NAN },
+ { "na", TERMSYM_NAN },
+ { "**", TERMSYM_ASTERISK },
+ { "Gt", TERMSYM_GT },
+ { "Lt", TERMSYM_LT },
+
+ { "aa", TERMSYM_ACUTE },
+ { "ga", TERMSYM_GRAVE },
+
+ { "en", TERMSYM_EN },
+ { "em", TERMSYM_EM },
+
+ { "Pi", TERMSYM_PI },
+ { NULL, 0 }
+};
+
static struct termsym termsym_ansi[] = {
{ "]", 1 }, /* TERMSYM_RBRACK */
{ "[", 1 }, /* TERMSYM_LBRACK */
@@ -85,8 +151,16 @@ static struct termsym termsym_ansi[] = {
{ "NaN", 3 }, /* TERMSYM_NAN */
{ "|", 1 }, /* TERMSYM_BAR */
{ "o", 1 }, /* TERMSYM_BULLET */
- { "&", 1 }, /* TERMSYM_AND */
- { "|", 1 }, /* TERMSYM_OR */
+ { "&", 1 }, /* TERMSYM_AMP */
+ { "--", 2 }, /* TERMSYM_EM */
+ { "-", 1 }, /* TERMSYM_EN */
+ { "(C)", 3 }, /* TERMSYM_COPY */
+ { "*", 1 }, /* TERMSYM_ASTERISK */
+ { "\\", 1 }, /* TERMSYM_SLASH */
+ { "-", 1 }, /* TERMSYM_HYPHEN */
+ { " ", 1 }, /* TERMSYM_SPACE */
+ { ".", 1 }, /* TERMSYM_PERIOD */
+ { "", 0 }, /* TERMSYM_BREAK */
};
static const char ansi_clear[] = { 27, '[', '0', 'm' };
@@ -614,118 +688,27 @@ header(struct termp *p, const struct mdoc_meta *meta)
static void
nescape(struct termp *p, const char *word, size_t len)
{
+ struct termenc *enc;
switch (len) {
case (1):
- switch (word[0]) {
- case ('\\'):
- /* FALLTHROUGH */
- case ('\''):
- /* FALLTHROUGH */
- case ('`'):
- /* FALLTHROUGH */
- case ('-'):
- /* FALLTHROUGH */
- case (' '):
- /* FALLTHROUGH */
- case ('.'):
- chara(p, word[0]); /* FIXME */
- break;
- case ('&'):
- break;
- case ('e'):
- chara(p, '\\'); /* FIXME */
- break;
- case ('q'):
- symbola(p, TERMSYM_DQUOTE);
- break;
- default:
- warnx("escape sequence not supported: %c",
- word[0]);
- break;
- }
+ enc = termenc1;
break;
-
case (2):
- if ('r' == word[0] && 'B' == word[1])
- symbola(p, TERMSYM_RBRACK);
- else if ('l' == word[0] && 'B' == word[1])
- symbola(p, TERMSYM_LBRACK);
- else if ('l' == word[0] && 'q' == word[1])
- symbola(p, TERMSYM_LDQUOTE);
- else if ('r' == word[0] && 'q' == word[1])
- symbola(p, TERMSYM_RDQUOTE);
- else if ('o' == word[0] && 'q' == word[1])
- symbola(p, TERMSYM_LSQUOTE);
- else if ('a' == word[0] && 'q' == word[1])
- symbola(p, TERMSYM_RSQUOTE);
- else if ('<' == word[0] && '-' == word[1])
- symbola(p, TERMSYM_LARROW);
- else if ('-' == word[0] && '>' == word[1])
- symbola(p, TERMSYM_RARROW);
- else if ('b' == word[0] && 'u' == word[1])
- symbola(p, TERMSYM_BULLET);
- else if ('<' == word[0] && '=' == word[1])
- symbola(p, TERMSYM_LE);
- else if ('>' == word[0] && '=' == word[1])
- symbola(p, TERMSYM_GE);
- else if ('=' == word[0] && '=' == word[1])
- symbola(p, TERMSYM_EQ);
- else if ('+' == word[0] && '-' == word[1])
- symbola(p, TERMSYM_PLUSMINUS);
- else if ('u' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_UARROW);
- else if ('d' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_DARROW);
- else if ('a' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_ACUTE);
- else if ('g' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_GRAVE);
- else if ('!' == word[0] && '=' == word[1])
- symbola(p, TERMSYM_NEQ);
- else if ('i' == word[0] && 'f' == word[1])
- symbola(p, TERMSYM_INF);
- else if ('n' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_NAN);
- else if ('b' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_BAR);
-
- /* Deprecated forms. */
- else if ('A' == word[0] && 'm' == word[1])
- symbola(p, TERMSYM_AMP);
- else if ('B' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_BAR);
- else if ('I' == word[0] && 'f' == word[1])
- symbola(p, TERMSYM_INF2);
- else if ('G' == word[0] && 'e' == word[1])
- symbola(p, TERMSYM_GE);
- else if ('G' == word[0] && 't' == word[1])
- symbola(p, TERMSYM_GT);
- else if ('L' == word[0] && 'e' == word[1])
- symbola(p, TERMSYM_LE);
- else if ('L' == word[0] && 'q' == word[1])
- symbola(p, TERMSYM_LDQUOTE);
- else if ('L' == word[0] && 't' == word[1])
- symbola(p, TERMSYM_LT);
- else if ('N' == word[0] && 'a' == word[1])
- symbola(p, TERMSYM_NAN);
- else if ('N' == word[0] && 'e' == word[1])
- symbola(p, TERMSYM_NEQ);
- else if ('P' == word[0] && 'i' == word[1])
- symbola(p, TERMSYM_PI);
- else if ('P' == word[0] && 'm' == word[1])
- symbola(p, TERMSYM_PLUSMINUS);
- else if ('R' == word[0] && 'q' == word[1])
- symbola(p, TERMSYM_RDQUOTE);
- else
- warnx("escape sequence not supported: %c%c",
- word[0], word[1]);
+ enc = termenc2;
break;
-
default:
- warnx("escape sequence not supported");
- break;
+ warnx("unsupported %zu-byte escape sequence", len);
+ return;
}
+
+ for ( ; enc->enc; enc++)
+ if (0 == memcmp(enc->enc, word, len)) {
+ symbola(p, enc->sym);
+ return;
+ }
+
+ warnx("unsupported %zu-byte escape sequence", len);
}
@@ -856,6 +839,9 @@ stringa(struct termp *p, const char *c, size_t sz)
{
size_t s;
+ if (0 == sz)
+ return;
+
s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
assert(c);
diff --git a/term.h b/term.h
index bc59b161..10623b92 100644
--- a/term.h
+++ b/term.h
@@ -54,6 +54,15 @@ enum tsym {
TERMSYM_BAR = 25,
TERMSYM_BULLET = 26,
TERMSYM_AMP = 27,
+ TERMSYM_EM = 28,
+ TERMSYM_EN = 29,
+ TERMSYM_COPY = 30,
+ TERMSYM_ASTERISK = 31,
+ TERMSYM_SLASH = 32,
+ TERMSYM_HYPHEN = 33,
+ TERMSYM_SPACE = 34,
+ TERMSYM_PERIOD = 35,
+ TERMSYM_BREAK = 36
};