From b036682395105453d12e07ce3d4e3b1918671a32 Mon Sep 17 00:00:00 2001
From: Kristaps Dzonsons <kristaps@bsd.lv>
Date: Fri, 27 Feb 2009 09:14:02 +0000
Subject: Character-encoding stuff documented, expanded.

---
 mdoc.3     |   5 +++
 mdocterm.1 | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 mdocterm.c |  70 +++++++++++++++++++++++++-------------
 validate.c |   2 ++
 4 files changed, 164 insertions(+), 24 deletions(-)

diff --git a/mdoc.3 b/mdoc.3
index 68a6618a..ae24eb70 100644
--- a/mdoc.3
+++ b/mdoc.3
@@ -200,6 +200,7 @@ an asterisk and non-open-parenthesis
 .Sq \&*
 for single-character sequences; or one of a small set of standalone
 single characters for other escapes.
+.\" PARAGRAPH
 .Pp
 Examples:
 .Pp
@@ -230,6 +231,10 @@ prints
 .Dq \*q
 .Pq double-quote
 .El
+.\" PARAGRAPH
+.Pp
+All escaped sequences are syntax-checked, but it's up to the front-end
+system to correctly render them to the output device.
 .\" SUBSECTION
 .Ss Abstract Syntax Tree
 The 
diff --git a/mdocterm.1 b/mdocterm.1
index 7ca80b43..92265669 100644
--- a/mdocterm.1
+++ b/mdocterm.1
@@ -89,6 +89,117 @@ reads from stdin and prints terminal-encoded output to stdout.
 .Nm
 is
 .Ud
+.\" SUB-SECTION
+.Ss Character Escapes
+This section documents the character-escapes accepted by
+.Xr mdocterm 1 .
+Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx
+and \\*x forms described in
+.Xr mdoc.samples 7
+are deprecated, but still correctly rendered.  For all two-character
+sequences, \\(xx is equivalent to the n-character \\[xx].
+.Pp
+Note that the
+.Em Output
+column will render differently whether executed with
+.Xr mdocterm 1 
+or another output filter.
+.\" PARAGRAPH
+.Pp
+Enclosures:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.El 
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It \(rB
+\\(rB (right bracket)
+.It \(lB
+\\(rB (left bracket)
+.It \(lq
+\\(lq (left double-quote)
+.It \(rq
+\\(rq (right double-quote)
+.It \(oq
+\\(lq (left single-quote)
+.It \(aq
+\\(aq (right single-quote, apostrophe)
+.El
+.\" PARAGRAPH
+.Pp
+Indicatives:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.El 
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It \(<-
+\\(<- (left arrow)
+.It \(->
+\\(-> (right arrow)
+.It \(ua
+\\(ua (up arrow)
+.It \(da
+\\(da (down arrow)
+.El
+.\" PARAGRAPH
+.Pp
+Mathematical:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.El 
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It \(<=
+\\(<= (less-than-equal)
+.It \(>=
+\\(>= (greater-than-equal)
+.It \(==
+\\(== (greater-than-equal)
+.It \(!=
+\\(!= (not equal)
+.It \(if
+\\(if (infinity)
+.It \(na
+\\(na (NaN)*
+.It \(+-
+\\(+- (plus-minus)
+.El
+.\" PARAGRAPH
+.Pp
+*This is a deviation from the standard, as NaN is usually rendered as
+\\*(Na, which is a deprecated form.  We introduce \\(na, which follows
+the more general syntax.
+.Pp
+Diacritics:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.El 
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It \(ga
+\\(ga (accent grave)
+.It \(aa
+\\(aa (accent accute)
+.El
+.\" PARAGRAPH
+.Pp
+Special symbols:
+.Pp
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It Em Output
+.Em Input (Name)
+.El 
+.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact
+.It \(bu
+\\(bu (bullet)
+.It \(ba
+\\(ba (bar)
+.El 
 .\" SECTION
 .Sh EXAMPLES
 To display this manual page:
diff --git a/mdocterm.c b/mdocterm.c
index ccc316b2..4a8ee2ee 100644
--- a/mdocterm.c
+++ b/mdocterm.c
@@ -36,6 +36,7 @@
 #define	TERMSYM_LARROW		"<-"
 #define	TERMSYM_RARROW		"->"
 #define	TERMSYM_UARROW		"^"
+#define	TERMSYM_DARROW		"v"
 #define	TERMSYM_LSQUOTE		"`"
 #define	TERMSYM_RSQUOTE		"\'"
 #define	TERMSYM_SQUOTE		"\'"
@@ -52,7 +53,8 @@
 #define	TERMSYM_GRAVE		"`"
 #define	TERMSYM_PI		"pi"
 #define	TERMSYM_PLUSMINUS	"+="
-#define	TERMSYM_INFINITY	"infinity"
+#define	TERMSYM_INF		"oo"
+#define	TERMSYM_INF2		"infinity"
 #define	TERMSYM_NAN		"NaN"
 #define	TERMSYM_BAR		"|"
 #define	TERMSYM_BULLET		"o"
@@ -338,50 +340,70 @@ nescape(struct termp *p, const char *word, size_t len)
 			stringa(p, TERMSYM_RBRACK);
 		else if ('l' == word[0] && 'B' == word[1])
 			stringa(p, TERMSYM_LBRACK);
+		else if ('l' == word[0] && 'q' == word[1])
+			stringa(p, TERMSYM_LDQUOTE);
+		else if ('r' == word[0] && 'q' == word[1])
+			stringa(p, TERMSYM_RDQUOTE);
+		else if ('o' == word[0] && 'q' == word[1])
+			stringa(p, TERMSYM_LSQUOTE);
+		else if ('a' == word[0] && 'q' == word[1])
+			stringa(p, TERMSYM_RSQUOTE);
 		else if ('<' == word[0] && '-' == word[1])
 			stringa(p, TERMSYM_LARROW);
 		else if ('-' == word[0] && '>' == word[1])
 			stringa(p, TERMSYM_RARROW);
-		else if ('l' == word[0] && 'q' == word[1])
-			stringa(p, TERMSYM_DQUOTE);
-		else if ('r' == word[0] && 'q' == word[1])
-			stringa(p, TERMSYM_DQUOTE);
 		else if ('b' == word[0] && 'u' == word[1])
 			stringa(p, TERMSYM_BULLET);
-		else if ('L' == word[0] && 'e' == word[1])
-			stringa(p, TERMSYM_LE);
 		else if ('<' == word[0] && '=' == word[1])
 			stringa(p, TERMSYM_LE);
-		else if ('G' == word[0] && 'e' == word[1])
-			stringa(p, TERMSYM_GE);
 		else if ('>' == word[0] && '=' == word[1])
 			stringa(p, TERMSYM_GE);
-		else if ('R' == word[0] && 'q' == word[1])
-			stringa(p, TERMSYM_RDQUOTE);
-		else if ('L' == word[0] && 'q' == word[1])
-			stringa(p, TERMSYM_LDQUOTE);
+		else if ('=' == word[0] && '=' == word[1])
+			stringa(p, TERMSYM_EQ);
+		else if ('+' == word[0] && '-' == word[1])
+			stringa(p, TERMSYM_PLUSMINUS);
 		else if ('u' == word[0] && 'a' == word[1])
 			stringa(p, TERMSYM_UARROW);
+		else if ('d' == word[0] && 'a' == word[1])
+			stringa(p, TERMSYM_DARROW);
 		else if ('a' == word[0] && 'a' == word[1])
 			stringa(p, TERMSYM_ACUTE);
 		else if ('g' == word[0] && 'a' == word[1])
 			stringa(p, TERMSYM_GRAVE);
-		else if ('P' == word[0] && 'i' == word[1])
-			stringa(p, TERMSYM_PI);
-		else if ('N' == word[0] && 'e' == word[1])
+		else if ('!' == word[0] && '=' == word[1])
 			stringa(p, TERMSYM_NEQ);
-		else if ('L' == word[0] && 't' == word[1])
-			stringa(p, TERMSYM_LT);
+		else if ('i' == word[0] && 'f' == word[1])
+			stringa(p, TERMSYM_INF);
+		else if ('n' == word[0] && 'a' == word[1])
+			stringa(p, TERMSYM_NAN);
+		else if ('b' == word[0] && 'a' == word[1])
+			stringa(p, TERMSYM_BAR);
+
+		/* Deprecated forms. */
+		else if ('B' == word[0] && 'a' == word[1])
+			stringa(p, TERMSYM_BAR);
+		else if ('I' == word[0] && 'f' == word[1])
+			stringa(p, TERMSYM_INF2);
+		else if ('G' == word[0] && 'e' == word[1])
+			stringa(p, TERMSYM_GE);
 		else if ('G' == word[0] && 't' == word[1])
 			stringa(p, TERMSYM_GT);
-		else if ('P' == word[0] && 'm' == word[1])
-			stringa(p, TERMSYM_PLUSMINUS);
-		else if ('I' == word[0] && 'f' == word[1])
-			stringa(p, TERMSYM_INFINITY);
+		else if ('L' == word[0] && 'e' == word[1])
+			stringa(p, TERMSYM_LE);
+		else if ('L' == word[0] && 'q' == word[1])
+			stringa(p, TERMSYM_LDQUOTE);
+		else if ('L' == word[0] && 't' == word[1])
+			stringa(p, TERMSYM_LT);
 		else if ('N' == word[0] && 'a' == word[1])
 			stringa(p, TERMSYM_NAN);
-		else if ('B' == word[0] && 'a' == word[1])
-			stringa(p, TERMSYM_BAR);
+		else if ('N' == word[0] && 'e' == word[1])
+			stringa(p, TERMSYM_NEQ);
+		else if ('P' == word[0] && 'i' == word[1])
+			stringa(p, TERMSYM_PI);
+		else if ('P' == word[0] && 'm' == word[1])
+			stringa(p, TERMSYM_PLUSMINUS);
+		else if ('R' == word[0] && 'q' == word[1])
+			stringa(p, TERMSYM_RDQUOTE);
 		break;
 	default:
 		break;
diff --git a/validate.c b/validate.c
index 5584186d..bafe9ce3 100644
--- a/validate.c
+++ b/validate.c
@@ -480,6 +480,8 @@ check_text(struct mdoc *mdoc, size_t line, size_t pos, const char *p)
 {
 	size_t		 c;
 
+	/* XXX - indicate deprecated escapes \*(xx and \*x. */
+
 	for ( ; *p; p++) {
 		if ( ! isprint((int)*p) && '\t' != *p)
 			return(mdoc_perr(mdoc, line, pos,
-- 
cgit