summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2018-08-10 22:12:44 +0000
committerIngo Schwarze <schwarze@openbsd.org>2018-08-10 22:12:44 +0000
commitc3329497cf03380460e875bcbd65f8966ac55ce4 (patch)
tree0062a092ace2b3b4a42f755e24c6d2c36b3655c7
parente1cba00943f4950e66e450c47d4a66b21b313834 (diff)
downloadmandoc-c3329497cf03380460e875bcbd65f8966ac55ce4.tar.gz
handle the non-portable GNU-style \[charNN], \[charNNN] character
escape sequences, used for example in the groff_char(7) manual page
-rw-r--r--TODO7
-rw-r--r--mandoc.c24
-rw-r--r--mandoc_char.710
3 files changed, 31 insertions, 10 deletions
diff --git a/TODO b/TODO
index 4d0c2190..c986d3c2 100644
--- a/TODO
+++ b/TODO
@@ -40,9 +40,10 @@ are mere guesses, and some may be wrong.
- \*(.T prints the device being used,
see groff_char(7) for an example
-
-- \[charNN], \[charNNN] prints a single-byte codepoint
- see groff_char(7) for examples
+ This is slightly hard because -Tlocale only decides to use ascii or
+ utf8 when initializing the formatter, so the information is not
+ yet available to the preprocessor at the parsing stage.
+ loc ** exist ** algo * size * imp *
- .ad (adjust margins)
.ad l -- adjust left margin only (flush left)
diff --git a/mandoc.c b/mandoc.c
index fd0ad7e7..7e785312 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -41,7 +41,7 @@ enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
const char *local_start;
- int local_sz;
+ int local_sz, c, i;
char term;
enum mandoc_esc gly;
@@ -330,8 +330,26 @@ mandoc_escape(const char **end, const char **start, int *sz)
}
break;
case ESCAPE_SPECIAL:
- if (1 == *sz && 'c' == **start)
- gly = ESCAPE_NOSPACE;
+ if (**start == 'c') {
+ if (*sz == 1) {
+ gly = ESCAPE_NOSPACE;
+ break;
+ }
+ if (*sz < 6 || *sz > 7 ||
+ strncmp(*start, "char", 4) != 0 ||
+ (int)strspn(*start + 4, "0123456789") + 4 < *sz)
+ break;
+ c = 0;
+ for (i = 4; i < *sz; i++)
+ c = 10 * c + ((*start)[i] - '0');
+ if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
+ break;
+ *start += 4;
+ *sz -= 4;
+ gly = ESCAPE_NUMBERED;
+ break;
+ }
+
/*
* Unicode escapes are defined in groff as \[u0000]
* to \[u10FFFF], where the contained value must be
diff --git a/mandoc_char.7 b/mandoc_char.7
index c919f783..84100951 100644
--- a/mandoc_char.7
+++ b/mandoc_char.7
@@ -761,14 +761,16 @@ For backward compatibility with existing manuals,
.Xr mandoc 1
also supports the
.Pp
-.Dl \eN\(aq Ns Ar number Ns \(aq
+.Dl \eN\(aq Ns Ar number Ns \(aq and \e[ Ns Cm char Ns Ar number ]
.Pp
-escape sequence, inserting the character
+escape sequences, inserting the character
.Ar number
from the current character set into the output.
Of course, this is inherently non-portable and is already marked
-as deprecated in the Heirloom roff manual.
-For example, do not use \eN\(aq34\(aq, use \e(dq, or even the plain
+as deprecated in the Heirloom roff manual;
+on top of that, the second form is a GNU extension.
+For example, do not use \eN\(aq34\(aq or \e[char34], use \e(dq,
+or even the plain
.Sq \(dq
character where possible.
.Sh COMPATIBILITY