From c953aa714570186eae41b5407803bc0826f109bf Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Tue, 3 Mar 2009 21:07:01 +0000 Subject: Character-escape addition simplified (see README.addescape, also added). --- README.addescape | 17 +++++ mdocterm.1 | 40 ++++++++--- mdocterm.c | 198 ++++++++++++++++++++++++++----------------------------- term.h | 9 +++ 4 files changed, 148 insertions(+), 116 deletions(-) create mode 100644 README.addescape diff --git a/README.addescape b/README.addescape new file mode 100644 index 00000000..93231406 --- /dev/null +++ b/README.addescape @@ -0,0 +1,17 @@ +$Id$ + +This documents adding a new character escape to mdocterm(1). Character +escapes are only syntax-validated in the back-end. + +Character escape may be in the form of \*x, \*(xx, \x, \(xx, \[n] and so +on and so on. All of these are recognised according to their byte +length. + +(1) If the escape is NOT recognised in enum tsym in term.h, add it. + +(2) Modify/create static struct termenc termencN, where N is the number +of characters in the encoding. This is in mdocterm.c. + +(3) Possibly modify nescape() to recognise a new termencN. + +Everything else is automatic. diff --git a/mdocterm.1 b/mdocterm.1 index ea0f0f48..81dd619d 100644 --- a/mdocterm.1 +++ b/mdocterm.1 @@ -93,11 +93,11 @@ is .Ss Character Escapes This section documents the character-escapes accepted by .Xr mdocterm 1 . -Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx -and \\*x forms described in +Note that the \\x, \\(xx and \\[n] forms are described here; the \\*(xx, +\\*[n] and \\*x forms described in .Xr mdoc.samples 7 -are deprecated, but still correctly rendered. For all two-character -sequences, \\(xx is equivalent to the n-character \\[xx]. +are deprecated, but still rendered. All one- and two-character +sequences may be used in the n-character sequence \\[n]. .Pp Note that the .Em Output @@ -106,6 +106,22 @@ column will render differently whether executed with or another output filter. .\" PARAGRAPH .Pp +Grammatic: +.Pp +.Bl -tag -width "OutputXXXX" -offset "XXXX" -compact +.It Em Output +.Em Input (Name) +.It \(em +\\(em (em-dash) +.It \(en +\\(en (en-dash) +.It \- +\\- (hyphen) +.It \\ +\\ (back-slash) +.El +.\" PARAGRAPH +.Pp Enclosures: .Pp .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact @@ -118,9 +134,9 @@ Enclosures: .It \(lq \\(lq (left double-quote) .It \(rq -\\(rq (right double-quote) +\\(rq, \\' (right double-quote) .It \(oq -\\(lq (left single-quote) +\\(lq, \\` (left single-quote) .It \(aq \\(aq (right single-quote, apostrophe) .El @@ -161,13 +177,11 @@ Mathematical: \\(na (NaN)* .It \(+- \\(+- (plus-minus) +.It \(** +\\(** (asterisk) .El .\" PARAGRAPH .Pp -*This is a deviation from the standard, as NaN is usually rendered as -\\*(Na, which is a deprecated form. We introduce \\(na, which follows -the more general syntax. -.Pp Diacritics: .Pp .Bl -tag -width "OutputXXXX" -offset "XXXX" -compact @@ -189,7 +203,13 @@ Special symbols: \\(bu (bullet) .It \(ba \\(ba (bar) +.It \(co +\\(co (copyright) .El +.Pp +*This is a deviation from the standard, as NaN is usually rendered as +\\*(Na, which is a deprecated form. We introduce \\(na, which follows +the more general syntax. .\" SECTION .Sh EXAMPLES To display this manual page: diff --git a/mdocterm.c b/mdocterm.c index f9c89230..e34ab4f2 100644 --- a/mdocterm.c +++ b/mdocterm.c @@ -32,6 +32,11 @@ #include "mmain.h" #include "term.h" +struct termenc { + const char *enc; + int sym; +}; + static void body(struct termp *, struct termpair *, const struct mdoc_meta *, @@ -57,6 +62,67 @@ extern size_t strlcat(char *, const char *, size_t); extern size_t strlcpy(char *, const char *, size_t); #endif +static struct termenc termenc1[] = { + { "\\", TERMSYM_SLASH }, + { "\'", TERMSYM_RSQUOTE }, + { "`", TERMSYM_LSQUOTE }, + { "-", TERMSYM_HYPHEN }, + { " ", TERMSYM_SPACE }, + { ".", TERMSYM_PERIOD }, + { "&", TERMSYM_BREAK }, + { "e", TERMSYM_SLASH }, + { "q", TERMSYM_DQUOTE }, + { NULL, 0 } +}; + +static struct termenc termenc2[] = { + { "rB", TERMSYM_RBRACK }, + { "lB", TERMSYM_LBRACK }, + { "Lq", TERMSYM_LDQUOTE }, + { "lq", TERMSYM_LDQUOTE }, + { "Rq", TERMSYM_RDQUOTE }, + { "rq", TERMSYM_RDQUOTE }, + { "oq", TERMSYM_LSQUOTE }, + { "aq", TERMSYM_RSQUOTE }, + + { "<-", TERMSYM_LARROW }, + { "->", TERMSYM_RARROW }, + { "ua", TERMSYM_UARROW }, + { "da", TERMSYM_DARROW }, + + { "bu", TERMSYM_BULLET }, + { "Ba", TERMSYM_BAR }, + { "ba", TERMSYM_BAR }, + { "co", TERMSYM_COPY }, + { "Am", TERMSYM_AMP }, + + { "Le", TERMSYM_LE }, + { "<=", TERMSYM_LE }, + { "Ge", TERMSYM_GE }, + { "=>", TERMSYM_GE }, + { "==", TERMSYM_EQ }, + { "Ne", TERMSYM_NEQ }, + { "!=", TERMSYM_NEQ }, + { "Pm", TERMSYM_PLUSMINUS }, + { "+-", TERMSYM_PLUSMINUS }, + { "If", TERMSYM_INF2 }, + { "if", TERMSYM_INF }, + { "Na", TERMSYM_NAN }, + { "na", TERMSYM_NAN }, + { "**", TERMSYM_ASTERISK }, + { "Gt", TERMSYM_GT }, + { "Lt", TERMSYM_LT }, + + { "aa", TERMSYM_ACUTE }, + { "ga", TERMSYM_GRAVE }, + + { "en", TERMSYM_EN }, + { "em", TERMSYM_EM }, + + { "Pi", TERMSYM_PI }, + { NULL, 0 } +}; + static struct termsym termsym_ansi[] = { { "]", 1 }, /* TERMSYM_RBRACK */ { "[", 1 }, /* TERMSYM_LBRACK */ @@ -85,8 +151,16 @@ static struct termsym termsym_ansi[] = { { "NaN", 3 }, /* TERMSYM_NAN */ { "|", 1 }, /* TERMSYM_BAR */ { "o", 1 }, /* TERMSYM_BULLET */ - { "&", 1 }, /* TERMSYM_AND */ - { "|", 1 }, /* TERMSYM_OR */ + { "&", 1 }, /* TERMSYM_AMP */ + { "--", 2 }, /* TERMSYM_EM */ + { "-", 1 }, /* TERMSYM_EN */ + { "(C)", 3 }, /* TERMSYM_COPY */ + { "*", 1 }, /* TERMSYM_ASTERISK */ + { "\\", 1 }, /* TERMSYM_SLASH */ + { "-", 1 }, /* TERMSYM_HYPHEN */ + { " ", 1 }, /* TERMSYM_SPACE */ + { ".", 1 }, /* TERMSYM_PERIOD */ + { "", 0 }, /* TERMSYM_BREAK */ }; static const char ansi_clear[] = { 27, '[', '0', 'm' }; @@ -614,118 +688,27 @@ header(struct termp *p, const struct mdoc_meta *meta) static void nescape(struct termp *p, const char *word, size_t len) { + struct termenc *enc; switch (len) { case (1): - switch (word[0]) { - case ('\\'): - /* FALLTHROUGH */ - case ('\''): - /* FALLTHROUGH */ - case ('`'): - /* FALLTHROUGH */ - case ('-'): - /* FALLTHROUGH */ - case (' '): - /* FALLTHROUGH */ - case ('.'): - chara(p, word[0]); /* FIXME */ - break; - case ('&'): - break; - case ('e'): - chara(p, '\\'); /* FIXME */ - break; - case ('q'): - symbola(p, TERMSYM_DQUOTE); - break; - default: - warnx("escape sequence not supported: %c", - word[0]); - break; - } + enc = termenc1; break; - case (2): - if ('r' == word[0] && 'B' == word[1]) - symbola(p, TERMSYM_RBRACK); - else if ('l' == word[0] && 'B' == word[1]) - symbola(p, TERMSYM_LBRACK); - else if ('l' == word[0] && 'q' == word[1]) - symbola(p, TERMSYM_LDQUOTE); - else if ('r' == word[0] && 'q' == word[1]) - symbola(p, TERMSYM_RDQUOTE); - else if ('o' == word[0] && 'q' == word[1]) - symbola(p, TERMSYM_LSQUOTE); - else if ('a' == word[0] && 'q' == word[1]) - symbola(p, TERMSYM_RSQUOTE); - else if ('<' == word[0] && '-' == word[1]) - symbola(p, TERMSYM_LARROW); - else if ('-' == word[0] && '>' == word[1]) - symbola(p, TERMSYM_RARROW); - else if ('b' == word[0] && 'u' == word[1]) - symbola(p, TERMSYM_BULLET); - else if ('<' == word[0] && '=' == word[1]) - symbola(p, TERMSYM_LE); - else if ('>' == word[0] && '=' == word[1]) - symbola(p, TERMSYM_GE); - else if ('=' == word[0] && '=' == word[1]) - symbola(p, TERMSYM_EQ); - else if ('+' == word[0] && '-' == word[1]) - symbola(p, TERMSYM_PLUSMINUS); - else if ('u' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_UARROW); - else if ('d' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_DARROW); - else if ('a' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_ACUTE); - else if ('g' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_GRAVE); - else if ('!' == word[0] && '=' == word[1]) - symbola(p, TERMSYM_NEQ); - else if ('i' == word[0] && 'f' == word[1]) - symbola(p, TERMSYM_INF); - else if ('n' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_NAN); - else if ('b' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_BAR); - - /* Deprecated forms. */ - else if ('A' == word[0] && 'm' == word[1]) - symbola(p, TERMSYM_AMP); - else if ('B' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_BAR); - else if ('I' == word[0] && 'f' == word[1]) - symbola(p, TERMSYM_INF2); - else if ('G' == word[0] && 'e' == word[1]) - symbola(p, TERMSYM_GE); - else if ('G' == word[0] && 't' == word[1]) - symbola(p, TERMSYM_GT); - else if ('L' == word[0] && 'e' == word[1]) - symbola(p, TERMSYM_LE); - else if ('L' == word[0] && 'q' == word[1]) - symbola(p, TERMSYM_LDQUOTE); - else if ('L' == word[0] && 't' == word[1]) - symbola(p, TERMSYM_LT); - else if ('N' == word[0] && 'a' == word[1]) - symbola(p, TERMSYM_NAN); - else if ('N' == word[0] && 'e' == word[1]) - symbola(p, TERMSYM_NEQ); - else if ('P' == word[0] && 'i' == word[1]) - symbola(p, TERMSYM_PI); - else if ('P' == word[0] && 'm' == word[1]) - symbola(p, TERMSYM_PLUSMINUS); - else if ('R' == word[0] && 'q' == word[1]) - symbola(p, TERMSYM_RDQUOTE); - else - warnx("escape sequence not supported: %c%c", - word[0], word[1]); + enc = termenc2; break; - default: - warnx("escape sequence not supported"); - break; + warnx("unsupported %zu-byte escape sequence", len); + return; } + + for ( ; enc->enc; enc++) + if (0 == memcmp(enc->enc, word, len)) { + symbola(p, enc->sym); + return; + } + + warnx("unsupported %zu-byte escape sequence", len); } @@ -856,6 +839,9 @@ stringa(struct termp *p, const char *c, size_t sz) { size_t s; + if (0 == sz) + return; + s = sz > p->maxcols * 2 ? sz : p->maxcols * 2; assert(c); diff --git a/term.h b/term.h index bc59b161..10623b92 100644 --- a/term.h +++ b/term.h @@ -54,6 +54,15 @@ enum tsym { TERMSYM_BAR = 25, TERMSYM_BULLET = 26, TERMSYM_AMP = 27, + TERMSYM_EM = 28, + TERMSYM_EN = 29, + TERMSYM_COPY = 30, + TERMSYM_ASTERISK = 31, + TERMSYM_SLASH = 32, + TERMSYM_HYPHEN = 33, + TERMSYM_SPACE = 34, + TERMSYM_PERIOD = 35, + TERMSYM_BREAK = 36 }; -- cgit