diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2012-05-31 22:29:13 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2012-05-31 22:29:13 +0000 |
commit | 151c919ed8cc3ed826972bf9834c86d322c8ea2a (patch) | |
tree | 88cd0e892637ff83ebf37f93c1e552f8572604bb | |
parent | a72385d7db78c4b91054f655c790402b70f79571 (diff) | |
download | mandoc-151c919ed8cc3ed826972bf9834c86d322c8ea2a.tar.gz |
Implement the roff \z escape sequence, intended to output the next
character without advancing the cursor position; implement it to
simply skip the next character, as it will usually be overwritten.
With this change, the pod2man(1) preamble user-defined string \*:,
intended to render as a diaeresis or umlaut diacritic above the
preceding character, is rendered in a slightly less ugly way,
though still not correctly. It was rendered as "z.." and is now
rendered as ".".
Given that the definition of \*: uses elaborate manual \h positioning,
there is little chance for mandoc(1) to ever render it correctly,
but at least we can refrain from printing out a spurious "z", and
we can make the \z do something semi-reasonable for easier cases.
"just commit" kristaps@
-rw-r--r-- | html.c | 81 | ||||
-rw-r--r-- | html.h | 1 | ||||
-rw-r--r-- | mandoc.c | 12 | ||||
-rw-r--r-- | mandoc.h | 3 | ||||
-rw-r--r-- | term.c | 66 | ||||
-rw-r--r-- | term.h | 1 |
6 files changed, 125 insertions, 39 deletions
@@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -262,8 +262,8 @@ print_metaf(struct html *h, enum mandoc_esc deco) int html_strlen(const char *cp) { - int ssz, sz; - const char *seq, *p; + size_t rsz; + int skip, sz; /* * Account for escaped sequences within string length @@ -274,10 +274,21 @@ html_strlen(const char *cp) */ sz = 0; - while (NULL != (p = strchr(cp, '\\'))) { - sz += (int)(p - cp); - ++cp; - switch (mandoc_escape(&cp, &seq, &ssz)) { + skip = 0; + while (1) { + rsz = strcspn(cp, "\\"); + if (rsz) { + cp += rsz; + if (skip) { + skip = 0; + rsz--; + } + sz += rsz; + } + if ('\0' == *cp) + break; + cp++; + switch (mandoc_escape(&cp, NULL, NULL)) { case (ESCAPE_ERROR): return(sz); case (ESCAPE_UNICODE): @@ -285,15 +296,19 @@ html_strlen(const char *cp) case (ESCAPE_NUMBERED): /* FALLTHROUGH */ case (ESCAPE_SPECIAL): - sz++; + if (skip) + skip = 0; + else + sz++; + break; + case (ESCAPE_SKIPCHAR): + skip = 1; break; default: break; } } - - assert(sz >= 0); - return(sz + strlen(cp)); + return(sz); } static int @@ -308,6 +323,12 @@ print_encode(struct html *h, const char *p, int norecurse) nospace = 0; while ('\0' != *p) { + if (HTML_SKIPCHAR & h->flags && '\\' != *p) { + h->flags &= ~HTML_SKIPCHAR; + p++; + continue; + } + sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); @@ -338,6 +359,31 @@ print_encode(struct html *h, const char *p, int norecurse) break; switch (esc) { + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTPREV): + /* FALLTHROUGH */ + case (ESCAPE_FONTBOLD): + /* FALLTHROUGH */ + case (ESCAPE_FONTITALIC): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): + if (0 == norecurse) + print_metaf(h, esc); + continue; + case (ESCAPE_SKIPCHAR): + h->flags |= HTML_SKIPCHAR; + continue; + default: + break; + } + + if (h->flags & HTML_SKIPCHAR) { + h->flags &= ~HTML_SKIPCHAR; + continue; + } + + switch (esc) { case (ESCAPE_UNICODE): /* Skip passed "u" header. */ c = mchars_num2uc(seq + 1, len - 1); @@ -356,19 +402,6 @@ print_encode(struct html *h, const char *p, int norecurse) else if (-1 == c && 1 == len) putchar((int)*seq); break; - case (ESCAPE_FONT): - /* FALLTHROUGH */ - case (ESCAPE_FONTPREV): - /* FALLTHROUGH */ - case (ESCAPE_FONTBOLD): - /* FALLTHROUGH */ - case (ESCAPE_FONTITALIC): - /* FALLTHROUGH */ - case (ESCAPE_FONTROMAN): - if (norecurse) - break; - print_metaf(h, esc); - break; case (ESCAPE_NOSPACE): if ('\0' == *p) nospace = 1; @@ -117,6 +117,7 @@ struct html { #define HTML_PREKEEP (1 << 3) #define HTML_NONOSPACE (1 << 4) /* never add spaces */ #define HTML_LITERAL (1 << 5) /* literal (e.g., <PRE>) context */ +#define HTML_SKIPCHAR (1 << 6) /* skip the next character */ struct tagq tags; /* stack of open tags */ struct rofftbl tbl; /* current table */ struct tag *tblt; /* current open table scope */ @@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -143,6 +143,16 @@ mandoc_escape(const char **end, const char **start, int *sz) break; /* + * The \z escape is supposed to output the following + * character without advancing the cursor position. + * Since we are mostly dealing with terminal mode, + * let us just skip the next character. + */ + case ('z'): + (*end)++; + return(ESCAPE_SKIPCHAR); + + /* * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where * 'X' is the trigger. These have opaque sub-strings. */ @@ -379,7 +379,8 @@ enum mandoc_esc { ESCAPE_FONTPREV, /* previous font mode */ ESCAPE_NUMBERED, /* a numbered glyph */ ESCAPE_UNICODE, /* a unicode codepoint */ - ESCAPE_NOSPACE /* suppress space if the last on a line */ + ESCAPE_NOSPACE, /* suppress space if the last on a line */ + ESCAPE_SKIPCHAR /* skip the next character */ }; typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, @@ -33,6 +33,7 @@ #include "term.h" #include "main.h" +static size_t cond_width(const struct termp *, int, int *); static void adjbuf(struct termp *p, int); static void bufferc(struct termp *, char); static void encode(struct termp *, const char *, size_t); @@ -419,12 +420,17 @@ term_word(struct termp *p, const char *word) p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); while ('\0' != *word) { - if ((ssz = strcspn(word, "\\")) > 0) + if ('\\' != *word) { + if (TERMP_SKIPCHAR & p->flags) { + p->flags &= ~TERMP_SKIPCHAR; + word++; + continue; + } + ssz = strcspn(word, "\\"); encode(p, word, ssz); - - word += (int)ssz; - if ('\\' != *word) + word += (int)ssz; continue; + } word++; esc = mandoc_escape(&word, &seq, &sz); @@ -480,9 +486,14 @@ term_word(struct termp *p, const char *word) term_fontlast(p); break; case (ESCAPE_NOSPACE): - if ('\0' == *word) + if (TERMP_SKIPCHAR & p->flags) + p->flags &= ~TERMP_SKIPCHAR; + else if ('\0' == *word) p->flags |= TERMP_NOSPACE; break; + case (ESCAPE_SKIPCHAR): + p->flags |= TERMP_SKIPCHAR; + break; default: break; } @@ -522,6 +533,11 @@ encode1(struct termp *p, int c) { enum termfont f; + if (TERMP_SKIPCHAR & p->flags) { + p->flags &= ~TERMP_SKIPCHAR; + return; + } + if (p->col + 4 >= p->maxcols) adjbuf(p, p->col + 4); @@ -545,6 +561,11 @@ encode(struct termp *p, const char *word, size_t sz) enum termfont f; int i, len; + if (TERMP_SKIPCHAR & p->flags) { + p->flags &= ~TERMP_SKIPCHAR; + return; + } + /* LINTED */ len = sz; @@ -593,12 +614,22 @@ term_len(const struct termp *p, size_t sz) return((*p->width)(p, ' ') * sz); } +static size_t +cond_width(const struct termp *p, int c, int *skip) +{ + + if (*skip) { + (*skip) = 0; + return(0); + } else + return((*p->width)(p, c)); +} size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; - int ssz, c; + int ssz, skip, c; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; @@ -610,10 +641,11 @@ term_strlen(const struct termp *p, const char *cp) */ sz = 0; + skip = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) - sz += (*p->width)(p, *cp++); + sz += cond_width(p, *cp++, &skip); c = 0; switch (*cp) { @@ -630,14 +662,14 @@ term_strlen(const struct termp *p, const char *cp) (seq + 1, ssz - 1); if ('\0' == c) break; - sz += (*p->width)(p, c); + sz += cond_width(p, c, &skip); continue; case (ESCAPE_SPECIAL): c = mchars_spec2cp (p->symtab, seq, ssz); if (c <= 0) break; - sz += (*p->width)(p, c); + sz += cond_width(p, c, &skip); continue; default: break; @@ -647,12 +679,12 @@ term_strlen(const struct termp *p, const char *cp) switch (esc) { case (ESCAPE_UNICODE): - sz += (*p->width)(p, '?'); + sz += cond_width(p, '?', &skip); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, ssz); if ('\0' != c) - sz += (*p->width)(p, c); + sz += cond_width(p, c, &skip); break; case (ESCAPE_SPECIAL): rhs = mchars_spec2str @@ -664,6 +696,9 @@ term_strlen(const struct termp *p, const char *cp) rhs = seq; rsz = ssz; break; + case (ESCAPE_SKIPCHAR): + skip = 1; + break; default: break; } @@ -671,15 +706,20 @@ term_strlen(const struct termp *p, const char *cp) if (NULL == rhs) break; + if (skip) { + skip = 0; + break; + } + for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case (ASCII_NBRSP): - sz += (*p->width)(p, ' '); + sz += cond_width(p, ' ', &skip); cp++; break; case (ASCII_HYPH): - sz += (*p->width)(p, '-'); + sz += cond_width(p, '-', &skip); cp++; break; default: @@ -78,6 +78,7 @@ struct termp { #define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */ #define TERMP_KEEP (1 << 14) /* Keep words together. */ #define TERMP_PREKEEP (1 << 15) /* ...starting with the next one. */ +#define TERMP_SKIPCHAR (1 << 16) /* Skip the next character. */ int *buf; /* Output buffer. */ enum termenc enc; /* Type of encoding. */ struct mchars *symtab; /* Encoded-symbol table. */ |