diff options
author | Kristaps Dzonsons <kristaps@bsd.lv> | 2009-02-24 13:46:54 +0000 |
---|---|---|
committer | Kristaps Dzonsons <kristaps@bsd.lv> | 2009-02-24 13:46:54 +0000 |
commit | 38fefdf005d8237391f4aabfb145c4a96d930b8b (patch) | |
tree | 3140d1ac71c9d4d0d8ed643bc9937302d4cf9379 | |
parent | c8bf126a0b893c247fa4ec7b271fa51308fe257f (diff) | |
download | mandoc-38fefdf005d8237391f4aabfb145c4a96d930b8b.tar.gz |
Character-encoding checked for all text (arguments/values).
-rw-r--r-- | argv.c | 15 | ||||
-rw-r--r-- | mdoc.3 | 19 | ||||
-rw-r--r-- | strings.c | 8 | ||||
-rw-r--r-- | term.c | 2 | ||||
-rw-r--r-- | validate.c | 6 |
5 files changed, 35 insertions, 15 deletions
@@ -267,7 +267,7 @@ static int args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) { - int i, c; + int i; char *p, *pp; assert(*pos > 0); @@ -290,14 +290,11 @@ args(struct mdoc *mdoc, int line, */ if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos])) { - for (i = *pos; (c = buf[i]); ) { - if ( ! mdoc_iscdelim(c)) + for (i = *pos; buf[i]; ) { + if ( ! mdoc_iscdelim(buf[i])) break; i++; - if (0 == buf[i] || ! isspace(c)) - break; - i++; - while (buf[i] && isspace(c)) + while (buf[i] && isspace((int)buf[i])) i++; } if (0 == buf[i]) { @@ -404,8 +401,8 @@ args(struct mdoc *mdoc, int line, /* Do non-tabsep look-ahead here. */ if ( ! (ARGS_TABSEP & fl)) - while ((c = buf[*pos])) { - if (isspace(c)) + while (buf[*pos]) { + if (isspace((int)buf[*pos])) if ('\\' != buf[*pos - 1]) break; (*pos)++; @@ -89,7 +89,9 @@ This section further defines the .Sx Functions and .Sx Variables -available to programmers. The last sub-section, +available to programmers. Following that, +.Sx Character Encoding +describes input format. Lastly, .Sx Abstract Syntax Tree , documents the output tree. .\" SUBSECTION @@ -174,6 +176,21 @@ An array of string-ified token names. An array of string-ified token argument names. .El .\" SUBSECTION +.Ss Character Encoding +The +.Xr mdoc 3 +library accepts only printable ASCII characters as defined by +.Xr isprint 3 . +Non-ASCII character sequences are escaped with an escape character +.Sq \\ +and followed by either an open-parenthesis +.Sq \&( +for two-character sequences; an open-bracket +.Sq \&[ +for n-character sequences (terminated at a close-bracket +.Sq \&] ) ; +or one of a small set of single characters for other escapes. +.\" SUBSECTION .Ss Abstract Syntax Tree The .Nm @@ -55,14 +55,16 @@ mdoc_isescape(const char *p) /* FALLTHROUGH */ case (' '): /* FALLTHROUGH */ + case ('&'): + /* FALLTHROUGH */ case ('.'): /* FALLTHROUGH */ case ('e'): return(2); case ('('): - if (0 == *++p) + if (0 == *++p || ! isgraph(*p)) return(0); - if (0 == *++p) + if (0 == *++p || ! isgraph(*p)) return(0); return(4); case ('['): @@ -72,7 +74,7 @@ mdoc_isescape(const char *p) } for (c = 3, p++; *p && ']' != *p; p++, c++) - if (isspace(*p)) + if ( ! isgraph(*p)) break; return(*p == ']' ? c : 0); @@ -1054,7 +1054,7 @@ termp_sq_pre(DECL_ARGS) if (MDOC_BODY != node->type) return(1); - word(p, "`"); + word(p, "\'"); p->flags |= TERMP_NOSPACE; return(1); } @@ -17,6 +17,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ #include <assert.h> +#include <ctype.h> #include <stdlib.h> #include "private.h" @@ -396,6 +397,9 @@ check_text(struct mdoc *mdoc, size_t line, size_t pos, const char *p) size_t c; for ( ; *p; p++) { + if ( ! isprint(*p) && '\t' != *p) + return(mdoc_perr(mdoc, line, pos, + "invalid characters")); if ('\\' != *p) continue; if ((c = mdoc_isescape(p))) { @@ -403,7 +407,7 @@ check_text(struct mdoc *mdoc, size_t line, size_t pos, const char *p) continue; } return(mdoc_perr(mdoc, line, pos, - "invalid escape sequence")); + "invalid escape sequence")); } return(1); |