diff options
author | Ingo Schwarze <schwarze@openbsd.org> | 2014-08-05 05:48:56 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@openbsd.org> | 2014-08-05 05:48:56 +0000 |
commit | e1fc6d68ebbf78198e7f6e20c2323077fb5eba82 (patch) | |
tree | 2f8311a10562aea02ba07ed1dd54afeb8a88ae67 | |
parent | 8d0d88274b986b8bc6586ad36830b9148ab1cec6 (diff) | |
download | mandoc-e1fc6d68ebbf78198e7f6e20c2323077fb5eba82.tar.gz |
Sync library documentation with reality.
Split mandoc_escape(3), mandoc_malloc(3), and mchars_alloc(3)
out of mandoc(3), adding lots of new information.
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | mandoc.3 | 270 | ||||
-rw-r--r-- | mandoc_escape.3 | 362 | ||||
-rw-r--r-- | mandoc_malloc.3 | 197 | ||||
-rw-r--r-- | mchars_alloc.3 | 224 |
5 files changed, 882 insertions, 180 deletions
@@ -223,10 +223,13 @@ DISTFILES = LICENSE \ mandoc.h \ mandoc_aux.h \ mandoc_char.7 \ + mandoc_escape.3 \ mandoc_html.3 \ + mandoc_malloc.3 \ manpath.h \ mansearch.3 \ mansearch.h \ + mchars_alloc.3 \ mdoc.7 \ mdoc.h \ msec.in \ @@ -327,8 +330,11 @@ WWW_MANS = apropos.1.html \ mandoc.1.html \ preconv.1.html \ mandoc.3.html \ + mandoc_escape.3.html \ mandoc_html.3.html \ + mandoc_malloc.3.html \ mansearch.3.html \ + mchars_alloc.3.html \ tbl.3.html \ mandoc.db.5.html \ eqn.7.html \ @@ -393,7 +399,8 @@ base-install: base-build $(INSTALL_LIB) man.h mandoc.h mandoc_aux.h mdoc.h \ $(DESTDIR)$(INCLUDEDIR) $(INSTALL_MAN) mandoc.1 preconv.1 demandoc.1 $(DESTDIR)$(MANDIR)/man1 - $(INSTALL_MAN) mandoc.3 tbl.3 $(DESTDIR)$(MANDIR)/man3 + $(INSTALL_MAN) mandoc.3 mandoc_escape.3 mandoc_malloc.3 \ + mchars_alloc.3 tbl.3 $(DESTDIR)$(MANDIR)/man3 $(INSTALL_MAN) man.7 mdoc.7 roff.7 eqn.7 tbl.7 mandoc_char.7 \ $(DESTDIR)$(MANDIR)/man7 $(INSTALL_DATA) example.style.css $(DESTDIR)$(EXAMPLEDIR) @@ -20,21 +20,11 @@ .Os .Sh NAME .Nm mandoc , -.Nm mandoc_calloc , -.Nm mandoc_escape , -.Nm mandoc_malloc , -.Nm mandoc_realloc , -.Nm mandoc_strdup , -.Nm mandoc_strndup , +.Nm man_deroff , .Nm man_meta , .Nm man_mparse , .Nm man_node , -.Nm mchars_alloc , -.Nm mchars_free , -.Nm mchars_num2char , -.Nm mchars_num2uc , -.Nm mchars_spec2cp , -.Nm mchars_spec2str , +.Nm mdoc_deroff , .Nm mdoc_meta , .Nm mdoc_node , .Nm mparse_alloc , @@ -50,57 +40,17 @@ .Sh LIBRARY .Lb libmandoc .Sh SYNOPSIS +.In sys/types.h .In mandoc.h .Fd "#define ASCII_NBRSP" .Fd "#define ASCII_HYPH" .Fd "#define ASCII_BREAK" -.Ft "void *" -.Fo mandoc_calloc -.Fa "size_t nmemb" -.Fa "size_t size" -.Fc -.Ft "enum mandoc_esc" -.Fo mandoc_escape -.Fa "const char **end" -.Fa "const char **start" -.Fa "int *sz" -.Fc -.Ft "void *" -.Fn mandoc_malloc "size_t size" -.Ft "struct mchars *" -.Fo mandoc_realloc -.Fa "void *ptr" -.Fa "size_t size" -.Fc -.Ft "char *" -.Fn mandoc_strdup -.Fn mchars_alloc "void" -.Ft void -.Fn mchars_free "struct mchars *p" -.Ft char -.Fn mchars_num2char "const char *cp" "size_t sz" -.Ft int -.Fn mchars_num2uc "const char *cp" "size_t sz" -.Ft "const char *" -.Fo mchars_spec2str -.Fa "const struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Fa "size_t *rsz" -.Fc -.Ft int -.Fo mchars_spec2cp -.Fa "const struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Fc -.Ft void +.Ft struct mparse * .Fo mparse_alloc -.Fa "enum mparset inttype" +.Fa "int options" .Fa "enum mandoclevel wlevel" .Fa "mandocmsg mmsg" .Fa "char *defos" -.Fa "int quick" .Fc .Ft void .Fo (*mandocmsg) @@ -138,6 +88,7 @@ .Fa "struct mparse *parse" .Fa "struct mdoc **mdoc" .Fa "struct man **man" +.Fa "char **sodest" .Fc .Ft "const char *" .Fo mparse_strerror @@ -147,8 +98,14 @@ .Fo mparse_strlevel .Fa "enum mandoclevel" .Fc +.In sys/types.h .In mandoc.h .In mdoc.h +.Ft void +.Fo mdoc_deroff +.Fa "char **dest" +.Fa "const struct mdoc_node *node" +.Fc .Ft "const struct mdoc_meta *" .Fo mdoc_meta .Fa "const struct mdoc *mdoc" @@ -159,8 +116,14 @@ .Fc .Vt extern const char * const * mdoc_argnames; .Vt extern const char * const * mdoc_macronames; +.In sys/types.h .In mandoc.h .In man.h +.Ft void +.Fo man_deroff +.Fa "char **dest" +.Fa "const struct man_node *node" +.Fc .Ft "const struct man_meta *" .Fo man_meta .Fa "const struct man *man" @@ -215,37 +178,22 @@ or invoke .Fn mparse_reset and parse new files. .El -.Pp -The -.Nm -library also contains routines for translating character strings into glyphs -.Pq see Fn mchars_alloc -and parsing escape sequences from strings -.Pq see Fn mandoc_escape . .Sh REFERENCE This section documents the functions, types, and variables available via -.In mandoc.h . +.In mandoc.h , +with the exception of those documented in +.Xr mandoc_escape 3 +and +.Xr mchars_alloc 3 . .Ss Types .Bl -ohang -.It Vt "enum mandoc_esc" -An escape sequence classification. .It Vt "enum mandocerr" A fatal error, error, or warning message during parsing. .It Vt "enum mandoclevel" A classification of an .Vt "enum mandocerr" as regards system operation. -.It Vt "struct mchars" -An opaque pointer to an object allowing for translation between -character strings and glyphs. -See -.Fn mchars_alloc . -.It Vt "enum mparset" -The type of parser when reading input. -This should usually be -.Dv MPARSE_AUTO -for auto-detection. .It Vt "struct mparse" An opaque pointer to a running parse sequence. Created with @@ -261,38 +209,20 @@ messages emitted by the parser. .El .Ss Functions .Bl -ohang -.It Fn mandoc_escape -Scan an escape sequence, i.e., a character string beginning with -.Sq \e . -Pass a pointer to the character after the -.Sq \e -as -.Va end ; -it will be set to the supremum of the parsed escape sequence unless -returning -.Dv ESCAPE_ERROR , -in which case the string is bogus and should be -thrown away. -If not -.Dv ESCAPE_ERROR -or -.Dv ESCAPE_IGNORE , -.Va start -is set to the first relevant character of the substring (font, glyph, -whatever) of length -.Va sz . -Both -.Va start -and -.Va sz -may be -.Dv NULL . -Declared in -.In mandoc.h , -implemented in -.Pa mandoc.c . +.It Fn man_deroff +Obtain a text-only representation of a +.Vt struct man_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn man_node . +When it is no longer needed, the pointer returned from +.Fn man_deroff +can be passed to +.Xr free 3 . .It Fn man_meta -Obtain the meta-data of a successful parse. +Obtain the meta-data of a successful +.Xr man 7 +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in @@ -306,67 +236,29 @@ Declared in implemented in .Pa man.c . .It Fn man_node -Obtain the root node of a successful parse. +Obtain the root node of a successful +.Xr man 7 +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in .In man.h , implemented in .Pa man.c . -.It Fn mchars_alloc -Allocate an -.Vt "struct mchars *" -object for translating special characters into glyphs. -See -.Xr mandoc_char 7 -for an overview of special characters. -The object must be freed with -.Fn mchars_free . -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_free -Free an object created with -.Fn mchars_alloc . -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_num2char -Convert a character index (e.g., the \eN\(aq\(aq escape) into a -printable ASCII character. -Returns \e0 (the nil character) if the input sequence is malformed. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_num2uc -Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into -a Unicode codepoint. -Returns \e0 (the nil character) if the input sequence is malformed. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_spec2cp -Convert a special character into a valid Unicode codepoint. -Returns \-1 on failure or a non-zero Unicode codepoint on success. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_spec2str -Convert a special character into an ASCII string. -Returns -.Dv NULL -on failure. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . +.It Fn mdoc_deroff +Obtain a text-only representation of a +.Vt struct mdoc_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn mdoc_node . +When it is no longer needed, the pointer returned from +.Fn mdoc_deroff +can be passed to +.Xr free 3 . .It Fn mdoc_meta -Obtain the meta-data of a successful parse. +Obtain the meta-data of a successful +.Xr mdoc +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in @@ -374,7 +266,9 @@ Declared in implemented in .Pa mdoc.c . .It Fn mdoc_node -Obtain the root node of a successful parse. +Obtain the root node of a successful +.Xr mdoc +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in @@ -385,15 +279,33 @@ implemented in Allocate a parser. The arguments have the following effect: .Bl -tag -offset 5n -width inttype -.It Ar inttype -When set to +.It Ar options +When the .Dv MPARSE_MDOC or -.Dv MPARSE_MAN , -only that parser will be used. -With -.Dv MPARSE_AUTO , -the document type will be automatically detected. +.Dv MPARSE_MAN +bit is set, only that parser is used. +Otherwise, the document type is automatically detected. +.Pp +When the +.Dv MPARSE_SO +bit is set, +.Xr roff 7 +.Ic \&so +file inclusion requests are always honoured. +Otherwise, if the request is the only content in an input file, +only the file name is remembered, to be returned in the +.Fa sodest +argument of +.Fn mparse_result . +.Pp +When the +.Dv MPARSE_QUICK +bit is set, parsing is aborted after the NAME section. +This is for example useful in +.Xr makewhatis 8 +.Fl Q +to quickly build minimal databases. .It Ar wlevel Can be set to .Dv MANDOCLEVEL_FATAL , @@ -414,9 +326,6 @@ macro, overriding the .Dv OSNAME preprocessor definition and the results of .Xr uname 3 . -.It Ar quick -When set, parsing is aborted after the NAME section. -This is for example useful to quickly build minimal databases. .El .Pp The same parser may be used for multiple files so long as @@ -486,7 +395,7 @@ i.e., those where .Fn mparse_readfd returned less than MANDOCLEVEL_FATAL .Pc -should invoke this function, in which case one of the two pointers will +should invoke this function, in which case one of the three pointers will be filled in. Declared in .In mandoc.h , @@ -540,6 +449,8 @@ The following non-printing characters may be embedded in text strings: A non-breaking space character. .It Dv ASCII_HYPH A soft hyphen. +.It Dv ASCII_BREAK +A breakable zero-width space. .El .Pp Escape characters are also passed verbatim into text strings. @@ -547,11 +458,9 @@ An escape character is a sequence of characters beginning with the backslash .Pq Sq \e . To construct human-readable text, these should be intercepted with -.Fn mandoc_escape -and converted with one of -.Fn mchars_num2char , -.Fn mchars_spec2str , -and so on. +.Xr mandoc_escape 3 +and converted with one the functions described in +.Xr mchars_alloc 3 . .Ss Man Abstract Syntax Tree This AST is governed by the ontological rules dictated in .Xr man 7 @@ -596,7 +505,7 @@ where capitalised non-terminals represent nodes. .El .Pp The only elements capable of nesting other elements are those with -next-lint scope as documented in +next-line scope as documented in .Xr man 7 . .Ss Mdoc Abstract Syntax Tree This AST is governed by the ontological @@ -732,10 +641,13 @@ front-ends to .Xr mandoc 1 are unable to render them in any meaningful way. Furthermore, behaviour when encountering badly-nested blocks is not -consistent across troff implementations, especially when using multiple +consistent across troff implementations, especially when using multiple levels of badly-nested blocks. .Sh SEE ALSO .Xr mandoc 1 , +.Xr mandoc_escape 3 , +.Xr mandoc_malloc 3 , +.Xr mchars_alloc 3 , .Xr eqn 7 , .Xr man 7 , .Xr mandoc_char 7 , diff --git a/mandoc_escape.3 b/mandoc_escape.3 new file mode 100644 index 00000000..e76601a3 --- /dev/null +++ b/mandoc_escape.3 @@ -0,0 +1,362 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt MANDOC_ESCAPE 3 +.Os +.Sh NAME +.Nm mandoc_escape +.Nd parse roff escape sequences +.Sh LIBRARY +.Lb libmandoc +.Sh SYNOPSIS +.In sys/types.h +.In mandoc.h +.Ft "enum mandoc_esc" +.Fo mandoc_escape +.Fa "const char **end" +.Fa "const char **start" +.Fa "int *sz" +.Fc +.Sh DESCRIPTION +This function scans a +.Xr roff 7 +escape sequence. +.Pp +An escape sequence consists of +.Bl -dash -compact -width 2n +.It +an initial backslash character +.Pq Sq \e , +.It +a single ASCII character called the escape sequence identifier, +.It +and, with only a few exceptions, an argument. +.El +.Pp +Arguments can be given in the following forms; some escape sequence +identifiers only accept some of these forms as specified below. +The first three forms are called the standard forms. +.Bl -tag -width 2n +.It \&In brackets: Ic \&[ Ns Ar argument Ns Ic \&] +The argument starts after the initial +.Sq \&[ , +ends before the final +.Sq \&] , +and the escape sequence ends with the final +.Sq \&] . +.It Two-character argument short form: Ic \&( Ns Ar ar +This form can only be used for arguments +consisting of exactly two characters. +It has the same effect as +.Ic \&[ Ns Ar ar Ns Ic \&] . +.It One-character argument short form: Ar a +This form can only be used for arguments +consisting of exactly one character. +It has the same effect as +.Ic \&[ Ns Ar a Ns Ic \&] . +.It Delimited form: Ar C Ns Ar argument Ns Ar C +The argument starts after the initial delimiter character +.Ar C , +ends before the next occurrence of the delimiter character +.Ar C , +and the escape sequence ends with that second +.Ar C . +Some escape sequences allow arbitrary characters +.Ar C +as quoting characters, some restrict the range of characters +that can be used as quoting characters. +.El +.Pp +Upon function entry, +.Fa end +is expected to point to the escape sequence identifier. +The values passed in as +.Fa start +and +.Fa sz +are ignored and overwritten. +.Pp +By design, this function cannot handle those +.Xr roff 7 +escape sequences that require in-place expansion, in particular +user-defined strings +.Ic \e* , +number registers +.Ic \en , +width measurements +.Ic \ew , +and numerical expression control +.Ic \eB . +These are handled by +.Fn roff_res , +a private preprocessor function called from +.Fn roff_parseln , +see the file +.Pa roff.c . +.Pp +The function +.Fn mandoc_escape +is used +.Bl -dash -compact -width 2n +.It +recursively by itself, because some escape sequence arguments can +in turn contain other escape sequences, +.It +for error detection internally by the +.Xr roff 7 +parser part of the +.Lb libmandoc , +see the file +.Pa roff.c , +.It +above all externally by the +.Xr mandoc +formatting modules, in particular +.Fl Tascii +and +.Fl Thtml , +for formatting purposes, see the files +.Pa term.c +and +.Pa html.c , +.It +and rarely externally by high-level utilities using the mandoc library, +for example +.Xr makewhatis 8 , +to purge escape sequences from text. +.El +.Sh RETURN VALUES +Upon function return, the pointer +.Fa end +is set to the character after the end of the escape sequence, +such that the calling higher-level parser can easily continue. +.Pp +For escape sequences taking an argument, the pointer +.Fa start +is set to the beginning of the argument and +.Fa sz +is set to the length of the argument. +For escape sequences not taking an argument, +.Fa start +is set to the character after the end of the sequence and +.Fa sz +is set to 0. +Both +.Fa start +and +.Fa sz +may be +.Dv NULL ; +in that case, the argument and the length are not returned. +.Pp +For sequences taking an argument, the function +.Fn mandoc_escape +returns one of the following values: +.Bl -tag -width 2n +.It Dv ESCAPE_FONT +The escape sequence +.Ic \ef +taking an argument in standard form: +.Ic \ef[ , \ef( , \ef Ns Ar a . +Two-character arguments starting with the character +.Sq C +are reduced to one-character arguments by skipping the +.Sq C . +More specific values are returned for the most commonly used arguments: +.Bl -column "argument" "ESCAPE_FONTITALIC" +.It argument Ta return value +.It Cm R No or Cm 1 Ta Dv ESCAPE_FONTROMAN +.It Cm I No or Cm 2 Ta Dv ESCAPE_FONTITALIC +.It Cm B No or Cm 3 Ta Dv ESCAPE_FONTBOLD +.It Cm P Ta Dv ESCAPE_FONTPREV +.It Cm BI Ta Dv ESCAPE_FONTBI +.El +.It Dv ESCAPE_SPECIAL +The escape sequence +.Ic \eC +taking an argument delimited with the single quote character +and, as a special exception, the escape sequences +.Em not +having an identifier, that is, those where the argument, in standard +form, directly follows the initial backslash: +.Ic \eC' , \e[ , \e( , \e Ns Ar a . +Note that the one-character argument short form can only be used for +argument characters that do not clash with escape sequence identifiers. +.Pp +If the argument consists of more than one character +and starts with the character +.Sq u , +.Dv ESCAPE_UNICODE +is returned as described below. +If the argument is just the single character +.Sq u , +.Dv ESCAPE_ERROR +is returned. +.Pp +The +.Dv ESCAPE_SPECIAL +special character escape sequences can be rendered using the functions +.Fn mchars_spec2cp +and +.Fn mchars_spec2str +described in the +.Xr mchars_alloc 3 +manual. +.It Dv ESCAPE_UNICODE +Escape sequences of the same format as described above under +.Dv ESCAPE_SPECIAL , +but with an argument starting with the character +.Sq u : +.Ic \eC'u , \e[u . +As a special exception, +.Fa start +is set to the character after the +.Sq u , +and the +.Fa sz +return value does not include the +.Sq u +either. +.Pp +Such Unicode character escape sequences can be rendered using the function +.Fn mchars_num2uc +described in the +.Xr mchars_alloc 3 +manual. +.It Dv ESCAPE_NUMBERED +The escape sequence +.Ic \eN +followed by a delimited argument. +The delimiter character is arbitrary except that digits cannot be used. +If a digit is encountered instead of the opening delimiter, that +digit is considered to be the argument and the end of the sequence, and +.Dv ESCAPE_IGNORE +is returned. +.Pp +Such ASCII character escape sequences can be rendered using the function +.Fn mchars_num2char +described in the +.Xr mchars_alloc 3 +manual. +.It Dv ESCAPE_IGNORE +.Bl -bullet -width 2n +.It +The escape sequence +.Ic \es +followed by an argument in standard form or by an argument delimited +by the single quote character: +.Ic \es' , \es[ , \es( , \es Ns Ar a . +As a special exception, an optional +.Sq + +or +.Sq \- +character is allowed after the +.Sq s +for all forms. +.It +The escape sequences +.Ic \eF , +.Ic \eg , +.Ic \ek , +.Ic \eM , +.Ic \em , +.Ic \en , +.Ic \eV , +and +.Ic \eY +followed by an argument in standard form. +.It +The escape sequences +.Ic \eA , +.Ic \eb , +.Ic \eD , +.Ic \eo , +.Ic \eR , +.Ic \eX , +and +.Ic \eZ +followed by an argument delimited by an arbitrary character. +.It +The escape sequences +.Ic \eH , +.Ic \eh , +.Ic \eL , +.Ic \el , +.Ic \eS , +.Ic \ev , +and +.Ic \ex +followed by an argument delimited by a character that cannot occur +in numerical expressions. +However, if any character that can occur in numerical expressions +is found instead of a delimiter, the sequence is considered to end +with that character, and +.Dv ESCAPE_ERROR +is returned. +.El +.It Dv ESCAPE_ERROR +Escape sequences taking an argument but not matching any of the above patterns. +In particular, that happens if the end of the logical input line +is reached before the end of the argument. +.El +.Pp +For sequences that do not take an argument, the function +.Fn mandoc_escape +returns one of the following values: +.Bl -tag -width 2n +.It Dv ESCAPE_SKIPCHAR +The escape sequence +.Qq \ez . +.It Dv ESCAPE_NOSPACE +The escape sequence +.Qq \ec . +.It Dv ESCAPE_IGNORE +The escape sequences +.Qq \ed +and +.Qq \eu . +.El +.Sh FILES +This function is implemented in +.Pa mandoc.c . +.Sh SEE ALSO +.Xr mchars_alloc 3 , +.Xr mandoc_char 7 , +.Xr roff 7 +.Sh HISTORY +This function has been available since mandoc 1.11.2. +.Sh AUTHORS +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +.An Ingo Schwarze Aq Mt schwarze@openbsd.org +.Sh BUGS +The function doesn't cleanly distinguish between sequences that are +valid and supported, valid and ignored, valid and unsupported, +syntactically invalid, or undefined. +For sequences that are ignored or unsupported, it doesn't tell +whether that deficiency is likely to cause major formatting problems +and/or loss of document content. +The function is already rather complicated and still parses some +sequences incorrectly. +. +.ig +For these sequences, the list given below specifies a starting string +and either the length of the argument or an ending character. +The argument starts after the starting string. +In the former case, the sequence ends with the end of the argument. +In the latter case, the argument ends before the ending character, +and the sequence ends with the ending character. +.. diff --git a/mandoc_malloc.3 b/mandoc_malloc.3 new file mode 100644 index 00000000..190432f7 --- /dev/null +++ b/mandoc_malloc.3 @@ -0,0 +1,197 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt MANDOC_MALLOC 3 +.Os +.Sh NAME +.Nm mandoc_malloc , +.Nm mandoc_realloc , +.Nm mandoc_reallocarray , +.Nm mandoc_calloc , +.Nm mandoc_strdup , +.Nm mandoc_strndup , +.Nm mandoc_asprintf +.Nd memory allocation function wrappers used in the mandoc library +.Sh LIBRARY +.Lb libmandoc +.Sh SYNOPSIS +.In sys/types.h +.In mandoc_aux.h +.Ft "void *" +.Fo mandoc_malloc +.Fa "size_t size" +.Fc +.Ft "void *" +.Fo mandoc_realloc +.Fa "void *ptr" +.Fa "size_t size" +.Fc +.Ft "void *" +.Fo mandoc_reallocarray +.Fa "void *ptr" +.Fa "size_t nmemb" +.Fa "size_t size" +.Fc +.Ft "void *" +.Fo mandoc_calloc +.Fa "size_t nmemb" +.Fa "size_t size" +.Fc +.Ft "char *" +.Fo mandoc_strdup +.Fa "const char *s" +.Fc +.Ft "char *" +.Fo mandoc_strndup +.Fa "const char *s" +.Fa "size_t maxlen" +.Fc +.Ft int +.Fo mandoc_asprintf +.Fa "char **ret" +.Fa "const char *format" +.Fa "..." +.Fc +.Sh DESCRIPTION +These functions call the +.Lb libc +functions of the same names, passing through their return values when +successful. +In case of failure, they do not return, but instead call +.Xr perror 3 +and +.Xr exit 3 . +They can be used both internally by any code in the +.Lb libmandoc +and externally by programs using that library, for example +.Xr mandoc 1 , +.Xr apropos 1 , +and +.Xr makewhatis 8 . +.Pp +The function +.Fn mandoc_malloc +allocates one new object, leaving the memory uninitialized. +The functions +.Fn mandoc_realloc +and +.Fn mandoc_reallocarray +change the size of an existing object or array, possibly moving it. +When shrinking the size, existing data is truncated; when growing, +the additional memory is not initialized. +The function +.Fn mandoc_calloc +allocates a new array, initializing it to zero. +.Pp +The argument +.Fa size +is the size of each object. +The argument +.Fa nmemb +is the new number of objects in the array. +The argument +.Fa ptr +is a pointer to the existing object or array to be resized; if it is +.Dv NULL , +a new object or array is allocated. +.Pp +The functions +.Fn mandoc_strdup +and +.Fn mandoc_strndup +copy a string into newly allocated memory. +For +.Fn mandoc_strdup , +the string pointed to by +.Fa s +needs to be NUL-terminated. +For +.Fn mandoc_strndup , +at most +.Fa maxlen +bytes are copied. +The function +.Fn mandoc_asprintf +writes output formatted according to +.Fa format +into newly allocated memory and returns a pointer to the result in +.Fa ret . +For all three string functions, the result is always NUL-terminated. +.Pp +When the objects and strings are no longer needed, +the pointers returned by these functions can be passed to +.Xr free 3 . +.Sh RETURN VALUES +The function +.Fn mandoc_asprintf +always returns the number of characters written, excluding the +final NUL byte. +It never returns -1. +.Pp +The other functions always return a valid pointer; they never return +.Dv NULL . +.Sh FILES +These functions are implemented in +.Pa mandoc_aux.c . +.Sh SEE ALSO +.Xr asprintf 3 , +.Xr exit 3 , +.Xr malloc 3 , +.Xr perror 3 , +.Xr strdup 3 +.Sh STANDARDS +The functions +.Fn malloc , +.Fn realloc , +and +.Fn calloc +are required by +.St -ansiC . +The functions +.Fn strdup +and +.Fn strndup +are required by +.St -p1003.1-2008 . +The function +.Fn asprintf +is a widespread extension that first appeared in the GNU C library. +.Pp +The function +.Fn reallocarray +is an extension that first appeared in +.Ox 5.6 . +If it is not provided by the operating system, the mandoc build system +uses a bundled portable implementation. +.Sh HISTORY +The functions +.Fn mandoc_malloc , +.Fn mandoc_realloc , +.Fn mandoc_calloc , +and +.Fn mandoc_strdup +have been available since mandoc 1.9.12, +.Fn mandoc_strndup +since 1.11.5, +and +.Fn mandoc_asprintf +and +.Fn mandoc_reallocarray +since 1.12.4 and 1.13.0. +.Sh AUTHORS +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +.An Ingo Schwarze Aq Mt schwarze@openbsd.org diff --git a/mchars_alloc.3 b/mchars_alloc.3 new file mode 100644 index 00000000..34c9fa3f --- /dev/null +++ b/mchars_alloc.3 @@ -0,0 +1,224 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt MCHARS_ALLOC 3 +.Os +.Sh NAME +.Nm mchars_alloc , +.Nm mchars_free , +.Nm mchars_num2char , +.Nm mchars_num2uc , +.Nm mchars_spec2cp , +.Nm mchars_spec2str +.Nd character table for mandoc +.Sh LIBRARY +.Lb libmandoc +.Sh SYNOPSIS +.In sys/types.h +.In mandoc.h +.Ft "struct mchars *" +.Fn mchars_alloc "void" +.Ft void +.Fo mchars_free +.Fa "struct mchars *table" +.Fc +.Ft char +.Fo mchars_num2char +.Fa "const char *decimal" +.Fa "size_t sz" +.Fc +.Ft int +.Fo mchars_num2uc +.Fa "const char *hexadecimal" +.Fa "size_t sz" +.Fc +.Ft int +.Fo mchars_spec2cp +.Fa "const struct mchars *table" +.Fa "const char *name" +.Fa "size_t sz" +.Fc +.Ft "const char *" +.Fo mchars_spec2str +.Fa "const struct mchars *table" +.Fa "const char *name" +.Fa "size_t sz" +.Fa "size_t *rsz" +.Fc +.Sh DESCRIPTION +These functions translate Unicode character numbers and +.Xr roff 7 +character names into glyphs. +See +.Xr mandoc_char 7 +for a list of +.Xr roff 7 +special characters. +These functions are intended for external use by programs formatting +.Xr mdoc 7 +and +.Xr man 7 +pages for output, for example the +.Xr mandoc 1 +output formatter modules and +.Xr makewhatis 8 . +The +.Fa decimal , +.Fa hexadecimal , +.Fa name , +and +.Fa size +input arguments are usually obtained from the +.Xr mandoc_escape 3 +parser function. +.Pp +The function +.Fn mchars_num2char +converts a +.Fa decimal +string representation of a character number consisting of +.Fa sz +digits into a printable ASCII character. +If the input string is non-numeric or does not represent a printable +ASCII character, the NUL character +.Pq Sq \e0 +is returned. +For example, the +.Xr mandoc 1 +.Fl Tascii , +.Fl Tutf8 , +and +.Fl Thtml +output modules use this function to render +.Xr roff 7 +.Ic \eN +escape sequences. +.Pp +The function +.Fn mchars_num2uc +converts a +.Fa hexadecimal +string representation of a Unicode codepoint consisting of +.Fa sz +digits into an integer representation. +If the input string is non-numeric or represents an ASCII character, +the NUL character +.Pq Sq \e0 +is returned. +For example, the +.Xr mandoc 1 +.Fl Tutf8 +and +.Fl Thtml +output modules use this function to render +.Xr roff 7 +.Ic \e[u Ns Ar XXXX Ns Ic \&] +and +.Ic \eC\(aqu Ns Ar XXXX Ns Ic \(aq +escape sequences. +.Pp +The function +.Fn mchars_alloc +allocates an opaque +.Vt "struct mchars *" +table object for subsequent use by the following two lookup functions. +When no longer needed, this object can be destroyed with +.Fn mchars_free . +.Pp +The function +.Fn mchars_spec2cp +looks up a +.Xr roff 7 +special character +.Fa name +consisting of +.Fa sz +characters in the +.Fa table +and returns the corresponding Unicode codepoint. +If the +.Ar name +is not recognized, \-1 is returned. +For example, the +.Xr mandoc 1 +.Fl Tutf8 +and +.Fl Thtml +output modules use this function to render +.Xr roff 7 +.Ic \e[ Ns Ar name Ns Ic \&] +and +.Ic \eC\(aq Ns Ar name Ns Ic \(aq +escape sequences. +.Pp +The function +.Fn mchars_spec2str +looks up a +.Xr roff 7 +special character +.Fa name +consisting of +.Fa sz +characters in the +.Fa table +and returns an ASCII string representation. +The length of the representation is returned in +.Fa rsz . +In many cases, the meaning of such ASCII representations +is not quite obvious, so using +.Xr roff 7 +special characters in documents intended for ASCII rendering +is usually a bad idea. +If the +.Ar name +is not recognized, +.Dv NULL +is returned. +For example, +.Xr makewhatis 8 +and the +.Xr mandoc 1 +.Fl Tascii +output module use this function to render +.Xr roff 7 +.Ic \e[ Ns Ar name Ns Ic \&] +and +.Ic \eC\(aq Ns Ar name Ns Ic \(aq +escape sequences. +.Sh FILES +These funtions are implemented in the file +.Pa chars.c . +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_escape 3 , +.Xr mandoc_char 7 , +.Xr roff 7 +.Sh HISTORY +These functions and their predecessors have been available since the +following mandoc versions: +.Bl -column "mchars_num2char()" "1.11.3" "chars_num2char()" "1.10.10" +.It Sy function Ta since Ta Sy predecessor Ta since +.It Fn mchars_alloc Ta 1.11.3 Ta Fn ascii2htab Ta 1.5.3 +.It Fn mchars_free Ta 1.11.2 Ta Fn asciifree Ta 1.6.0 +.It Fn mchars_num2char Ta 1.11.2 Ta Fn chars_num2char Ta 1.10.10 +.It Fn mchars_num2uc Ta 1.11.3 Ta \(em Ta \(em +.It Fn mchars_spec2cp Ta 1.11.2 Ta Fn chars_spec2cp Ta 1.10.5 +.It Fn mchars_spec2str Ta 1.11.2 Ta Fn a2ascii Ta 1.5.3 +.El +.Sh AUTHORS +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +.An Ingo Schwarze Aq Mt schwarze@openbsd.org |