summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--man.3279
-rw-r--r--man.77
-rw-r--r--man.c2
-rw-r--r--man.h3
-rw-r--r--man_action.c1
-rw-r--r--man_macro.c1
-rw-r--r--man_term.c1
-rw-r--r--man_validate.c1
-rw-r--r--mandoc_char.7469
-rw-r--r--mdoc.315
-rw-r--r--mdoc.7422
-rw-r--r--mdoc_macro.c65
12 files changed, 817 insertions, 449 deletions
diff --git a/man.3 b/man.3
new file mode 100644
index 00000000..8df78583
--- /dev/null
+++ b/man.3
@@ -0,0 +1,279 @@
+.\" $Id$
+.\"
+.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the
+.\" above copyright notice and this permission notice appear in all
+.\" copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+.\" WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+.\" WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+.\" AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+.\" DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+.\" PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+.\" PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate$
+.Dt man 3
+.Os
+.\" SECTION
+.Sh NAME
+.Nm man_alloc ,
+.Nm man_parseln ,
+.Nm man_endparse ,
+.Nm man_node ,
+.Nm man_meta ,
+.Nm man_free ,
+.Nm man_reset
+.Nd man macro compiler library
+.\" SECTION
+.Sh SYNOPSIS
+.Fd #include <man.h>
+.Vt extern const char * const * man_macronames;
+.Ft "struct man *"
+.Fn man_alloc "void *data" "int pflags" "const struct man_cb *cb"
+.Ft void
+.Fn man_reset "struct man *man"
+.Ft void
+.Fn man_free "struct man *man"
+.Ft int
+.Fn man_parseln "struct man *man" "int line" "char *buf"
+.Ft "const struct man_node *"
+.Fn man_node "struct man *man"
+.Ft "const struct man_meta *"
+.Fn man_meta "struct man *man"
+.Ft int
+.Fn man_endparse "struct man *man"
+.\" SECTION
+.Sh DESCRIPTION
+The
+.Nm man
+library parses lines of
+.Xr man 7
+input (and
+.Em only
+man) into an abstract syntax tree (AST).
+.\" PARAGRAPH
+.Pp
+In general, applications initiate a parsing sequence with
+.Fn man_alloc ,
+parse each line in a document with
+.Fn man_parseln ,
+close the parsing session with
+.Fn man_endparse ,
+operate over the syntax tree returned by
+.Fn man_node
+and
+.Fn man_meta ,
+then free all allocated memory with
+.Fn man_free .
+The
+.Fn man_reset
+function may be used in order to reset the parser for another input
+sequence. See the
+.Sx EXAMPLES
+section for a full example.
+.\" PARAGRAPH
+.Pp
+This section further defines the
+.Sx Types ,
+.Sx Functions
+and
+.Sx Variables
+available to programmers. Following that, the
+.Sx Abstract Syntax Tree
+section documents the output tree.
+.\" SUBSECTION
+.Ss Types
+Both functions (see
+.Sx Functions )
+and variables (see
+.Sx Variables )
+may use the following types:
+.Bl -ohang -offset "XXXX"
+.\" LIST-ITEM
+.It Vt struct man
+An opaque type defined in
+.Pa man.c .
+Its values are only used privately within the library.
+.\" LIST-ITEM
+.It Vt struct man_cb
+A set of message callbacks defined in
+.Pa man.h .
+.\" LIST-ITEM
+.It Vt struct man_node
+A parsed node. Defined in
+.Pa man.h .
+See
+.Sx Abstract Syntax Tree
+for details.
+.El
+.\" SUBSECTION
+.Ss Functions
+Function descriptions follow:
+.Bl -ohang -offset "XXXX"
+.\" LIST-ITEM
+.It Fn man_alloc
+Allocates a parsing structure. The
+.Fa data
+pointer is passed to callbacks in
+.Fa cb ,
+which are documented further in the header file.
+The
+.Fa pflags
+arguments are defined in
+.Pa man.h .
+Returns NULL on failure. If non-NULL, the pointer must be freed with
+.Fn man_free .
+.\" LIST-ITEM
+.It Fn man_reset
+Reset the parser for another parse routine. After its use,
+.Fn man_parseln
+behaves as if invoked for the first time.
+.\" LIST-ITEM
+.It Fn man_free
+Free all resources of a parser. The pointer is no longer valid after
+invocation.
+.\" LIST-ITEM
+.It Fn man_parseln
+Parse a nil-terminated line of input. This line should not contain the
+trailing newline. Returns 0 on failure, 1 on success. The input buffer
+.Fa buf
+is modified by this function.
+.\" LIST-ITEM
+.It Fn man_endparse
+Signals that the parse is complete. Note that if
+.Fn man_endparse
+is called subsequent to
+.Fn man_node ,
+the resulting tree is incomplete. Returns 0 on failure, 1 on success.
+.\" LIST-ITEM
+.It Fn man_node
+Returns the first node of the parse. Note that if
+.Fn man_parseln
+or
+.Fn man_endparse
+return 0, the tree will be incomplete.
+.It Fn man_meta
+Returns the document's parsed meta-data. If this information has not
+yet been supplied or
+.Fn man_parseln
+or
+.Fn man_endparse
+return 0, the data will be incomplete.
+.El
+.\" SUBSECTION
+.Ss Variables
+The following variables are also defined:
+.Bl -ohang -offset "XXXX"
+.\" LIST-ITEM
+.It Va man_macronames
+An array of string-ified token names.
+.El
+.\" SUBSECTION
+.Ss Abstract Syntax Tree
+The
+.Nm
+functions produce an abstract syntax tree (AST) describing input in a
+regular form. It may be reviewed at any time with
+.Fn man_nodes ;
+however, if called before
+.Fn man_endparse ,
+or after
+.Fn man_endparse
+or
+.Fn man_parseln
+fail, it may be incomplete.
+.\" PARAGRAPH
+.Pp
+This AST is governed by the ontological
+rules dictated in
+.Xr man 7
+and derives its terminology accordingly.
+.\" PARAGRAPH
+.Pp
+The AST is composed of
+.Vt struct man_node
+nodes with element, root and text types as declared
+by the
+.Va type
+field. Each node also provides its parse point (the
+.Va line ,
+.Va sec ,
+and
+.Va pos
+fields), its position in the tree (the
+.Va parent ,
+.Va child ,
+.Va next
+and
+.Va prev
+fields) and some type-specific data.
+.\" PARAGRAPH
+.Pp
+The tree itself is arranged according to the following normal form,
+where capitalised non-terminals represent nodes.
+.Pp
+.Bl -tag -width "ELEMENTXX" -compact -offset "XXXX"
+.\" LIST-ITEM
+.It ROOT
+\(<- mnode+
+.It mnode
+\(<- ELEMENT | TEXT
+.It ELEMENT
+\(<- ELEMENT | TEXT*
+.It TEXT
+\(<- [[:alpha:]]*
+.El
+.\" PARAGRAPH
+.Pp
+The only elements capable of nesting other elements are those with
+next-lint scope as documented in
+.Xr man 7 .
+.\" SECTION
+.Sh EXAMPLES
+The following example reads lines from stdin and parses them, operating
+on the finished parse tree with
+.Fn parsed .
+Note that, if the last line of the file isn't newline-terminated, this
+will truncate the file's last character (see
+.Xr fgetln 3 ) .
+Further, this example does not error-check nor free memory upon failure.
+.Bd -literal -offset "XXXX"
+struct man *man;
+struct man_node *node;
+char *buf;
+size_t len;
+int line;
+
+line = 1;
+man = man_alloc(NULL, 0, NULL);
+
+while ((buf = fgetln(fp, &len))) {
+ buf[len - 1] = '\\0';
+ if ( ! man_parseln(man, line, buf))
+ errx(1, "man_parseln");
+ line++;
+}
+
+if ( ! man_endparse(man))
+ errx(1, "man_endparse");
+if (NULL == (node = man_node(man)))
+ errx(1, "man_node");
+
+parsed(man, node);
+man_free(man);
+.Ed
+.\" SECTION
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr man 7
+.\" SECTION
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons Aq kristaps@openbsd.org .
diff --git a/man.7 b/man.7
index 3c522ff4..76f26614 100644
--- a/man.7
+++ b/man.7
@@ -88,10 +88,6 @@ or a single one-character sequence.
Characters may alternatively be escaped by a slash-asterisk,
.Sq \e* ,
with the same combinations as described above. This form is deprecated.
-.Pp
-The
-.Xr mdoc 7
-contains a table of all available escapes.
.\" SECTION
.Sh STRUCTURE
Macros are one to three three characters in length and begin with a
@@ -182,7 +178,8 @@ macros and corresponding number of arguments.
.El
.\" SECTION
.Sh SEE ALSO
-.Xr mandoc 1
+.Xr mandoc 1 ,
+.Xr mandoc_char 7
.\" SECTION
.Sh AUTHORS
The
diff --git a/man.c b/man.c
index 57b79900..49db57fa 100644
--- a/man.c
+++ b/man.c
@@ -32,7 +32,7 @@ const char *const __man_macronames[MAN_MAX] = {
"IP", "HP", "SM", "SB",
"BI", "IB", "BR", "RB",
"R", "B", "I", "IR",
- "RI"
+ "RI", "br"
};
const char * const *man_macronames = __man_macronames;
diff --git a/man.h b/man.h
index 993bd385..881e6bb4 100644
--- a/man.h
+++ b/man.h
@@ -42,7 +42,8 @@
#define MAN_I 18
#define MAN_IR 19
#define MAN_RI 20
-#define MAN_MAX 21
+#define MAN_br 21
+#define MAN_MAX 22
enum man_type {
MAN_TEXT,
diff --git a/man_action.c b/man_action.c
index 631a1d66..912d2bcc 100644
--- a/man_action.c
+++ b/man_action.c
@@ -61,6 +61,7 @@ const struct actions man_actions[MAN_MAX] = {
{ NULL }, /* I */
{ NULL }, /* IR */
{ NULL }, /* RI */
+ { NULL }, /* br */
};
diff --git a/man_macro.c b/man_macro.c
index 2b389b14..b3e9cc05 100644
--- a/man_macro.c
+++ b/man_macro.c
@@ -52,6 +52,7 @@ static int man_flags[MAN_MAX] = {
FL_NLINE, /* I */
FL_NLINE, /* IR */
FL_NLINE, /* RI */
+ 0, /* br */
};
int
diff --git a/man_term.c b/man_term.c
index 2c207ef7..7d949470 100644
--- a/man_term.c
+++ b/man_term.c
@@ -80,6 +80,7 @@ static const struct termact termacts[MAN_MAX] = {
{ pre_I, post_I }, /* I */
{ pre_IR, NULL }, /* IR */
{ pre_RI, NULL }, /* RI */
+ { pre_PP, NULL }, /* br */
};
static void print_head(struct termp *,
diff --git a/man_validate.c b/man_validate.c
index fb7dd295..b7fd95b8 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -71,6 +71,7 @@ static const struct man_valid man_valids[MAN_MAX] = {
{ NULL }, /* I */
{ NULL }, /* IR */
{ NULL }, /* RI */
+ { posts_eq0 }, /* br */
};
diff --git a/mandoc_char.7 b/mandoc_char.7
new file mode 100644
index 00000000..e65dc9c6
--- /dev/null
+++ b/mandoc_char.7
@@ -0,0 +1,469 @@
+.\" $Id$
+.\"
+.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the
+.\" above copyright notice and this permission notice appear in all
+.\" copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+.\" WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+.\" WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+.\" AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+.\" DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+.\" PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+.\" PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate$
+.Dt mandoc_char 7
+.Os
+.\" SECTION
+.Sh NAME
+.Nm mandoc_char
+.Nd mandoc special characters
+.\" SECTION
+.Sh DESCRIPTION
+This documents the special characters accepted by
+.Xr mandoc 1
+to format
+.Xr mdoc 7
+and
+.Xr man 7
+documents. Specific output devices of
+.Xr mandoc 1 ,
+dictated by the
+.Fl T Ns Ar output
+argument, will properly render these sequences.
+.Pp
+Both
+.Xr mdoc 7
+and
+.Xr man 7
+encode special characters with slightly different semantics; consult the
+respective manuals for these escapes.
+.Pp
+Grammatic:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(em
+.Pq em-dash
+.It \e(en
+.Pq en-dash
+.It \e-
+.Pq hyphen
+.It \e\e
+.Pq back-slash
+.It \e'
+.Pq apostrophe
+.It \e`
+.Pq back-tick
+.It \e
+.Pq space
+.It \e.
+.Pq period
+.It \e(r!
+.Pq upside-down exclamation
+.It \e(r?
+.Pq upside-down question
+.El
+.\" PARAGRAPH
+.Pp
+Enclosures:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(lh
+.Pq left hand
+.It \e(rh
+.Pq right hand
+.It \e(Fo
+.Pq left guillemet
+.It \e(Fc
+.Pq right guillemet
+.It \e(fo
+.Pq left guilsing
+.It \e(fc
+.Pq right guilsing
+.It \e(rC
+.Pq right brace
+.It \e(lC
+.Pq left brace
+.It \e(ra
+.Pq right angle
+.It \e(la
+.Pq left angle
+.It \e(rB
+.Pq right bracket
+.It \e(lB
+.Pq left bracket
+.It \eq
+.Pq double-quote
+.It \e(lq
+.Pq left double-quote
+.It \e(Lq
+.Pq left double-quote, deprecated
+.It \e(rq
+.Pq right double-quote
+.It \e(Rq
+.Pq right double-quote, deprecated
+.It \e(oq
+.Pq left single-quote
+.It \e(aq
+.Pq right single-quote
+.It \e(Bq
+.Pq right low double-quote
+.It \e(bq
+.Pq right low single-quote
+.El
+.\" PARAGRAPH
+.Pp
+Indicatives:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(<-
+.Pq left arrow
+.It \e(->
+.Pq right arrow
+.It \e(ua
+.Pq up arrow
+.It \e(da
+.Pq down arrow
+.It \e(<>
+.Pq left-right arrow
+.It \e(lA
+.Pq left double-arrow
+.It \e(rA
+.Pq right double-arrow
+.It \e(uA
+.Pq up double-arrow
+.It \e(dA
+.Pq down double-arrow
+.It \e(hA
+.Pq left-right double-arrow
+.El
+.\" PARAGRAPH
+.Pp
+Mathematical:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(es
+.Pq empty set
+.It \e(ca
+.Pq intersection
+.It \e(cu
+.Pq union
+.It \e(gr
+.Pq gradient
+.It \e(pd
+.Pq partial differential
+.It \e(ap
+.Pq similarity
+.It \e(=)
+.Pq proper superset
+.It \e((=
+.Pq proper subset
+.It \e(eq
+.Pq equals
+.It \e(di
+.Pq division
+.It \e(mu
+.Pq multiplication
+.It \e(pl
+.Pq addition
+.It \e(nm
+.Pq not element
+.It \e(mo
+.Pq element
+.It \e(Im
+.Pq imaginary
+.It \e(Re
+.Pq real
+.It \e(Ah
+.Pq aleph
+.It \e(te
+.Pq existential quantifier
+.It \e(fa
+.Pq universal quantifier
+.It \e(AN
+.Pq logical AND
+.It \e(OR
+.Pq logical OR
+.It \e(no
+.Pq logical NOT
+.It \e(st
+.Pq such that
+.It \e(tf
+.Pq therefore
+.It \e(~~
+.Pq approximate
+.It \e(~=
+.Pq approximately equals
+.It \e(=~
+.Pq congruent
+.It \e(Gt
+.Pq greater-than, deprecated
+.It \e(Lt
+.Pq less-than, deprecated
+.It \e(<=
+.Pq less-than-equal
+.It \e(Le
+.Pq less-than-equal, deprecated
+.It \e(>=
+.Pq greater-than-equal
+.It \e(Ge
+.Pq greater-than-equal
+.It \e(==
+.Pq equal
+.It \e(!=
+.Pq not equal
+.It \e(Ne
+.Pq not equal, deprecated
+.It \e(if
+.Pq infinity
+.It \e(If
+.Pq infinity, deprecated
+.It \e(na
+.Pq NaN , an extension
+.It \e(Na
+.Pq NaN, deprecated
+.It \e(+-
+.Pq plus-minus
+.It \e(Pm
+.Pq plus-minus, deprecated
+.It \e(**
+.Pq asterisk
+.El
+.\" PARAGRAPH
+.Pp
+Ligatures:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(ss
+.Pq German eszett
+.It \e(AE
+.Pq upper-case AE
+.It \e(ae
+.Pq lower-case AE
+.It \e(OE
+.Pq upper-case OE
+.It \e(oe
+.Pq lower-case OE
+.It \e(ff
+.Pq ff ligature
+.It \e(fi
+.Pq fi ligature
+.It \e(fl
+.Pq fl ligature
+.It \e(Fi
+.Pq ffi ligature
+.It \e(Fl
+.Pq ffl ligature
+.El
+.\" PARAGRAPH
+.Pp
+Diacritics and letters:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(ga
+.Pq grave accent
+.It \e(aa
+.Pq accute accent
+.It \e(a"
+.Pq umlaut accent
+.It \e(ad
+.Pq dieresis accent
+.It \e(a~
+.Pq tilde accent
+.It \e(a^
+.Pq circumflex accent
+.It \e(ac
+.Pq cedilla accent
+.It \e(ad
+.Pq dieresis accent
+.It \e(ah
+.Pq caron accent
+.It \e(ao
+.Pq ring accent
+.It \e(ho
+.Pq hook accent
+.It \e(ab
+.Pq breve accent
+.It \e(a-
+.Pq macron accent
+.It \e(-D
+.Pq upper-case eth
+.It \e(Sd
+.Pq lower-case eth
+.It \e(TP
+.Pq upper-case thorn
+.It \e(Tp
+.Pq lower-case thorn
+.It \e('A
+.Pq upper-case acute A
+.It \e('E
+.Pq upper-case acute E
+.It \e('I
+.Pq upper-case acute I
+.It \e('O
+.Pq upper-case acute O
+.It \e('U
+.Pq upper-case acute U
+.It \e('a
+.Pq lower-case acute a
+.It \e('e
+.Pq lower-case acute e
+.It \e('i
+.Pq lower-case acute i
+.It \e('o
+.Pq lower-case acute o
+.It \e('u
+.Pq lower-case acute u
+.It \e(`A
+.Pq upper-case grave A
+.It \e(`E
+.Pq upper-case grave E
+.It \e(`I
+.Pq upper-case grave I
+.It \e(`O
+.Pq upper-case grave O
+.It \e(`U
+.Pq upper-case grave U
+.It \e(`a
+.Pq lower-case grave a
+.It \e(`e
+.Pq lower-case grave e
+.It \e(`i
+.Pq lower-case grave i
+.It \e(`o
+.Pq lower-case grave o
+.It \e(`u
+.Pq lower-case grave u
+.It \e(~A
+.Pq upper-case tilde A
+.It \e(~N
+.Pq upper-case tilde N
+.It \e(~O
+.Pq upper-case tilde O
+.It \e(~a
+.Pq lower-case tilde a
+.It \e(~n
+.Pq lower-case tilde n
+.It \e(~o
+.Pq lower-case tilde o
+.It \e(:A
+.Pq upper-case dieresis A
+.It \e(:E
+.Pq upper-case dieresis E
+.It \e(:I
+.Pq upper-case dieresis I
+.It \e(:O
+.Pq upper-case dieresis O
+.It \e(:U
+.Pq upper-case dieresis U
+.It \e(:a
+.Pq lower-case dieresis a
+.It \e(:e
+.Pq lower-case dieresis e
+.It \e(:i
+.Pq lower-case dieresis i
+.It \e(:o
+.Pq lower-case dieresis o
+.It \e(:u
+.Pq lower-case dieresis u
+.It \e(:y
+.Pq lower-case dieresis y
+.It \e(^A
+.Pq upper-case circumflex A
+.It \e(^E
+.Pq upper-case circumflex E
+.It \e(^I
+.Pq upper-case circumflex I
+.It \e(^O
+.Pq upper-case circumflex O
+.It \e(^U
+.Pq upper-case circumflex U
+.It \e(^a
+.Pq lower-case circumflex a
+.It \e(^e
+.Pq lower-case circumflex e
+.It \e(^i
+.Pq lower-case circumflex i
+.It \e(^o
+.Pq lower-case circumflex o
+.It \e(^u
+.Pq lower-case circumflex u
+.It \e(,C
+.Pq upper-case cedilla C
+.It \e(,c
+.Pq lower-case cedilla c
+.It \e(/L
+.Pq upper-case stroke L
+.It \e(/l
+.Pq lower-case stroke l
+.It \e(/O
+.Pq upper-case stroke O
+.It \e(/o
+.Pq lower-case stroke o
+.It \e(oA
+.Pq upper-case ring A
+.It \e(oa
+.Pq lower-case ring a
+.El
+.\" PARAGRAPH
+.Pp
+Monetary:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e(Cs
+.Pq Scandinavian
+.It \e(Do
+.Pq dollar
+.It \e(Po
+.Pq pound
+.It \e(Ye
+.Pq yen
+.It \e(Fn
+.Pq florin
+.It \e(ct
+.Pq cent
+.El
+.\" PARAGRAPH
+.Pp
+Special symbols:
+.Bl -tag -width 12n -offset "XXXX" -compact
+.It \e0
+.Pq white-space
+.It \e(de
+.Pq degree
+.It \e(ps
+.Pq paragraph
+.It \e(sc
+.Pq section
+.It \e(dg
+.Pq dagger
+.It \e(dd
+.Pq double dagger
+.It \e(ci
+.Pq circle
+.It \e(ba
+.Pq bar
+.It \e(bb
+.Pq broken bar
+.It \e(Ba
+.Pq bar, deprecated
+.It \e(co
+.Pq copyright
+.It \e(rg
+.Pq registered
+.It \e(tm
+.Pq trademarked
+.It \e&
+.Pq non-breaking space
+.It \ee
+.Pq escape
+.It \e(Am
+.Pq ampersand, deprecated
+.El
+.\" SECTION
+.Sh SEE ALSO
+.Xr mandoc 1
+.\" SECTION
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons Aq kristaps@openbsd.org .
diff --git a/mdoc.3 b/mdoc.3
index 27b5dca1..6333159d 100644
--- a/mdoc.3
+++ b/mdoc.3
@@ -56,14 +56,7 @@ library parses lines of
.Xr mdoc 7
input (and
.Em only
-mdoc) into an abstract syntax tree that generalises the semantic
-annotation of its input. Common front-ends for
-.Nm
-are
-.Xr mdocterm 1 ,
-.Xr mdoclint 1
-and
-.Xr mdoctree 1 .
+mdoc) into an abstract syntax tree (AST).
.\" PARAGRAPH
.Pp
In general, applications initiate a parsing sequence with
@@ -227,9 +220,7 @@ fields), its position in the tree (the
.Va next
and
.Va prev
-fields) and type-specific data (the
-.Va data
-field).
+fields) and some type-specific data.
.\" PARAGRAPH
.Pp
The tree itself is arranged according to the following normal form,
@@ -281,7 +272,7 @@ size_t len;
int line;
line = 1;
-mdoc = mdoc_alloc(NULL, NULL);
+mdoc = mdoc_alloc(NULL, 0, NULL);
while ((buf = fgetln(fp, &len))) {
buf[len - 1] = '\\0';
diff --git a/mdoc.7 b/mdoc.7
index 9b76e4d5..b0bef155 100644
--- a/mdoc.7
+++ b/mdoc.7
@@ -122,423 +122,6 @@ or a single one-character sequence.
Characters may alternatively be escaped by a slash-asterisk,
.Sq \e* ,
with the same combinations as described above. This form is deprecated.
-.Pp
-The following is a table of all available escapes.
-.Pp
-Grammatic:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(em
-.Pq em-dash
-.It \e(en
-.Pq en-dash
-.It \e-
-.Pq hyphen
-.It \e\e
-.Pq back-slash
-.It \e'
-.Pq apostrophe
-.It \e`
-.Pq back-tick
-.It \e
-.Pq space
-.It \e.
-.Pq period
-.It \e(r!
-.Pq upside-down exclamation
-.It \e(r?
-.Pq upside-down question
-.El
-.\" PARAGRAPH
-.Pp
-Enclosures:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(lh
-.Pq left hand
-.It \e(rh
-.Pq right hand
-.It \e(Fo
-.Pq left guillemet
-.It \e(Fc
-.Pq right guillemet
-.It \e(fo
-.Pq left guilsing
-.It \e(fc
-.Pq right guilsing
-.It \e(rC
-.Pq right brace
-.It \e(lC
-.Pq left brace
-.It \e(ra
-.Pq right angle
-.It \e(la
-.Pq left angle
-.It \e(rB
-.Pq right bracket
-.It \e(lB
-.Pq left bracket
-.It \eq
-.Pq double-quote
-.It \e(lq
-.Pq left double-quote
-.It \e(Lq
-.Pq left double-quote, deprecated
-.It \e(rq
-.Pq right double-quote
-.It \e(Rq
-.Pq right double-quote, deprecated
-.It \e(oq
-.Pq left single-quote
-.It \e(aq
-.Pq right single-quote
-.It \e(Bq
-.Pq right low double-quote
-.It \e(bq
-.Pq right low single-quote
-.El
-.\" PARAGRAPH
-.Pp
-Indicatives:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(<-
-.Pq left arrow
-.It \e(->
-.Pq right arrow
-.It \e(ua
-.Pq up arrow
-.It \e(da
-.Pq down arrow
-.It \e(<>
-.Pq left-right arrow
-.It \e(lA
-.Pq left double-arrow
-.It \e(rA
-.Pq right double-arrow
-.It \e(uA
-.Pq up double-arrow
-.It \e(dA
-.Pq down double-arrow
-.It \e(hA
-.Pq left-right double-arrow
-.El
-.\" PARAGRAPH
-.Pp
-Mathematical:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(es
-.Pq empty set
-.It \e(ca
-.Pq intersection
-.It \e(cu
-.Pq union
-.It \e(gr
-.Pq gradient
-.It \e(pd
-.Pq partial differential
-.It \e(ap
-.Pq similarity
-.It \e(=)
-.Pq proper superset
-.It \e((=
-.Pq proper subset
-.It \e(eq
-.Pq equals
-.It \e(di
-.Pq division
-.It \e(mu
-.Pq multiplication
-.It \e(pl
-.Pq addition
-.It \e(nm
-.Pq not element
-.It \e(mo
-.Pq element
-.It \e(Im
-.Pq imaginary
-.It \e(Re
-.Pq real
-.It \e(Ah
-.Pq aleph
-.It \e(te
-.Pq existential quantifier
-.It \e(fa
-.Pq universal quantifier
-.It \e(AN
-.Pq logical AND
-.It \e(OR
-.Pq logical OR
-.It \e(no
-.Pq logical NOT
-.It \e(st
-.Pq such that
-.It \e(tf
-.Pq therefore
-.It \e(~~
-.Pq approximate
-.It \e(~=
-.Pq approximately equals
-.It \e(=~
-.Pq congruent
-.It \e(Gt
-.Pq greater-than, deprecated
-.It \e(Lt
-.Pq less-than, deprecated
-.It \e(<=
-.Pq less-than-equal
-.It \e(Le
-.Pq less-than-equal, deprecated
-.It \e(>=
-.Pq greater-than-equal
-.It \e(Ge
-.Pq greater-than-equal
-.It \e(==
-.Pq equal
-.It \e(!=
-.Pq not equal
-.It \e(Ne
-.Pq not equal, deprecated
-.It \e(if
-.Pq infinity
-.It \e(If
-.Pq infinity, deprecated
-.It \e(na
-.Pq NaN , an extension
-.It \e(Na
-.Pq NaN, deprecated
-.It \e(+-
-.Pq plus-minus
-.It \e(Pm
-.Pq plus-minus, deprecated
-.It \e(**
-.Pq asterisk
-.El
-.\" PARAGRAPH
-.Pp
-Ligatures:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(ss
-.Pq German eszett
-.It \e(AE
-.Pq upper-case AE
-.It \e(ae
-.Pq lower-case AE
-.It \e(OE
-.Pq upper-case OE
-.It \e(oe
-.Pq lower-case OE
-.It \e(ff
-.Pq ff ligature
-.It \e(fi
-.Pq fi ligature
-.It \e(fl
-.Pq fl ligature
-.It \e(Fi
-.Pq ffi ligature
-.It \e(Fl
-.Pq ffl ligature
-.El
-.\" PARAGRAPH
-.Pp
-Diacritics and letters:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(ga
-.Pq grave accent
-.It \e(aa
-.Pq accute accent
-.It \e(a"
-.Pq umlaut accent
-.It \e(ad
-.Pq dieresis accent
-.It \e(a~
-.Pq tilde accent
-.It \e(a^
-.Pq circumflex accent
-.It \e(ac
-.Pq cedilla accent
-.It \e(ad
-.Pq dieresis accent
-.It \e(ah
-.Pq caron accent
-.It \e(ao
-.Pq ring accent
-.It \e(ho
-.Pq hook accent
-.It \e(ab
-.Pq breve accent
-.It \e(a-
-.Pq macron accent
-.It \e(-D
-.Pq upper-case eth
-.It \e(Sd
-.Pq lower-case eth
-.It \e(TP
-.Pq upper-case thorn
-.It \e(Tp
-.Pq lower-case thorn
-.It \e('A
-.Pq upper-case acute A
-.It \e('E
-.Pq upper-case acute E
-.It \e('I
-.Pq upper-case acute I
-.It \e('O
-.Pq upper-case acute O
-.It \e('U
-.Pq upper-case acute U
-.It \e('a
-.Pq lower-case acute a
-.It \e('e
-.Pq lower-case acute e
-.It \e('i
-.Pq lower-case acute i
-.It \e('o
-.Pq lower-case acute o
-.It \e('u
-.Pq lower-case acute u
-.It \e(`A
-.Pq upper-case grave A
-.It \e(`E
-.Pq upper-case grave E
-.It \e(`I
-.Pq upper-case grave I
-.It \e(`O
-.Pq upper-case grave O
-.It \e(`U
-.Pq upper-case grave U
-.It \e(`a
-.Pq lower-case grave a
-.It \e(`e
-.Pq lower-case grave e
-.It \e(`i
-.Pq lower-case grave i
-.It \e(`o
-.Pq lower-case grave o
-.It \e(`u
-.Pq lower-case grave u
-.It \e(~A
-.Pq upper-case tilde A
-.It \e(~N
-.Pq upper-case tilde N
-.It \e(~O
-.Pq upper-case tilde O
-.It \e(~a
-.Pq lower-case tilde a
-.It \e(~n
-.Pq lower-case tilde n
-.It \e(~o
-.Pq lower-case tilde o
-.It \e(:A
-.Pq upper-case dieresis A
-.It \e(:E
-.Pq upper-case dieresis E
-.It \e(:I
-.Pq upper-case dieresis I
-.It \e(:O
-.Pq upper-case dieresis O
-.It \e(:U
-.Pq upper-case dieresis U
-.It \e(:a
-.Pq lower-case dieresis a
-.It \e(:e
-.Pq lower-case dieresis e
-.It \e(:i
-.Pq lower-case dieresis i
-.It \e(:o
-.Pq lower-case dieresis o
-.It \e(:u
-.Pq lower-case dieresis u
-.It \e(:y
-.Pq lower-case dieresis y
-.It \e(^A
-.Pq upper-case circumflex A
-.It \e(^E
-.Pq upper-case circumflex E
-.It \e(^I
-.Pq upper-case circumflex I
-.It \e(^O
-.Pq upper-case circumflex O
-.It \e(^U
-.Pq upper-case circumflex U
-.It \e(^a
-.Pq lower-case circumflex a
-.It \e(^e
-.Pq lower-case circumflex e
-.It \e(^i
-.Pq lower-case circumflex i
-.It \e(^o
-.Pq lower-case circumflex o
-.It \e(^u
-.Pq lower-case circumflex u
-.It \e(,C
-.Pq upper-case cedilla C
-.It \e(,c
-.Pq lower-case cedilla c
-.It \e(/L
-.Pq upper-case stroke L
-.It \e(/l
-.Pq lower-case stroke l
-.It \e(/O
-.Pq upper-case stroke O
-.It \e(/o
-.Pq lower-case stroke o
-.It \e(oA
-.Pq upper-case ring A
-.It \e(oa
-.Pq lower-case ring a
-.El
-.\" PARAGRAPH
-.Pp
-Monetary:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e(Cs
-.Pq Scandinavian
-.It \e(Do
-.Pq dollar
-.It \e(Po
-.Pq pound
-.It \e(Ye
-.Pq yen
-.It \e(Fn
-.Pq florin
-.It \e(ct
-.Pq cent
-.El
-.\" PARAGRAPH
-.Pp
-Special symbols:
-.Bl -tag -width 12n -offset "XXXX" -compact
-.It \e0
-.Pq white-space
-.It \e(de
-.Pq degree
-.It \e(ps
-.Pq paragraph
-.It \e(sc
-.Pq section
-.It \e(dg
-.Pq dagger
-.It \e(dd
-.Pq double dagger
-.It \e(ci
-.Pq circle
-.It \e(ba
-.Pq bar
-.It \e(bb
-.Pq broken bar
-.It \e(Ba
-.Pq bar, deprecated
-.It \e(co
-.Pq copyright
-.It \e(rg
-.Pq registered
-.It \e(tm
-.Pq trademarked
-.It \e&
-.Pq non-breaking space
-.It \ee
-.Pq escape
-.It \e(Am
-.Pq ampersand, deprecated
-.El
.\" SECTION
.Sh STRUCTURE
Macros are classified in an ontology described by their scope rules.
@@ -853,7 +436,7 @@ then the macro accepts an arbitrary number of arguments.
.It \&.Bsx Ta Yes Ta Yes Ta n
.It \&.Bx Ta Yes Ta Yes Ta n
.It \&.Db Ta \&No Ta \&No Ta 1
-.It \&.Em Ta Yes Ta Yes Ta n
+.It \&.Em Ta Yes Ta Yes Ta >0
.It \&.Fx Ta Yes Ta Yes Ta n
.It \&.Ms Ta \&No Ta Yes Ta >0
.It \&.No Ta Yes Ta Yes Ta 0
@@ -943,7 +526,8 @@ is callable.
.El
.\" SECTION
.Sh SEE ALSO
-.Xr mandoc 1
+.Xr mandoc 1 ,
+.Xr mandoc_char 7
.\" SECTION
.Sh AUTHORS
The
diff --git a/mdoc_macro.c b/mdoc_macro.c
index be1a9d1b..cf6915d9 100644
--- a/mdoc_macro.c
+++ b/mdoc_macro.c
@@ -33,6 +33,7 @@
/* FIXME: .Fl, .Ar, .Cd handling of `|'. */
enum mwarn {
+ WIGNE,
WIMPBRK,
WMACPARM,
WOBS
@@ -237,6 +238,9 @@ pwarn(struct mdoc *mdoc, int line, int pos, enum mwarn type)
p = NULL;
switch (type) {
+ case (WIGNE):
+ p = "ignoring empty element";
+ break;
case (WIMPBRK):
p = "crufty end-of-line scope violation";
break;
@@ -800,10 +804,31 @@ blk_exp_close(MACRO_PROT_ARGS)
static int
in_line(MACRO_PROT_ARGS)
{
- int la, lastpunct, c, w, cnt, d, call;
+ int la, lastpunct, c, w, cnt, d, nc;
struct mdoc_arg *arg;
char *p;
+ /*
+ * Whether we allow ignored elements (those without content,
+ * usually because of reserved words) to squeak by.
+ */
+ switch (tok) {
+ case (MDOC_Lp):
+ /* FALLTHROUGH */
+ case (MDOC_Pp):
+ /* FALLTHROUGH */
+ case (MDOC_Nm):
+ /* FALLTHROUGH */
+ case (MDOC_Fl):
+ /* FALLTHROUGH */
+ case (MDOC_Ar):
+ nc = 1;
+ break;
+ default:
+ nc = 0;
+ break;
+ }
+
for (la = ppos, arg = NULL;; ) {
la = *pos;
c = mdoc_argv(mdoc, line, tok, &arg, pos, buf);
@@ -821,7 +846,7 @@ in_line(MACRO_PROT_ARGS)
return(0);
}
- for (call = cnt = 0, lastpunct = 1;; ) {
+ for (cnt = 0, lastpunct = 1;; ) {
la = *pos;
w = mdoc_args(mdoc, line, pos, buf, tok, &p);
@@ -837,19 +862,24 @@ in_line(MACRO_PROT_ARGS)
c = ARGS_QWORD == w ? MDOC_MAX :
lookup(mdoc, line, la, tok, p);
- /* MDOC_MAX (not a macro) or -1 (error). */
+ /*
+ * In this case, we've located a submacro and must
+ * execute it. Close out scope, if open. If no
+ * elements have been generated, either create one (nc)
+ * or raise a warning.
+ */
if (MDOC_MAX != c && -1 != c) {
if (0 == lastpunct && ! rew_elem(mdoc, tok))
return(0);
- if (0 == cnt) {
+ if (nc && 0 == cnt) {
if ( ! mdoc_elem_alloc(mdoc, line, ppos,
tok, arg))
return(0);
- if ( ! rew_elem(mdoc, tok))
- return(0);
mdoc->next = MDOC_NEXT_SIBLING;
- }
+ } else if ( ! nc && 0 == cnt)
+ if ( ! pwarn(mdoc, line, ppos, WIGNE))
+ return(0);
c = mdoc_macro(mdoc, c, line, la, pos, buf);
if (0 == c)
return(0);
@@ -859,7 +889,11 @@ in_line(MACRO_PROT_ARGS)
} else if (-1 == c)
return(0);
- /* Non-quote-enclosed punctuation. */
+ /*
+ * Non-quote-enclosed punctuation. Set up our scope, if
+ * a word; rewind the scope, if a delimiter; then append
+ * the word.
+ */
d = mdoc_isdelim(p);
@@ -884,13 +918,22 @@ in_line(MACRO_PROT_ARGS)
if (0 == lastpunct && ! rew_elem(mdoc, tok))
return(0);
- if (0 == cnt) {
+
+ /*
+ * If no elements have been collected and we're allowed to have
+ * empties (nc), open a scope and close it out. Otherwise,
+ * raise a warning.
+ *
+ */
+ if (nc && 0 == cnt) {
c = mdoc_elem_alloc(mdoc, line, ppos, tok, arg);
if (0 == c)
return(0);
- if ( ! rew_elem(mdoc, tok))
+ mdoc->next = MDOC_NEXT_SIBLING;
+ } else if ( ! nc && 0 == cnt)
+ if ( ! pwarn(mdoc, line, ppos, WIGNE))
return(0);
- }
+
if (ppos > 1)
return(1);
return(append_delims(mdoc, line, pos, buf));