version 1.1, 2009/01/16 14:04:26 |
version 1.52, 2011/01/01 12:18:37 |
|
|
.\" |
.\" $Id$ |
|
.\" |
|
.\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> |
|
.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> |
|
.\" |
|
.\" Permission to use, copy, modify, and distribute this software for any |
|
.\" purpose with or without fee is hereby granted, provided that the above |
|
.\" copyright notice and this permission notice appear in all copies. |
|
.\" |
|
.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|
.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|
.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
|
.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
|
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
|
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
.\" |
.Dd $Mdocdate$ |
.Dd $Mdocdate$ |
.Dt mdoc 3 |
.Dt MDOC 3 |
.Os |
.Os |
.\" |
|
.Sh NAME |
.Sh NAME |
|
.Nm mdoc , |
.Nm mdoc_alloc , |
.Nm mdoc_alloc , |
.Nm mdoc_parseln , |
|
.Nm mdoc_endparse , |
.Nm mdoc_endparse , |
.Nm mdoc_result , |
.Nm mdoc_free , |
.Nm mdoc_free |
.Nm mdoc_meta , |
.Nd mdoc macro compiler |
.Nm mdoc_node , |
.\" |
.Nm mdoc_parseln , |
|
.Nm mdoc_reset |
|
.Nd mdoc macro compiler library |
.Sh SYNOPSIS |
.Sh SYNOPSIS |
|
.In mandoc.h |
.In mdoc.h |
.In mdoc.h |
|
.Vt extern const char * const * mdoc_macronames; |
|
.Vt extern const char * const * mdoc_argnames; |
|
.Ft int |
|
.Fo mdoc_addspan |
|
.Fa "struct mdoc *mdoc" |
|
.Fa "const struct tbl_span *span" |
|
.Fc |
.Ft "struct mdoc *" |
.Ft "struct mdoc *" |
.Fn mdoc_alloc "void *data" "const struct mdoc_cb *cb" |
.Fo mdoc_alloc |
.Ft void |
.Fa "struct regset *regs" |
.Fn mdoc_free "struct mdoc *" |
.Fa "void *data" |
|
.Fa "mandocmsg msgs" |
|
.Fc |
.Ft int |
.Ft int |
.Fn mdoc_parseln "struct mdoc *" "int" "char *buf" |
.Fn mdoc_endparse "struct mdoc *mdoc" |
|
.Ft void |
|
.Fn mdoc_free "struct mdoc *mdoc" |
|
.Ft "const struct mdoc_meta *" |
|
.Fn mdoc_meta "const struct mdoc *mdoc" |
.Ft "const struct mdoc_node *" |
.Ft "const struct mdoc_node *" |
.Fn mdoc_result "struct mdoc *" |
.Fn mdoc_node "const struct mdoc *mdoc" |
.Ft int |
.Ft int |
.Fn mdoc_endparse "struct mdoc *" |
.Fo mdoc_parseln |
.\" |
.Fa "struct mdoc *mdoc" |
|
.Fa "int line" |
|
.Fa "char *buf" |
|
.Fc |
|
.Ft int |
|
.Fn mdoc_reset "struct mdoc *mdoc" |
.Sh DESCRIPTION |
.Sh DESCRIPTION |
The |
The |
.Nm mdoc |
.Nm mdoc |
library parses lines of mdoc-macro text into an abstract syntax tree. |
library parses lines of |
|
.Xr mdoc 7 |
|
input |
|
into an abstract syntax tree (AST). |
|
.Pp |
In general, applications initiate a parsing sequence with |
In general, applications initiate a parsing sequence with |
.Fn mdoc_alloc , |
.Fn mdoc_alloc , |
parse each line in a document with |
parse each line in a document with |
.Fn mdoc_parseln , |
.Fn mdoc_parseln , |
close the parsing session with |
close the parsing session with |
.Fn mdoc_endparse , |
.Fn mdoc_endparse , |
operate over the syntax tree returned by |
operate over the syntax tree returned by |
.Fn mdoc_result , |
.Fn mdoc_node |
|
and |
|
.Fn mdoc_meta , |
then free all allocated memory with |
then free all allocated memory with |
.Fn mdoc_free . |
.Fn mdoc_free . |
See the |
The |
.Sx EXAMPLES |
.Fn mdoc_reset |
section for a full example. |
function may be used in order to reset the parser for another input |
.\" The following requests should be uncommented and used where appropriate. |
sequence. |
.\" This next request is for sections 2, 3, and 9 function return values only. |
.Ss Types |
.\" .Sh RETURN VALUES |
.Bl -ohang |
.\" .Sh EXAMPLES |
.It Vt struct mdoc |
.\" The next request is for sections 2, 3, and 9 error and signal handling only. |
An opaque type. |
.\" .Sh ERRORS |
Its values are only used privately within the library. |
.\" .Sh SEE ALSO |
.It Vt struct mdoc_node |
.\" .Xr foobar 1 |
A parsed node. |
.\" .Sh STANDARDS |
See |
.\" .Sh HISTORY |
.Sx Abstract Syntax Tree |
.\" .Sh AUTHORS |
for details. |
.\" .Sh CAVEATS |
.El |
.\" .Sh BUGS |
.Ss Functions |
|
.Bl -ohang |
|
.It Fn mdoc_addspan |
|
Add a table span to the parsing stream. |
|
Returns 0 on failure, 1 on success. |
|
.It Fn mdoc_alloc |
|
Allocates a parsing structure. |
|
The |
|
.Fa data |
|
pointer is passed to |
|
.Fa msgs . |
|
Returns NULL on failure. |
|
If non-NULL, the pointer must be freed with |
|
.Fn mdoc_free . |
|
.It Fn mdoc_reset |
|
Reset the parser for another parse routine. |
|
After its use, |
|
.Fn mdoc_parseln |
|
behaves as if invoked for the first time. |
|
If it returns 0, memory could not be allocated. |
|
.It Fn mdoc_free |
|
Free all resources of a parser. |
|
The pointer is no longer valid after invocation. |
|
.It Fn mdoc_parseln |
|
Parse a nil-terminated line of input. |
|
This line should not contain the trailing newline. |
|
Returns 0 on failure, 1 on success. |
|
The input buffer |
|
.Fa buf |
|
is modified by this function. |
|
.It Fn mdoc_endparse |
|
Signals that the parse is complete. |
|
Note that if |
|
.Fn mdoc_endparse |
|
is called subsequent to |
|
.Fn mdoc_node , |
|
the resulting tree is incomplete. |
|
Returns 0 on failure, 1 on success. |
|
.It Fn mdoc_node |
|
Returns the first node of the parse. |
|
Note that if |
|
.Fn mdoc_parseln |
|
or |
|
.Fn mdoc_endparse |
|
return 0, the tree will be incomplete. |
|
.It Fn mdoc_meta |
|
Returns the document's parsed meta-data. |
|
If this information has not yet been supplied or |
|
.Fn mdoc_parseln |
|
or |
|
.Fn mdoc_endparse |
|
return 0, the data will be incomplete. |
|
.El |
|
.Ss Variables |
|
.Bl -ohang |
|
.It Va mdoc_macronames |
|
An array of string-ified token names. |
|
.It Va mdoc_argnames |
|
An array of string-ified token argument names. |
|
.El |
|
.Ss Abstract Syntax Tree |
|
The |
|
.Nm |
|
functions produce an abstract syntax tree (AST) describing input in a |
|
regular form. |
|
It may be reviewed at any time with |
|
.Fn mdoc_nodes ; |
|
however, if called before |
|
.Fn mdoc_endparse , |
|
or after |
|
.Fn mdoc_endparse |
|
or |
|
.Fn mdoc_parseln |
|
fail, it may be incomplete. |
|
.Pp |
|
This AST is governed by the ontological |
|
rules dictated in |
|
.Xr mdoc 7 |
|
and derives its terminology accordingly. |
|
.Qq In-line |
|
elements described in |
|
.Xr mdoc 7 |
|
are described simply as |
|
.Qq elements . |
|
.Pp |
|
The AST is composed of |
|
.Vt struct mdoc_node |
|
nodes with block, head, body, element, root and text types as declared |
|
by the |
|
.Va type |
|
field. |
|
Each node also provides its parse point (the |
|
.Va line , |
|
.Va sec , |
|
and |
|
.Va pos |
|
fields), its position in the tree (the |
|
.Va parent , |
|
.Va child , |
|
.Va nchild , |
|
.Va next |
|
and |
|
.Va prev |
|
fields) and some type-specific data, in particular, for nodes generated |
|
from macros, the generating macro in the |
|
.Va tok |
|
field. |
|
.Pp |
|
The tree itself is arranged according to the following normal form, |
|
where capitalised non-terminals represent nodes. |
|
.Pp |
|
.Bl -tag -width "ELEMENTXX" -compact |
|
.It ROOT |
|
\(<- mnode+ |
|
.It mnode |
|
\(<- BLOCK | ELEMENT | TEXT |
|
.It BLOCK |
|
\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]] |
|
.It ELEMENT |
|
\(<- TEXT* |
|
.It HEAD |
|
\(<- mnode* |
|
.It BODY |
|
\(<- mnode* [ENDBODY mnode*] |
|
.It TAIL |
|
\(<- mnode* |
|
.It TEXT |
|
\(<- [[:printable:],0x1e]* |
|
.El |
|
.Pp |
|
Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of |
|
the BLOCK production: these refer to punctuation marks. |
|
Furthermore, although a TEXT node will generally have a non-zero-length |
|
string, in the specific case of |
|
.Sq \&.Bd \-literal , |
|
an empty line will produce a zero-length string. |
|
Multiple body parts are only found in invocations of |
|
.Sq \&Bl \-column , |
|
where a new body introduces a new phrase. |
|
.Ss Badly-nested Blocks |
|
The ENDBODY node is available to end the formatting associated |
|
with a given block before the physical end of that block. |
|
It has a non-null |
|
.Va end |
|
field, is of the BODY |
|
.Va type , |
|
has the same |
|
.Va tok |
|
as the BLOCK it is ending, and has a |
|
.Va pending |
|
field pointing to that BLOCK's BODY node. |
|
It is an indirect child of that BODY node |
|
and has no children of its own. |
|
.Pp |
|
An ENDBODY node is generated when a block ends while one of its child |
|
blocks is still open, like in the following example: |
|
.Bd -literal -offset indent |
|
\&.Ao ao |
|
\&.Bo bo ac |
|
\&.Ac bc |
|
\&.Bc end |
|
.Ed |
|
.Pp |
|
This example results in the following block structure: |
|
.Bd -literal -offset indent |
|
BLOCK Ao |
|
HEAD Ao |
|
BODY Ao |
|
TEXT ao |
|
BLOCK Bo, pending -> Ao |
|
HEAD Bo |
|
BODY Bo |
|
TEXT bo |
|
TEXT ac |
|
ENDBODY Ao, pending -> Ao |
|
TEXT bc |
|
TEXT end |
|
.Ed |
|
.Pp |
|
Here, the formatting of the |
|
.Sq \&Ao |
|
block extends from TEXT ao to TEXT ac, |
|
while the formatting of the |
|
.Sq \&Bo |
|
block extends from TEXT bo to TEXT bc. |
|
It renders as follows in |
|
.Fl T Ns Cm ascii |
|
mode: |
|
.Pp |
|
.Dl <ao [bo ac> bc] end |
|
.Pp |
|
Support for badly-nested blocks is only provided for backward |
|
compatibility with some older |
|
.Xr mdoc 7 |
|
implementations. |
|
Using badly-nested blocks is |
|
.Em strongly discouraged : |
|
the |
|
.Fl T Ns Cm html |
|
and |
|
.Fl T Ns Cm xhtml |
|
front-ends are unable to render them in any meaningful way. |
|
Furthermore, behaviour when encountering badly-nested blocks is not |
|
consistent across troff implementations, especially when using multiple |
|
levels of badly-nested blocks. |
|
.Sh EXAMPLES |
|
The following example reads lines from stdin and parses them, operating |
|
on the finished parse tree with |
|
.Fn parsed . |
|
This example does not error-check nor free memory upon failure. |
|
.Bd -literal -offset indent |
|
struct regset regs; |
|
struct mdoc *mdoc; |
|
const struct mdoc_node *node; |
|
char *buf; |
|
size_t len; |
|
int line; |
|
|
|
bzero(®s, sizeof(struct regset)); |
|
line = 1; |
|
mdoc = mdoc_alloc(®s, NULL, NULL); |
|
buf = NULL; |
|
alloc_len = 0; |
|
|
|
while ((len = getline(&buf, &alloc_len, stdin)) >= 0) { |
|
if (len && buflen[len - 1] = '\en') |
|
buf[len - 1] = '\e0'; |
|
if ( ! mdoc_parseln(mdoc, line, buf)) |
|
errx(1, "mdoc_parseln"); |
|
line++; |
|
} |
|
|
|
if ( ! mdoc_endparse(mdoc)) |
|
errx(1, "mdoc_endparse"); |
|
if (NULL == (node = mdoc_node(mdoc))) |
|
errx(1, "mdoc_node"); |
|
|
|
parsed(mdoc, node); |
|
mdoc_free(mdoc); |
|
.Ed |
|
.Pp |
|
To compile this, execute |
|
.Pp |
|
.Dl % cc main.c libmdoc.a libmandoc.a |
|
.Pp |
|
where |
|
.Pa main.c |
|
is the example file. |
|
.Sh SEE ALSO |
|
.Xr mandoc 1 , |
|
.Xr mdoc 7 |
|
.Sh AUTHORS |
|
The |
|
.Nm |
|
library was written by |
|
.An Kristaps Dzonsons Aq kristaps@bsd.lv . |