=================================================================== RCS file: /cvs/mandoc/mandoc.3,v retrieving revision 1.2 retrieving revision 1.30 diff -u -p -r1.2 -r1.30 --- mandoc/mandoc.3 2011/03/28 21:49:42 1.2 +++ mandoc/mandoc.3 2015/01/15 02:29:26 1.30 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.3,v 1.2 2011/03/28 21:49:42 kristaps Exp $ +.\" $Id: mandoc.3,v 1.30 2015/01/15 02:29:26 schwarze Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,53 +15,72 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: March 28 2011 $ +.Dd $Mdocdate: January 15 2015 $ .Dt MANDOC 3 .Os .Sh NAME .Nm mandoc , +.Nm man_deroff , .Nm man_meta , +.Nm man_mparse , .Nm man_node , +.Nm mdoc_deroff , .Nm mdoc_meta , .Nm mdoc_node , .Nm mparse_alloc , .Nm mparse_free , +.Nm mparse_getkeep , +.Nm mparse_keep , +.Nm mparse_open , .Nm mparse_readfd , .Nm mparse_reset , .Nm mparse_result , .Nm mparse_strerror , .Nm mparse_strlevel +.Nm mparse_wait , .Nd mandoc macro compiler library +.Sh LIBRARY +.Lb libmandoc .Sh SYNOPSIS -.In man.h -.In mdoc.h +.In sys/types.h .In mandoc.h -.Ft "const struct man_meta *" -.Fo man_meta -.Fa "const struct man *man" +.Fd "#define ASCII_NBRSP" +.Fd "#define ASCII_HYPH" +.Fd "#define ASCII_BREAK" +.Ft struct mparse * +.Fo mparse_alloc +.Fa "int options" +.Fa "enum mandoclevel wlevel" +.Fa "mandocmsg mmsg" +.Fa "const struct mchars *mchars" +.Fa "char *defos" .Fc -.Ft "const struct man_node *" -.Fo man_node -.Fa "const struct man *man" +.Ft void +.Fo (*mandocmsg) +.Fa "enum mandocerr errtype" +.Fa "enum mandoclevel level" +.Fa "const char *file" +.Fa "int line" +.Fa "int col" +.Fa "const char *msg" .Fc -.Ft "const struct mdoc_meta *" -.Fo mdoc_meta -.Fa "const struct mdoc *mdoc" +.Ft void +.Fo mparse_free +.Fa "struct mparse *parse" .Fc -.Ft "const struct mdoc_node *" -.Fo mdoc_node -.Fa "const struct mdoc *mdoc" +.Ft const char * +.Fo mparse_getkeep +.Fa "const struct mparse *parse" .Fc .Ft void -.Fo mparse_alloc -.Fa "enum mparset type" -.Fa "enum mandoclevel wlevel" -.Fa "mandocmsg msg" -.Fa "void *msgarg" +.Fo mparse_keep +.Fa "struct mparse *parse" .Fc -.Ft void -.Fo mparse_free +.Ft "enum mandoclevel" +.Fo mparse_open .Fa "struct mparse *parse" +.Fa "int *fd" +.Fa "const char *fname" .Fc .Ft "enum mandoclevel" .Fo mparse_readfd @@ -78,6 +97,7 @@ .Fa "struct mparse *parse" .Fa "struct mdoc **mdoc" .Fa "struct man **man" +.Fa "char **sodest" .Fc .Ft "const char *" .Fo mparse_strerror @@ -87,9 +107,49 @@ .Fo mparse_strlevel .Fa "enum mandoclevel" .Fc -.Vt extern const char * const * man_macronames; +.Ft "enum mandoclevel" +.Fo mparse_wait +.Fa "struct mparse *parse" +.Fc +.In sys/types.h +.In mandoc.h +.In mdoc.h +.Ft void +.Fo mdoc_deroff +.Fa "char **dest" +.Fa "const struct mdoc_node *node" +.Fc +.Ft "const struct mdoc_meta *" +.Fo mdoc_meta +.Fa "const struct mdoc *mdoc" +.Fc +.Ft "const struct mdoc_node *" +.Fo mdoc_node +.Fa "const struct mdoc *mdoc" +.Fc .Vt extern const char * const * mdoc_argnames; .Vt extern const char * const * mdoc_macronames; +.In sys/types.h +.In mandoc.h +.In man.h +.Ft void +.Fo man_deroff +.Fa "char **dest" +.Fa "const struct man_node *node" +.Fc +.Ft "const struct man_meta *" +.Fo man_meta +.Fa "const struct man *man" +.Fc +.Ft "const struct mparse *" +.Fo man_mparse +.Fa "const struct man *man" +.Fc +.Ft "const struct man_node *" +.Fo man_node +.Fa "const struct man *man" +.Fc +.Vt extern const char * const * man_macronames; .Sh DESCRIPTION The .Nm mandoc @@ -112,9 +172,13 @@ The following describes a general parse sequence: .Bl -enum .It initiate a parsing sequence with +.Xr mchars_alloc 3 +and .Fn mparse_alloc ; .It -parse files or file descriptors with +parse files with +.Fn mparse_open +and .Fn mparse_readfd ; .It retrieve a parsed syntax tree, if the parse was successful, with @@ -126,17 +190,351 @@ or .Fn man_node ; .It free all allocated memory with -.Fn mparse_free , +.Fn mparse_free +and +.Xr mchars_free 3 , or invoke .Fn mparse_reset and parse new files. .El +.Sh REFERENCE +This section documents the functions, types, and variables available +via +.In mandoc.h , +with the exception of those documented in +.Xr mandoc_escape 3 +and +.Xr mchars_alloc 3 . +.Ss Types +.Bl -ohang +.It Vt "enum mandocerr" +A fatal error, error, or warning message during parsing. +.It Vt "enum mandoclevel" +A classification of an +.Vt "enum mandocerr" +as regards system operation. +.It Vt "struct mchars" +An opaque pointer to a a character table. +Created with +.Xr mchars_alloc 3 +and freed with +.Xr mchars_free 3 . +.It Vt "struct mparse" +An opaque pointer to a running parse sequence. +Created with +.Fn mparse_alloc +and freed with +.Fn mparse_free . +This may be used across parsed input if +.Fn mparse_reset +is called between parses. +.It Vt "mandocmsg" +A prototype for a function to handle fatal error, error, and warning +messages emitted by the parser. +.El +.Ss Functions +.Bl -ohang +.It Fn man_deroff +Obtain a text-only representation of a +.Vt struct man_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn man_node . +When it is no longer needed, the pointer returned from +.Fn man_deroff +can be passed to +.Xr free 3 . +.It Fn man_meta +Obtain the meta-data of a successful +.Xr man 7 +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn man_mparse +Get the parser used for the current output. +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn man_node +Obtain the root node of a successful +.Xr man 7 +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn mdoc_deroff +Obtain a text-only representation of a +.Vt struct mdoc_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn mdoc_node . +When it is no longer needed, the pointer returned from +.Fn mdoc_deroff +can be passed to +.Xr free 3 . +.It Fn mdoc_meta +Obtain the meta-data of a successful +.Xr mdoc +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In mdoc.h , +implemented in +.Pa mdoc.c . +.It Fn mdoc_node +Obtain the root node of a successful +.Xr mdoc +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In mdoc.h , +implemented in +.Pa mdoc.c . +.It Fn mparse_alloc +Allocate a parser. +The arguments have the following effect: +.Bl -tag -offset 5n -width inttype +.It Ar options +When the +.Dv MPARSE_MDOC +or +.Dv MPARSE_MAN +bit is set, only that parser is used. +Otherwise, the document type is automatically detected. +.Pp +When the +.Dv MPARSE_SO +bit is set, +.Xr roff 7 +.Ic \&so +file inclusion requests are always honoured. +Otherwise, if the request is the only content in an input file, +only the file name is remembered, to be returned in the +.Fa sodest +argument of +.Fn mparse_result . +.Pp +When the +.Dv MPARSE_QUICK +bit is set, parsing is aborted after the NAME section. +This is for example useful in +.Xr makewhatis 8 +.Fl Q +to quickly build minimal databases. +.It Ar wlevel +Can be set to +.Dv MANDOCLEVEL_FATAL , +.Dv MANDOCLEVEL_ERROR , +or +.Dv MANDOCLEVEL_WARNING . +Messages below the selected level will be suppressed. +.It Ar mmsg +A callback function to handle errors and warnings. +See +.Pa main.c +for an example. +.It Ar mchars +An opaque pointer to a a character table obtained from +.Xr mchars_alloc 3 . +.It Ar defos +A default string for the +.Xr mdoc 7 +.Sq \&Os +macro, overriding the +.Dv OSNAME +preprocessor definition and the results of +.Xr uname 3 . +.El +.Pp +The same parser may be used for multiple files so long as +.Fn mparse_reset +is called between parses. +.Fn mparse_free +must be called to free the memory allocated by this function. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_free +Free all memory allocated by +.Fn mparse_alloc . +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_getkeep +Acquire the keep buffer. +Must follow a call of +.Fn mparse_keep . +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_keep +Instruct the parser to retain a copy of its parsed input. +This can be acquired with subsequent +.Fn mparse_getkeep +calls. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_open +If the +.Fa fname +ends in +.Pa .gz , +open with +.Xr gunzip 1 ; +otherwise, with +.Xr open 2 . +If +.Xr open 2 +fails, append +.Pa .gz +and try with +.Xr gunzip 1 . +Return a file descriptor open for reading in +.Fa fd , +or -1 on failure. +It can be passed to +.Fn mparse_readfd +or used directly. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_readfd +Parse a file descriptor opened with +.Xr open 2 +or +.Fn mparse_open . +Pass the associated filename in +.Va fname . +Calls +.Fn mparse_wait +before returning. +This function may be called multiple times with different parameters; however, +.Fn mparse_reset +should be invoked between parses. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_reset +Reset a parser so that +.Fn mparse_readfd +may be used again. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_result +Obtain the result of a parse. +Only successful parses +.Po +i.e., those where +.Fn mparse_readfd +returned less than MANDOCLEVEL_FATAL +.Pc +should invoke this function, in which case one of the three pointers will +be filled in. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_strerror +Return a statically-allocated string representation of an error code. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_strlevel +Return a statically-allocated string representation of a level code. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_wait +Bury a +.Xr gunzip 1 +child process that was spawned with +.Fn mparse_open . +To be called after the parse sequence is complete. +Not needed after +.Fn mparse_readfd , +but does no harm in that case, either. +Returns +.Dv MANDOCLEVEL_OK +on success and +.Dv MANDOCLEVEL_SYSERR +on failure, that is, when +.Xr wait 2 +fails, or when +.Xr gunzip 1 +died from a signal or exited with non-zero status. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.El +.Ss Variables +.Bl -ohang +.It Va man_macronames +The string representation of a man macro as indexed by +.Vt "enum mant" . +.It Va mdoc_argnames +The string representation of a mdoc macro argument as indexed by +.Vt "enum mdocargt" . +.It Va mdoc_macronames +The string representation of a mdoc macro as indexed by +.Vt "enum mdoct" . +.El .Sh IMPLEMENTATION NOTES This section consists of structural documentation for .Xr mdoc 7 and .Xr man 7 -syntax trees. +syntax trees and strings. +.Ss Man and Mdoc Strings +Strings may be extracted from mdoc and man meta-data, or from text +nodes (MDOC_TEXT and MAN_TEXT, respectively). +These strings have special non-printing formatting cues embedded in the +text itself, as well as +.Xr roff 7 +escapes preserved from input. +Implementing systems will need to handle both situations to produce +human-readable text. +In general, strings may be assumed to consist of 7-bit ASCII characters. +.Pp +The following non-printing characters may be embedded in text strings: +.Bl -tag -width Ds +.It Dv ASCII_NBRSP +A non-breaking space character. +.It Dv ASCII_HYPH +A soft hyphen. +.It Dv ASCII_BREAK +A breakable zero-width space. +.El +.Pp +Escape characters are also passed verbatim into text strings. +An escape character is a sequence of characters beginning with the +backslash +.Pq Sq \e . +To construct human-readable text, these should be intercepted with +.Xr mandoc_escape 3 +and converted with one the functions described in +.Xr mchars_alloc 3 . .Ss Man Abstract Syntax Tree This AST is governed by the ontological rules dictated in .Xr man 7 @@ -177,11 +575,11 @@ where capitalised non-terminals represent nodes. .It ELEMENT \(<- ELEMENT | TEXT* .It TEXT -\(<- [[:alpha:]]* +\(<- [[:ascii:]]* .El .Pp The only elements capable of nesting other elements are those with -next-lint scope as documented in +next-line scope as documented in .Xr man 7 . .Ss Mdoc Abstract Syntax Tree This AST is governed by the ontological @@ -236,7 +634,7 @@ where capitalised non-terminals represent nodes. .It TAIL \(<- mnode* .It TEXT -\(<- [[:printable:],0x1e]* +\(<- [[:ascii:]]* .El .Pp Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of @@ -251,7 +649,7 @@ where a new body introduces a new phrase. .Pp The .Xr mdoc 7 -syntax tree accomodates for broken block structures as well. +syntax tree accommodates for broken block structures as well. The ENDBODY node is available to end the formatting associated with a given block before the physical end of that block. It has a non-null @@ -317,12 +715,16 @@ front-ends to .Xr mandoc 1 are unable to render them in any meaningful way. Furthermore, behaviour when encountering badly-nested blocks is not -consistent across troff implementations, especially when using multiple +consistent across troff implementations, especially when using multiple levels of badly-nested blocks. .Sh SEE ALSO .Xr mandoc 1 , +.Xr mandoc_escape 3 , +.Xr mandoc_malloc 3 , +.Xr mchars_alloc 3 , .Xr eqn 7 , .Xr man 7 , +.Xr mandoc_char 7 , .Xr mdoc 7 , .Xr roff 7 , .Xr tbl 7 @@ -330,4 +732,4 @@ levels of badly-nested blocks. The .Nm library was written by -.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .