=================================================================== RCS file: /cvs/mandoc/mandoc.3,v retrieving revision 1.19 retrieving revision 1.29 diff -u -p -r1.19 -r1.29 --- mandoc/mandoc.3 2013/07/13 19:41:16 1.19 +++ mandoc/mandoc.3 2014/11/26 23:42:14 1.29 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.3,v 1.19 2013/07/13 19:41:16 schwarze Exp $ +.\" $Id: mandoc.3,v 1.29 2014/11/26 23:42:14 schwarze Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,99 +15,60 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: July 13 2013 $ +.Dd $Mdocdate: November 26 2014 $ .Dt MANDOC 3 .Os .Sh NAME .Nm mandoc , -.Nm mandoc_escape , +.Nm man_deroff , .Nm man_meta , .Nm man_mparse , .Nm man_node , -.Nm mchars_alloc , -.Nm mchars_free , -.Nm mchars_num2char , -.Nm mchars_num2uc , -.Nm mchars_spec2cp , -.Nm mchars_spec2str , +.Nm mdoc_deroff , .Nm mdoc_meta , .Nm mdoc_node , .Nm mparse_alloc , .Nm mparse_free , .Nm mparse_getkeep , .Nm mparse_keep , +.Nm mparse_open , .Nm mparse_readfd , .Nm mparse_reset , .Nm mparse_result , .Nm mparse_strerror , .Nm mparse_strlevel +.Nm mparse_wait , .Nd mandoc macro compiler library .Sh LIBRARY -.Lb mandoc +.Lb libmandoc .Sh SYNOPSIS -.In man.h -.In mdoc.h +.In sys/types.h .In mandoc.h -.Ft "enum mandoc_esc" -.Fo mandoc_escape -.Fa "const char **end" -.Fa "const char **start" -.Fa "int *sz" -.Fc -.Ft "const struct man_meta *" -.Fo man_meta -.Fa "const struct man *man" -.Fc -.Ft "const struct mparse *" -.Fo man_mparse -.Fa "const struct man *man" -.Fc -.Ft "const struct man_node *" -.Fo man_node -.Fa "const struct man *man" -.Fc -.Ft "struct mchars *" -.Fn mchars_alloc -.Ft void -.Fn mchars_free "struct mchars *p" -.Ft char -.Fn mchars_num2char "const char *cp" "size_t sz" -.Ft int -.Fn mchars_num2uc "const char *cp" "size_t sz" -.Ft "const char *" -.Fo mchars_spec2str -.Fa "const struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Fa "size_t *rsz" -.Fc -.Ft int -.Fo mchars_spec2cp -.Fa "const struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Ft "const char *" -.Fc -.Ft "const struct mdoc_meta *" -.Fo mdoc_meta -.Fa "const struct mdoc *mdoc" -.Fc -.Ft "const struct mdoc_node *" -.Fo mdoc_node -.Fa "const struct mdoc *mdoc" -.Fc -.Ft void +.Fd "#define ASCII_NBRSP" +.Fd "#define ASCII_HYPH" +.Fd "#define ASCII_BREAK" +.Ft struct mparse * .Fo mparse_alloc -.Fa "enum mparset type" +.Fa "int options" .Fa "enum mandoclevel wlevel" -.Fa "mandocmsg msg" -.Fa "void *msgarg" +.Fa "mandocmsg mmsg" +.Fa "const struct mchars *mchars" +.Fa "char *defos" .Fc .Ft void +.Fo (*mandocmsg) +.Fa "enum mandocerr errtype" +.Fa "enum mandoclevel level" +.Fa "const char *file" +.Fa "int line" +.Fa "int col" +.Fa "const char *msg" +.Fc +.Ft void .Fo mparse_free .Fa "struct mparse *parse" .Fc -.Ft void +.Ft const char * .Fo mparse_getkeep .Fa "const struct mparse *parse" .Fc @@ -116,6 +77,12 @@ .Fa "struct mparse *parse" .Fc .Ft "enum mandoclevel" +.Fo mparse_open +.Fa "struct mparse *parse" +.Fa "int *fd" +.Fa "const char *fname" +.Fc +.Ft "enum mandoclevel" .Fo mparse_readfd .Fa "struct mparse *parse" .Fa "int fd" @@ -130,6 +97,7 @@ .Fa "struct mparse *parse" .Fa "struct mdoc **mdoc" .Fa "struct man **man" +.Fa "char **sodest" .Fc .Ft "const char *" .Fo mparse_strerror @@ -139,11 +107,49 @@ .Fo mparse_strlevel .Fa "enum mandoclevel" .Fc -.Vt extern const char * const * man_macronames; +.Ft "enum mandoclevel" +.Fo mparse_wait +.Fa "struct mparse *parse" +.Fc +.In sys/types.h +.In mandoc.h +.In mdoc.h +.Ft void +.Fo mdoc_deroff +.Fa "char **dest" +.Fa "const struct mdoc_node *node" +.Fc +.Ft "const struct mdoc_meta *" +.Fo mdoc_meta +.Fa "const struct mdoc *mdoc" +.Fc +.Ft "const struct mdoc_node *" +.Fo mdoc_node +.Fa "const struct mdoc *mdoc" +.Fc .Vt extern const char * const * mdoc_argnames; .Vt extern const char * const * mdoc_macronames; -.Fd "#define ASCII_NBRSP" -.Fd "#define ASCII_HYPH" +.In sys/types.h +.In mandoc.h +.In man.h +.Ft void +.Fo man_deroff +.Fa "char **dest" +.Fa "const struct man_node *node" +.Fc +.Ft "const struct man_meta *" +.Fo man_meta +.Fa "const struct man *man" +.Fc +.Ft "const struct mparse *" +.Fo man_mparse +.Fa "const struct man *man" +.Fc +.Ft "const struct man_node *" +.Fo man_node +.Fa "const struct man *man" +.Fc +.Vt extern const char * const * man_macronames; .Sh DESCRIPTION The .Nm mandoc @@ -166,6 +172,8 @@ The following describes a general parse sequence: .Bl -enum .It initiate a parsing sequence with +.Xr mchars_alloc 3 +and .Fn mparse_alloc ; .It parse files or file descriptors with @@ -180,42 +188,35 @@ or .Fn man_node ; .It free all allocated memory with -.Fn mparse_free , +.Fn mparse_free +and +.Xr mchars_free 3 , or invoke .Fn mparse_reset and parse new files. .El -.Pp -The -.Nm -library also contains routines for translating character strings into glyphs -.Pq see Fn mchars_alloc -and parsing escape sequences from strings -.Pq see Fn mandoc_escape . .Sh REFERENCE This section documents the functions, types, and variables available via -.In mandoc.h . +.In mandoc.h , +with the exception of those documented in +.Xr mandoc_escape 3 +and +.Xr mchars_alloc 3 . .Ss Types .Bl -ohang -.It Vt "enum mandoc_esc" -An escape sequence classification. .It Vt "enum mandocerr" A fatal error, error, or warning message during parsing. .It Vt "enum mandoclevel" A classification of an -.Vt "enum mandoclevel" +.Vt "enum mandocerr" as regards system operation. .It Vt "struct mchars" -An opaque pointer to an object allowing for translation between -character strings and glyphs. -See -.Fn mchars_alloc . -.It Vt "enum mparset" -The type of parser when reading input. -This should usually be -.Dv MPARSE_AUTO -for auto-detection. +An opaque pointer to a a character table. +Created with +.Xr mchars_alloc 3 +and freed with +.Xr mchars_free 3 . .It Vt "struct mparse" An opaque pointer to a running parse sequence. Created with @@ -231,38 +232,20 @@ messages emitted by the parser. .El .Ss Functions .Bl -ohang -.It Fn mandoc_escape -Scan an escape sequence, i.e., a character string beginning with -.Sq \e . -Pass a pointer to the character after the -.Sq \e -as -.Va end ; -it will be set to the supremum of the parsed escape sequence unless -returning -.Dv ESCAPE_ERROR , -in which case the string is bogus and should be -thrown away. -If not -.Dv ESCAPE_ERROR -or -.Dv ESCAPE_IGNORE , -.Va start -is set to the first relevant character of the substring (font, glyph, -whatever) of length -.Va sz . -Both -.Va start -and -.Va sz -may be -.Dv NULL . -Declared in -.In mandoc.h , -implemented in -.Pa mandoc.c . +.It Fn man_deroff +Obtain a text-only representation of a +.Vt struct man_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn man_node . +When it is no longer needed, the pointer returned from +.Fn man_deroff +can be passed to +.Xr free 3 . .It Fn man_meta -Obtain the meta-data of a successful parse. +Obtain the meta-data of a successful +.Xr man 7 +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in @@ -276,67 +259,29 @@ Declared in implemented in .Pa man.c . .It Fn man_node -Obtain the root node of a successful parse. +Obtain the root node of a successful +.Xr man 7 +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in .In man.h , implemented in .Pa man.c . -.It Fn mchars_alloc -Allocate an -.Vt "struct mchars *" -object for translating special characters into glyphs. -See -.Xr mandoc_char 7 -for an overview of special characters. -The object must be freed with -.Fn mchars_free . -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_free -Free an object created with -.Fn mchars_alloc . -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_num2char -Convert a character index (e.g., the \eN\(aq\(aq escape) into a -printable ASCII character. -Returns \e0 (the nil character) if the input sequence is malformed. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_num2uc -Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into -a Unicode codepoint. -Returns \e0 (the nil character) if the input sequence is malformed. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_spec2cp -Convert a special character into a valid Unicode codepoint. -Returns \-1 on failure or a non-zero Unicode codepoint on success. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_spec2str -Convert a special character into an ASCII string. -Returns -.Dv NULL -on failure. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . +.It Fn mdoc_deroff +Obtain a text-only representation of a +.Vt struct mdoc_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn mdoc_node . +When it is no longer needed, the pointer returned from +.Fn mdoc_deroff +can be passed to +.Xr free 3 . .It Fn mdoc_meta -Obtain the meta-data of a successful parse. +Obtain the meta-data of a successful +.Xr mdoc +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in @@ -344,7 +289,9 @@ Declared in implemented in .Pa mdoc.c . .It Fn mdoc_node -Obtain the root node of a successful parse. +Obtain the root node of a successful +.Xr mdoc +parse. This may only be used on a pointer returned by .Fn mparse_result . Declared in @@ -353,6 +300,60 @@ implemented in .Pa mdoc.c . .It Fn mparse_alloc Allocate a parser. +The arguments have the following effect: +.Bl -tag -offset 5n -width inttype +.It Ar options +When the +.Dv MPARSE_MDOC +or +.Dv MPARSE_MAN +bit is set, only that parser is used. +Otherwise, the document type is automatically detected. +.Pp +When the +.Dv MPARSE_SO +bit is set, +.Xr roff 7 +.Ic \&so +file inclusion requests are always honoured. +Otherwise, if the request is the only content in an input file, +only the file name is remembered, to be returned in the +.Fa sodest +argument of +.Fn mparse_result . +.Pp +When the +.Dv MPARSE_QUICK +bit is set, parsing is aborted after the NAME section. +This is for example useful in +.Xr makewhatis 8 +.Fl Q +to quickly build minimal databases. +.It Ar wlevel +Can be set to +.Dv MANDOCLEVEL_FATAL , +.Dv MANDOCLEVEL_ERROR , +or +.Dv MANDOCLEVEL_WARNING . +Messages below the selected level will be suppressed. +.It Ar mmsg +A callback function to handle errors and warnings. +See +.Pa main.c +for an example. +.It Ar mchars +An opaque pointer to a a character table obtained from +.Xr mchars_alloc 3 . +.It Ar defos +A default string for the +.Xr mdoc 7 +.Sq \&Os +macro, overriding the +.Dv OSNAME +preprocessor definition and the results of +.Xr uname 3 . +.El +.Pp The same parser may be used for multiple files so long as .Fn mparse_reset is called between parses. @@ -386,18 +387,47 @@ Declared in .In mandoc.h , implemented in .Pa read.c . +.It Fn mparse_open +If the +.Fa fname +ends in +.Pa .gz , +open with +.Xr gunzip 1 ; +otherwise, with +.Xr open 2 . +If +.Xr open 2 +fails, append +.Pa .gz +and try with +.Xr gunzip 1 . +Return a file descriptor open for reading in +.Fa fd , +or -1 on failure. +It can be passed to +.Fn mparse_readfd +or used directly. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_readfd Parse a file or file descriptor. If .Va fd -is -1, +is -1, open .Va fname -is opened for reading. +with +.Fn mparse_open . Otherwise, .Va fname is assumed to be the name associated with .Va fd . -This may be called multiple times with different parameters; however, +Calls +.Fn mparse_wait +before returning. +This function may be called multiple times with different parameters; however, .Fn mparse_reset should be invoked between parses. Declared in @@ -420,7 +450,7 @@ i.e., those where .Fn mparse_readfd returned less than MANDOCLEVEL_FATAL .Pc -should invoke this function, in which case one of the two pointers will +should invoke this function, in which case one of the three pointers will be filled in. Declared in .In mandoc.h , @@ -438,6 +468,28 @@ Declared in .In mandoc.h , implemented in .Pa read.c . +.It Fn mparse_wait +Bury a +.Xr gunzip 1 +child process that was spawned with +.Fn mparse_open . +To be called after the parse sequence is complete. +Not needed after +.Fn mparse_readfd , +but does no harm in that case, either. +Returns +.Dv MANDOCLEVEL_OK +on success and +.Dv MANDOCLEVEL_SYSERR +on failure, that is, when +.Xr wait 2 +fails, or when +.Xr gunzip 1 +died from a signal or exited with non-zero status. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .El .Ss Variables .Bl -ohang @@ -474,6 +526,8 @@ The following non-printing characters may be embedded A non-breaking space character. .It Dv ASCII_HYPH A soft hyphen. +.It Dv ASCII_BREAK +A breakable zero-width space. .El .Pp Escape characters are also passed verbatim into text strings. @@ -481,11 +535,9 @@ An escape character is a sequence of characters beginn backslash .Pq Sq \e . To construct human-readable text, these should be intercepted with -.Fn mandoc_escape -and converted with one of -.Fn mchars_num2char , -.Fn mchars_spec2str , -and so on. +.Xr mandoc_escape 3 +and converted with one the functions described in +.Xr mchars_alloc 3 . .Ss Man Abstract Syntax Tree This AST is governed by the ontological rules dictated in .Xr man 7 @@ -530,7 +582,7 @@ where capitalised non-terminals represent nodes. .El .Pp The only elements capable of nesting other elements are those with -next-lint scope as documented in +next-line scope as documented in .Xr man 7 . .Ss Mdoc Abstract Syntax Tree This AST is governed by the ontological @@ -666,10 +718,13 @@ front-ends to .Xr mandoc 1 are unable to render them in any meaningful way. Furthermore, behaviour when encountering badly-nested blocks is not -consistent across troff implementations, especially when using multiple +consistent across troff implementations, especially when using multiple levels of badly-nested blocks. .Sh SEE ALSO .Xr mandoc 1 , +.Xr mandoc_escape 3 , +.Xr mandoc_malloc 3 , +.Xr mchars_alloc 3 , .Xr eqn 7 , .Xr man 7 , .Xr mandoc_char 7 ,