=================================================================== RCS file: /cvs/mandoc/mandoc.3,v retrieving revision 1.5 retrieving revision 1.24 diff -u -p -r1.5 -r1.24 --- mandoc/mandoc.3 2011/04/30 10:18:24 1.5 +++ mandoc/mandoc.3 2014/03/23 11:25:26 1.24 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.3,v 1.5 2011/04/30 10:18:24 kristaps Exp $ +.\" $Id: mandoc.3,v 1.24 2014/03/23 11:25:26 schwarze Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,61 +15,114 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: April 30 2011 $ +.Dd $Mdocdate: March 23 2014 $ .Dt MANDOC 3 .Os .Sh NAME .Nm mandoc , +.Nm mandoc_calloc , .Nm mandoc_escape , +.Nm mandoc_malloc , +.Nm mandoc_realloc , +.Nm mandoc_strdup , +.Nm mandoc_strndup , .Nm man_meta , +.Nm man_mparse , .Nm man_node , +.Nm mchars_alloc , +.Nm mchars_free , +.Nm mchars_num2char , +.Nm mchars_num2uc , +.Nm mchars_spec2cp , +.Nm mchars_spec2str , .Nm mdoc_meta , .Nm mdoc_node , .Nm mparse_alloc , .Nm mparse_free , +.Nm mparse_getkeep , +.Nm mparse_keep , .Nm mparse_readfd , .Nm mparse_reset , .Nm mparse_result , .Nm mparse_strerror , .Nm mparse_strlevel .Nd mandoc macro compiler library +.Sh LIBRARY +.Lb libmandoc .Sh SYNOPSIS -.In man.h -.In mdoc.h .In mandoc.h +.Fd "#define ASCII_NBRSP" +.Fd "#define ASCII_HYPH" +.Fd "#define ASCII_BREAK" +.Ft "void *" +.Fo mandoc_calloc +.Fa "size_t nmemb" +.Fa "size_t size" +.Fc .Ft "enum mandoc_esc" .Fo mandoc_escape -.Fa "const char **in" -.Fa "const char **seq" -.Fa "int *len" +.Fa "const char **end" +.Fa "const char **start" +.Fa "int *sz" .Fc -.Ft "const struct man_meta *" -.Fo man_meta -.Fa "const struct man *man" +.Ft "void *" +.Fn mandoc_malloc "size_t size" +.Ft "struct mchars *" +.Fo mandoc_realloc +.Fa "void *ptr" +.Fa "size_t size" .Fc -.Ft "const struct man_node *" -.Fo man_node -.Fa "const struct man *man" +.Ft "char *" +.Fn mandoc_strdup +.Fn mchars_alloc "void" +.Ft void +.Fn mchars_free "struct mchars *p" +.Ft char +.Fn mchars_num2char "const char *cp" "size_t sz" +.Ft int +.Fn mchars_num2uc "const char *cp" "size_t sz" +.Ft "const char *" +.Fo mchars_spec2str +.Fa "const struct mchars *p" +.Fa "const char *cp" +.Fa "size_t sz" +.Fa "size_t *rsz" .Fc -.Ft "const struct mdoc_meta *" -.Fo mdoc_meta -.Fa "const struct mdoc *mdoc" +.Ft int +.Fo mchars_spec2cp +.Fa "const struct mchars *p" +.Fa "const char *cp" +.Fa "size_t sz" .Fc -.Ft "const struct mdoc_node *" -.Fo mdoc_node -.Fa "const struct mdoc *mdoc" -.Fc .Ft void .Fo mparse_alloc -.Fa "enum mparset type" +.Fa "enum mparset inttype" .Fa "enum mandoclevel wlevel" -.Fa "mandocmsg msg" -.Fa "void *msgarg" +.Fa "mandocmsg mmsg" +.Fa "char *defos" +.Fa "int quick" .Fc .Ft void +.Fo (*mandocmsg) +.Fa "enum mandocerr errtype" +.Fa "enum mandoclevel level" +.Fa "const char *file" +.Fa "int line" +.Fa "int col" +.Fa "const char *msg" +.Fc +.Ft void .Fo mparse_free .Fa "struct mparse *parse" .Fc +.Ft const char * +.Fo mparse_getkeep +.Fa "const struct mparse *parse" +.Fc +.Ft void +.Fo mparse_keep +.Fa "struct mparse *parse" +.Fc .Ft "enum mandoclevel" .Fo mparse_readfd .Fa "struct mparse *parse" @@ -94,11 +147,33 @@ .Fo mparse_strlevel .Fa "enum mandoclevel" .Fc -.Vt extern const char * const * man_macronames; +.In mandoc.h +.In mdoc.h +.Ft "const struct mdoc_meta *" +.Fo mdoc_meta +.Fa "const struct mdoc *mdoc" +.Fc +.Ft "const struct mdoc_node *" +.Fo mdoc_node +.Fa "const struct mdoc *mdoc" +.Fc .Vt extern const char * const * mdoc_argnames; .Vt extern const char * const * mdoc_macronames; -.Fd "#define ASCII_NBRSP" -.Fd "#define ASCII_HYPH" +.In mandoc.h +.In man.h +.Ft "const struct man_meta *" +.Fo man_meta +.Fa "const struct man *man" +.Fc +.Ft "const struct mparse *" +.Fo man_mparse +.Fa "const struct man *man" +.Fc +.Ft "const struct man_node *" +.Fo man_node +.Fa "const struct man *man" +.Fc +.Vt extern const char * const * man_macronames; .Sh DESCRIPTION The .Nm mandoc @@ -140,6 +215,13 @@ or invoke .Fn mparse_reset and parse new files. .El +.Pp +The +.Nm +library also contains routines for translating character strings into glyphs +.Pq see Fn mchars_alloc +and parsing escape sequences from strings +.Pq see Fn mandoc_escape . .Sh REFERENCE This section documents the functions, types, and variables available via @@ -147,23 +229,54 @@ via .Ss Types .Bl -ohang .It Vt "enum mandoc_esc" +An escape sequence classification. .It Vt "enum mandocerr" +A fatal error, error, or warning message during parsing. .It Vt "enum mandoclevel" +A classification of an +.Vt "enum mandocerr" +as regards system operation. +.It Vt "struct mchars" +An opaque pointer to an object allowing for translation between +character strings and glyphs. +See +.Fn mchars_alloc . .It Vt "enum mparset" +The type of parser when reading input. +This should usually be +.Dv MPARSE_AUTO +for auto-detection. .It Vt "struct mparse" +An opaque pointer to a running parse sequence. +Created with +.Fn mparse_alloc +and freed with +.Fn mparse_free . +This may be used across parsed input if +.Fn mparse_reset +is called between parses. .It Vt "mandocmsg" +A prototype for a function to handle fatal error, error, and warning +messages emitted by the parser. .El .Ss Functions .Bl -ohang .It Fn mandoc_escape Scan an escape sequence, i.e., a character string beginning with .Sq \e . -Pass a pointer to this string as +Pass a pointer to the character after the +.Sq \e +as .Va end ; it will be set to the supremum of the parsed escape sequence unless -returning ESCAPE_ERROR, in which case the string is bogus and should be +returning +.Dv ESCAPE_ERROR , +in which case the string is bogus and should be thrown away. -If not ESCAPE_ERROR or ESCAPE_IGNORE, +If not +.Dv ESCAPE_ERROR +or +.Dv ESCAPE_IGNORE , .Va start is set to the first relevant character of the substring (font, glyph, whatever) of length @@ -172,33 +285,173 @@ Both .Va start and .Va sz -may be NULL. +may be +.Dv NULL . +Declared in +.In mandoc.h , +implemented in +.Pa mandoc.c . .It Fn man_meta Obtain the meta-data of a successful parse. This may only be used on a pointer returned by .Fn mparse_result . +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn man_mparse +Get the parser used for the current output. +Declared in +.In man.h , +implemented in +.Pa man.c . .It Fn man_node Obtain the root node of a successful parse. This may only be used on a pointer returned by .Fn mparse_result . +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn mchars_alloc +Allocate an +.Vt "struct mchars *" +object for translating special characters into glyphs. +See +.Xr mandoc_char 7 +for an overview of special characters. +The object must be freed with +.Fn mchars_free . +Declared in +.In mandoc.h , +implemented in +.Pa chars.c . +.It Fn mchars_free +Free an object created with +.Fn mchars_alloc . +Declared in +.In mandoc.h , +implemented in +.Pa chars.c . +.It Fn mchars_num2char +Convert a character index (e.g., the \eN\(aq\(aq escape) into a +printable ASCII character. +Returns \e0 (the nil character) if the input sequence is malformed. +Declared in +.In mandoc.h , +implemented in +.Pa chars.c . +.It Fn mchars_num2uc +Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into +a Unicode codepoint. +Returns \e0 (the nil character) if the input sequence is malformed. +Declared in +.In mandoc.h , +implemented in +.Pa chars.c . +.It Fn mchars_spec2cp +Convert a special character into a valid Unicode codepoint. +Returns \-1 on failure or a non-zero Unicode codepoint on success. +Declared in +.In mandoc.h , +implemented in +.Pa chars.c . +.It Fn mchars_spec2str +Convert a special character into an ASCII string. +Returns +.Dv NULL +on failure. +Declared in +.In mandoc.h , +implemented in +.Pa chars.c . .It Fn mdoc_meta Obtain the meta-data of a successful parse. This may only be used on a pointer returned by .Fn mparse_result . +Declared in +.In mdoc.h , +implemented in +.Pa mdoc.c . .It Fn mdoc_node Obtain the root node of a successful parse. This may only be used on a pointer returned by .Fn mparse_result . +Declared in +.In mdoc.h , +implemented in +.Pa mdoc.c . .It Fn mparse_alloc Allocate a parser. +The arguments have the following effect: +.Bl -tag -offset 5n -width inttype +.It Ar inttype +When set to +.Dv MPARSE_MDOC +or +.Dv MPARSE_MAN , +only that parser will be used. +With +.Dv MPARSE_AUTO , +the document type will be automatically detected. +.It Ar wlevel +Can be set to +.Dv MANDOCLEVEL_FATAL , +.Dv MANDOCLEVEL_ERROR , +or +.Dv MANDOCLEVEL_WARNING . +Messages below the selected level will be suppressed. +.It Ar mmsg +A callback function to handle errors and warnings. +See +.Pa main.c +for an example. +.It Ar defos +A default string for the +.Xr mdoc 7 +.Sq \&Os +macro, overriding the +.Dv OSNAME +preprocessor definition and the results of +.Xr uname 3 . +.It Ar quick +When set, parsing is aborted after the NAME section. +This is for example useful to quickly build minimal databases. +.El +.Pp The same parser may be used for multiple files so long as .Fn mparse_reset is called between parses. .Fn mparse_free must be called to free the memory allocated by this function. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_free Free all memory allocated by .Fn mparse_alloc . +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_getkeep +Acquire the keep buffer. +Must follow a call of +.Fn mparse_keep . +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_keep +Instruct the parser to retain a copy of its parsed input. +This can be acquired with subsequent +.Fn mparse_getkeep +calls. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_readfd Parse a file or file descriptor. If @@ -213,10 +466,18 @@ is assumed to be the name associated with This may be called multiple times with different parameters; however, .Fn mparse_reset should be invoked between parses. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_reset Reset a parser so that .Fn mparse_readfd may be used again. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_result Obtain the result of a parse. Only successful parses @@ -227,10 +488,22 @@ returned less than MANDOCLEVEL_FATAL .Pc should invoke this function, in which case one of the two pointers will be filled in. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_strerror Return a statically-allocated string representation of an error code. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .It Fn mparse_strlevel Return a statically-allocated string representation of a level code. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . .El .Ss Variables .Bl -ohang @@ -249,7 +522,36 @@ This section consists of structural documentation for .Xr mdoc 7 and .Xr man 7 -syntax trees. +syntax trees and strings. +.Ss Man and Mdoc Strings +Strings may be extracted from mdoc and man meta-data, or from text +nodes (MDOC_TEXT and MAN_TEXT, respectively). +These strings have special non-printing formatting cues embedded in the +text itself, as well as +.Xr roff 7 +escapes preserved from input. +Implementing systems will need to handle both situations to produce +human-readable text. +In general, strings may be assumed to consist of 7-bit ASCII characters. +.Pp +The following non-printing characters may be embedded in text strings: +.Bl -tag -width Ds +.It Dv ASCII_NBRSP +A non-breaking space character. +.It Dv ASCII_HYPH +A soft hyphen. +.El +.Pp +Escape characters are also passed verbatim into text strings. +An escape character is a sequence of characters beginning with the +backslash +.Pq Sq \e . +To construct human-readable text, these should be intercepted with +.Fn mandoc_escape +and converted with one of +.Fn mchars_num2char , +.Fn mchars_spec2str , +and so on. .Ss Man Abstract Syntax Tree This AST is governed by the ontological rules dictated in .Xr man 7 @@ -290,7 +592,7 @@ where capitalised non-terminals represent nodes. .It ELEMENT \(<- ELEMENT | TEXT* .It TEXT -\(<- [[:alpha:]]* +\(<- [[:ascii:]]* .El .Pp The only elements capable of nesting other elements are those with @@ -349,7 +651,7 @@ where capitalised non-terminals represent nodes. .It TAIL \(<- mnode* .It TEXT -\(<- [[:printable:],0x1e]* +\(<- [[:ascii:]]* .El .Pp Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of @@ -436,6 +738,7 @@ levels of badly-nested blocks. .Xr mandoc 1 , .Xr eqn 7 , .Xr man 7 , +.Xr mandoc_char 7 , .Xr mdoc 7 , .Xr roff 7 , .Xr tbl 7 @@ -443,4 +746,4 @@ levels of badly-nested blocks. The .Nm library was written by -.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .