[BACK]Return to mandoc.3 CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mandoc.3, Revision 1.13

1.13    ! kristaps    1: .\"    $Id: mandoc.3,v 1.12 2011/07/11 08:43:27 kristaps Exp $
1.1       kristaps    2: .\"
                      3: .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
                      4: .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
                      5: .\"
                      6: .\" Permission to use, copy, modify, and distribute this software for any
                      7: .\" purpose with or without fee is hereby granted, provided that the above
                      8: .\" copyright notice and this permission notice appear in all copies.
                      9: .\"
                     10: .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11: .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12: .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13: .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14: .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15: .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16: .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17: .\"
1.13    ! kristaps   18: .Dd $Mdocdate: July 11 2011 $
1.1       kristaps   19: .Dt MANDOC 3
                     20: .Os
                     21: .Sh NAME
                     22: .Nm mandoc ,
1.3       kristaps   23: .Nm mandoc_escape ,
1.1       kristaps   24: .Nm man_meta ,
                     25: .Nm man_node ,
1.6       kristaps   26: .Nm mchars_alloc ,
                     27: .Nm mchars_free ,
                     28: .Nm mchars_num2char ,
1.7       kristaps   29: .Nm mchars_num2uc ,
1.6       kristaps   30: .Nm mchars_spec2cp ,
                     31: .Nm mchars_spec2str ,
1.1       kristaps   32: .Nm mdoc_meta ,
                     33: .Nm mdoc_node ,
                     34: .Nm mparse_alloc ,
                     35: .Nm mparse_free ,
                     36: .Nm mparse_readfd ,
                     37: .Nm mparse_reset ,
1.2       kristaps   38: .Nm mparse_result ,
                     39: .Nm mparse_strerror ,
                     40: .Nm mparse_strlevel
1.1       kristaps   41: .Nd mandoc macro compiler library
1.8       kristaps   42: .Sh LIBRARY
                     43: .Lb mandoc
1.1       kristaps   44: .Sh SYNOPSIS
                     45: .In man.h
                     46: .In mdoc.h
                     47: .In mandoc.h
1.3       kristaps   48: .Ft "enum mandoc_esc"
                     49: .Fo mandoc_escape
                     50: .Fa "const char **in"
                     51: .Fa "const char **seq"
                     52: .Fa "int *len"
                     53: .Fc
1.1       kristaps   54: .Ft "const struct man_meta *"
                     55: .Fo man_meta
                     56: .Fa "const struct man *man"
                     57: .Fc
                     58: .Ft "const struct man_node *"
                     59: .Fo man_node
                     60: .Fa "const struct man *man"
                     61: .Fc
1.6       kristaps   62: .Ft "struct mchars *"
                     63: .Fn mchars_alloc
                     64: .Ft void
                     65: .Fn mchars_free "struct mchars *p"
                     66: .Ft char
                     67: .Fn mchars_num2char "const char *cp" "size_t sz"
1.7       kristaps   68: .Ft int
                     69: .Fn mchars_num2uc "const char *cp" "size_t sz"
1.6       kristaps   70: .Ft "const char *"
                     71: .Fo mchars_spec2str
                     72: .Fa "struct mchars *p"
                     73: .Fa "const char *cp"
                     74: .Fa "size_t sz"
                     75: .Fa "size_t *rsz"
                     76: .Fc
                     77: .Ft int
                     78: .Fo mchars_spec2cp
                     79: .Fa "struct mchars *p"
                     80: .Fa "const char *cp"
                     81: .Fa "size_t sz"
                     82: .Ft "const char *"
                     83: .Fc
1.1       kristaps   84: .Ft "const struct mdoc_meta *"
                     85: .Fo mdoc_meta
                     86: .Fa "const struct mdoc *mdoc"
                     87: .Fc
                     88: .Ft "const struct mdoc_node *"
                     89: .Fo mdoc_node
                     90: .Fa "const struct mdoc *mdoc"
                     91: .Fc
                     92: .Ft void
                     93: .Fo mparse_alloc
                     94: .Fa "enum mparset type"
                     95: .Fa "enum mandoclevel wlevel"
                     96: .Fa "mandocmsg msg"
                     97: .Fa "void *msgarg"
                     98: .Fc
                     99: .Ft void
                    100: .Fo mparse_free
                    101: .Fa "struct mparse *parse"
                    102: .Fc
                    103: .Ft "enum mandoclevel"
                    104: .Fo mparse_readfd
                    105: .Fa "struct mparse *parse"
                    106: .Fa "int fd"
                    107: .Fa "const char *fname"
                    108: .Fc
                    109: .Ft void
                    110: .Fo mparse_reset
                    111: .Fa "struct mparse *parse"
                    112: .Fc
                    113: .Ft void
                    114: .Fo mparse_result
                    115: .Fa "struct mparse *parse"
                    116: .Fa "struct mdoc **mdoc"
                    117: .Fa "struct man **man"
1.2       kristaps  118: .Fc
                    119: .Ft "const char *"
                    120: .Fo mparse_strerror
                    121: .Fa "enum mandocerr"
                    122: .Fc
                    123: .Ft "const char *"
                    124: .Fo mparse_strlevel
                    125: .Fa "enum mandoclevel"
1.1       kristaps  126: .Fc
                    127: .Vt extern const char * const * man_macronames;
                    128: .Vt extern const char * const * mdoc_argnames;
                    129: .Vt extern const char * const * mdoc_macronames;
1.4       kristaps  130: .Fd "#define ASCII_NBRSP"
                    131: .Fd "#define ASCII_HYPH"
1.1       kristaps  132: .Sh DESCRIPTION
                    133: The
                    134: .Nm mandoc
                    135: library parses a
                    136: .Ux
                    137: manual into an abstract syntax tree (AST).
                    138: .Ux
                    139: manuals are composed of
                    140: .Xr mdoc 7
                    141: or
                    142: .Xr man 7 ,
                    143: and may be mixed with
                    144: .Xr roff 7 ,
                    145: .Xr tbl 7 ,
                    146: and
                    147: .Xr eqn 7
                    148: invocations.
                    149: .Pp
                    150: The following describes a general parse sequence:
                    151: .Bl -enum
                    152: .It
                    153: initiate a parsing sequence with
                    154: .Fn mparse_alloc ;
                    155: .It
                    156: parse files or file descriptors with
                    157: .Fn mparse_readfd ;
                    158: .It
                    159: retrieve a parsed syntax tree, if the parse was successful, with
                    160: .Fn mparse_result ;
                    161: .It
                    162: iterate over parse nodes with
                    163: .Fn mdoc_node
                    164: or
                    165: .Fn man_node ;
                    166: .It
                    167: free all allocated memory with
                    168: .Fn mparse_free ,
                    169: or invoke
                    170: .Fn mparse_reset
                    171: and parse new files.
1.3       kristaps  172: .El
1.6       kristaps  173: .Pp
                    174: The
                    175: .Nm
                    176: library also contains routines for translating character strings into glyphs
                    177: .Pq see Fn mchars_alloc
                    178: and parsing escape sequences from strings
                    179: .Pq see Fn mandoc_escape .
1.3       kristaps  180: .Sh REFERENCE
                    181: This section documents the functions, types, and variables available
                    182: via
                    183: .In mandoc.h .
                    184: .Ss Types
                    185: .Bl -ohang
                    186: .It Vt "enum mandoc_esc"
1.11      kristaps  187: An escape sequence classification.
1.3       kristaps  188: .It Vt "enum mandocerr"
1.11      kristaps  189: A fatal error, error, or warning message during parsing.
1.3       kristaps  190: .It Vt "enum mandoclevel"
1.11      kristaps  191: A classification of an
                    192: .Vt "enum mandoclevel"
                    193: as regards system operation.
1.6       kristaps  194: .It Vt "struct mchars"
                    195: An opaque pointer to an object allowing for translation between
                    196: character strings and glyphs.
                    197: See
                    198: .Fn mchars_alloc .
1.3       kristaps  199: .It Vt "enum mparset"
1.11      kristaps  200: The type of parser when reading input.
                    201: This should usually be
1.12      kristaps  202: .Dv MPARSE_AUTO
1.11      kristaps  203: for auto-detection.
1.3       kristaps  204: .It Vt "struct mparse"
1.11      kristaps  205: An opaque pointer to a running parse sequence.
                    206: Created with
                    207: .Fn mparse_alloc
                    208: and freed with
                    209: .Fn mparse_free .
                    210: This may be used across parsed input if
                    211: .Fn mparse_reset
                    212: is called between parses.
1.3       kristaps  213: .It Vt "mandocmsg"
1.11      kristaps  214: A prototype for a function to handle fatal error, error, and warning
                    215: messages emitted by the parser.
1.3       kristaps  216: .El
                    217: .Ss Functions
                    218: .Bl -ohang
                    219: .It Fn mandoc_escape
1.4       kristaps  220: Scan an escape sequence, i.e., a character string beginning with
                    221: .Sq \e .
                    222: Pass a pointer to this string as
                    223: .Va end ;
                    224: it will be set to the supremum of the parsed escape sequence unless
1.12      kristaps  225: returning
                    226: .Dv ESCAPE_ERROR ,
                    227: in which case the string is bogus and should be
1.4       kristaps  228: thrown away.
1.12      kristaps  229: If not
                    230: .Dv ESCAPE_ERROR
                    231: or
                    232: .Dv ESCAPE_IGNORE ,
1.4       kristaps  233: .Va start
                    234: is set to the first relevant character of the substring (font, glyph,
                    235: whatever) of length
                    236: .Va sz .
                    237: Both
                    238: .Va start
                    239: and
                    240: .Va sz
1.12      kristaps  241: may be
                    242: .Dv NULL .
1.3       kristaps  243: .It Fn man_meta
1.4       kristaps  244: Obtain the meta-data of a successful parse.
                    245: This may only be used on a pointer returned by
                    246: .Fn mparse_result .
1.3       kristaps  247: .It Fn man_node
1.4       kristaps  248: Obtain the root node of a successful parse.
                    249: This may only be used on a pointer returned by
                    250: .Fn mparse_result .
1.6       kristaps  251: .It Fn mchars_alloc
                    252: Allocate an
                    253: .Vt "struct mchars *"
                    254: object for translating special characters into glyphs.
                    255: See
                    256: .Xr mandoc_char 7
                    257: for an overview of special characters.
                    258: The object must be freed with
                    259: .Fn mchars_free .
                    260: .It Fn mchars_free
                    261: Free an object created with
                    262: .Fn mchars_alloc .
                    263: .It Fn mchars_num2char
1.7       kristaps  264: Convert a character index (e.g., the \eN\(aq\(aq escape) into a
                    265: printable ASCII character.
                    266: Returns \e0 (the nil character) if the input sequence is malformed.
                    267: .It Fn mchars_num2uc
                    268: Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
                    269: a Unicode codepoint.
1.6       kristaps  270: Returns \e0 (the nil character) if the input sequence is malformed.
                    271: .It Fn mchars_spec2cp
                    272: Convert a special character into a valid Unicode codepoint.
1.10      kristaps  273: Returns \-1 on failure or a non-zero Unicode codepoint on success.
1.6       kristaps  274: .It Fn mchars_spec2str
                    275: Convert a special character into an ASCII string.
1.12      kristaps  276: Returns
                    277: .Dv NULL
                    278: on failure.
1.3       kristaps  279: .It Fn mdoc_meta
1.4       kristaps  280: Obtain the meta-data of a successful parse.
                    281: This may only be used on a pointer returned by
                    282: .Fn mparse_result .
1.3       kristaps  283: .It Fn mdoc_node
1.4       kristaps  284: Obtain the root node of a successful parse.
                    285: This may only be used on a pointer returned by
                    286: .Fn mparse_result .
1.3       kristaps  287: .It Fn mparse_alloc
1.4       kristaps  288: Allocate a parser.
                    289: The same parser may be used for multiple files so long as
                    290: .Fn mparse_reset
                    291: is called between parses.
                    292: .Fn mparse_free
                    293: must be called to free the memory allocated by this function.
1.3       kristaps  294: .It Fn mparse_free
1.4       kristaps  295: Free all memory allocated by
                    296: .Fn mparse_alloc .
1.3       kristaps  297: .It Fn mparse_readfd
1.4       kristaps  298: Parse a file or file descriptor.
                    299: If
                    300: .Va fd
                    301: is -1,
                    302: .Va fname
                    303: is opened for reading.
                    304: Otherwise,
                    305: .Va fname
                    306: is assumed to be the name associated with
                    307: .Va fd .
                    308: This may be called multiple times with different parameters; however,
                    309: .Fn mparse_reset
                    310: should be invoked between parses.
1.3       kristaps  311: .It Fn mparse_reset
1.4       kristaps  312: Reset a parser so that
                    313: .Fn mparse_readfd
                    314: may be used again.
1.3       kristaps  315: .It Fn mparse_result
1.4       kristaps  316: Obtain the result of a parse.
                    317: Only successful parses
                    318: .Po
                    319: i.e., those where
                    320: .Fn mparse_readfd
                    321: returned less than MANDOCLEVEL_FATAL
                    322: .Pc
                    323: should invoke this function, in which case one of the two pointers will
                    324: be filled in.
1.3       kristaps  325: .It Fn mparse_strerror
1.4       kristaps  326: Return a statically-allocated string representation of an error code.
1.3       kristaps  327: .It Fn mparse_strlevel
1.4       kristaps  328: Return a statically-allocated string representation of a level code.
1.3       kristaps  329: .El
                    330: .Ss Variables
                    331: .Bl -ohang
                    332: .It Va man_macronames
1.4       kristaps  333: The string representation of a man macro as indexed by
                    334: .Vt "enum mant" .
1.3       kristaps  335: .It Va mdoc_argnames
1.4       kristaps  336: The string representation of a mdoc macro argument as indexed by
                    337: .Vt "enum mdocargt" .
1.3       kristaps  338: .It Va mdoc_macronames
1.4       kristaps  339: The string representation of a mdoc macro as indexed by
                    340: .Vt "enum mdoct" .
1.1       kristaps  341: .El
                    342: .Sh IMPLEMENTATION NOTES
                    343: This section consists of structural documentation for
                    344: .Xr mdoc 7
                    345: and
                    346: .Xr man 7
1.11      kristaps  347: syntax trees and strings.
                    348: .Ss Man and Mdoc Strings
                    349: Strings may be extracted from mdoc and man meta-data, or from text
                    350: nodes (MDOC_TEXT and MAN_TEXT, respectively).
                    351: These strings have special non-printing formatting cues embedded in the
                    352: text itself, as well as
                    353: .Xr roff 7
                    354: escapes preserved from input.
                    355: Implementing systems will need to handle both situations to produce
                    356: human-readable text.
                    357: In general, strings may be assumed to consist of 7-bit ASCII characters.
                    358: .Pp
                    359: The following non-printing characters may be embedded in text strings:
                    360: .Bl -tag -width Ds
                    361: .It Dv ASCII_NBRSP
                    362: A non-breaking space character.
                    363: .It Dv ASCII_HYPH
                    364: A soft hyphen.
                    365: .El
                    366: .Pp
                    367: Escape characters are also passed verbatim into text strings.
                    368: An escape character is a sequence of characters beginning with the
                    369: backslash
                    370: .Pq Sq \e .
                    371: To construct human-readable text, these should be intercepted with
                    372: .Fn mandoc_escape
                    373: and converted with one of
                    374: .Fn mchars_num2char ,
                    375: .Fn mchars_spec2str ,
                    376: and so on.
1.1       kristaps  377: .Ss Man Abstract Syntax Tree
                    378: This AST is governed by the ontological rules dictated in
                    379: .Xr man 7
                    380: and derives its terminology accordingly.
                    381: .Pp
                    382: The AST is composed of
                    383: .Vt struct man_node
                    384: nodes with element, root and text types as declared by the
                    385: .Va type
                    386: field.
                    387: Each node also provides its parse point (the
                    388: .Va line ,
                    389: .Va sec ,
                    390: and
                    391: .Va pos
                    392: fields), its position in the tree (the
                    393: .Va parent ,
                    394: .Va child ,
                    395: .Va next
                    396: and
                    397: .Va prev
                    398: fields) and some type-specific data.
                    399: .Pp
                    400: The tree itself is arranged according to the following normal form,
                    401: where capitalised non-terminals represent nodes.
                    402: .Pp
                    403: .Bl -tag -width "ELEMENTXX" -compact
                    404: .It ROOT
                    405: \(<- mnode+
                    406: .It mnode
                    407: \(<- ELEMENT | TEXT | BLOCK
                    408: .It BLOCK
                    409: \(<- HEAD BODY
                    410: .It HEAD
                    411: \(<- mnode*
                    412: .It BODY
                    413: \(<- mnode*
                    414: .It ELEMENT
                    415: \(<- ELEMENT | TEXT*
                    416: .It TEXT
1.11      kristaps  417: \(<- [[:ascii:]]*
1.1       kristaps  418: .El
                    419: .Pp
                    420: The only elements capable of nesting other elements are those with
                    421: next-lint scope as documented in
                    422: .Xr man 7 .
                    423: .Ss Mdoc Abstract Syntax Tree
                    424: This AST is governed by the ontological
                    425: rules dictated in
                    426: .Xr mdoc 7
                    427: and derives its terminology accordingly.
                    428: .Qq In-line
                    429: elements described in
                    430: .Xr mdoc 7
                    431: are described simply as
                    432: .Qq elements .
                    433: .Pp
                    434: The AST is composed of
                    435: .Vt struct mdoc_node
                    436: nodes with block, head, body, element, root and text types as declared
                    437: by the
                    438: .Va type
                    439: field.
                    440: Each node also provides its parse point (the
                    441: .Va line ,
                    442: .Va sec ,
                    443: and
                    444: .Va pos
                    445: fields), its position in the tree (the
                    446: .Va parent ,
                    447: .Va child ,
                    448: .Va nchild ,
                    449: .Va next
                    450: and
                    451: .Va prev
                    452: fields) and some type-specific data, in particular, for nodes generated
                    453: from macros, the generating macro in the
                    454: .Va tok
                    455: field.
                    456: .Pp
                    457: The tree itself is arranged according to the following normal form,
                    458: where capitalised non-terminals represent nodes.
                    459: .Pp
                    460: .Bl -tag -width "ELEMENTXX" -compact
                    461: .It ROOT
                    462: \(<- mnode+
                    463: .It mnode
                    464: \(<- BLOCK | ELEMENT | TEXT
                    465: .It BLOCK
                    466: \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
                    467: .It ELEMENT
                    468: \(<- TEXT*
                    469: .It HEAD
                    470: \(<- mnode*
                    471: .It BODY
                    472: \(<- mnode* [ENDBODY mnode*]
                    473: .It TAIL
                    474: \(<- mnode*
                    475: .It TEXT
1.11      kristaps  476: \(<- [[:ascii:]]*
1.1       kristaps  477: .El
                    478: .Pp
                    479: Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
                    480: the BLOCK production: these refer to punctuation marks.
                    481: Furthermore, although a TEXT node will generally have a non-zero-length
                    482: string, in the specific case of
                    483: .Sq \&.Bd \-literal ,
                    484: an empty line will produce a zero-length string.
                    485: Multiple body parts are only found in invocations of
                    486: .Sq \&Bl \-column ,
                    487: where a new body introduces a new phrase.
                    488: .Pp
                    489: The
                    490: .Xr mdoc 7
1.5       kristaps  491: syntax tree accommodates for broken block structures as well.
1.1       kristaps  492: The ENDBODY node is available to end the formatting associated
                    493: with a given block before the physical end of that block.
                    494: It has a non-null
                    495: .Va end
                    496: field, is of the BODY
                    497: .Va type ,
                    498: has the same
                    499: .Va tok
                    500: as the BLOCK it is ending, and has a
                    501: .Va pending
                    502: field pointing to that BLOCK's BODY node.
                    503: It is an indirect child of that BODY node
                    504: and has no children of its own.
                    505: .Pp
                    506: An ENDBODY node is generated when a block ends while one of its child
                    507: blocks is still open, like in the following example:
                    508: .Bd -literal -offset indent
                    509: \&.Ao ao
                    510: \&.Bo bo ac
                    511: \&.Ac bc
                    512: \&.Bc end
                    513: .Ed
                    514: .Pp
                    515: This example results in the following block structure:
                    516: .Bd -literal -offset indent
                    517: BLOCK Ao
                    518:     HEAD Ao
                    519:     BODY Ao
                    520:         TEXT ao
                    521:         BLOCK Bo, pending -> Ao
                    522:             HEAD Bo
                    523:             BODY Bo
                    524:                 TEXT bo
                    525:                 TEXT ac
                    526:                 ENDBODY Ao, pending -> Ao
                    527:                 TEXT bc
                    528: TEXT end
                    529: .Ed
                    530: .Pp
                    531: Here, the formatting of the
                    532: .Sq \&Ao
                    533: block extends from TEXT ao to TEXT ac,
                    534: while the formatting of the
                    535: .Sq \&Bo
                    536: block extends from TEXT bo to TEXT bc.
                    537: It renders as follows in
                    538: .Fl T Ns Cm ascii
                    539: mode:
                    540: .Pp
                    541: .Dl <ao [bo ac> bc] end
                    542: .Pp
                    543: Support for badly-nested blocks is only provided for backward
                    544: compatibility with some older
                    545: .Xr mdoc 7
                    546: implementations.
                    547: Using badly-nested blocks is
                    548: .Em strongly discouraged ;
                    549: for example, the
                    550: .Fl T Ns Cm html
                    551: and
                    552: .Fl T Ns Cm xhtml
                    553: front-ends to
                    554: .Xr mandoc 1
                    555: are unable to render them in any meaningful way.
                    556: Furthermore, behaviour when encountering badly-nested blocks is not
                    557: consistent across troff implementations, especially when using  multiple
                    558: levels of badly-nested blocks.
                    559: .Sh SEE ALSO
                    560: .Xr mandoc 1 ,
                    561: .Xr eqn 7 ,
                    562: .Xr man 7 ,
1.6       kristaps  563: .Xr mandoc_char 7 ,
1.1       kristaps  564: .Xr mdoc 7 ,
                    565: .Xr roff 7 ,
                    566: .Xr tbl 7
                    567: .Sh AUTHORS
                    568: The
                    569: .Nm
                    570: library was written by
1.13    ! kristaps  571: .An Kristaps Dzonsons ,
        !           572: .Mt kristaps@bsd.lv .

CVSweb