[BACK]Return to mandoc.3 CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mandoc.3, Revision 1.24

1.24    ! schwarze    1: .\"    $Id: mandoc.3,v 1.23 2014/01/05 20:26:36 schwarze Exp $
1.1       kristaps    2: .\"
                      3: .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
                      4: .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
                      5: .\"
                      6: .\" Permission to use, copy, modify, and distribute this software for any
                      7: .\" purpose with or without fee is hereby granted, provided that the above
                      8: .\" copyright notice and this permission notice appear in all copies.
                      9: .\"
                     10: .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11: .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12: .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13: .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14: .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15: .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16: .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17: .\"
1.24    ! schwarze   18: .Dd $Mdocdate: January 5 2014 $
1.1       kristaps   19: .Dt MANDOC 3
                     20: .Os
                     21: .Sh NAME
                     22: .Nm mandoc ,
1.24    ! schwarze   23: .Nm mandoc_calloc ,
1.3       kristaps   24: .Nm mandoc_escape ,
1.24    ! schwarze   25: .Nm mandoc_malloc ,
        !            26: .Nm mandoc_realloc ,
        !            27: .Nm mandoc_strdup ,
        !            28: .Nm mandoc_strndup ,
1.1       kristaps   29: .Nm man_meta ,
1.14      kristaps   30: .Nm man_mparse ,
1.1       kristaps   31: .Nm man_node ,
1.6       kristaps   32: .Nm mchars_alloc ,
                     33: .Nm mchars_free ,
                     34: .Nm mchars_num2char ,
1.7       kristaps   35: .Nm mchars_num2uc ,
1.6       kristaps   36: .Nm mchars_spec2cp ,
                     37: .Nm mchars_spec2str ,
1.1       kristaps   38: .Nm mdoc_meta ,
                     39: .Nm mdoc_node ,
                     40: .Nm mparse_alloc ,
                     41: .Nm mparse_free ,
1.14      kristaps   42: .Nm mparse_getkeep ,
                     43: .Nm mparse_keep ,
1.1       kristaps   44: .Nm mparse_readfd ,
                     45: .Nm mparse_reset ,
1.2       kristaps   46: .Nm mparse_result ,
                     47: .Nm mparse_strerror ,
                     48: .Nm mparse_strlevel
1.1       kristaps   49: .Nd mandoc macro compiler library
1.8       kristaps   50: .Sh LIBRARY
1.22      schwarze   51: .Lb libmandoc
1.1       kristaps   52: .Sh SYNOPSIS
                     53: .In mandoc.h
1.24    ! schwarze   54: .Fd "#define ASCII_NBRSP"
        !            55: .Fd "#define ASCII_HYPH"
        !            56: .Fd "#define ASCII_BREAK"
        !            57: .Ft "void *"
        !            58: .Fo mandoc_calloc
        !            59: .Fa "size_t nmemb"
        !            60: .Fa "size_t size"
        !            61: .Fc
1.3       kristaps   62: .Ft "enum mandoc_esc"
                     63: .Fo mandoc_escape
1.23      schwarze   64: .Fa "const char **end"
                     65: .Fa "const char **start"
1.15      kristaps   66: .Fa "int *sz"
1.3       kristaps   67: .Fc
1.24    ! schwarze   68: .Ft "void *"
        !            69: .Fn mandoc_malloc "size_t size"
        !            70: .Ft "struct mchars *"
        !            71: .Fo mandoc_realloc
        !            72: .Fa "void *ptr"
        !            73: .Fa "size_t size"
1.1       kristaps   74: .Fc
1.24    ! schwarze   75: .Ft "char *"
        !            76: .Fn mandoc_strdup
1.20      schwarze   77: .Fn mchars_alloc "void"
1.6       kristaps   78: .Ft void
                     79: .Fn mchars_free "struct mchars *p"
                     80: .Ft char
                     81: .Fn mchars_num2char "const char *cp" "size_t sz"
1.7       kristaps   82: .Ft int
                     83: .Fn mchars_num2uc "const char *cp" "size_t sz"
1.6       kristaps   84: .Ft "const char *"
                     85: .Fo mchars_spec2str
1.16      kristaps   86: .Fa "const struct mchars *p"
1.6       kristaps   87: .Fa "const char *cp"
                     88: .Fa "size_t sz"
                     89: .Fa "size_t *rsz"
                     90: .Fc
                     91: .Ft int
                     92: .Fo mchars_spec2cp
1.16      kristaps   93: .Fa "const struct mchars *p"
1.6       kristaps   94: .Fa "const char *cp"
                     95: .Fa "size_t sz"
                     96: .Fc
1.1       kristaps   97: .Ft void
                     98: .Fo mparse_alloc
1.23      schwarze   99: .Fa "enum mparset inttype"
1.1       kristaps  100: .Fa "enum mandoclevel wlevel"
1.23      schwarze  101: .Fa "mandocmsg mmsg"
                    102: .Fa "char *defos"
                    103: .Fa "int quick"
1.1       kristaps  104: .Fc
                    105: .Ft void
1.24    ! schwarze  106: .Fo (*mandocmsg)
        !           107: .Fa "enum mandocerr errtype"
        !           108: .Fa "enum mandoclevel level"
        !           109: .Fa "const char *file"
        !           110: .Fa "int line"
        !           111: .Fa "int col"
        !           112: .Fa "const char *msg"
        !           113: .Fc
        !           114: .Ft void
1.1       kristaps  115: .Fo mparse_free
                    116: .Fa "struct mparse *parse"
                    117: .Fc
1.23      schwarze  118: .Ft const char *
1.14      kristaps  119: .Fo mparse_getkeep
                    120: .Fa "const struct mparse *parse"
                    121: .Fc
                    122: .Ft void
                    123: .Fo mparse_keep
                    124: .Fa "struct mparse *parse"
                    125: .Fc
1.1       kristaps  126: .Ft "enum mandoclevel"
                    127: .Fo mparse_readfd
                    128: .Fa "struct mparse *parse"
                    129: .Fa "int fd"
                    130: .Fa "const char *fname"
                    131: .Fc
                    132: .Ft void
                    133: .Fo mparse_reset
                    134: .Fa "struct mparse *parse"
                    135: .Fc
                    136: .Ft void
                    137: .Fo mparse_result
                    138: .Fa "struct mparse *parse"
                    139: .Fa "struct mdoc **mdoc"
                    140: .Fa "struct man **man"
1.2       kristaps  141: .Fc
                    142: .Ft "const char *"
                    143: .Fo mparse_strerror
                    144: .Fa "enum mandocerr"
                    145: .Fc
                    146: .Ft "const char *"
                    147: .Fo mparse_strlevel
                    148: .Fa "enum mandoclevel"
1.1       kristaps  149: .Fc
1.24    ! schwarze  150: .In mandoc.h
        !           151: .In mdoc.h
        !           152: .Ft "const struct mdoc_meta *"
        !           153: .Fo mdoc_meta
        !           154: .Fa "const struct mdoc *mdoc"
        !           155: .Fc
        !           156: .Ft "const struct mdoc_node *"
        !           157: .Fo mdoc_node
        !           158: .Fa "const struct mdoc *mdoc"
        !           159: .Fc
1.1       kristaps  160: .Vt extern const char * const * mdoc_argnames;
                    161: .Vt extern const char * const * mdoc_macronames;
1.24    ! schwarze  162: .In mandoc.h
        !           163: .In man.h
        !           164: .Ft "const struct man_meta *"
        !           165: .Fo man_meta
        !           166: .Fa "const struct man *man"
        !           167: .Fc
        !           168: .Ft "const struct mparse *"
        !           169: .Fo man_mparse
        !           170: .Fa "const struct man *man"
        !           171: .Fc
        !           172: .Ft "const struct man_node *"
        !           173: .Fo man_node
        !           174: .Fa "const struct man *man"
        !           175: .Fc
        !           176: .Vt extern const char * const * man_macronames;
1.1       kristaps  177: .Sh DESCRIPTION
                    178: The
                    179: .Nm mandoc
                    180: library parses a
                    181: .Ux
                    182: manual into an abstract syntax tree (AST).
                    183: .Ux
                    184: manuals are composed of
                    185: .Xr mdoc 7
                    186: or
                    187: .Xr man 7 ,
                    188: and may be mixed with
                    189: .Xr roff 7 ,
                    190: .Xr tbl 7 ,
                    191: and
                    192: .Xr eqn 7
                    193: invocations.
                    194: .Pp
                    195: The following describes a general parse sequence:
                    196: .Bl -enum
                    197: .It
                    198: initiate a parsing sequence with
                    199: .Fn mparse_alloc ;
                    200: .It
                    201: parse files or file descriptors with
                    202: .Fn mparse_readfd ;
                    203: .It
                    204: retrieve a parsed syntax tree, if the parse was successful, with
                    205: .Fn mparse_result ;
                    206: .It
                    207: iterate over parse nodes with
                    208: .Fn mdoc_node
                    209: or
                    210: .Fn man_node ;
                    211: .It
                    212: free all allocated memory with
                    213: .Fn mparse_free ,
                    214: or invoke
                    215: .Fn mparse_reset
                    216: and parse new files.
1.3       kristaps  217: .El
1.6       kristaps  218: .Pp
                    219: The
                    220: .Nm
                    221: library also contains routines for translating character strings into glyphs
                    222: .Pq see Fn mchars_alloc
                    223: and parsing escape sequences from strings
                    224: .Pq see Fn mandoc_escape .
1.3       kristaps  225: .Sh REFERENCE
                    226: This section documents the functions, types, and variables available
                    227: via
                    228: .In mandoc.h .
                    229: .Ss Types
                    230: .Bl -ohang
                    231: .It Vt "enum mandoc_esc"
1.11      kristaps  232: An escape sequence classification.
1.3       kristaps  233: .It Vt "enum mandocerr"
1.11      kristaps  234: A fatal error, error, or warning message during parsing.
1.3       kristaps  235: .It Vt "enum mandoclevel"
1.11      kristaps  236: A classification of an
1.23      schwarze  237: .Vt "enum mandocerr"
1.11      kristaps  238: as regards system operation.
1.6       kristaps  239: .It Vt "struct mchars"
                    240: An opaque pointer to an object allowing for translation between
                    241: character strings and glyphs.
                    242: See
                    243: .Fn mchars_alloc .
1.3       kristaps  244: .It Vt "enum mparset"
1.11      kristaps  245: The type of parser when reading input.
                    246: This should usually be
1.12      kristaps  247: .Dv MPARSE_AUTO
1.11      kristaps  248: for auto-detection.
1.3       kristaps  249: .It Vt "struct mparse"
1.11      kristaps  250: An opaque pointer to a running parse sequence.
                    251: Created with
                    252: .Fn mparse_alloc
                    253: and freed with
                    254: .Fn mparse_free .
                    255: This may be used across parsed input if
                    256: .Fn mparse_reset
                    257: is called between parses.
1.3       kristaps  258: .It Vt "mandocmsg"
1.11      kristaps  259: A prototype for a function to handle fatal error, error, and warning
                    260: messages emitted by the parser.
1.3       kristaps  261: .El
                    262: .Ss Functions
                    263: .Bl -ohang
                    264: .It Fn mandoc_escape
1.4       kristaps  265: Scan an escape sequence, i.e., a character string beginning with
                    266: .Sq \e .
1.17      joerg     267: Pass a pointer to the character after the
                    268: .Sq \e
                    269: as
1.4       kristaps  270: .Va end ;
                    271: it will be set to the supremum of the parsed escape sequence unless
1.12      kristaps  272: returning
                    273: .Dv ESCAPE_ERROR ,
                    274: in which case the string is bogus and should be
1.4       kristaps  275: thrown away.
1.12      kristaps  276: If not
                    277: .Dv ESCAPE_ERROR
                    278: or
                    279: .Dv ESCAPE_IGNORE ,
1.4       kristaps  280: .Va start
                    281: is set to the first relevant character of the substring (font, glyph,
                    282: whatever) of length
                    283: .Va sz .
                    284: Both
                    285: .Va start
                    286: and
                    287: .Va sz
1.12      kristaps  288: may be
                    289: .Dv NULL .
1.18      schwarze  290: Declared in
                    291: .In mandoc.h ,
                    292: implemented in
                    293: .Pa mandoc.c .
1.3       kristaps  294: .It Fn man_meta
1.4       kristaps  295: Obtain the meta-data of a successful parse.
                    296: This may only be used on a pointer returned by
                    297: .Fn mparse_result .
1.18      schwarze  298: Declared in
                    299: .In man.h ,
                    300: implemented in
                    301: .Pa man.c .
1.14      kristaps  302: .It Fn man_mparse
                    303: Get the parser used for the current output.
1.18      schwarze  304: Declared in
                    305: .In man.h ,
                    306: implemented in
                    307: .Pa man.c .
1.3       kristaps  308: .It Fn man_node
1.4       kristaps  309: Obtain the root node of a successful parse.
                    310: This may only be used on a pointer returned by
                    311: .Fn mparse_result .
1.18      schwarze  312: Declared in
                    313: .In man.h ,
                    314: implemented in
                    315: .Pa man.c .
1.6       kristaps  316: .It Fn mchars_alloc
                    317: Allocate an
                    318: .Vt "struct mchars *"
                    319: object for translating special characters into glyphs.
                    320: See
                    321: .Xr mandoc_char 7
                    322: for an overview of special characters.
                    323: The object must be freed with
                    324: .Fn mchars_free .
1.18      schwarze  325: Declared in
                    326: .In mandoc.h ,
                    327: implemented in
                    328: .Pa chars.c .
1.6       kristaps  329: .It Fn mchars_free
                    330: Free an object created with
                    331: .Fn mchars_alloc .
1.18      schwarze  332: Declared in
                    333: .In mandoc.h ,
                    334: implemented in
                    335: .Pa chars.c .
1.6       kristaps  336: .It Fn mchars_num2char
1.7       kristaps  337: Convert a character index (e.g., the \eN\(aq\(aq escape) into a
                    338: printable ASCII character.
                    339: Returns \e0 (the nil character) if the input sequence is malformed.
1.18      schwarze  340: Declared in
                    341: .In mandoc.h ,
                    342: implemented in
                    343: .Pa chars.c .
1.7       kristaps  344: .It Fn mchars_num2uc
                    345: Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
                    346: a Unicode codepoint.
1.6       kristaps  347: Returns \e0 (the nil character) if the input sequence is malformed.
1.18      schwarze  348: Declared in
                    349: .In mandoc.h ,
                    350: implemented in
                    351: .Pa chars.c .
1.6       kristaps  352: .It Fn mchars_spec2cp
                    353: Convert a special character into a valid Unicode codepoint.
1.10      kristaps  354: Returns \-1 on failure or a non-zero Unicode codepoint on success.
1.18      schwarze  355: Declared in
                    356: .In mandoc.h ,
                    357: implemented in
                    358: .Pa chars.c .
1.6       kristaps  359: .It Fn mchars_spec2str
                    360: Convert a special character into an ASCII string.
1.12      kristaps  361: Returns
                    362: .Dv NULL
                    363: on failure.
1.18      schwarze  364: Declared in
                    365: .In mandoc.h ,
                    366: implemented in
                    367: .Pa chars.c .
1.3       kristaps  368: .It Fn mdoc_meta
1.4       kristaps  369: Obtain the meta-data of a successful parse.
                    370: This may only be used on a pointer returned by
                    371: .Fn mparse_result .
1.18      schwarze  372: Declared in
                    373: .In mdoc.h ,
                    374: implemented in
                    375: .Pa mdoc.c .
1.3       kristaps  376: .It Fn mdoc_node
1.4       kristaps  377: Obtain the root node of a successful parse.
                    378: This may only be used on a pointer returned by
                    379: .Fn mparse_result .
1.18      schwarze  380: Declared in
                    381: .In mdoc.h ,
                    382: implemented in
                    383: .Pa mdoc.c .
1.3       kristaps  384: .It Fn mparse_alloc
1.4       kristaps  385: Allocate a parser.
1.23      schwarze  386: The arguments have the following effect:
                    387: .Bl -tag -offset 5n -width inttype
                    388: .It Ar inttype
                    389: When set to
                    390: .Dv MPARSE_MDOC
                    391: or
                    392: .Dv MPARSE_MAN ,
                    393: only that parser will be used.
                    394: With
                    395: .Dv MPARSE_AUTO ,
                    396: the document type will be automatically detected.
                    397: .It Ar wlevel
                    398: Can be set to
                    399: .Dv MANDOCLEVEL_FATAL ,
                    400: .Dv MANDOCLEVEL_ERROR ,
                    401: or
                    402: .Dv MANDOCLEVEL_WARNING .
                    403: Messages below the selected level will be suppressed.
                    404: .It Ar mmsg
                    405: A callback function to handle errors and warnings.
                    406: See
                    407: .Pa main.c
                    408: for an example.
                    409: .It Ar defos
                    410: A default string for the
                    411: .Xr mdoc 7
                    412: .Sq \&Os
                    413: macro, overriding the
                    414: .Dv OSNAME
                    415: preprocessor definition and the results of
                    416: .Xr uname 3 .
                    417: .It Ar quick
                    418: When set, parsing is aborted after the NAME section.
                    419: This is for example useful to quickly build minimal databases.
                    420: .El
                    421: .Pp
1.4       kristaps  422: The same parser may be used for multiple files so long as
                    423: .Fn mparse_reset
                    424: is called between parses.
                    425: .Fn mparse_free
                    426: must be called to free the memory allocated by this function.
1.18      schwarze  427: Declared in
                    428: .In mandoc.h ,
                    429: implemented in
                    430: .Pa read.c .
1.3       kristaps  431: .It Fn mparse_free
1.4       kristaps  432: Free all memory allocated by
                    433: .Fn mparse_alloc .
1.18      schwarze  434: Declared in
                    435: .In mandoc.h ,
                    436: implemented in
                    437: .Pa read.c .
1.14      kristaps  438: .It Fn mparse_getkeep
                    439: Acquire the keep buffer.
                    440: Must follow a call of
                    441: .Fn mparse_keep .
1.18      schwarze  442: Declared in
                    443: .In mandoc.h ,
                    444: implemented in
                    445: .Pa read.c .
1.14      kristaps  446: .It Fn mparse_keep
                    447: Instruct the parser to retain a copy of its parsed input.
                    448: This can be acquired with subsequent
                    449: .Fn mparse_getkeep
                    450: calls.
1.18      schwarze  451: Declared in
                    452: .In mandoc.h ,
                    453: implemented in
                    454: .Pa read.c .
1.3       kristaps  455: .It Fn mparse_readfd
1.4       kristaps  456: Parse a file or file descriptor.
                    457: If
                    458: .Va fd
                    459: is -1,
                    460: .Va fname
                    461: is opened for reading.
                    462: Otherwise,
                    463: .Va fname
                    464: is assumed to be the name associated with
                    465: .Va fd .
                    466: This may be called multiple times with different parameters; however,
                    467: .Fn mparse_reset
                    468: should be invoked between parses.
1.18      schwarze  469: Declared in
                    470: .In mandoc.h ,
                    471: implemented in
                    472: .Pa read.c .
1.3       kristaps  473: .It Fn mparse_reset
1.4       kristaps  474: Reset a parser so that
                    475: .Fn mparse_readfd
                    476: may be used again.
1.18      schwarze  477: Declared in
                    478: .In mandoc.h ,
                    479: implemented in
                    480: .Pa read.c .
1.3       kristaps  481: .It Fn mparse_result
1.4       kristaps  482: Obtain the result of a parse.
                    483: Only successful parses
                    484: .Po
                    485: i.e., those where
                    486: .Fn mparse_readfd
                    487: returned less than MANDOCLEVEL_FATAL
                    488: .Pc
                    489: should invoke this function, in which case one of the two pointers will
                    490: be filled in.
1.18      schwarze  491: Declared in
                    492: .In mandoc.h ,
                    493: implemented in
                    494: .Pa read.c .
1.3       kristaps  495: .It Fn mparse_strerror
1.4       kristaps  496: Return a statically-allocated string representation of an error code.
1.18      schwarze  497: Declared in
                    498: .In mandoc.h ,
                    499: implemented in
                    500: .Pa read.c .
1.3       kristaps  501: .It Fn mparse_strlevel
1.4       kristaps  502: Return a statically-allocated string representation of a level code.
1.18      schwarze  503: Declared in
                    504: .In mandoc.h ,
                    505: implemented in
                    506: .Pa read.c .
1.3       kristaps  507: .El
                    508: .Ss Variables
                    509: .Bl -ohang
                    510: .It Va man_macronames
1.4       kristaps  511: The string representation of a man macro as indexed by
                    512: .Vt "enum mant" .
1.3       kristaps  513: .It Va mdoc_argnames
1.4       kristaps  514: The string representation of a mdoc macro argument as indexed by
                    515: .Vt "enum mdocargt" .
1.3       kristaps  516: .It Va mdoc_macronames
1.4       kristaps  517: The string representation of a mdoc macro as indexed by
                    518: .Vt "enum mdoct" .
1.1       kristaps  519: .El
                    520: .Sh IMPLEMENTATION NOTES
                    521: This section consists of structural documentation for
                    522: .Xr mdoc 7
                    523: and
                    524: .Xr man 7
1.11      kristaps  525: syntax trees and strings.
                    526: .Ss Man and Mdoc Strings
                    527: Strings may be extracted from mdoc and man meta-data, or from text
                    528: nodes (MDOC_TEXT and MAN_TEXT, respectively).
                    529: These strings have special non-printing formatting cues embedded in the
                    530: text itself, as well as
                    531: .Xr roff 7
                    532: escapes preserved from input.
                    533: Implementing systems will need to handle both situations to produce
                    534: human-readable text.
                    535: In general, strings may be assumed to consist of 7-bit ASCII characters.
                    536: .Pp
                    537: The following non-printing characters may be embedded in text strings:
                    538: .Bl -tag -width Ds
                    539: .It Dv ASCII_NBRSP
                    540: A non-breaking space character.
                    541: .It Dv ASCII_HYPH
                    542: A soft hyphen.
                    543: .El
                    544: .Pp
                    545: Escape characters are also passed verbatim into text strings.
                    546: An escape character is a sequence of characters beginning with the
                    547: backslash
                    548: .Pq Sq \e .
                    549: To construct human-readable text, these should be intercepted with
                    550: .Fn mandoc_escape
                    551: and converted with one of
                    552: .Fn mchars_num2char ,
                    553: .Fn mchars_spec2str ,
                    554: and so on.
1.1       kristaps  555: .Ss Man Abstract Syntax Tree
                    556: This AST is governed by the ontological rules dictated in
                    557: .Xr man 7
                    558: and derives its terminology accordingly.
                    559: .Pp
                    560: The AST is composed of
                    561: .Vt struct man_node
                    562: nodes with element, root and text types as declared by the
                    563: .Va type
                    564: field.
                    565: Each node also provides its parse point (the
                    566: .Va line ,
                    567: .Va sec ,
                    568: and
                    569: .Va pos
                    570: fields), its position in the tree (the
                    571: .Va parent ,
                    572: .Va child ,
                    573: .Va next
                    574: and
                    575: .Va prev
                    576: fields) and some type-specific data.
                    577: .Pp
                    578: The tree itself is arranged according to the following normal form,
                    579: where capitalised non-terminals represent nodes.
                    580: .Pp
                    581: .Bl -tag -width "ELEMENTXX" -compact
                    582: .It ROOT
                    583: \(<- mnode+
                    584: .It mnode
                    585: \(<- ELEMENT | TEXT | BLOCK
                    586: .It BLOCK
                    587: \(<- HEAD BODY
                    588: .It HEAD
                    589: \(<- mnode*
                    590: .It BODY
                    591: \(<- mnode*
                    592: .It ELEMENT
                    593: \(<- ELEMENT | TEXT*
                    594: .It TEXT
1.11      kristaps  595: \(<- [[:ascii:]]*
1.1       kristaps  596: .El
                    597: .Pp
                    598: The only elements capable of nesting other elements are those with
                    599: next-lint scope as documented in
                    600: .Xr man 7 .
                    601: .Ss Mdoc Abstract Syntax Tree
                    602: This AST is governed by the ontological
                    603: rules dictated in
                    604: .Xr mdoc 7
                    605: and derives its terminology accordingly.
                    606: .Qq In-line
                    607: elements described in
                    608: .Xr mdoc 7
                    609: are described simply as
                    610: .Qq elements .
                    611: .Pp
                    612: The AST is composed of
                    613: .Vt struct mdoc_node
                    614: nodes with block, head, body, element, root and text types as declared
                    615: by the
                    616: .Va type
                    617: field.
                    618: Each node also provides its parse point (the
                    619: .Va line ,
                    620: .Va sec ,
                    621: and
                    622: .Va pos
                    623: fields), its position in the tree (the
                    624: .Va parent ,
                    625: .Va child ,
                    626: .Va nchild ,
                    627: .Va next
                    628: and
                    629: .Va prev
                    630: fields) and some type-specific data, in particular, for nodes generated
                    631: from macros, the generating macro in the
                    632: .Va tok
                    633: field.
                    634: .Pp
                    635: The tree itself is arranged according to the following normal form,
                    636: where capitalised non-terminals represent nodes.
                    637: .Pp
                    638: .Bl -tag -width "ELEMENTXX" -compact
                    639: .It ROOT
                    640: \(<- mnode+
                    641: .It mnode
                    642: \(<- BLOCK | ELEMENT | TEXT
                    643: .It BLOCK
                    644: \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
                    645: .It ELEMENT
                    646: \(<- TEXT*
                    647: .It HEAD
                    648: \(<- mnode*
                    649: .It BODY
                    650: \(<- mnode* [ENDBODY mnode*]
                    651: .It TAIL
                    652: \(<- mnode*
                    653: .It TEXT
1.11      kristaps  654: \(<- [[:ascii:]]*
1.1       kristaps  655: .El
                    656: .Pp
                    657: Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
                    658: the BLOCK production: these refer to punctuation marks.
                    659: Furthermore, although a TEXT node will generally have a non-zero-length
                    660: string, in the specific case of
                    661: .Sq \&.Bd \-literal ,
                    662: an empty line will produce a zero-length string.
                    663: Multiple body parts are only found in invocations of
                    664: .Sq \&Bl \-column ,
                    665: where a new body introduces a new phrase.
                    666: .Pp
                    667: The
                    668: .Xr mdoc 7
1.5       kristaps  669: syntax tree accommodates for broken block structures as well.
1.1       kristaps  670: The ENDBODY node is available to end the formatting associated
                    671: with a given block before the physical end of that block.
                    672: It has a non-null
                    673: .Va end
                    674: field, is of the BODY
                    675: .Va type ,
                    676: has the same
                    677: .Va tok
                    678: as the BLOCK it is ending, and has a
                    679: .Va pending
                    680: field pointing to that BLOCK's BODY node.
                    681: It is an indirect child of that BODY node
                    682: and has no children of its own.
                    683: .Pp
                    684: An ENDBODY node is generated when a block ends while one of its child
                    685: blocks is still open, like in the following example:
                    686: .Bd -literal -offset indent
                    687: \&.Ao ao
                    688: \&.Bo bo ac
                    689: \&.Ac bc
                    690: \&.Bc end
                    691: .Ed
                    692: .Pp
                    693: This example results in the following block structure:
                    694: .Bd -literal -offset indent
                    695: BLOCK Ao
                    696:     HEAD Ao
                    697:     BODY Ao
                    698:         TEXT ao
                    699:         BLOCK Bo, pending -> Ao
                    700:             HEAD Bo
                    701:             BODY Bo
                    702:                 TEXT bo
                    703:                 TEXT ac
                    704:                 ENDBODY Ao, pending -> Ao
                    705:                 TEXT bc
                    706: TEXT end
                    707: .Ed
                    708: .Pp
                    709: Here, the formatting of the
                    710: .Sq \&Ao
                    711: block extends from TEXT ao to TEXT ac,
                    712: while the formatting of the
                    713: .Sq \&Bo
                    714: block extends from TEXT bo to TEXT bc.
                    715: It renders as follows in
                    716: .Fl T Ns Cm ascii
                    717: mode:
                    718: .Pp
                    719: .Dl <ao [bo ac> bc] end
                    720: .Pp
                    721: Support for badly-nested blocks is only provided for backward
                    722: compatibility with some older
                    723: .Xr mdoc 7
                    724: implementations.
                    725: Using badly-nested blocks is
                    726: .Em strongly discouraged ;
                    727: for example, the
                    728: .Fl T Ns Cm html
                    729: and
                    730: .Fl T Ns Cm xhtml
                    731: front-ends to
                    732: .Xr mandoc 1
                    733: are unable to render them in any meaningful way.
                    734: Furthermore, behaviour when encountering badly-nested blocks is not
                    735: consistent across troff implementations, especially when using  multiple
                    736: levels of badly-nested blocks.
                    737: .Sh SEE ALSO
                    738: .Xr mandoc 1 ,
                    739: .Xr eqn 7 ,
                    740: .Xr man 7 ,
1.6       kristaps  741: .Xr mandoc_char 7 ,
1.1       kristaps  742: .Xr mdoc 7 ,
                    743: .Xr roff 7 ,
                    744: .Xr tbl 7
                    745: .Sh AUTHORS
                    746: The
                    747: .Nm
                    748: library was written by
1.19      schwarze  749: .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .

CVSweb