[BACK]Return to mandoc.3 CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mandoc.3, Revision 1.45

1.45    ! schwarze    1: .\" $Id: mandoc.3,v 1.44 2018/12/30 00:49:55 schwarze Exp $
1.1       kristaps    2: .\"
                      3: .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.38      schwarze    4: .\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5: .\"
                      6: .\" Permission to use, copy, modify, and distribute this software for any
                      7: .\" purpose with or without fee is hereby granted, provided that the above
                      8: .\" copyright notice and this permission notice appear in all copies.
                      9: .\"
                     10: .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11: .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12: .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13: .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14: .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15: .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16: .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17: .\"
1.45    ! schwarze   18: .Dd $Mdocdate: December 30 2018 $
1.1       kristaps   19: .Dt MANDOC 3
                     20: .Os
                     21: .Sh NAME
                     22: .Nm mandoc ,
1.37      schwarze   23: .Nm deroff ,
1.1       kristaps   24: .Nm mparse_alloc ,
1.42      schwarze   25: .Nm mparse_copy ,
1.1       kristaps   26: .Nm mparse_free ,
1.26      schwarze   27: .Nm mparse_open ,
1.1       kristaps   28: .Nm mparse_readfd ,
                     29: .Nm mparse_reset ,
1.43      schwarze   30: .Nm mparse_result
1.1       kristaps   31: .Nd mandoc macro compiler library
                     32: .Sh SYNOPSIS
1.25      schwarze   33: .In sys/types.h
1.43      schwarze   34: .In stdio.h
1.1       kristaps   35: .In mandoc.h
1.45    ! schwarze   36: .In roff.h
        !            37: .In mandoc_parse.h
1.31      schwarze   38: .Pp
1.24      schwarze   39: .Fd "#define ASCII_NBRSP"
                     40: .Fd "#define ASCII_HYPH"
                     41: .Fd "#define ASCII_BREAK"
1.25      schwarze   42: .Ft struct mparse *
1.1       kristaps   43: .Fo mparse_alloc
1.25      schwarze   44: .Fa "int options"
1.40      schwarze   45: .Fa "enum mandoc_os oe_e"
                     46: .Fa "char *os_s"
1.1       kristaps   47: .Fc
                     48: .Ft void
                     49: .Fo mparse_free
                     50: .Fa "struct mparse *parse"
                     51: .Fc
1.42      schwarze   52: .Ft void
                     53: .Fo mparse_copy
1.14      kristaps   54: .Fa "const struct mparse *parse"
                     55: .Fc
1.35      schwarze   56: .Ft int
1.26      schwarze   57: .Fo mparse_open
                     58: .Fa "struct mparse *parse"
                     59: .Fa "const char *fname"
                     60: .Fc
1.43      schwarze   61: .Ft void
1.1       kristaps   62: .Fo mparse_readfd
                     63: .Fa "struct mparse *parse"
                     64: .Fa "int fd"
                     65: .Fa "const char *fname"
                     66: .Fc
                     67: .Ft void
                     68: .Fo mparse_reset
                     69: .Fa "struct mparse *parse"
                     70: .Fc
1.44      schwarze   71: .Ft struct roff_meta *
1.1       kristaps   72: .Fo mparse_result
                     73: .Fa "struct mparse *parse"
1.2       kristaps   74: .Fc
1.37      schwarze   75: .In roff.h
                     76: .Ft void
                     77: .Fo deroff
                     78: .Fa "char **dest"
                     79: .Fa "const struct roff_node *node"
                     80: .Fc
1.25      schwarze   81: .In sys/types.h
1.24      schwarze   82: .In mandoc.h
                     83: .In mdoc.h
1.37      schwarze   84: .Vt extern const char * const * mdoc_argnames;
                     85: .Vt extern const char * const * mdoc_macronames;
1.25      schwarze   86: .In sys/types.h
1.24      schwarze   87: .In mandoc.h
                     88: .In man.h
1.37      schwarze   89: .Vt extern const char * const * man_macronames;
1.1       kristaps   90: .Sh DESCRIPTION
                     91: The
                     92: .Nm mandoc
                     93: library parses a
                     94: .Ux
                     95: manual into an abstract syntax tree (AST).
                     96: .Ux
                     97: manuals are composed of
                     98: .Xr mdoc 7
                     99: or
                    100: .Xr man 7 ,
                    101: and may be mixed with
                    102: .Xr roff 7 ,
                    103: .Xr tbl 7 ,
                    104: and
                    105: .Xr eqn 7
                    106: invocations.
                    107: .Pp
                    108: The following describes a general parse sequence:
                    109: .Bl -enum
                    110: .It
                    111: initiate a parsing sequence with
1.27      schwarze  112: .Xr mchars_alloc 3
                    113: and
1.1       kristaps  114: .Fn mparse_alloc ;
                    115: .It
1.31      schwarze  116: open a file with
                    117: .Xr open 2
                    118: or
                    119: .Fn mparse_open ;
                    120: .It
                    121: parse it with
1.1       kristaps  122: .Fn mparse_readfd ;
                    123: .It
1.34      schwarze  124: close it with
                    125: .Xr close 2 ;
                    126: .It
1.31      schwarze  127: retrieve the syntax tree with
1.1       kristaps  128: .Fn mparse_result ;
                    129: .It
1.38      schwarze  130: if information about the validity of the input is needed, fetch it with
                    131: .Fn mparse_updaterc ;
                    132: .It
1.37      schwarze  133: iterate over parse nodes with starting from the
                    134: .Fa first
                    135: member of the returned
1.44      schwarze  136: .Vt struct roff_meta ;
1.1       kristaps  137: .It
                    138: free all allocated memory with
1.27      schwarze  139: .Fn mparse_free
                    140: and
                    141: .Xr mchars_free 3 ,
1.1       kristaps  142: or invoke
                    143: .Fn mparse_reset
1.37      schwarze  144: and go back to step 2 to parse new files.
1.3       kristaps  145: .El
                    146: .Sh REFERENCE
                    147: This section documents the functions, types, and variables available
                    148: via
1.25      schwarze  149: .In mandoc.h ,
                    150: with the exception of those documented in
                    151: .Xr mandoc_escape 3
                    152: and
                    153: .Xr mchars_alloc 3 .
1.3       kristaps  154: .Ss Types
                    155: .Bl -ohang
                    156: .It Vt "enum mandocerr"
1.31      schwarze  157: An error or warning message during parsing.
1.3       kristaps  158: .It Vt "enum mandoclevel"
1.11      kristaps  159: A classification of an
1.23      schwarze  160: .Vt "enum mandocerr"
1.11      kristaps  161: as regards system operation.
1.37      schwarze  162: See the DIAGNOSTICS section in
                    163: .Xr mandoc 1
                    164: regarding the meanings of the levels.
1.3       kristaps  165: .It Vt "struct mparse"
1.11      kristaps  166: An opaque pointer to a running parse sequence.
                    167: Created with
                    168: .Fn mparse_alloc
                    169: and freed with
                    170: .Fn mparse_free .
                    171: This may be used across parsed input if
                    172: .Fn mparse_reset
                    173: is called between parses.
1.3       kristaps  174: .El
                    175: .Ss Functions
                    176: .Bl -ohang
1.37      schwarze  177: .It Fn deroff
1.25      schwarze  178: Obtain a text-only representation of a
1.37      schwarze  179: .Vt struct roff_node ,
1.25      schwarze  180: including text contained in its child nodes.
1.37      schwarze  181: To be used on children of the
                    182: .Fa first
                    183: member of
1.44      schwarze  184: .Vt struct roff_meta .
1.25      schwarze  185: When it is no longer needed, the pointer returned from
1.37      schwarze  186: .Fn deroff
1.25      schwarze  187: can be passed to
                    188: .Xr free 3 .
1.3       kristaps  189: .It Fn mparse_alloc
1.4       kristaps  190: Allocate a parser.
1.23      schwarze  191: The arguments have the following effect:
                    192: .Bl -tag -offset 5n -width inttype
1.25      schwarze  193: .It Ar options
                    194: When the
1.23      schwarze  195: .Dv MPARSE_MDOC
                    196: or
1.25      schwarze  197: .Dv MPARSE_MAN
                    198: bit is set, only that parser is used.
                    199: Otherwise, the document type is automatically detected.
                    200: .Pp
                    201: When the
                    202: .Dv MPARSE_SO
                    203: bit is set,
                    204: .Xr roff 7
                    205: .Ic \&so
                    206: file inclusion requests are always honoured.
                    207: Otherwise, if the request is the only content in an input file,
                    208: only the file name is remembered, to be returned in the
                    209: .Fa sodest
1.44      schwarze  210: field of
                    211: .Vt struct roff_meta .
1.25      schwarze  212: .Pp
                    213: When the
                    214: .Dv MPARSE_QUICK
                    215: bit is set, parsing is aborted after the NAME section.
                    216: This is for example useful in
                    217: .Xr makewhatis 8
                    218: .Fl Q
                    219: to quickly build minimal databases.
1.44      schwarze  220: .Pp
                    221: When the
                    222: .Dv MARSE_VALIDATE
                    223: bit is set,
                    224: .Fn mparse_result
                    225: runs the validation functions before returning the syntax tree.
                    226: This is almost always required, except in certain debugging scenarios,
                    227: for example to dump unvalidated syntax trees.
1.40      schwarze  228: .It Ar os_e
                    229: Operating system to check base system conventions for.
                    230: If
                    231: .Dv MANDOC_OS_OTHER ,
                    232: the system is automatically detected from
                    233: .Ic \&Os ,
                    234: .Fl Ios ,
                    235: or
                    236: .Xr uname 3 .
                    237: .It Ar os_s
1.23      schwarze  238: A default string for the
                    239: .Xr mdoc 7
1.40      schwarze  240: .Ic \&Os
1.23      schwarze  241: macro, overriding the
                    242: .Dv OSNAME
                    243: preprocessor definition and the results of
                    244: .Xr uname 3 .
1.37      schwarze  245: Passing
                    246: .Dv NULL
                    247: sets no default.
1.23      schwarze  248: .El
                    249: .Pp
1.4       kristaps  250: The same parser may be used for multiple files so long as
                    251: .Fn mparse_reset
                    252: is called between parses.
                    253: .Fn mparse_free
                    254: must be called to free the memory allocated by this function.
1.18      schwarze  255: Declared in
                    256: .In mandoc.h ,
                    257: implemented in
                    258: .Pa read.c .
1.3       kristaps  259: .It Fn mparse_free
1.4       kristaps  260: Free all memory allocated by
                    261: .Fn mparse_alloc .
1.18      schwarze  262: Declared in
                    263: .In mandoc.h ,
                    264: implemented in
                    265: .Pa read.c .
1.42      schwarze  266: .It Fn mparse_copy
                    267: Dump a copy of the input to the standard output; used for
                    268: .Fl man T Ns Cm man .
1.18      schwarze  269: Declared in
                    270: .In mandoc.h ,
                    271: implemented in
                    272: .Pa read.c .
1.26      schwarze  273: .It Fn mparse_open
1.32      schwarze  274: Open the file for reading.
                    275: If that fails and
1.26      schwarze  276: .Fa fname
1.32      schwarze  277: does not already end in
                    278: .Ql .gz ,
                    279: try again after appending
                    280: .Ql .gz .
                    281: Save the information whether the file is zipped or not.
1.35      schwarze  282: Return a file descriptor open for reading or -1 on failure.
1.26      schwarze  283: It can be passed to
                    284: .Fn mparse_readfd
                    285: or used directly.
                    286: Declared in
                    287: .In mandoc.h ,
                    288: implemented in
                    289: .Pa read.c .
1.3       kristaps  290: .It Fn mparse_readfd
1.30      schwarze  291: Parse a file descriptor opened with
                    292: .Xr open 2
                    293: or
1.29      schwarze  294: .Fn mparse_open .
1.30      schwarze  295: Pass the associated filename in
                    296: .Va fname .
1.29      schwarze  297: This function may be called multiple times with different parameters; however,
1.34      schwarze  298: .Xr close 2
                    299: and
1.4       kristaps  300: .Fn mparse_reset
                    301: should be invoked between parses.
1.18      schwarze  302: Declared in
                    303: .In mandoc.h ,
                    304: implemented in
                    305: .Pa read.c .
1.3       kristaps  306: .It Fn mparse_reset
1.4       kristaps  307: Reset a parser so that
                    308: .Fn mparse_readfd
                    309: may be used again.
1.18      schwarze  310: Declared in
                    311: .In mandoc.h ,
                    312: implemented in
                    313: .Pa read.c .
1.3       kristaps  314: .It Fn mparse_result
1.4       kristaps  315: Obtain the result of a parse.
1.18      schwarze  316: Declared in
                    317: .In mandoc.h ,
                    318: implemented in
                    319: .Pa read.c .
1.3       kristaps  320: .El
                    321: .Ss Variables
                    322: .Bl -ohang
                    323: .It Va man_macronames
1.37      schwarze  324: The string representation of a
                    325: .Xr man 7
                    326: macro as indexed by
1.4       kristaps  327: .Vt "enum mant" .
1.3       kristaps  328: .It Va mdoc_argnames
1.37      schwarze  329: The string representation of an
                    330: .Xr mdoc 7
                    331: macro argument as indexed by
1.4       kristaps  332: .Vt "enum mdocargt" .
1.3       kristaps  333: .It Va mdoc_macronames
1.37      schwarze  334: The string representation of an
                    335: .Xr mdoc 7
                    336: macro as indexed by
1.4       kristaps  337: .Vt "enum mdoct" .
1.1       kristaps  338: .El
                    339: .Sh IMPLEMENTATION NOTES
                    340: This section consists of structural documentation for
                    341: .Xr mdoc 7
                    342: and
                    343: .Xr man 7
1.11      kristaps  344: syntax trees and strings.
                    345: .Ss Man and Mdoc Strings
                    346: Strings may be extracted from mdoc and man meta-data, or from text
                    347: nodes (MDOC_TEXT and MAN_TEXT, respectively).
                    348: These strings have special non-printing formatting cues embedded in the
                    349: text itself, as well as
                    350: .Xr roff 7
                    351: escapes preserved from input.
                    352: Implementing systems will need to handle both situations to produce
                    353: human-readable text.
                    354: In general, strings may be assumed to consist of 7-bit ASCII characters.
                    355: .Pp
                    356: The following non-printing characters may be embedded in text strings:
                    357: .Bl -tag -width Ds
                    358: .It Dv ASCII_NBRSP
                    359: A non-breaking space character.
                    360: .It Dv ASCII_HYPH
                    361: A soft hyphen.
1.25      schwarze  362: .It Dv ASCII_BREAK
                    363: A breakable zero-width space.
1.11      kristaps  364: .El
                    365: .Pp
                    366: Escape characters are also passed verbatim into text strings.
                    367: An escape character is a sequence of characters beginning with the
                    368: backslash
                    369: .Pq Sq \e .
                    370: To construct human-readable text, these should be intercepted with
1.25      schwarze  371: .Xr mandoc_escape 3
                    372: and converted with one the functions described in
                    373: .Xr mchars_alloc 3 .
1.1       kristaps  374: .Ss Man Abstract Syntax Tree
                    375: This AST is governed by the ontological rules dictated in
                    376: .Xr man 7
                    377: and derives its terminology accordingly.
                    378: .Pp
                    379: The AST is composed of
1.37      schwarze  380: .Vt struct roff_node
1.1       kristaps  381: nodes with element, root and text types as declared by the
                    382: .Va type
                    383: field.
                    384: Each node also provides its parse point (the
                    385: .Va line ,
1.37      schwarze  386: .Va pos ,
1.1       kristaps  387: and
1.37      schwarze  388: .Va sec
1.1       kristaps  389: fields), its position in the tree (the
                    390: .Va parent ,
                    391: .Va child ,
                    392: .Va next
                    393: and
                    394: .Va prev
                    395: fields) and some type-specific data.
                    396: .Pp
                    397: The tree itself is arranged according to the following normal form,
                    398: where capitalised non-terminals represent nodes.
                    399: .Pp
                    400: .Bl -tag -width "ELEMENTXX" -compact
                    401: .It ROOT
                    402: \(<- mnode+
                    403: .It mnode
                    404: \(<- ELEMENT | TEXT | BLOCK
                    405: .It BLOCK
                    406: \(<- HEAD BODY
                    407: .It HEAD
                    408: \(<- mnode*
                    409: .It BODY
                    410: \(<- mnode*
                    411: .It ELEMENT
                    412: \(<- ELEMENT | TEXT*
                    413: .It TEXT
1.11      kristaps  414: \(<- [[:ascii:]]*
1.1       kristaps  415: .El
                    416: .Pp
                    417: The only elements capable of nesting other elements are those with
1.25      schwarze  418: next-line scope as documented in
1.1       kristaps  419: .Xr man 7 .
                    420: .Ss Mdoc Abstract Syntax Tree
                    421: This AST is governed by the ontological
                    422: rules dictated in
                    423: .Xr mdoc 7
                    424: and derives its terminology accordingly.
                    425: .Qq In-line
                    426: elements described in
                    427: .Xr mdoc 7
                    428: are described simply as
                    429: .Qq elements .
                    430: .Pp
                    431: The AST is composed of
1.37      schwarze  432: .Vt struct roff_node
1.1       kristaps  433: nodes with block, head, body, element, root and text types as declared
                    434: by the
                    435: .Va type
                    436: field.
                    437: Each node also provides its parse point (the
                    438: .Va line ,
1.37      schwarze  439: .Va pos ,
1.1       kristaps  440: and
1.37      schwarze  441: .Va sec
1.1       kristaps  442: fields), its position in the tree (the
                    443: .Va parent ,
                    444: .Va child ,
1.36      schwarze  445: .Va last ,
1.1       kristaps  446: .Va next
                    447: and
                    448: .Va prev
                    449: fields) and some type-specific data, in particular, for nodes generated
                    450: from macros, the generating macro in the
                    451: .Va tok
                    452: field.
                    453: .Pp
                    454: The tree itself is arranged according to the following normal form,
                    455: where capitalised non-terminals represent nodes.
                    456: .Pp
                    457: .Bl -tag -width "ELEMENTXX" -compact
                    458: .It ROOT
                    459: \(<- mnode+
                    460: .It mnode
                    461: \(<- BLOCK | ELEMENT | TEXT
                    462: .It BLOCK
                    463: \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
                    464: .It ELEMENT
                    465: \(<- TEXT*
                    466: .It HEAD
                    467: \(<- mnode*
                    468: .It BODY
                    469: \(<- mnode* [ENDBODY mnode*]
                    470: .It TAIL
                    471: \(<- mnode*
                    472: .It TEXT
1.11      kristaps  473: \(<- [[:ascii:]]*
1.1       kristaps  474: .El
                    475: .Pp
                    476: Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
                    477: the BLOCK production: these refer to punctuation marks.
                    478: Furthermore, although a TEXT node will generally have a non-zero-length
                    479: string, in the specific case of
                    480: .Sq \&.Bd \-literal ,
                    481: an empty line will produce a zero-length string.
                    482: Multiple body parts are only found in invocations of
                    483: .Sq \&Bl \-column ,
                    484: where a new body introduces a new phrase.
                    485: .Pp
                    486: The
                    487: .Xr mdoc 7
1.5       kristaps  488: syntax tree accommodates for broken block structures as well.
1.1       kristaps  489: The ENDBODY node is available to end the formatting associated
                    490: with a given block before the physical end of that block.
                    491: It has a non-null
                    492: .Va end
                    493: field, is of the BODY
                    494: .Va type ,
                    495: has the same
                    496: .Va tok
                    497: as the BLOCK it is ending, and has a
                    498: .Va pending
                    499: field pointing to that BLOCK's BODY node.
                    500: It is an indirect child of that BODY node
                    501: and has no children of its own.
                    502: .Pp
                    503: An ENDBODY node is generated when a block ends while one of its child
                    504: blocks is still open, like in the following example:
                    505: .Bd -literal -offset indent
                    506: \&.Ao ao
                    507: \&.Bo bo ac
                    508: \&.Ac bc
                    509: \&.Bc end
                    510: .Ed
                    511: .Pp
                    512: This example results in the following block structure:
                    513: .Bd -literal -offset indent
                    514: BLOCK Ao
                    515:     HEAD Ao
                    516:     BODY Ao
                    517:         TEXT ao
                    518:         BLOCK Bo, pending -> Ao
                    519:             HEAD Bo
                    520:             BODY Bo
                    521:                 TEXT bo
                    522:                 TEXT ac
                    523:                 ENDBODY Ao, pending -> Ao
                    524:                 TEXT bc
                    525: TEXT end
                    526: .Ed
                    527: .Pp
                    528: Here, the formatting of the
1.40      schwarze  529: .Ic \&Ao
1.1       kristaps  530: block extends from TEXT ao to TEXT ac,
                    531: while the formatting of the
1.40      schwarze  532: .Ic \&Bo
1.1       kristaps  533: block extends from TEXT bo to TEXT bc.
                    534: It renders as follows in
                    535: .Fl T Ns Cm ascii
                    536: mode:
                    537: .Pp
                    538: .Dl <ao [bo ac> bc] end
                    539: .Pp
                    540: Support for badly-nested blocks is only provided for backward
                    541: compatibility with some older
                    542: .Xr mdoc 7
                    543: implementations.
                    544: Using badly-nested blocks is
                    545: .Em strongly discouraged ;
                    546: for example, the
                    547: .Fl T Ns Cm html
1.39      schwarze  548: front-end to
1.1       kristaps  549: .Xr mandoc 1
1.39      schwarze  550: is unable to render them in any meaningful way.
1.1       kristaps  551: Furthermore, behaviour when encountering badly-nested blocks is not
1.25      schwarze  552: consistent across troff implementations, especially when using multiple
1.1       kristaps  553: levels of badly-nested blocks.
                    554: .Sh SEE ALSO
                    555: .Xr mandoc 1 ,
1.37      schwarze  556: .Xr man.cgi 3 ,
1.25      schwarze  557: .Xr mandoc_escape 3 ,
1.37      schwarze  558: .Xr mandoc_headers 3 ,
1.25      schwarze  559: .Xr mandoc_malloc 3 ,
1.37      schwarze  560: .Xr mansearch 3 ,
1.25      schwarze  561: .Xr mchars_alloc 3 ,
1.37      schwarze  562: .Xr tbl 3 ,
1.1       kristaps  563: .Xr eqn 7 ,
                    564: .Xr man 7 ,
1.6       kristaps  565: .Xr mandoc_char 7 ,
1.1       kristaps  566: .Xr mdoc 7 ,
                    567: .Xr roff 7 ,
                    568: .Xr tbl 7
                    569: .Sh AUTHORS
1.37      schwarze  570: .An -nosplit
1.1       kristaps  571: The
                    572: .Nm
                    573: library was written by
1.37      schwarze  574: .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
                    575: and is maintained by
                    576: .An Ingo Schwarze Aq Mt schwarze@openbsd.org .

CVSweb