[BACK]Return to mandoc.3 CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mandoc.3, Revision 1.44

1.44    ! schwarze    1: .\"    $Id: mandoc.3,v 1.43 2018/12/14 01:18:25 schwarze Exp $
1.1       kristaps    2: .\"
                      3: .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.38      schwarze    4: .\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5: .\"
                      6: .\" Permission to use, copy, modify, and distribute this software for any
                      7: .\" purpose with or without fee is hereby granted, provided that the above
                      8: .\" copyright notice and this permission notice appear in all copies.
                      9: .\"
                     10: .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11: .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12: .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13: .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14: .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15: .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16: .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17: .\"
1.44    ! schwarze   18: .Dd $Mdocdate: December 14 2018 $
1.1       kristaps   19: .Dt MANDOC 3
                     20: .Os
                     21: .Sh NAME
                     22: .Nm mandoc ,
1.37      schwarze   23: .Nm deroff ,
1.1       kristaps   24: .Nm mparse_alloc ,
1.42      schwarze   25: .Nm mparse_copy ,
1.1       kristaps   26: .Nm mparse_free ,
1.26      schwarze   27: .Nm mparse_open ,
1.1       kristaps   28: .Nm mparse_readfd ,
                     29: .Nm mparse_reset ,
1.43      schwarze   30: .Nm mparse_result
1.1       kristaps   31: .Nd mandoc macro compiler library
                     32: .Sh SYNOPSIS
1.25      schwarze   33: .In sys/types.h
1.43      schwarze   34: .In stdio.h
1.1       kristaps   35: .In mandoc.h
1.31      schwarze   36: .Pp
1.24      schwarze   37: .Fd "#define ASCII_NBRSP"
                     38: .Fd "#define ASCII_HYPH"
                     39: .Fd "#define ASCII_BREAK"
1.25      schwarze   40: .Ft struct mparse *
1.1       kristaps   41: .Fo mparse_alloc
1.25      schwarze   42: .Fa "int options"
1.40      schwarze   43: .Fa "enum mandoc_os oe_e"
                     44: .Fa "char *os_s"
1.1       kristaps   45: .Fc
                     46: .Ft void
                     47: .Fo mparse_free
                     48: .Fa "struct mparse *parse"
                     49: .Fc
1.42      schwarze   50: .Ft void
                     51: .Fo mparse_copy
1.14      kristaps   52: .Fa "const struct mparse *parse"
                     53: .Fc
1.35      schwarze   54: .Ft int
1.26      schwarze   55: .Fo mparse_open
                     56: .Fa "struct mparse *parse"
                     57: .Fa "const char *fname"
                     58: .Fc
1.43      schwarze   59: .Ft void
1.1       kristaps   60: .Fo mparse_readfd
                     61: .Fa "struct mparse *parse"
                     62: .Fa "int fd"
                     63: .Fa "const char *fname"
                     64: .Fc
                     65: .Ft void
                     66: .Fo mparse_reset
                     67: .Fa "struct mparse *parse"
                     68: .Fc
1.44    ! schwarze   69: .Ft struct roff_meta *
1.1       kristaps   70: .Fo mparse_result
                     71: .Fa "struct mparse *parse"
1.2       kristaps   72: .Fc
1.37      schwarze   73: .In roff.h
                     74: .Ft void
                     75: .Fo deroff
                     76: .Fa "char **dest"
                     77: .Fa "const struct roff_node *node"
                     78: .Fc
1.25      schwarze   79: .In sys/types.h
1.24      schwarze   80: .In mandoc.h
                     81: .In mdoc.h
1.37      schwarze   82: .Vt extern const char * const * mdoc_argnames;
                     83: .Vt extern const char * const * mdoc_macronames;
1.25      schwarze   84: .In sys/types.h
1.24      schwarze   85: .In mandoc.h
                     86: .In man.h
1.37      schwarze   87: .Vt extern const char * const * man_macronames;
1.1       kristaps   88: .Sh DESCRIPTION
                     89: The
                     90: .Nm mandoc
                     91: library parses a
                     92: .Ux
                     93: manual into an abstract syntax tree (AST).
                     94: .Ux
                     95: manuals are composed of
                     96: .Xr mdoc 7
                     97: or
                     98: .Xr man 7 ,
                     99: and may be mixed with
                    100: .Xr roff 7 ,
                    101: .Xr tbl 7 ,
                    102: and
                    103: .Xr eqn 7
                    104: invocations.
                    105: .Pp
                    106: The following describes a general parse sequence:
                    107: .Bl -enum
                    108: .It
                    109: initiate a parsing sequence with
1.27      schwarze  110: .Xr mchars_alloc 3
                    111: and
1.1       kristaps  112: .Fn mparse_alloc ;
                    113: .It
1.31      schwarze  114: open a file with
                    115: .Xr open 2
                    116: or
                    117: .Fn mparse_open ;
                    118: .It
                    119: parse it with
1.1       kristaps  120: .Fn mparse_readfd ;
                    121: .It
1.34      schwarze  122: close it with
                    123: .Xr close 2 ;
                    124: .It
1.31      schwarze  125: retrieve the syntax tree with
1.1       kristaps  126: .Fn mparse_result ;
                    127: .It
1.38      schwarze  128: if information about the validity of the input is needed, fetch it with
                    129: .Fn mparse_updaterc ;
                    130: .It
1.37      schwarze  131: iterate over parse nodes with starting from the
                    132: .Fa first
                    133: member of the returned
1.44    ! schwarze  134: .Vt struct roff_meta ;
1.1       kristaps  135: .It
                    136: free all allocated memory with
1.27      schwarze  137: .Fn mparse_free
                    138: and
                    139: .Xr mchars_free 3 ,
1.1       kristaps  140: or invoke
                    141: .Fn mparse_reset
1.37      schwarze  142: and go back to step 2 to parse new files.
1.3       kristaps  143: .El
                    144: .Sh REFERENCE
                    145: This section documents the functions, types, and variables available
                    146: via
1.25      schwarze  147: .In mandoc.h ,
                    148: with the exception of those documented in
                    149: .Xr mandoc_escape 3
                    150: and
                    151: .Xr mchars_alloc 3 .
1.3       kristaps  152: .Ss Types
                    153: .Bl -ohang
                    154: .It Vt "enum mandocerr"
1.31      schwarze  155: An error or warning message during parsing.
1.3       kristaps  156: .It Vt "enum mandoclevel"
1.11      kristaps  157: A classification of an
1.23      schwarze  158: .Vt "enum mandocerr"
1.11      kristaps  159: as regards system operation.
1.37      schwarze  160: See the DIAGNOSTICS section in
                    161: .Xr mandoc 1
                    162: regarding the meanings of the levels.
1.3       kristaps  163: .It Vt "struct mparse"
1.11      kristaps  164: An opaque pointer to a running parse sequence.
                    165: Created with
                    166: .Fn mparse_alloc
                    167: and freed with
                    168: .Fn mparse_free .
                    169: This may be used across parsed input if
                    170: .Fn mparse_reset
                    171: is called between parses.
1.3       kristaps  172: .El
                    173: .Ss Functions
                    174: .Bl -ohang
1.37      schwarze  175: .It Fn deroff
1.25      schwarze  176: Obtain a text-only representation of a
1.37      schwarze  177: .Vt struct roff_node ,
1.25      schwarze  178: including text contained in its child nodes.
1.37      schwarze  179: To be used on children of the
                    180: .Fa first
                    181: member of
1.44    ! schwarze  182: .Vt struct roff_meta .
1.25      schwarze  183: When it is no longer needed, the pointer returned from
1.37      schwarze  184: .Fn deroff
1.25      schwarze  185: can be passed to
                    186: .Xr free 3 .
1.3       kristaps  187: .It Fn mparse_alloc
1.4       kristaps  188: Allocate a parser.
1.23      schwarze  189: The arguments have the following effect:
                    190: .Bl -tag -offset 5n -width inttype
1.25      schwarze  191: .It Ar options
                    192: When the
1.23      schwarze  193: .Dv MPARSE_MDOC
                    194: or
1.25      schwarze  195: .Dv MPARSE_MAN
                    196: bit is set, only that parser is used.
                    197: Otherwise, the document type is automatically detected.
                    198: .Pp
                    199: When the
                    200: .Dv MPARSE_SO
                    201: bit is set,
                    202: .Xr roff 7
                    203: .Ic \&so
                    204: file inclusion requests are always honoured.
                    205: Otherwise, if the request is the only content in an input file,
                    206: only the file name is remembered, to be returned in the
                    207: .Fa sodest
1.44    ! schwarze  208: field of
        !           209: .Vt struct roff_meta .
1.25      schwarze  210: .Pp
                    211: When the
                    212: .Dv MPARSE_QUICK
                    213: bit is set, parsing is aborted after the NAME section.
                    214: This is for example useful in
                    215: .Xr makewhatis 8
                    216: .Fl Q
                    217: to quickly build minimal databases.
1.44    ! schwarze  218: .Pp
        !           219: When the
        !           220: .Dv MARSE_VALIDATE
        !           221: bit is set,
        !           222: .Fn mparse_result
        !           223: runs the validation functions before returning the syntax tree.
        !           224: This is almost always required, except in certain debugging scenarios,
        !           225: for example to dump unvalidated syntax trees.
1.40      schwarze  226: .It Ar os_e
                    227: Operating system to check base system conventions for.
                    228: If
                    229: .Dv MANDOC_OS_OTHER ,
                    230: the system is automatically detected from
                    231: .Ic \&Os ,
                    232: .Fl Ios ,
                    233: or
                    234: .Xr uname 3 .
                    235: .It Ar os_s
1.23      schwarze  236: A default string for the
                    237: .Xr mdoc 7
1.40      schwarze  238: .Ic \&Os
1.23      schwarze  239: macro, overriding the
                    240: .Dv OSNAME
                    241: preprocessor definition and the results of
                    242: .Xr uname 3 .
1.37      schwarze  243: Passing
                    244: .Dv NULL
                    245: sets no default.
1.23      schwarze  246: .El
                    247: .Pp
1.4       kristaps  248: The same parser may be used for multiple files so long as
                    249: .Fn mparse_reset
                    250: is called between parses.
                    251: .Fn mparse_free
                    252: must be called to free the memory allocated by this function.
1.18      schwarze  253: Declared in
                    254: .In mandoc.h ,
                    255: implemented in
                    256: .Pa read.c .
1.3       kristaps  257: .It Fn mparse_free
1.4       kristaps  258: Free all memory allocated by
                    259: .Fn mparse_alloc .
1.18      schwarze  260: Declared in
                    261: .In mandoc.h ,
                    262: implemented in
                    263: .Pa read.c .
1.42      schwarze  264: .It Fn mparse_copy
                    265: Dump a copy of the input to the standard output; used for
                    266: .Fl man T Ns Cm man .
1.18      schwarze  267: Declared in
                    268: .In mandoc.h ,
                    269: implemented in
                    270: .Pa read.c .
1.26      schwarze  271: .It Fn mparse_open
1.32      schwarze  272: Open the file for reading.
                    273: If that fails and
1.26      schwarze  274: .Fa fname
1.32      schwarze  275: does not already end in
                    276: .Ql .gz ,
                    277: try again after appending
                    278: .Ql .gz .
                    279: Save the information whether the file is zipped or not.
1.35      schwarze  280: Return a file descriptor open for reading or -1 on failure.
1.26      schwarze  281: It can be passed to
                    282: .Fn mparse_readfd
                    283: or used directly.
                    284: Declared in
                    285: .In mandoc.h ,
                    286: implemented in
                    287: .Pa read.c .
1.3       kristaps  288: .It Fn mparse_readfd
1.30      schwarze  289: Parse a file descriptor opened with
                    290: .Xr open 2
                    291: or
1.29      schwarze  292: .Fn mparse_open .
1.30      schwarze  293: Pass the associated filename in
                    294: .Va fname .
1.29      schwarze  295: This function may be called multiple times with different parameters; however,
1.34      schwarze  296: .Xr close 2
                    297: and
1.4       kristaps  298: .Fn mparse_reset
                    299: should be invoked between parses.
1.18      schwarze  300: Declared in
                    301: .In mandoc.h ,
                    302: implemented in
                    303: .Pa read.c .
1.3       kristaps  304: .It Fn mparse_reset
1.4       kristaps  305: Reset a parser so that
                    306: .Fn mparse_readfd
                    307: may be used again.
1.18      schwarze  308: Declared in
                    309: .In mandoc.h ,
                    310: implemented in
                    311: .Pa read.c .
1.3       kristaps  312: .It Fn mparse_result
1.4       kristaps  313: Obtain the result of a parse.
1.18      schwarze  314: Declared in
                    315: .In mandoc.h ,
                    316: implemented in
                    317: .Pa read.c .
1.3       kristaps  318: .El
                    319: .Ss Variables
                    320: .Bl -ohang
                    321: .It Va man_macronames
1.37      schwarze  322: The string representation of a
                    323: .Xr man 7
                    324: macro as indexed by
1.4       kristaps  325: .Vt "enum mant" .
1.3       kristaps  326: .It Va mdoc_argnames
1.37      schwarze  327: The string representation of an
                    328: .Xr mdoc 7
                    329: macro argument as indexed by
1.4       kristaps  330: .Vt "enum mdocargt" .
1.3       kristaps  331: .It Va mdoc_macronames
1.37      schwarze  332: The string representation of an
                    333: .Xr mdoc 7
                    334: macro as indexed by
1.4       kristaps  335: .Vt "enum mdoct" .
1.1       kristaps  336: .El
                    337: .Sh IMPLEMENTATION NOTES
                    338: This section consists of structural documentation for
                    339: .Xr mdoc 7
                    340: and
                    341: .Xr man 7
1.11      kristaps  342: syntax trees and strings.
                    343: .Ss Man and Mdoc Strings
                    344: Strings may be extracted from mdoc and man meta-data, or from text
                    345: nodes (MDOC_TEXT and MAN_TEXT, respectively).
                    346: These strings have special non-printing formatting cues embedded in the
                    347: text itself, as well as
                    348: .Xr roff 7
                    349: escapes preserved from input.
                    350: Implementing systems will need to handle both situations to produce
                    351: human-readable text.
                    352: In general, strings may be assumed to consist of 7-bit ASCII characters.
                    353: .Pp
                    354: The following non-printing characters may be embedded in text strings:
                    355: .Bl -tag -width Ds
                    356: .It Dv ASCII_NBRSP
                    357: A non-breaking space character.
                    358: .It Dv ASCII_HYPH
                    359: A soft hyphen.
1.25      schwarze  360: .It Dv ASCII_BREAK
                    361: A breakable zero-width space.
1.11      kristaps  362: .El
                    363: .Pp
                    364: Escape characters are also passed verbatim into text strings.
                    365: An escape character is a sequence of characters beginning with the
                    366: backslash
                    367: .Pq Sq \e .
                    368: To construct human-readable text, these should be intercepted with
1.25      schwarze  369: .Xr mandoc_escape 3
                    370: and converted with one the functions described in
                    371: .Xr mchars_alloc 3 .
1.1       kristaps  372: .Ss Man Abstract Syntax Tree
                    373: This AST is governed by the ontological rules dictated in
                    374: .Xr man 7
                    375: and derives its terminology accordingly.
                    376: .Pp
                    377: The AST is composed of
1.37      schwarze  378: .Vt struct roff_node
1.1       kristaps  379: nodes with element, root and text types as declared by the
                    380: .Va type
                    381: field.
                    382: Each node also provides its parse point (the
                    383: .Va line ,
1.37      schwarze  384: .Va pos ,
1.1       kristaps  385: and
1.37      schwarze  386: .Va sec
1.1       kristaps  387: fields), its position in the tree (the
                    388: .Va parent ,
                    389: .Va child ,
                    390: .Va next
                    391: and
                    392: .Va prev
                    393: fields) and some type-specific data.
                    394: .Pp
                    395: The tree itself is arranged according to the following normal form,
                    396: where capitalised non-terminals represent nodes.
                    397: .Pp
                    398: .Bl -tag -width "ELEMENTXX" -compact
                    399: .It ROOT
                    400: \(<- mnode+
                    401: .It mnode
                    402: \(<- ELEMENT | TEXT | BLOCK
                    403: .It BLOCK
                    404: \(<- HEAD BODY
                    405: .It HEAD
                    406: \(<- mnode*
                    407: .It BODY
                    408: \(<- mnode*
                    409: .It ELEMENT
                    410: \(<- ELEMENT | TEXT*
                    411: .It TEXT
1.11      kristaps  412: \(<- [[:ascii:]]*
1.1       kristaps  413: .El
                    414: .Pp
                    415: The only elements capable of nesting other elements are those with
1.25      schwarze  416: next-line scope as documented in
1.1       kristaps  417: .Xr man 7 .
                    418: .Ss Mdoc Abstract Syntax Tree
                    419: This AST is governed by the ontological
                    420: rules dictated in
                    421: .Xr mdoc 7
                    422: and derives its terminology accordingly.
                    423: .Qq In-line
                    424: elements described in
                    425: .Xr mdoc 7
                    426: are described simply as
                    427: .Qq elements .
                    428: .Pp
                    429: The AST is composed of
1.37      schwarze  430: .Vt struct roff_node
1.1       kristaps  431: nodes with block, head, body, element, root and text types as declared
                    432: by the
                    433: .Va type
                    434: field.
                    435: Each node also provides its parse point (the
                    436: .Va line ,
1.37      schwarze  437: .Va pos ,
1.1       kristaps  438: and
1.37      schwarze  439: .Va sec
1.1       kristaps  440: fields), its position in the tree (the
                    441: .Va parent ,
                    442: .Va child ,
1.36      schwarze  443: .Va last ,
1.1       kristaps  444: .Va next
                    445: and
                    446: .Va prev
                    447: fields) and some type-specific data, in particular, for nodes generated
                    448: from macros, the generating macro in the
                    449: .Va tok
                    450: field.
                    451: .Pp
                    452: The tree itself is arranged according to the following normal form,
                    453: where capitalised non-terminals represent nodes.
                    454: .Pp
                    455: .Bl -tag -width "ELEMENTXX" -compact
                    456: .It ROOT
                    457: \(<- mnode+
                    458: .It mnode
                    459: \(<- BLOCK | ELEMENT | TEXT
                    460: .It BLOCK
                    461: \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
                    462: .It ELEMENT
                    463: \(<- TEXT*
                    464: .It HEAD
                    465: \(<- mnode*
                    466: .It BODY
                    467: \(<- mnode* [ENDBODY mnode*]
                    468: .It TAIL
                    469: \(<- mnode*
                    470: .It TEXT
1.11      kristaps  471: \(<- [[:ascii:]]*
1.1       kristaps  472: .El
                    473: .Pp
                    474: Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
                    475: the BLOCK production: these refer to punctuation marks.
                    476: Furthermore, although a TEXT node will generally have a non-zero-length
                    477: string, in the specific case of
                    478: .Sq \&.Bd \-literal ,
                    479: an empty line will produce a zero-length string.
                    480: Multiple body parts are only found in invocations of
                    481: .Sq \&Bl \-column ,
                    482: where a new body introduces a new phrase.
                    483: .Pp
                    484: The
                    485: .Xr mdoc 7
1.5       kristaps  486: syntax tree accommodates for broken block structures as well.
1.1       kristaps  487: The ENDBODY node is available to end the formatting associated
                    488: with a given block before the physical end of that block.
                    489: It has a non-null
                    490: .Va end
                    491: field, is of the BODY
                    492: .Va type ,
                    493: has the same
                    494: .Va tok
                    495: as the BLOCK it is ending, and has a
                    496: .Va pending
                    497: field pointing to that BLOCK's BODY node.
                    498: It is an indirect child of that BODY node
                    499: and has no children of its own.
                    500: .Pp
                    501: An ENDBODY node is generated when a block ends while one of its child
                    502: blocks is still open, like in the following example:
                    503: .Bd -literal -offset indent
                    504: \&.Ao ao
                    505: \&.Bo bo ac
                    506: \&.Ac bc
                    507: \&.Bc end
                    508: .Ed
                    509: .Pp
                    510: This example results in the following block structure:
                    511: .Bd -literal -offset indent
                    512: BLOCK Ao
                    513:     HEAD Ao
                    514:     BODY Ao
                    515:         TEXT ao
                    516:         BLOCK Bo, pending -> Ao
                    517:             HEAD Bo
                    518:             BODY Bo
                    519:                 TEXT bo
                    520:                 TEXT ac
                    521:                 ENDBODY Ao, pending -> Ao
                    522:                 TEXT bc
                    523: TEXT end
                    524: .Ed
                    525: .Pp
                    526: Here, the formatting of the
1.40      schwarze  527: .Ic \&Ao
1.1       kristaps  528: block extends from TEXT ao to TEXT ac,
                    529: while the formatting of the
1.40      schwarze  530: .Ic \&Bo
1.1       kristaps  531: block extends from TEXT bo to TEXT bc.
                    532: It renders as follows in
                    533: .Fl T Ns Cm ascii
                    534: mode:
                    535: .Pp
                    536: .Dl <ao [bo ac> bc] end
                    537: .Pp
                    538: Support for badly-nested blocks is only provided for backward
                    539: compatibility with some older
                    540: .Xr mdoc 7
                    541: implementations.
                    542: Using badly-nested blocks is
                    543: .Em strongly discouraged ;
                    544: for example, the
                    545: .Fl T Ns Cm html
1.39      schwarze  546: front-end to
1.1       kristaps  547: .Xr mandoc 1
1.39      schwarze  548: is unable to render them in any meaningful way.
1.1       kristaps  549: Furthermore, behaviour when encountering badly-nested blocks is not
1.25      schwarze  550: consistent across troff implementations, especially when using multiple
1.1       kristaps  551: levels of badly-nested blocks.
                    552: .Sh SEE ALSO
                    553: .Xr mandoc 1 ,
1.37      schwarze  554: .Xr man.cgi 3 ,
1.25      schwarze  555: .Xr mandoc_escape 3 ,
1.37      schwarze  556: .Xr mandoc_headers 3 ,
1.25      schwarze  557: .Xr mandoc_malloc 3 ,
1.37      schwarze  558: .Xr mansearch 3 ,
1.25      schwarze  559: .Xr mchars_alloc 3 ,
1.37      schwarze  560: .Xr tbl 3 ,
1.1       kristaps  561: .Xr eqn 7 ,
                    562: .Xr man 7 ,
1.6       kristaps  563: .Xr mandoc_char 7 ,
1.1       kristaps  564: .Xr mdoc 7 ,
                    565: .Xr roff 7 ,
                    566: .Xr tbl 7
                    567: .Sh AUTHORS
1.37      schwarze  568: .An -nosplit
1.1       kristaps  569: The
                    570: .Nm
                    571: library was written by
1.37      schwarze  572: .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
                    573: and is maintained by
                    574: .An Ingo Schwarze Aq Mt schwarze@openbsd.org .

CVSweb