[BACK]Return to mandoc_escape.3 CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mandoc_escape.3, Revision 1.6

1.6     ! schwarze    1: .\" $Id: mandoc_escape.3,v 1.5 2023/10/23 10:56:55 schwarze Exp $
1.1       schwarze    2: .\"
                      3: .\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
                      4: .\"
                      5: .\" Permission to use, copy, modify, and distribute this software for any
                      6: .\" purpose with or without fee is hereby granted, provided that the above
                      7: .\" copyright notice and this permission notice appear in all copies.
                      8: .\"
                      9: .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10: .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11: .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12: .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13: .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14: .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15: .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16: .\"
1.6     ! schwarze   17: .Dd $Mdocdate: October 23 2023 $
1.1       schwarze   18: .Dt MANDOC_ESCAPE 3
                     19: .Os
                     20: .Sh NAME
                     21: .Nm mandoc_escape
                     22: .Nd parse roff escape sequences
                     23: .Sh SYNOPSIS
                     24: .In sys/types.h
                     25: .In mandoc.h
                     26: .Ft "enum mandoc_esc"
                     27: .Fo mandoc_escape
                     28: .Fa "const char **end"
                     29: .Fa "const char **start"
                     30: .Fa "int *sz"
                     31: .Fc
                     32: .Sh DESCRIPTION
                     33: This function scans a
                     34: .Xr roff 7
                     35: escape sequence.
                     36: .Pp
                     37: An escape sequence consists of
                     38: .Bl -dash -compact -width 2n
                     39: .It
                     40: an initial backslash character
                     41: .Pq Sq \e ,
                     42: .It
                     43: a single ASCII character called the escape sequence identifier,
                     44: .It
                     45: and, with only a few exceptions, an argument.
                     46: .El
                     47: .Pp
                     48: Arguments can be given in the following forms; some escape sequence
                     49: identifiers only accept some of these forms as specified below.
                     50: The first three forms are called the standard forms.
                     51: .Bl -tag -width 2n
                     52: .It \&In brackets: Ic \&[ Ns Ar argument Ns Ic \&]
                     53: The argument starts after the initial
                     54: .Sq \&[ ,
                     55: ends before the final
                     56: .Sq \&] ,
                     57: and the escape sequence ends with the final
                     58: .Sq \&] .
                     59: .It Two-character argument short form: Ic \&( Ns Ar ar
                     60: This form can only be used for arguments
                     61: consisting of exactly two characters.
                     62: It has the same effect as
                     63: .Ic \&[ Ns Ar ar Ns Ic \&] .
                     64: .It One-character argument short form: Ar a
                     65: This form can only be used for arguments
                     66: consisting of exactly one character.
                     67: It has the same effect as
                     68: .Ic \&[ Ns Ar a Ns Ic \&] .
                     69: .It Delimited form: Ar C Ns Ar argument Ns Ar C
                     70: The argument starts after the initial delimiter character
                     71: .Ar C ,
                     72: ends before the next occurrence of the delimiter character
                     73: .Ar C ,
                     74: and the escape sequence ends with that second
                     75: .Ar C .
                     76: Some escape sequences allow arbitrary characters
                     77: .Ar C
                     78: as quoting characters, some restrict the range of characters
                     79: that can be used as quoting characters.
                     80: .El
                     81: .Pp
                     82: Upon function entry,
1.6     ! schwarze   83: .Pf * Fa end
1.1       schwarze   84: is expected to point to the escape sequence identifier.
                     85: The values passed in as
1.6     ! schwarze   86: .Pf * Fa start
1.1       schwarze   87: and
1.6     ! schwarze   88: .Pf * Fa sz
1.1       schwarze   89: are ignored and overwritten.
                     90: .Pp
                     91: By design, this function cannot handle those
                     92: .Xr roff 7
                     93: escape sequences that require in-place expansion, in particular
                     94: user-defined strings
                     95: .Ic \e* ,
                     96: number registers
                     97: .Ic \en ,
                     98: width measurements
                     99: .Ic \ew ,
                    100: and numerical expression control
                    101: .Ic \eB .
                    102: These are handled by
1.5       schwarze  103: .Fn roff_expand ,
1.1       schwarze  104: a private preprocessor function called from
1.6     ! schwarze  105: .Fn roff_parseln
        !           106: and
        !           107: .Fn roff_getarg ,
1.1       schwarze  108: see the file
                    109: .Pa roff.c .
                    110: .Pp
                    111: The function
                    112: .Fn mandoc_escape
                    113: is used
                    114: .Bl -dash -compact -width 2n
                    115: .It
                    116: recursively by itself, because some escape sequence arguments can
                    117: in turn contain other escape sequences,
                    118: .It
1.6     ! schwarze  119: for parsing and error detection internally by the
1.1       schwarze  120: .Xr roff 7
                    121: parser part of the
1.3       schwarze  122: .Xr mandoc 3
                    123: library, see the file
1.1       schwarze  124: .Pa roff.c ,
                    125: .It
1.6     ! schwarze  126: occasionally by high-level parser and validation modules when they
        !           127: need to skip escape sequences while scanning the input, see the files
        !           128: .Pa mdoc.c ,
        !           129: .Pa man.c ,
        !           130: .Pa man_validate.c ,
        !           131: .Pa eqn.c ,
        !           132: and
        !           133: .Pa tbl_data.c
        !           134: .It
1.1       schwarze  135: above all externally by the
1.4       schwarze  136: .Xr mandoc 1
1.1       schwarze  137: formatting modules, in particular
                    138: .Fl Tascii
                    139: and
                    140: .Fl Thtml ,
                    141: for formatting purposes, see the files
                    142: .Pa term.c
                    143: and
                    144: .Pa html.c ,
                    145: .It
                    146: and rarely externally by high-level utilities using the mandoc library,
                    147: for example
                    148: .Xr makewhatis 8 ,
                    149: to purge escape sequences from text.
                    150: .El
                    151: .Sh RETURN VALUES
                    152: Upon function return, the pointer
1.6     ! schwarze  153: .Pf * Fa end
1.1       schwarze  154: is set to the character after the end of the escape sequence,
                    155: such that the calling higher-level parser can easily continue.
                    156: .Pp
                    157: For escape sequences taking an argument, the pointer
1.6     ! schwarze  158: .Pf * Fa start
1.1       schwarze  159: is set to the beginning of the argument and
1.6     ! schwarze  160: .Pf * Fa sz
1.1       schwarze  161: is set to the length of the argument.
                    162: For escape sequences not taking an argument,
1.6     ! schwarze  163: .Pf * Fa start
1.1       schwarze  164: is set to the character after the end of the sequence and
1.6     ! schwarze  165: .Pf * Fa sz
1.1       schwarze  166: is set to 0.
                    167: Both
                    168: .Fa start
                    169: and
                    170: .Fa sz
                    171: may be
                    172: .Dv NULL ;
                    173: in that case, the argument and the length are not returned.
                    174: .Pp
                    175: For sequences taking an argument, the function
                    176: .Fn mandoc_escape
                    177: returns one of the following values:
                    178: .Bl -tag -width 2n
1.6     ! schwarze  179: .It Dv ESCAPE_DEVICE
        !           180: The escape sequence
        !           181: .Ic \e*(.T
        !           182: or
        !           183: .Ic \e*[.T] .
1.1       schwarze  184: .It Dv ESCAPE_FONT
                    185: The escape sequence
                    186: .Ic \ef
                    187: taking an argument in standard form:
                    188: .Ic \ef[ , \ef( , \ef Ns Ar a .
                    189: Two-character arguments starting with the character
                    190: .Sq C
                    191: are reduced to one-character arguments by skipping the
                    192: .Sq C .
                    193: More specific values are returned for the most commonly used arguments:
                    194: .Bl -column "argument" "ESCAPE_FONTITALIC"
                    195: .It argument Ta return value
                    196: .It Cm R No or Cm 1 Ta Dv ESCAPE_FONTROMAN
                    197: .It Cm I No or Cm 2 Ta Dv ESCAPE_FONTITALIC
                    198: .It Cm B No or Cm 3 Ta Dv ESCAPE_FONTBOLD
                    199: .It Cm P Ta Dv ESCAPE_FONTPREV
                    200: .It Cm BI Ta Dv ESCAPE_FONTBI
                    201: .El
1.6     ! schwarze  202: .It Dv ESCAPE_HLINE
        !           203: The escape sequence
        !           204: .Ic \eh
        !           205: followed by an argument delimited by an arbitrary character.
        !           206: .It Dv ESCAPE_HORIZ
        !           207: The escape sequence
        !           208: .Ic \el
        !           209: followed by an argument delimited by an arbitrary character.
        !           210: .It Dv ESCAPE_NUMBERED
        !           211: The escape sequence
        !           212: .Ic \eN
        !           213: followed by a delimited argument.
        !           214: The delimiter character is arbitrary except that digits cannot be used.
        !           215: If a digit is encountered instead of the opening delimiter, that
        !           216: digit is considered to be the argument and the end of the sequence, and
        !           217: .Dv ESCAPE_IGNORE
        !           218: is returned.
        !           219: .Pp
        !           220: Such ASCII character escape sequences can be rendered using the function
        !           221: .Fn mchars_num2char
        !           222: described in the
        !           223: .Xr mchars_alloc 3
        !           224: manual.
        !           225: .It Dv ESCAPE_OVERSTRIKE
        !           226: The escape sequence
        !           227: .Ic \eo
        !           228: followed by an argument delimited by an arbitrary character.
1.1       schwarze  229: .It Dv ESCAPE_SPECIAL
                    230: The escape sequence
                    231: .Ic \eC
                    232: taking an argument delimited with the single quote character
                    233: and, as a special exception, the escape sequences
                    234: .Em not
                    235: having an identifier, that is, those where the argument, in standard
                    236: form, directly follows the initial backslash:
                    237: .Ic \eC' , \e[ , \e( , \e Ns Ar a .
                    238: Note that the one-character argument short form can only be used for
                    239: argument characters that do not clash with escape sequence identifiers.
                    240: .Pp
1.2       schwarze  241: If the argument matches one of the forms described below under
                    242: .Dv ESCAPE_UNICODE ,
                    243: that value is returned instead.
1.1       schwarze  244: .Pp
                    245: The
                    246: .Dv ESCAPE_SPECIAL
                    247: special character escape sequences can be rendered using the functions
                    248: .Fn mchars_spec2cp
                    249: and
                    250: .Fn mchars_spec2str
                    251: described in the
                    252: .Xr mchars_alloc 3
                    253: manual.
                    254: .It Dv ESCAPE_UNICODE
                    255: Escape sequences of the same format as described above under
                    256: .Dv ESCAPE_SPECIAL ,
1.2       schwarze  257: but with an argument of the forms
                    258: .Ic u Ns Ar XXXX ,
                    259: .Ic u Ns Ar YXXXX ,
                    260: or
                    261: .Ic u10 Ns Ar XXXX
                    262: where
                    263: .Ar X
                    264: and
                    265: .Ar Y
                    266: are hexadecimal digits and
                    267: .Ar Y
                    268: is not zero:
1.1       schwarze  269: .Ic \eC'u , \e[u .
                    270: As a special exception,
1.6     ! schwarze  271: .Pf * Fa start
1.1       schwarze  272: is set to the character after the
1.2       schwarze  273: .Ic u ,
1.1       schwarze  274: and the
1.6     ! schwarze  275: .Pf * Fa sz
1.1       schwarze  276: return value does not include the
1.2       schwarze  277: .Ic u
1.1       schwarze  278: either.
                    279: .Pp
                    280: Such Unicode character escape sequences can be rendered using the function
                    281: .Fn mchars_num2uc
                    282: described in the
                    283: .Xr mchars_alloc 3
                    284: manual.
                    285: .It Dv ESCAPE_IGNORE
1.6     ! schwarze  286: Many escape sequences that
        !           287: .Xr mandoc 1
        !           288: intends to ignore, in particular:
1.1       schwarze  289: .Bl -bullet -width 2n
                    290: .It
                    291: The escape sequence
                    292: .Ic \es
                    293: followed by an argument in standard form or by an argument delimited
                    294: by the single quote character:
                    295: .Ic \es' , \es[ , \es( , \es Ns Ar a .
                    296: As a special exception, an optional
                    297: .Sq +
                    298: or
                    299: .Sq \-
                    300: character is allowed after the
                    301: .Sq s
                    302: for all forms.
                    303: .It
                    304: The escape sequences
                    305: .Ic \eF ,
                    306: .Ic \ek ,
                    307: .Ic \eM ,
                    308: .Ic \em ,
1.6     ! schwarze  309: .Ic \eO ,
1.1       schwarze  310: and
                    311: .Ic \eY
                    312: followed by an argument in standard form.
                    313: .It
                    314: The escape sequences
                    315: .Ic \eb ,
                    316: .Ic \eD ,
                    317: .Ic \eR ,
                    318: .Ic \eX ,
                    319: and
                    320: .Ic \eZ
                    321: followed by an argument delimited by an arbitrary character.
                    322: .It
                    323: The escape sequences
                    324: .Ic \eH ,
                    325: .Ic \eL ,
                    326: .Ic \eS ,
                    327: .Ic \ev ,
                    328: and
                    329: .Ic \ex
                    330: followed by an argument delimited by a character that cannot occur
                    331: in numerical expressions.
                    332: However, if any character that can occur in numerical expressions
                    333: is found instead of a delimiter, the sequence is considered to end
                    334: with that character, and
                    335: .Dv ESCAPE_ERROR
                    336: is returned.
1.6     ! schwarze  337: .It
        !           338: The escape sequences
        !           339: .Ic \eO
        !           340: with a single-digit argument in the range from 1 to 4 inclusive.
1.1       schwarze  341: .El
1.6     ! schwarze  342: .It Dv ESCAPE_UNSUPP
        !           343: An escape sequence that
        !           344: .Xr mandoc 1
        !           345: can parse, but for which formatting in unsupported, in particular
        !           346: .Qq \eO0
        !           347: and
        !           348: .Qq \eO5 .
1.1       schwarze  349: .It Dv ESCAPE_ERROR
1.6     ! schwarze  350: Escape sequences taking an argument
        !           351: where the actual argument contains a syntax error.
1.1       schwarze  352: In particular, that happens if the end of the logical input line
                    353: is reached before the end of the argument.
                    354: .El
                    355: .Pp
                    356: For sequences that do not take an argument, the function
                    357: .Fn mandoc_escape
                    358: returns one of the following values:
                    359: .Bl -tag -width 2n
1.6     ! schwarze  360: .It Dv ESCAPE_BREAK
1.1       schwarze  361: The escape sequence
1.6     ! schwarze  362: .Qq \ep .
        !           363: .It Dv ESCAPE_IGNORE
        !           364: Many escape sequences including
        !           365: .Qq \e% ,
        !           366: .Qq \e& ,
        !           367: .Qq \e| ,
        !           368: .Qq \ed ,
        !           369: and
        !           370: .Qq \eu .
1.1       schwarze  371: .It Dv ESCAPE_NOSPACE
                    372: The escape sequence
                    373: .Qq \ec .
1.6     ! schwarze  374: .It Dv ESCAPE_SKIPCHAR
        !           375: The escape sequence
        !           376: .Qq \ez .
        !           377: .It Dv ESCAPE_UNSUPP
1.1       schwarze  378: The escape sequences
1.6     ! schwarze  379: .Qq \e! ,
        !           380: .Qq \e? ,
        !           381: and
        !           382: .Qq \er .
        !           383: .It Dv ESCAPE_UNDEF
        !           384: Many escape sequences that other
        !           385: .Xr roff 7
        !           386: implementations do not define either, for example
        !           387: .Qq \eG ,
        !           388: .Qq \eI ,
        !           389: .Qq \ei ,
        !           390: .Qq \eJ ,
        !           391: .Qq \ej ,
        !           392: .Qq \eK ,
        !           393: .Qq \eP ,
        !           394: .Qq \eT ,
        !           395: .Qq \eU ,
        !           396: .Qq \eW ,
1.1       schwarze  397: and
1.6     ! schwarze  398: .Qq \ey .
1.1       schwarze  399: .El
                    400: .Sh FILES
                    401: This function is implemented in
                    402: .Pa mandoc.c .
                    403: .Sh SEE ALSO
                    404: .Xr mchars_alloc 3 ,
                    405: .Xr mandoc_char 7 ,
                    406: .Xr roff 7
                    407: .Sh HISTORY
                    408: This function has been available since mandoc 1.11.2.
                    409: .Sh AUTHORS
                    410: .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
                    411: .An Ingo Schwarze Aq Mt schwarze@openbsd.org

CVSweb