[BACK]Return to mdoc.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mdoc.c, Revision 1.275

1.275   ! schwarze    1: /* $Id: mdoc.c,v 1.274 2018/12/31 07:46:07 schwarze Exp $ */
1.1       kristaps    2: /*
1.275   ! schwarze    3:  * Copyright (c) 2010, 2012-2018, 2020 Ingo Schwarze <schwarze@openbsd.org>
1.182     schwarze    4:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.1       kristaps    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
1.75      kristaps    7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    9:  *
1.241     schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.75      kristaps   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.241     schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.75      kristaps   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.275   ! schwarze   17:  *
        !            18:  * Top level and utility functions of the mdoc(7) parser for mandoc(1).
1.1       kristaps   19:  */
1.114     kristaps   20: #include "config.h"
                     21:
1.106     kristaps   22: #include <sys/types.h>
                     23:
1.1       kristaps   24: #include <assert.h>
1.211     schwarze   25: #include <ctype.h>
1.1       kristaps   26: #include <stdarg.h>
1.73      kristaps   27: #include <stdio.h>
1.1       kristaps   28: #include <stdlib.h>
                     29: #include <string.h>
1.120     kristaps   30: #include <time.h>
1.1       kristaps   31:
1.239     schwarze   32: #include "mandoc_aux.h"
                     33: #include "mandoc.h"
                     34: #include "roff.h"
1.187     kristaps   35: #include "mdoc.h"
1.239     schwarze   36: #include "libmandoc.h"
1.247     schwarze   37: #include "roff_int.h"
1.70      kristaps   38: #include "libmdoc.h"
1.1       kristaps   39:
1.213     schwarze   40: const  char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1       kristaps   41:        "split",                "nosplit",              "ragged",
1.213     schwarze   42:        "unfilled",             "literal",              "file",
                     43:        "offset",               "bullet",               "dash",
                     44:        "hyphen",               "item",                 "enum",
                     45:        "tag",                  "diag",                 "hang",
                     46:        "ohang",                "inset",                "column",
                     47:        "width",                "compact",              "std",
1.52      kristaps   48:        "filled",               "words",                "emphasis",
1.108     kristaps   49:        "symbolic",             "nested",               "centered"
1.262     schwarze   50: };
1.1       kristaps   51: const  char * const *mdoc_argnames = __mdoc_argnames;
                     52:
1.242     schwarze   53: static int               mdoc_ptext(struct roff_man *, int, char *, int);
                     54: static int               mdoc_pmacro(struct roff_man *, int, char *, int);
1.88      kristaps   55:
1.181     kristaps   56:
1.50      kristaps   57: /*
1.53      kristaps   58:  * Main parse routine.  Parses a single line -- really just hands off to
1.123     kristaps   59:  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.50      kristaps   60:  */
1.20      kristaps   61: int
1.242     schwarze   62: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps   63: {
                     64:
1.239     schwarze   65:        if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.228     schwarze   66:                mdoc->flags |= MDOC_NEWLINE;
1.153     schwarze   67:
                     68:        /*
                     69:         * Let the roff nS register switch SYNOPSIS mode early,
                     70:         * such that the parser knows at all times
                     71:         * whether this mode is on or off.
                     72:         * Note that this mode is also switched by the Sh macro.
                     73:         */
1.204     schwarze   74:        if (roff_getreg(mdoc->roff, "nS"))
                     75:                mdoc->flags |= MDOC_SYNOPSIS;
                     76:        else
                     77:                mdoc->flags &= ~MDOC_SYNOPSIS;
1.153     schwarze   78:
1.253     schwarze   79:        return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.213     schwarze   80:            mdoc_pmacro(mdoc, ln, buf, offs) :
1.253     schwarze   81:            mdoc_ptext(mdoc, ln, buf, offs);
1.1       kristaps   82: }
                     83:
1.232     schwarze   84: void
1.262     schwarze   85: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
1.17      kristaps   86: {
1.240     schwarze   87:        struct roff_node *p;
1.17      kristaps   88:
1.247     schwarze   89:        p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
                     90:        roff_node_append(mdoc, p);
1.242     schwarze   91:        mdoc->next = ROFF_NEXT_CHILD;
1.152     schwarze   92: }
                     93:
1.240     schwarze   94: struct roff_node *
1.262     schwarze   95: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
                     96:     enum roff_tok tok, struct roff_node *body)
1.152     schwarze   97: {
1.240     schwarze   98:        struct roff_node *p;
1.152     schwarze   99:
1.258     schwarze  100:        body->flags |= NODE_ENDED;
                    101:        body->parent->flags |= NODE_ENDED;
1.247     schwarze  102:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.237     schwarze  103:        p->body = body;
1.202     schwarze  104:        p->norm = body->norm;
1.260     schwarze  105:        p->end = ENDBODY_SPACE;
1.247     schwarze  106:        roff_node_append(mdoc, p);
1.242     schwarze  107:        mdoc->next = ROFF_NEXT_SIBLING;
1.253     schwarze  108:        return p;
1.1       kristaps  109: }
                    110:
1.240     schwarze  111: struct roff_node *
1.242     schwarze  112: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.262     schwarze  113:     enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  114: {
1.240     schwarze  115:        struct roff_node *p;
1.1       kristaps  116:
1.247     schwarze  117:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.77      kristaps  118:        p->args = args;
                    119:        if (p->args)
1.53      kristaps  120:                (args->refcnt)++;
1.172     kristaps  121:
                    122:        switch (tok) {
1.213     schwarze  123:        case MDOC_Bd:
                    124:        case MDOC_Bf:
                    125:        case MDOC_Bl:
1.217     schwarze  126:        case MDOC_En:
1.213     schwarze  127:        case MDOC_Rs:
1.172     kristaps  128:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    129:                break;
                    130:        default:
                    131:                break;
                    132:        }
1.247     schwarze  133:        roff_node_append(mdoc, p);
1.242     schwarze  134:        mdoc->next = ROFF_NEXT_CHILD;
1.253     schwarze  135:        return p;
1.1       kristaps  136: }
                    137:
1.231     schwarze  138: void
1.242     schwarze  139: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.262     schwarze  140:      enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  141: {
1.240     schwarze  142:        struct roff_node *p;
1.1       kristaps  143:
1.247     schwarze  144:        p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.77      kristaps  145:        p->args = args;
                    146:        if (p->args)
1.53      kristaps  147:                (args->refcnt)++;
1.172     kristaps  148:
                    149:        switch (tok) {
1.213     schwarze  150:        case MDOC_An:
1.172     kristaps  151:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    152:                break;
                    153:        default:
                    154:                break;
                    155:        }
1.247     schwarze  156:        roff_node_append(mdoc, p);
1.242     schwarze  157:        mdoc->next = ROFF_NEXT_CHILD;
1.1       kristaps  158: }
                    159:
1.53      kristaps  160: /*
                    161:  * Parse free-form text, that is, a line that does not begin with the
                    162:  * control character.
                    163:  */
                    164: static int
1.242     schwarze  165: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1       kristaps  166: {
1.240     schwarze  167:        struct roff_node *n;
1.267     schwarze  168:        const char       *cp, *sp;
1.142     kristaps  169:        char             *c, *ws, *end;
                    170:
1.203     schwarze  171:        n = mdoc->last;
1.142     kristaps  172:
                    173:        /*
1.257     schwarze  174:         * If a column list contains plain text, assume an implicit item
                    175:         * macro.  This can happen one or more times at the beginning
                    176:         * of such a list, intermixed with non-It mdoc macros and with
                    177:         * nodes generated on the roff level, for example by tbl.
1.142     kristaps  178:         */
                    179:
1.257     schwarze  180:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    181:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    182:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    183:             n->parent->norm->Bl.type == LIST_column)) {
1.203     schwarze  184:                mdoc->flags |= MDOC_FREECOL;
1.269     schwarze  185:                (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It,
                    186:                    line, offs, &offs, buf);
1.253     schwarze  187:                return 1;
1.142     kristaps  188:        }
1.124     kristaps  189:
1.137     schwarze  190:        /*
                    191:         * Search for the beginning of unescaped trailing whitespace (ws)
                    192:         * and for the first character not to be output (end).
                    193:         */
1.139     kristaps  194:
                    195:        /* FIXME: replace with strcspn(). */
1.137     schwarze  196:        ws = NULL;
                    197:        for (c = end = buf + offs; *c; c++) {
                    198:                switch (*c) {
                    199:                case ' ':
                    200:                        if (NULL == ws)
                    201:                                ws = c;
                    202:                        continue;
                    203:                case '\t':
                    204:                        /*
                    205:                         * Always warn about trailing tabs,
                    206:                         * even outside literal context,
                    207:                         * where they should be put on the next line.
                    208:                         */
                    209:                        if (NULL == ws)
                    210:                                ws = c;
                    211:                        /*
                    212:                         * Strip trailing tabs in literal context only;
                    213:                         * outside, they affect the next line.
                    214:                         */
1.274     schwarze  215:                        if (mdoc->flags & ROFF_NOFILL)
1.137     schwarze  216:                                continue;
                    217:                        break;
                    218:                case '\\':
                    219:                        /* Skip the escaped character, too, if any. */
                    220:                        if (c[1])
                    221:                                c++;
                    222:                        /* FALLTHROUGH */
                    223:                default:
                    224:                        ws = NULL;
                    225:                        break;
                    226:                }
                    227:                end = c + 1;
                    228:        }
                    229:        *end = '\0';
1.91      kristaps  230:
1.137     schwarze  231:        if (ws)
1.271     schwarze  232:                mandoc_msg(MANDOCERR_SPACE_EOL, line, (int)(ws - buf), NULL);
1.115     kristaps  233:
1.267     schwarze  234:        /*
                    235:         * Blank lines are allowed in no-fill mode
                    236:         * and cancel preceding \c,
                    237:         * but add a single vertical space elsewhere.
                    238:         */
                    239:
1.274     schwarze  240:        if (buf[offs] == '\0' && (mdoc->flags & ROFF_NOFILL) == 0) {
1.267     schwarze  241:                switch (mdoc->last->type) {
                    242:                case ROFFT_TEXT:
                    243:                        sp = mdoc->last->string;
                    244:                        cp = end = strchr(sp, '\0') - 2;
                    245:                        if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
                    246:                                break;
                    247:                        while (cp > sp && cp[-1] == '\\')
                    248:                                cp--;
                    249:                        if ((end - cp) % 2)
                    250:                                break;
                    251:                        *end = '\0';
                    252:                        return 1;
                    253:                default:
                    254:                        break;
                    255:                }
1.271     schwarze  256:                mandoc_msg(MANDOCERR_FI_BLANK, line, (int)(c - buf), NULL);
1.264     schwarze  257:                roff_elem_alloc(mdoc, line, offs, ROFF_sp);
1.258     schwarze  258:                mdoc->last->flags |= NODE_VALID | NODE_ENDED;
1.242     schwarze  259:                mdoc->next = ROFF_NEXT_SIBLING;
1.253     schwarze  260:                return 1;
1.119     kristaps  261:        }
1.68      kristaps  262:
1.249     schwarze  263:        roff_word_alloc(mdoc, line, offs, buf+offs);
1.91      kristaps  264:
1.274     schwarze  265:        if (mdoc->flags & ROFF_NOFILL)
1.253     schwarze  266:                return 1;
1.128     kristaps  267:
                    268:        /*
                    269:         * End-of-sentence check.  If the last character is an unescaped
                    270:         * EOS character, then flag the node as being the end of a
                    271:         * sentence.  The front-end will know how to interpret this.
                    272:         */
1.132     kristaps  273:
1.137     schwarze  274:        assert(buf < end);
                    275:
1.207     schwarze  276:        if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.258     schwarze  277:                mdoc->last->flags |= NODE_EOS;
1.259     schwarze  278:
                    279:        for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
                    280:                if (c - buf < offs + 2)
                    281:                        continue;
1.265     schwarze  282:                if (end - c < 3)
1.259     schwarze  283:                        break;
1.266     schwarze  284:                if (c[1] != ' ' ||
1.268     schwarze  285:                    isalnum((unsigned char)c[-2]) == 0 ||
                    286:                    isalnum((unsigned char)c[-1]) == 0 ||
1.266     schwarze  287:                    (c[-2] == 'n' && c[-1] == 'c') ||
                    288:                    (c[-2] == 'v' && c[-1] == 's'))
                    289:                        continue;
                    290:                c += 2;
                    291:                if (*c == ' ')
                    292:                        c++;
                    293:                if (*c == ' ')
                    294:                        c++;
                    295:                if (isupper((unsigned char)(*c)))
1.271     schwarze  296:                        mandoc_msg(MANDOCERR_EOS, line, (int)(c - buf), NULL);
1.259     schwarze  297:        }
                    298:
1.253     schwarze  299:        return 1;
1.1       kristaps  300: }
                    301:
1.53      kristaps  302: /*
                    303:  * Parse a macro line, that is, a line beginning with the control
                    304:  * character.
                    305:  */
1.155     kristaps  306: static int
1.242     schwarze  307: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps  308: {
1.240     schwarze  309:        struct roff_node *n;
1.229     schwarze  310:        const char       *cp;
1.263     schwarze  311:        size_t            sz;
1.262     schwarze  312:        enum roff_tok     tok;
1.263     schwarze  313:        int               sv;
                    314:
                    315:        /* Determine the line macro. */
1.63      kristaps  316:
1.188     kristaps  317:        sv = offs;
1.263     schwarze  318:        tok = TOKEN_NONE;
                    319:        for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
                    320:                offs++;
                    321:        if (sz == 2 || sz == 3)
                    322:                tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
1.248     schwarze  323:        if (tok == TOKEN_NONE) {
1.271     schwarze  324:                mandoc_msg(MANDOCERR_MACRO, ln, sv, "%s", buf + sv - 1);
1.253     schwarze  325:                return 1;
1.53      kristaps  326:        }
1.1       kristaps  327:
1.229     schwarze  328:        /* Skip a leading escape sequence or tab. */
1.160     kristaps  329:
1.229     schwarze  330:        switch (buf[offs]) {
                    331:        case '\\':
                    332:                cp = buf + offs + 1;
                    333:                mandoc_escape(&cp, NULL, NULL);
                    334:                offs = cp - buf;
                    335:                break;
                    336:        case '\t':
1.188     kristaps  337:                offs++;
1.229     schwarze  338:                break;
                    339:        default:
                    340:                break;
                    341:        }
1.160     kristaps  342:
                    343:        /* Jump to the next non-whitespace word. */
1.1       kristaps  344:
1.261     schwarze  345:        while (buf[offs] == ' ')
1.188     kristaps  346:                offs++;
1.1       kristaps  347:
1.213     schwarze  348:        /*
1.125     kristaps  349:         * Trailing whitespace.  Note that tabs are allowed to be passed
                    350:         * into the parser as "text", so we only warn about spaces here.
                    351:         */
1.115     kristaps  352:
1.188     kristaps  353:        if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.271     schwarze  354:                mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL);
1.115     kristaps  355:
1.144     kristaps  356:        /*
1.275   ! schwarze  357:         * If an initial or transparent macro or a list invocation,
        !           358:         * divert directly into macro processing.
1.144     kristaps  359:         */
                    360:
1.257     schwarze  361:        n = mdoc->last;
1.275   ! schwarze  362:        if (n == NULL || tok == MDOC_It || tok == MDOC_El ||
        !           363:            roff_tok_transparent(tok)) {
1.269     schwarze  364:                (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
1.253     schwarze  365:                return 1;
1.232     schwarze  366:        }
1.144     kristaps  367:
                    368:        /*
1.257     schwarze  369:         * If a column list contains a non-It macro, assume an implicit
                    370:         * item macro.  This can happen one or more times at the
                    371:         * beginning of such a list, intermixed with text lines and
                    372:         * with nodes generated on the roff level, for example by tbl.
1.98      kristaps  373:         */
1.144     kristaps  374:
1.257     schwarze  375:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    376:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    377:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    378:             n->parent->norm->Bl.type == LIST_column)) {
1.203     schwarze  379:                mdoc->flags |= MDOC_FREECOL;
1.269     schwarze  380:                (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf);
1.253     schwarze  381:                return 1;
1.144     kristaps  382:        }
                    383:
                    384:        /* Normal processing of a macro. */
                    385:
1.269     schwarze  386:        (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
1.208     schwarze  387:
                    388:        /* In quick mode (for mandocdb), abort after the NAME section. */
                    389:
                    390:        if (mdoc->quick && MDOC_Sh == tok &&
                    391:            SEC_NAME != mdoc->last->sec)
1.253     schwarze  392:                return 2;
1.1       kristaps  393:
1.253     schwarze  394:        return 1;
1.1       kristaps  395: }
1.100     kristaps  396:
1.186     kristaps  397: enum mdelim
                    398: mdoc_isdelim(const char *p)
                    399: {
                    400:
                    401:        if ('\0' == p[0])
1.253     schwarze  402:                return DELIM_NONE;
1.186     kristaps  403:
                    404:        if ('\0' == p[1])
                    405:                switch (p[0]) {
1.213     schwarze  406:                case '(':
                    407:                case '[':
1.253     schwarze  408:                        return DELIM_OPEN;
1.213     schwarze  409:                case '|':
1.253     schwarze  410:                        return DELIM_MIDDLE;
1.213     schwarze  411:                case '.':
                    412:                case ',':
                    413:                case ';':
                    414:                case ':':
                    415:                case '?':
                    416:                case '!':
                    417:                case ')':
                    418:                case ']':
1.253     schwarze  419:                        return DELIM_CLOSE;
1.186     kristaps  420:                default:
1.253     schwarze  421:                        return DELIM_NONE;
1.186     kristaps  422:                }
                    423:
                    424:        if ('\\' != p[0])
1.253     schwarze  425:                return DELIM_NONE;
1.100     kristaps  426:
1.186     kristaps  427:        if (0 == strcmp(p + 1, "."))
1.253     schwarze  428:                return DELIM_CLOSE;
1.200     schwarze  429:        if (0 == strcmp(p + 1, "fR|\\fP"))
1.253     schwarze  430:                return DELIM_MIDDLE;
1.186     kristaps  431:
1.253     schwarze  432:        return DELIM_NONE;
1.186     kristaps  433: }

CVSweb