[BACK]Return to mdoc.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/mdoc.c, Revision 1.262

1.262   ! schwarze    1: /*     $Id: mdoc.c,v 1.261 2017/03/03 13:55:31 schwarze Exp $ */
1.1       kristaps    2: /*
1.182     schwarze    3:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.259     schwarze    4:  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
1.75      kristaps    7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    9:  *
1.241     schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.75      kristaps   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.241     schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.75      kristaps   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1       kristaps   17:  */
1.114     kristaps   18: #include "config.h"
                     19:
1.106     kristaps   20: #include <sys/types.h>
                     21:
1.1       kristaps   22: #include <assert.h>
1.211     schwarze   23: #include <ctype.h>
1.1       kristaps   24: #include <stdarg.h>
1.73      kristaps   25: #include <stdio.h>
1.1       kristaps   26: #include <stdlib.h>
                     27: #include <string.h>
1.120     kristaps   28: #include <time.h>
1.1       kristaps   29:
1.239     schwarze   30: #include "mandoc_aux.h"
                     31: #include "mandoc.h"
                     32: #include "roff.h"
1.187     kristaps   33: #include "mdoc.h"
1.239     schwarze   34: #include "libmandoc.h"
1.247     schwarze   35: #include "roff_int.h"
1.70      kristaps   36: #include "libmdoc.h"
1.1       kristaps   37:
1.213     schwarze   38: const  char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1       kristaps   39:        "split",                "nosplit",              "ragged",
1.213     schwarze   40:        "unfilled",             "literal",              "file",
                     41:        "offset",               "bullet",               "dash",
                     42:        "hyphen",               "item",                 "enum",
                     43:        "tag",                  "diag",                 "hang",
                     44:        "ohang",                "inset",                "column",
                     45:        "width",                "compact",              "std",
1.52      kristaps   46:        "filled",               "words",                "emphasis",
1.108     kristaps   47:        "symbolic",             "nested",               "centered"
1.262   ! schwarze   48: };
1.1       kristaps   49: const  char * const *mdoc_argnames = __mdoc_argnames;
                     50:
1.242     schwarze   51: static int               mdoc_ptext(struct roff_man *, int, char *, int);
                     52: static int               mdoc_pmacro(struct roff_man *, int, char *, int);
1.88      kristaps   53:
1.181     kristaps   54:
1.50      kristaps   55: /*
1.53      kristaps   56:  * Main parse routine.  Parses a single line -- really just hands off to
1.123     kristaps   57:  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.50      kristaps   58:  */
1.20      kristaps   59: int
1.242     schwarze   60: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps   61: {
                     62:
1.239     schwarze   63:        if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.228     schwarze   64:                mdoc->flags |= MDOC_NEWLINE;
1.153     schwarze   65:
                     66:        /*
                     67:         * Let the roff nS register switch SYNOPSIS mode early,
                     68:         * such that the parser knows at all times
                     69:         * whether this mode is on or off.
                     70:         * Note that this mode is also switched by the Sh macro.
                     71:         */
1.204     schwarze   72:        if (roff_getreg(mdoc->roff, "nS"))
                     73:                mdoc->flags |= MDOC_SYNOPSIS;
                     74:        else
                     75:                mdoc->flags &= ~MDOC_SYNOPSIS;
1.153     schwarze   76:
1.253     schwarze   77:        return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.213     schwarze   78:            mdoc_pmacro(mdoc, ln, buf, offs) :
1.253     schwarze   79:            mdoc_ptext(mdoc, ln, buf, offs);
1.1       kristaps   80: }
                     81:
1.232     schwarze   82: void
1.148     kristaps   83: mdoc_macro(MACRO_PROT_ARGS)
1.88      kristaps   84: {
1.262   ! schwarze   85:        assert(tok >= MDOC_Dd && tok < MDOC_MAX);
1.232     schwarze   86:        (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
1.73      kristaps   87: }
                     88:
1.231     schwarze   89: void
1.262   ! schwarze   90: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
1.17      kristaps   91: {
1.240     schwarze   92:        struct roff_node *p;
1.17      kristaps   93:
1.247     schwarze   94:        p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
                     95:        roff_node_append(mdoc, p);
1.242     schwarze   96:        mdoc->next = ROFF_NEXT_CHILD;
1.152     schwarze   97: }
                     98:
1.240     schwarze   99: struct roff_node *
1.262   ! schwarze  100: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
        !           101:     enum roff_tok tok, struct roff_node *body)
1.152     schwarze  102: {
1.240     schwarze  103:        struct roff_node *p;
1.152     schwarze  104:
1.258     schwarze  105:        body->flags |= NODE_ENDED;
                    106:        body->parent->flags |= NODE_ENDED;
1.247     schwarze  107:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.237     schwarze  108:        p->body = body;
1.202     schwarze  109:        p->norm = body->norm;
1.260     schwarze  110:        p->end = ENDBODY_SPACE;
1.247     schwarze  111:        roff_node_append(mdoc, p);
1.242     schwarze  112:        mdoc->next = ROFF_NEXT_SIBLING;
1.253     schwarze  113:        return p;
1.1       kristaps  114: }
                    115:
1.240     schwarze  116: struct roff_node *
1.242     schwarze  117: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.262   ! schwarze  118:     enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  119: {
1.240     schwarze  120:        struct roff_node *p;
1.1       kristaps  121:
1.247     schwarze  122:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.77      kristaps  123:        p->args = args;
                    124:        if (p->args)
1.53      kristaps  125:                (args->refcnt)++;
1.172     kristaps  126:
                    127:        switch (tok) {
1.213     schwarze  128:        case MDOC_Bd:
                    129:        case MDOC_Bf:
                    130:        case MDOC_Bl:
1.217     schwarze  131:        case MDOC_En:
1.213     schwarze  132:        case MDOC_Rs:
1.172     kristaps  133:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    134:                break;
                    135:        default:
                    136:                break;
                    137:        }
1.247     schwarze  138:        roff_node_append(mdoc, p);
1.242     schwarze  139:        mdoc->next = ROFF_NEXT_CHILD;
1.253     schwarze  140:        return p;
1.1       kristaps  141: }
                    142:
1.231     schwarze  143: void
1.242     schwarze  144: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.262   ! schwarze  145:      enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  146: {
1.240     schwarze  147:        struct roff_node *p;
1.1       kristaps  148:
1.247     schwarze  149:        p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.77      kristaps  150:        p->args = args;
                    151:        if (p->args)
1.53      kristaps  152:                (args->refcnt)++;
1.172     kristaps  153:
                    154:        switch (tok) {
1.213     schwarze  155:        case MDOC_An:
1.172     kristaps  156:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    157:                break;
                    158:        default:
                    159:                break;
                    160:        }
1.247     schwarze  161:        roff_node_append(mdoc, p);
1.242     schwarze  162:        mdoc->next = ROFF_NEXT_CHILD;
1.175     kristaps  163: }
1.1       kristaps  164:
1.231     schwarze  165: void
1.242     schwarze  166: mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
1.201     schwarze  167: {
                    168:
1.247     schwarze  169:        roff_node_unlink(mdoc, p);
1.256     schwarze  170:        p->prev = p->next = NULL;
1.247     schwarze  171:        roff_node_append(mdoc, p);
1.1       kristaps  172: }
                    173:
1.53      kristaps  174: /*
                    175:  * Parse free-form text, that is, a line that does not begin with the
                    176:  * control character.
                    177:  */
                    178: static int
1.242     schwarze  179: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1       kristaps  180: {
1.240     schwarze  181:        struct roff_node *n;
1.142     kristaps  182:        char             *c, *ws, *end;
                    183:
1.203     schwarze  184:        n = mdoc->last;
1.142     kristaps  185:
                    186:        /*
1.257     schwarze  187:         * If a column list contains plain text, assume an implicit item
                    188:         * macro.  This can happen one or more times at the beginning
                    189:         * of such a list, intermixed with non-It mdoc macros and with
                    190:         * nodes generated on the roff level, for example by tbl.
1.142     kristaps  191:         */
                    192:
1.257     schwarze  193:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    194:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    195:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    196:             n->parent->norm->Bl.type == LIST_column)) {
1.203     schwarze  197:                mdoc->flags |= MDOC_FREECOL;
1.232     schwarze  198:                mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
1.253     schwarze  199:                return 1;
1.142     kristaps  200:        }
1.124     kristaps  201:
1.137     schwarze  202:        /*
                    203:         * Search for the beginning of unescaped trailing whitespace (ws)
                    204:         * and for the first character not to be output (end).
                    205:         */
1.139     kristaps  206:
                    207:        /* FIXME: replace with strcspn(). */
1.137     schwarze  208:        ws = NULL;
                    209:        for (c = end = buf + offs; *c; c++) {
                    210:                switch (*c) {
                    211:                case ' ':
                    212:                        if (NULL == ws)
                    213:                                ws = c;
                    214:                        continue;
                    215:                case '\t':
                    216:                        /*
                    217:                         * Always warn about trailing tabs,
                    218:                         * even outside literal context,
                    219:                         * where they should be put on the next line.
                    220:                         */
                    221:                        if (NULL == ws)
                    222:                                ws = c;
                    223:                        /*
                    224:                         * Strip trailing tabs in literal context only;
                    225:                         * outside, they affect the next line.
                    226:                         */
1.203     schwarze  227:                        if (MDOC_LITERAL & mdoc->flags)
1.137     schwarze  228:                                continue;
                    229:                        break;
                    230:                case '\\':
                    231:                        /* Skip the escaped character, too, if any. */
                    232:                        if (c[1])
                    233:                                c++;
                    234:                        /* FALLTHROUGH */
                    235:                default:
                    236:                        ws = NULL;
                    237:                        break;
                    238:                }
                    239:                end = c + 1;
                    240:        }
                    241:        *end = '\0';
1.91      kristaps  242:
1.137     schwarze  243:        if (ws)
1.218     schwarze  244:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    245:                    line, (int)(ws-buf), NULL);
1.115     kristaps  246:
1.231     schwarze  247:        if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
1.218     schwarze  248:                mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
                    249:                    line, (int)(c - buf), NULL);
1.124     kristaps  250:
1.119     kristaps  251:                /*
1.165     schwarze  252:                 * Insert a `sp' in the case of a blank line.  Technically,
1.124     kristaps  253:                 * blank lines aren't allowed, but enough manuals assume this
                    254:                 * behaviour that we want to work around it.
1.119     kristaps  255:                 */
1.250     schwarze  256:                roff_elem_alloc(mdoc, line, offs, MDOC_sp);
1.258     schwarze  257:                mdoc->last->flags |= NODE_VALID | NODE_ENDED;
1.242     schwarze  258:                mdoc->next = ROFF_NEXT_SIBLING;
1.253     schwarze  259:                return 1;
1.119     kristaps  260:        }
1.68      kristaps  261:
1.249     schwarze  262:        roff_word_alloc(mdoc, line, offs, buf+offs);
1.91      kristaps  263:
1.231     schwarze  264:        if (mdoc->flags & MDOC_LITERAL)
1.253     schwarze  265:                return 1;
1.128     kristaps  266:
                    267:        /*
                    268:         * End-of-sentence check.  If the last character is an unescaped
                    269:         * EOS character, then flag the node as being the end of a
                    270:         * sentence.  The front-end will know how to interpret this.
                    271:         */
1.132     kristaps  272:
1.137     schwarze  273:        assert(buf < end);
                    274:
1.207     schwarze  275:        if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.258     schwarze  276:                mdoc->last->flags |= NODE_EOS;
1.259     schwarze  277:
                    278:        for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
                    279:                if (c - buf < offs + 2)
                    280:                        continue;
                    281:                if (end - c < 4)
                    282:                        break;
                    283:                if (isalpha((unsigned char)c[-2]) &&
                    284:                    isalpha((unsigned char)c[-1]) &&
                    285:                    c[1] == ' ' &&
                    286:                    isupper((unsigned char)(c[2] == ' ' ? c[3] : c[2])) &&
                    287:                    (c[-2] != 'n' || c[-1] != 'c') &&
                    288:                    (c[-2] != 'v' || c[-1] != 's'))
                    289:                        mandoc_msg(MANDOCERR_EOS, mdoc->parse,
                    290:                            line, (int)(c - buf), NULL);
                    291:        }
                    292:
1.253     schwarze  293:        return 1;
1.1       kristaps  294: }
                    295:
1.53      kristaps  296: /*
                    297:  * Parse a macro line, that is, a line beginning with the control
                    298:  * character.
                    299:  */
1.155     kristaps  300: static int
1.242     schwarze  301: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps  302: {
1.240     schwarze  303:        struct roff_node *n;
1.229     schwarze  304:        const char       *cp;
1.262   ! schwarze  305:        enum roff_tok     tok;
1.188     kristaps  306:        int               i, sv;
1.144     kristaps  307:        char              mac[5];
1.63      kristaps  308:
1.188     kristaps  309:        sv = offs;
1.130     kristaps  310:
1.213     schwarze  311:        /*
1.162     schwarze  312:         * Copy the first word into a nil-terminated buffer.
1.229     schwarze  313:         * Stop when a space, tab, escape, or eoln is encountered.
1.160     kristaps  314:         */
1.1       kristaps  315:
1.188     kristaps  316:        i = 0;
1.229     schwarze  317:        while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
1.188     kristaps  318:                mac[i++] = buf[offs++];
                    319:
                    320:        mac[i] = '\0';
                    321:
1.248     schwarze  322:        tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE;
1.1       kristaps  323:
1.248     schwarze  324:        if (tok == TOKEN_NONE) {
1.222     schwarze  325:                mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
                    326:                    ln, sv, buf + sv - 1);
1.253     schwarze  327:                return 1;
1.53      kristaps  328:        }
1.1       kristaps  329:
1.229     schwarze  330:        /* Skip a leading escape sequence or tab. */
1.160     kristaps  331:
1.229     schwarze  332:        switch (buf[offs]) {
                    333:        case '\\':
                    334:                cp = buf + offs + 1;
                    335:                mandoc_escape(&cp, NULL, NULL);
                    336:                offs = cp - buf;
                    337:                break;
                    338:        case '\t':
1.188     kristaps  339:                offs++;
1.229     schwarze  340:                break;
                    341:        default:
                    342:                break;
                    343:        }
1.160     kristaps  344:
                    345:        /* Jump to the next non-whitespace word. */
1.1       kristaps  346:
1.261     schwarze  347:        while (buf[offs] == ' ')
1.188     kristaps  348:                offs++;
1.1       kristaps  349:
1.213     schwarze  350:        /*
1.125     kristaps  351:         * Trailing whitespace.  Note that tabs are allowed to be passed
                    352:         * into the parser as "text", so we only warn about spaces here.
                    353:         */
1.115     kristaps  354:
1.188     kristaps  355:        if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.218     schwarze  356:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    357:                    ln, offs - 1, NULL);
1.115     kristaps  358:
1.144     kristaps  359:        /*
                    360:         * If an initial macro or a list invocation, divert directly
                    361:         * into macro processing.
                    362:         */
                    363:
1.257     schwarze  364:        n = mdoc->last;
                    365:        if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
1.232     schwarze  366:                mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.253     schwarze  367:                return 1;
1.232     schwarze  368:        }
1.144     kristaps  369:
                    370:        /*
1.257     schwarze  371:         * If a column list contains a non-It macro, assume an implicit
                    372:         * item macro.  This can happen one or more times at the
                    373:         * beginning of such a list, intermixed with text lines and
                    374:         * with nodes generated on the roff level, for example by tbl.
1.98      kristaps  375:         */
1.144     kristaps  376:
1.257     schwarze  377:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    378:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    379:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    380:             n->parent->norm->Bl.type == LIST_column)) {
1.203     schwarze  381:                mdoc->flags |= MDOC_FREECOL;
1.232     schwarze  382:                mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
1.253     schwarze  383:                return 1;
1.144     kristaps  384:        }
                    385:
                    386:        /* Normal processing of a macro. */
                    387:
1.232     schwarze  388:        mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.208     schwarze  389:
                    390:        /* In quick mode (for mandocdb), abort after the NAME section. */
                    391:
                    392:        if (mdoc->quick && MDOC_Sh == tok &&
                    393:            SEC_NAME != mdoc->last->sec)
1.253     schwarze  394:                return 2;
1.1       kristaps  395:
1.253     schwarze  396:        return 1;
1.1       kristaps  397: }
1.100     kristaps  398:
1.186     kristaps  399: enum mdelim
                    400: mdoc_isdelim(const char *p)
                    401: {
                    402:
                    403:        if ('\0' == p[0])
1.253     schwarze  404:                return DELIM_NONE;
1.186     kristaps  405:
                    406:        if ('\0' == p[1])
                    407:                switch (p[0]) {
1.213     schwarze  408:                case '(':
                    409:                case '[':
1.253     schwarze  410:                        return DELIM_OPEN;
1.213     schwarze  411:                case '|':
1.253     schwarze  412:                        return DELIM_MIDDLE;
1.213     schwarze  413:                case '.':
                    414:                case ',':
                    415:                case ';':
                    416:                case ':':
                    417:                case '?':
                    418:                case '!':
                    419:                case ')':
                    420:                case ']':
1.253     schwarze  421:                        return DELIM_CLOSE;
1.186     kristaps  422:                default:
1.253     schwarze  423:                        return DELIM_NONE;
1.186     kristaps  424:                }
                    425:
                    426:        if ('\\' != p[0])
1.253     schwarze  427:                return DELIM_NONE;
1.100     kristaps  428:
1.186     kristaps  429:        if (0 == strcmp(p + 1, "."))
1.253     schwarze  430:                return DELIM_CLOSE;
1.200     schwarze  431:        if (0 == strcmp(p + 1, "fR|\\fP"))
1.253     schwarze  432:                return DELIM_MIDDLE;
1.186     kristaps  433:
1.253     schwarze  434:        return DELIM_NONE;
1.255     schwarze  435: }
                    436:
                    437: void
                    438: mdoc_validate(struct roff_man *mdoc)
                    439: {
                    440:
                    441:        mdoc->last = mdoc->first;
                    442:        mdoc_node_validate(mdoc);
                    443:        mdoc_state_reset(mdoc);
1.186     kristaps  444: }

CVSweb