[BACK]Return to roff.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/roff.c, Revision 1.78

1.78    ! kristaps    1: /*     $Id: roff.c,v 1.77 2010/05/16 19:08:11 kristaps Exp $ */
1.1       kristaps    2: /*
1.67      kristaps    3:  * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1       kristaps    4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
1.66      kristaps    6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    8:  *
1.66      kristaps    9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1       kristaps   16:  */
1.66      kristaps   17: #ifdef HAVE_CONFIG_H
                     18: #include "config.h"
                     19: #endif
1.30      kristaps   20:
1.67      kristaps   21: #include <assert.h>
1.1       kristaps   22: #include <stdlib.h>
1.67      kristaps   23: #include <string.h>
1.75      kristaps   24: #include <stdio.h>
1.1       kristaps   25:
1.67      kristaps   26: #include "mandoc.h"
1.43      kristaps   27: #include "roff.h"
1.33      kristaps   28:
1.75      kristaps   29: #define        ROFF_CTL(c) \
                     30:        ('.' == (c) || '\'' == (c))
1.76      kristaps   31: #if    0
1.75      kristaps   32: #define        ROFF_MDEBUG(p, str) \
                     33:        fprintf(stderr, "%s: %s (%d:%d)\n", (str), \
                     34:                roffs[(p)->last->tok].name, \
                     35:                (p)->last->line, (p)->last->col)
                     36: #else
                     37: #define        ROFF_MDEBUG(p, str) while (/* CONSTCOND */ 0)
                     38: #endif
                     39:
1.67      kristaps   40: enum   rofft {
1.75      kristaps   41:        ROFF_if,
1.76      kristaps   42:        ROFF_ig,
                     43:        ROFF_cblock,
1.75      kristaps   44:        ROFF_ccond,
1.74      kristaps   45: #if 0
                     46:        ROFF_am,
                     47:        ROFF_ami,
1.67      kristaps   48:        ROFF_de,
                     49:        ROFF_dei,
                     50:        ROFF_close,
1.74      kristaps   51: #endif
1.67      kristaps   52:        ROFF_MAX
                     53: };
                     54:
                     55: struct roff {
                     56:        struct roffnode *last; /* leaf of stack */
                     57:        mandocmsg        msg; /* err/warn/fatal messages */
                     58:        void            *data; /* privdata for messages */
                     59: };
                     60:
                     61: struct roffnode {
                     62:        enum rofft       tok; /* type of node */
                     63:        struct roffnode *parent; /* up one in stack */
1.74      kristaps   64:        char            *end; /* end-token: custom */
1.67      kristaps   65:        int              line; /* parse line */
                     66:        int              col; /* parse col */
1.75      kristaps   67:        int              endspan;
1.67      kristaps   68: };
                     69:
                     70: #define        ROFF_ARGS        struct roff *r, /* parse ctx */ \
1.72      kristaps   71:                         enum rofft tok, /* tok of macro */ \
1.67      kristaps   72:                         char **bufp, /* input buffer */ \
                     73:                         size_t *szp, /* size of input buffer */ \
                     74:                         int ln, /* parse line */ \
1.75      kristaps   75:                         int ppos, /* original pos in buffer */ \
                     76:                         int pos, /* current pos in buffer */ \
1.74      kristaps   77:                         int *offs /* reset offset of buffer data */
1.67      kristaps   78:
                     79: typedef        enum rofferr (*roffproc)(ROFF_ARGS);
                     80:
                     81: struct roffmac {
                     82:        const char      *name; /* macro name */
1.75      kristaps   83:        roffproc         proc;
1.78    ! kristaps   84:        roffproc         text;
1.67      kristaps   85: };
                     86:
1.75      kristaps   87: static enum rofferr     roff_if(ROFF_ARGS);
1.78    ! kristaps   88: static enum rofferr     roff_if_text(ROFF_ARGS);
1.76      kristaps   89: static enum rofferr     roff_ig(ROFF_ARGS);
1.78    ! kristaps   90: static enum rofferr     roff_ig_text(ROFF_ARGS);
1.76      kristaps   91: static enum rofferr     roff_cblock(ROFF_ARGS);
1.75      kristaps   92: static enum rofferr     roff_ccond(ROFF_ARGS);
1.67      kristaps   93:
                     94: const  struct roffmac   roffs[ROFF_MAX] = {
1.78    ! kristaps   95:        { "if", roff_if, roff_if_text },
        !            96:        { "ig", roff_ig, roff_ig_text },
        !            97:        { ".", roff_cblock, NULL },
        !            98:        { "\\}", roff_ccond, NULL },
1.67      kristaps   99: };
                    100:
                    101: static void             roff_free1(struct roff *);
                    102: static enum rofft       roff_hash_find(const char *);
1.76      kristaps  103: static void             roffnode_cleanscope(struct roff *);
1.67      kristaps  104: static int              roffnode_push(struct roff *,
                    105:                                enum rofft, int, int);
                    106: static void             roffnode_pop(struct roff *);
                    107: static enum rofft       roff_parse(const char *, int *);
                    108:
                    109:
                    110: /*
                    111:  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
                    112:  * the nil-terminated string name could be found.
                    113:  */
                    114: static enum rofft
                    115: roff_hash_find(const char *p)
                    116: {
                    117:        int              i;
                    118:
                    119:        /* FIXME: make this be fast and efficient. */
                    120:
                    121:        for (i = 0; i < (int)ROFF_MAX; i++)
                    122:                if (0 == strcmp(roffs[i].name, p))
                    123:                        return((enum rofft)i);
                    124:
                    125:        return(ROFF_MAX);
                    126: }
                    127:
                    128:
                    129: /*
                    130:  * Pop the current node off of the stack of roff instructions currently
                    131:  * pending.
                    132:  */
                    133: static void
                    134: roffnode_pop(struct roff *r)
                    135: {
                    136:        struct roffnode *p;
                    137:
1.75      kristaps  138:        assert(r->last);
                    139:        p = r->last;
                    140:        r->last = r->last->parent;
1.74      kristaps  141:        if (p->end)
                    142:                free(p->end);
1.67      kristaps  143:        free(p);
                    144: }
                    145:
                    146:
                    147: /*
                    148:  * Push a roff node onto the instruction stack.  This must later be
                    149:  * removed with roffnode_pop().
                    150:  */
                    151: static int
                    152: roffnode_push(struct roff *r, enum rofft tok, int line, int col)
                    153: {
                    154:        struct roffnode *p;
                    155:
                    156:        if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
                    157:                (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
                    158:                return(0);
                    159:        }
                    160:
                    161:        p->tok = tok;
                    162:        p->parent = r->last;
                    163:        p->line = line;
                    164:        p->col = col;
                    165:
                    166:        r->last = p;
                    167:        return(1);
                    168: }
                    169:
                    170:
                    171: static void
                    172: roff_free1(struct roff *r)
                    173: {
                    174:
                    175:        while (r->last)
                    176:                roffnode_pop(r);
                    177: }
                    178:
                    179:
                    180: void
                    181: roff_reset(struct roff *r)
                    182: {
                    183:
                    184:        roff_free1(r);
                    185: }
                    186:
                    187:
                    188: void
                    189: roff_free(struct roff *r)
                    190: {
                    191:
                    192:        roff_free1(r);
                    193:        free(r);
                    194: }
                    195:
                    196:
                    197: struct roff *
                    198: roff_alloc(const mandocmsg msg, void *data)
                    199: {
                    200:        struct roff     *r;
                    201:
                    202:        if (NULL == (r = calloc(1, sizeof(struct roff)))) {
                    203:                (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
                    204:                return(0);
                    205:        }
                    206:
                    207:        r->msg = msg;
                    208:        r->data = data;
                    209:        return(r);
                    210: }
                    211:
                    212:
                    213: enum rofferr
1.74      kristaps  214: roff_parseln(struct roff *r, int ln,
                    215:                char **bufp, size_t *szp, int pos, int *offs)
1.67      kristaps  216: {
                    217:        enum rofft       t;
1.78    ! kristaps  218:        int              ppos, i, j, wtf;
1.74      kristaps  219:
1.75      kristaps  220:        if (r->last && ! ROFF_CTL((*bufp)[pos])) {
1.78    ! kristaps  221:                /*
        !           222:                 * If a scope is open and we're not a macro, pass it
        !           223:                 * through our text detector and continue as quickly as
        !           224:                 * possible.
        !           225:                 */
        !           226:                t = r->last->tok;
        !           227:                assert(roffs[t].text);
        !           228:                return((*roffs[t].text)
        !           229:                                (r, t, bufp, szp, ln, pos, pos, offs));
1.75      kristaps  230:        } else if ( ! ROFF_CTL((*bufp)[pos]))
1.78    ! kristaps  231:                /*
        !           232:                 * Don't do anything if we're free-form text.
        !           233:                 */
1.67      kristaps  234:                return(ROFF_CONT);
                    235:
1.78    ! kristaps  236:        /* A macro-ish line with a possibly-open macro context. */
        !           237:
        !           238:        wtf = 0;
        !           239:
        !           240:        if (r->last && r->last->end) {
        !           241:                /*
        !           242:                 * We have a scope open that has a custom end-macro
        !           243:                 * handler.  Try to match it against the input.
        !           244:                 */
        !           245:                i = pos + 1;
        !           246:                while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
        !           247:                        i++;
        !           248:
        !           249:                for (j = 0; r->last->end[j]; j++, i++)
        !           250:                        if ((*bufp)[i] != r->last->end[j])
        !           251:                                break;
        !           252:
        !           253:                if ('\0' == r->last->end[j] &&
        !           254:                                ('\0' == (*bufp)[i] ||
        !           255:                                 ' ' == (*bufp)[i] ||
        !           256:                                 '\t' == (*bufp)[i])) {
        !           257:                        roffnode_pop(r);
        !           258:                        roffnode_cleanscope(r);
        !           259:                        wtf = 1;
        !           260:                }
        !           261:        }
1.67      kristaps  262:
1.75      kristaps  263:        ppos = pos;
1.76      kristaps  264:        if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) {
1.78    ! kristaps  265:                /*
        !           266:                 * This is some of groff's stranger behaviours.  If we
        !           267:                 * encountered a custom end-scope tag and that tag also
        !           268:                 * happens to be a "real" macro, then we need to try
        !           269:                 * interpreting it again as a real macro.  If it's not,
        !           270:                 * then return ignore.  Else continue.
        !           271:                 */
        !           272:                if (wtf)
1.76      kristaps  273:                        return(ROFF_IGN);
1.78    ! kristaps  274:                else if (NULL == r->last)
        !           275:                        return(ROFF_CONT);
        !           276:
        !           277:                /* FIXME: this assumes that we ignore!? */
        !           278:                return(ROFF_IGN);
1.76      kristaps  279:        }
1.67      kristaps  280:
1.75      kristaps  281:        assert(roffs[t].proc);
1.78    ! kristaps  282:        return((*roffs[t].proc)
        !           283:                        (r, t, bufp, szp, ln, ppos, pos, offs));
1.74      kristaps  284: }
                    285:
                    286:
                    287: int
                    288: roff_endparse(struct roff *r)
                    289: {
                    290:
                    291:        if (NULL == r->last)
                    292:                return(1);
                    293:        return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
                    294:                                r->last->col, NULL));
1.67      kristaps  295: }
                    296:
                    297:
                    298: /*
                    299:  * Parse a roff node's type from the input buffer.  This must be in the
                    300:  * form of ".foo xxx" in the usual way.
                    301:  */
                    302: static enum rofft
                    303: roff_parse(const char *buf, int *pos)
                    304: {
                    305:        int              j;
                    306:        char             mac[5];
                    307:        enum rofft       t;
                    308:
1.75      kristaps  309:        assert(ROFF_CTL(buf[*pos]));
                    310:        (*pos)++;
1.67      kristaps  311:
                    312:        while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
                    313:                (*pos)++;
                    314:
                    315:        if ('\0' == buf[*pos])
                    316:                return(ROFF_MAX);
                    317:
                    318:        for (j = 0; j < 4; j++, (*pos)++)
                    319:                if ('\0' == (mac[j] = buf[*pos]))
                    320:                        break;
                    321:                else if (' ' == buf[*pos])
                    322:                        break;
                    323:
                    324:        if (j == 4 || j < 1)
                    325:                return(ROFF_MAX);
                    326:
                    327:        mac[j] = '\0';
                    328:
                    329:        if (ROFF_MAX == (t = roff_hash_find(mac)))
                    330:                return(t);
                    331:
                    332:        while (buf[*pos] && ' ' == buf[*pos])
                    333:                (*pos)++;
                    334:
                    335:        return(t);
                    336: }
                    337:
                    338:
                    339: /* ARGSUSED */
                    340: static enum rofferr
1.76      kristaps  341: roff_cblock(ROFF_ARGS)
1.67      kristaps  342: {
                    343:
1.76      kristaps  344:        if (NULL == r->last) {
                    345:                if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
                    346:                        return(ROFF_ERR);
                    347:                return(ROFF_IGN);
                    348:        }
1.67      kristaps  349:
1.76      kristaps  350:        if (ROFF_ig != r->last->tok) {
                    351:                if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
                    352:                        return(ROFF_ERR);
1.67      kristaps  353:                return(ROFF_IGN);
1.76      kristaps  354:        }
1.67      kristaps  355:
1.76      kristaps  356:        if ((*bufp)[pos])
                    357:                if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
                    358:                        return(ROFF_ERR);
1.71      kristaps  359:
1.76      kristaps  360:        ROFF_MDEBUG(r, "closing ignore block");
1.71      kristaps  361:        roffnode_pop(r);
1.76      kristaps  362:        roffnode_cleanscope(r);
                    363:        return(ROFF_IGN);
1.71      kristaps  364:
1.67      kristaps  365: }
                    366:
                    367:
1.76      kristaps  368: static void
                    369: roffnode_cleanscope(struct roff *r)
1.67      kristaps  370: {
                    371:
1.76      kristaps  372:        while (r->last) {
                    373:                if (--r->last->endspan < 0)
                    374:                        break;
                    375:                ROFF_MDEBUG(r, "closing implicit scope");
                    376:                roffnode_pop(r);
                    377:        }
1.67      kristaps  378: }
                    379:
                    380:
1.75      kristaps  381: /* ARGSUSED */
1.74      kristaps  382: static enum rofferr
1.75      kristaps  383: roff_ccond(ROFF_ARGS)
1.74      kristaps  384: {
                    385:
1.76      kristaps  386:        if (NULL == r->last) {
                    387:                if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
                    388:                        return(ROFF_ERR);
                    389:                return(ROFF_IGN);
                    390:        }
                    391:
                    392:        if (ROFF_if != r->last->tok) {
1.75      kristaps  393:                if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
                    394:                        return(ROFF_ERR);
                    395:                return(ROFF_IGN);
                    396:        }
                    397:
1.76      kristaps  398:        if (r->last->endspan > -1) {
                    399:                if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
                    400:                        return(ROFF_ERR);
                    401:                return(ROFF_IGN);
                    402:        }
                    403:
                    404:        if ((*bufp)[pos])
                    405:                if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
                    406:                        return(ROFF_ERR);
                    407:
1.75      kristaps  408:        ROFF_MDEBUG(r, "closing explicit scope");
                    409:        roffnode_pop(r);
1.76      kristaps  410:        roffnode_cleanscope(r);
                    411:        return(ROFF_IGN);
                    412: }
                    413:
1.75      kristaps  414:
1.76      kristaps  415: /* ARGSUSED */
                    416: static enum rofferr
                    417: roff_ig(ROFF_ARGS)
                    418: {
1.78    ! kristaps  419:        int             sv;
        !           420:        size_t          sz;
1.76      kristaps  421:
                    422:        if ( ! roffnode_push(r, tok, ln, ppos))
                    423:                return(ROFF_ERR);
                    424:
1.78    ! kristaps  425:        if ('\0' == (*bufp)[pos]) {
        !           426:                ROFF_MDEBUG(r, "opening ignore block");
        !           427:                return(ROFF_IGN);
        !           428:        }
        !           429:
        !           430:        sv = pos;
        !           431:        while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
        !           432:                        '\t' != (*bufp)[pos])
        !           433:                pos++;
        !           434:
        !           435:        /*
        !           436:         * Note: groff does NOT like escape characters in the input.
        !           437:         * Instead of detecting this, we're just going to let it fly and
        !           438:         * to hell with it.
        !           439:         */
        !           440:
        !           441:        assert(pos > sv);
        !           442:        sz = (size_t)(pos - sv);
        !           443:
        !           444:        r->last->end = malloc(sz + 1);
        !           445:
        !           446:        if (NULL == r->last->end) {
        !           447:                (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
        !           448:                return(ROFF_ERR);
        !           449:        }
        !           450:
        !           451:        memcpy(r->last->end, *bufp + sv, sz);
        !           452:        r->last->end[(int)sz] = '\0';
        !           453:
        !           454:        ROFF_MDEBUG(r, "opening explicit ignore block");
1.74      kristaps  455:
1.77      kristaps  456:        if ((*bufp)[pos])
                    457:                if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
                    458:                        return(ROFF_ERR);
1.74      kristaps  459:
1.78    ! kristaps  460:        return(ROFF_IGN);
        !           461: }
        !           462:
        !           463:
        !           464: /* ARGSUSED */
        !           465: static enum rofferr
        !           466: roff_ig_text(ROFF_ARGS)
        !           467: {
        !           468:
        !           469:        return(ROFF_IGN);
        !           470: }
        !           471:
        !           472:
        !           473: /* ARGSUSED */
        !           474: static enum rofferr
        !           475: roff_if_text(ROFF_ARGS)
        !           476: {
        !           477:        char            *ep, *st;
        !           478:
        !           479:        st = &(*bufp)[pos];
        !           480:        if (NULL == (ep = strstr(st, "\\}"))) {
        !           481:                roffnode_cleanscope(r);
        !           482:                return(ROFF_IGN);
        !           483:        }
        !           484:
        !           485:        if (ep > st && '\\' != *(ep - 1)) {
        !           486:                ROFF_MDEBUG(r, "closing explicit scope (in-line)");
        !           487:                roffnode_pop(r);
        !           488:        }
        !           489:
        !           490:        roffnode_cleanscope(r);
1.74      kristaps  491:        return(ROFF_IGN);
                    492: }
                    493:
                    494:
1.75      kristaps  495: /* ARGSUSED */
1.74      kristaps  496: static enum rofferr
1.75      kristaps  497: roff_if(ROFF_ARGS)
1.74      kristaps  498: {
1.77      kristaps  499:        int              sv;
1.74      kristaps  500:
                    501:        /*
                    502:         * Read ahead past the conditional.
                    503:         * FIXME: this does not work, as conditionals don't end on
                    504:         * whitespace, but are parsed according to a formal grammar.
                    505:         * It's good enough for now, however.
                    506:         */
                    507:
1.75      kristaps  508:        while ((*bufp)[pos] && ' ' != (*bufp)[pos])
                    509:                pos++;
1.77      kristaps  510:
                    511:        sv = pos;
1.75      kristaps  512:        while (' ' == (*bufp)[pos])
                    513:                pos++;
1.74      kristaps  514:
1.77      kristaps  515:        /*
                    516:         * Roff is weird.  If we have just white-space after the
                    517:         * conditional, it's considered the BODY and we exit without
                    518:         * really doing anything.  Warn about this.  It's probably
                    519:         * wrong.
                    520:         */
                    521:
                    522:        if ('\0' == (*bufp)[pos] && sv != pos) {
                    523:                if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
                    524:                        return(ROFF_ERR);
                    525:                return(ROFF_IGN);
                    526:        }
                    527:
                    528:        if ( ! roffnode_push(r, tok, ln, ppos))
                    529:                return(ROFF_ERR);
                    530:
1.74      kristaps  531:        /* Don't evaluate: just assume NO. */
                    532:
1.75      kristaps  533:        r->last->endspan = 1;
                    534:
                    535:        if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
                    536:                ROFF_MDEBUG(r, "opening explicit scope");
                    537:                r->last->endspan = -1;
                    538:                pos += 2;
                    539:        } else
                    540:                ROFF_MDEBUG(r, "opening implicit scope");
1.74      kristaps  541:
1.77      kristaps  542:        /*
                    543:         * If there are no arguments on the line, the next-line scope is
                    544:         * assumed.
                    545:         */
                    546:
1.75      kristaps  547:        if ('\0' == (*bufp)[pos])
                    548:                return(ROFF_IGN);
1.77      kristaps  549:
                    550:        /* Otherwise re-run the roff parser after recalculating. */
1.74      kristaps  551:
1.75      kristaps  552:        *offs = pos;
                    553:        return(ROFF_RERUN);
1.74      kristaps  554: }

CVSweb