pod2mdoc/pod2mdoc.c - annotate

Return to pod2mdoc.c CVS log
Up to [cvsweb.bsd.lv] / pod2mdoc
Annotation of pod2mdoc/pod2mdoc.c, Revision 1.31

1.31    ! schwarze    1: /*     $Id: pod2mdoc.c,v 1.30 2014/07/15 19:00:48 schwarze Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/stat.h>
                     18: #include <sys/time.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <string.h>
                     27: #include <unistd.h>
                     28:
1.10      kristaps   29: /*
1.19      kristaps   30:  * In what section can we find Perl module manuals?
                     31:  * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
                     32:  * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10      kristaps   33:  */
                     34: #define        PERL_SECTION    "3p"
                     35:
1.1       schwarze   36: struct args {
                     37:        const char      *title; /* override "Dt" title */
                     38:        const char      *date; /* override "Dd" date */
                     39:        const char      *section; /* override "Dt" section */
                     40: };
                     41:
1.4       schwarze   42: enum   list {
                     43:        LIST_BULLET = 0,
                     44:        LIST_ENUM,
                     45:        LIST_TAG,
                     46:        LIST__MAX
                     47: };
                     48:
1.11      kristaps   49: enum   sect {
                     50:        SECT_NONE = 0,
                     51:        SECT_NAME, /* NAME section */
                     52:        SECT_SYNOPSIS, /* SYNOPSIS section */
                     53: };
                     54:
1.1       schwarze   55: struct state {
1.31    ! schwarze   56:        const char      *fname; /* file being parsed */
1.1       schwarze   57:        int              parsing; /* after =cut of before command */
                     58:        int              paused; /* in =begin and before =end */
1.11      kristaps   59:        enum sect        sect; /* which section are we in? */
1.4       schwarze   60: #define        LIST_STACKSZ     128
                     61:        enum list        lstack[LIST_STACKSZ]; /* open lists */
                     62:        size_t           lpos; /* where in list stack */
1.31    ! schwarze   63:        int              haspar; /* in paragraph: do we need Pp? */
        !            64:        int              hasnl; /* in text: just started a new line */
        !            65:        char            *outbuf; /* text buffered for output */
        !            66:        size_t           outbufsz; /* allocated size of outbuf */
        !            67:        size_t           outbuflen; /* current length of outbuf */
1.1       schwarze   68: };
                     69:
                     70: enum   fmt {
                     71:        FMT_ITALIC,
                     72:        FMT_BOLD,
                     73:        FMT_CODE,
                     74:        FMT_LINK,
                     75:        FMT_ESCAPE,
                     76:        FMT_FILE,
                     77:        FMT_NBSP,
                     78:        FMT_INDEX,
                     79:        FMT_NULL,
                     80:        FMT__MAX
                     81: };
                     82:
                     83: enum   cmd {
                     84:        CMD_POD = 0,
                     85:        CMD_HEAD1,
                     86:        CMD_HEAD2,
                     87:        CMD_HEAD3,
                     88:        CMD_HEAD4,
                     89:        CMD_OVER,
                     90:        CMD_ITEM,
                     91:        CMD_BACK,
                     92:        CMD_BEGIN,
                     93:        CMD_END,
                     94:        CMD_FOR,
                     95:        CMD_ENCODING,
                     96:        CMD_CUT,
                     97:        CMD__MAX
                     98: };
                     99:
                    100: static const char *const cmds[CMD__MAX] = {
                    101:        "pod",          /* CMD_POD */
                    102:        "head1",        /* CMD_HEAD1 */
                    103:        "head2",        /* CMD_HEAD2 */
                    104:        "head3",        /* CMD_HEAD3 */
                    105:        "head4",        /* CMD_HEAD4 */
                    106:        "over",         /* CMD_OVER */
                    107:        "item",         /* CMD_ITEM */
                    108:        "back",         /* CMD_BACK */
                    109:        "begin",        /* CMD_BEGIN */
                    110:        "end",          /* CMD_END */
                    111:        "for",          /* CMD_FOR */
                    112:        "encoding",     /* CMD_ENCODING */
                    113:        "cut"           /* CMD_CUT */
                    114: };
                    115:
                    116: static const char fmts[FMT__MAX] = {
                    117:        'I',            /* FMT_ITALIC */
                    118:        'B',            /* FMT_BOLD */
                    119:        'C',            /* FMT_CODE */
                    120:        'L',            /* FMT_LINK */
                    121:        'E',            /* FMT_ESCAPE */
                    122:        'F',            /* FMT_FILE */
                    123:        'S',            /* FMT_NBSP */
                    124:        'X',            /* FMT_INDEX */
                    125:        'Z'             /* FMT_NULL */
                    126: };
                    127:
1.6       kristaps  128: static int     last;
                    129:
1.31    ! schwarze  130:
        !           131: static void
        !           132: outbuf_grow(struct state *st, size_t by)
        !           133: {
        !           134:
        !           135:        st->outbufsz += (by / 128 + 1) * 128;
        !           136:        st->outbuf = realloc(st->outbuf, st->outbufsz);
        !           137:        if (NULL == st->outbuf) {
        !           138:                perror(NULL);
        !           139:                exit(EXIT_FAILURE);
        !           140:        }
        !           141: }
        !           142:
        !           143: static void
        !           144: outbuf_addchar(struct state *st)
        !           145: {
        !           146:
        !           147:        if (st->outbuflen + 2 >= st->outbufsz)
        !           148:                outbuf_grow(st, 1);
        !           149:        st->outbuf[st->outbuflen++] = last;
        !           150:        if ('\\' == last)
        !           151:                st->outbuf[st->outbuflen++] = 'e';
        !           152:        st->outbuf[st->outbuflen] = '\0';
        !           153: }
        !           154:
        !           155: static void
        !           156: outbuf_addstr(struct state *st, const char *str)
        !           157: {
        !           158:        size_t   slen;
        !           159:
        !           160:        slen = strlen(str);
        !           161:        if (st->outbuflen + slen >= st->outbufsz)
        !           162:                outbuf_grow(st, slen);
        !           163:        memcpy(st->outbuf + st->outbuflen, str, slen+1);
        !           164:        last = str[slen - 1];
        !           165: }
        !           166:
        !           167: static void
        !           168: outbuf_flush(struct state *st)
        !           169: {
        !           170:
        !           171:        if (0 == st->outbuflen)
        !           172:                return;
        !           173:
        !           174:        fputs(st->outbuf, stdout);
        !           175:        *st->outbuf = '\0';
        !           176:        st->outbuflen = 0;
        !           177:        st->hasnl = 0;
        !           178: }
        !           179:
        !           180: static void
        !           181: outbuf_newln(struct state *st)
        !           182: {
        !           183:
        !           184:        if ('\n' == last)
        !           185:                return;
        !           186:        outbuf_flush(st);
        !           187:        putchar('\n');
        !           188:        last = '\n';
        !           189:        st->hasnl = 1;
        !           190: }
        !           191:
1.1       schwarze  192: /*
                    193:  * Given buf[*start] is at the start of an escape name, read til the end
                    194:  * of the escape ('>') then try to do something with it.
                    195:  * Sets start to be one after the '>'.
                    196:  */
                    197: static void
1.31    ! schwarze  198: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1       schwarze  199: {
                    200:        char             esc[16]; /* no more needed */
                    201:        size_t           i, max;
                    202:
                    203:        max = sizeof(esc) - 1;
                    204:        i = 0;
                    205:        /* Read til our buffer is full. */
                    206:        while (*start < end && '>' != buf[*start] && i < max)
                    207:                esc[i++] = buf[(*start)++];
                    208:        esc[i] = '\0';
                    209:
                    210:        if (i == max) {
                    211:                /* Too long... skip til we end. */
                    212:                while (*start < end && '>' != buf[*start])
                    213:                        (*start)++;
                    214:                return;
                    215:        } else if (*start >= end)
                    216:                return;
                    217:
                    218:        assert('>' == buf[*start]);
                    219:        (*start)++;
                    220:
                    221:        /*
                    222:         * TODO: right now, we only recognise the named escapes.
                    223:         * Just let the rest of them go.
                    224:         */
1.6       kristaps  225:        if (0 == strcmp(esc, "lt"))
1.31    ! schwarze  226:                outbuf_addstr(st, "\\(la");
1.1       schwarze  227:        else if (0 == strcmp(esc, "gt"))
1.31    ! schwarze  228:                outbuf_addstr(st, "\\(ra");
1.1       schwarze  229:        else if (0 == strcmp(esc, "vb"))
1.31    ! schwarze  230:                outbuf_addstr(st, "\\(ba");
1.1       schwarze  231:        else if (0 == strcmp(esc, "sol"))
1.31    ! schwarze  232:                outbuf_addstr(st, "\\(sl");
1.1       schwarze  233: }
                    234:
                    235: /*
1.9       kristaps  236:  * Run some heuristics to intuit a link format.
1.19      kristaps  237:  * I set "start" to be the end of the sequence (last right-carrot) so
1.9       kristaps  238:  * that the caller can safely just continue processing.
1.19      kristaps  239:  * If this is just an empty tag, I'll return 0.
1.9       kristaps  240:  */
                    241: static int
                    242: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
                    243: {
1.21      kristaps  244:        size_t           linkstart, realend, linkend,
                    245:                         i, j, textsz, stack;
1.9       kristaps  246:
                    247:        /*
                    248:         * Scan to the start of the terminus.
                    249:         * This function is more or less replicated in the formatcode()
                    250:         * for null or index formatting codes.
1.23      kristaps  251:         * However, we're slightly different because we might have
                    252:         * nested escapes we need to ignore.
1.9       kristaps  253:         */
1.21      kristaps  254:        stack = 0;
1.19      kristaps  255:        for (linkstart = realend = *start; realend < end; realend++) {
1.23      kristaps  256:                if ('<' == buf[realend])
                    257:                        stack++;
1.19      kristaps  258:                if ('>' != buf[realend])
1.9       kristaps  259:                        continue;
1.23      kristaps  260:                else if (stack-- > 0)
                    261:                        continue;
                    262:                if (dsz == 1)
1.9       kristaps  263:                        break;
1.19      kristaps  264:                assert(realend > 0);
                    265:                if (' ' != buf[realend - 1])
1.9       kristaps  266:                        continue;
1.19      kristaps  267:                for (i = realend, j = 0; i < end && j < dsz; j++)
1.9       kristaps  268:                        if ('>' != buf[i++])
                    269:                                break;
                    270:                if (dsz == j)
                    271:                        break;
                    272:        }
1.19      kristaps  273:
                    274:        /* Ignore stubs. */
                    275:        if (realend == end || realend == *start)
1.9       kristaps  276:                return(0);
                    277:
1.19      kristaps  278:        /* Set linkend to the end of content. */
                    279:        linkend = dsz > 1 ? realend - 1 : realend;
1.18      kristaps  280:
1.19      kristaps  281:        /* Re-scan to see if we have a title or section. */
                    282:        for (textsz = *start; textsz < linkend; textsz++)
                    283:                if ('|' == buf[textsz] || '/' == buf[textsz])
1.18      kristaps  284:                        break;
                    285:
1.19      kristaps  286:        if (textsz < linkend && '|' == buf[textsz]) {
1.20      kristaps  287:                /* With title: set start, then end at section. */
1.19      kristaps  288:                linkstart = textsz + 1;
1.18      kristaps  289:                textsz = textsz - *start;
1.19      kristaps  290:                for (i = linkstart; i < linkend; i++)
                    291:                        if ('/' == buf[i])
                    292:                                break;
                    293:                if (i < linkend)
                    294:                        linkend = i;
1.20      kristaps  295:        } else if (textsz < linkend && '/' == buf[textsz]) {
                    296:                /* With section: set end at section. */
                    297:                linkend = textsz;
                    298:                textsz = 0;
                    299:        } else
                    300:                /* No title, no section. */
1.18      kristaps  301:                textsz = 0;
1.19      kristaps  302:
                    303:        *start = realend;
                    304:        j = linkend - linkstart;
                    305:
1.20      kristaps  306:        /* Do we have only subsection material? */
                    307:        if (0 == j && '/' == buf[linkend]) {
                    308:                linkstart = linkend + 1;
                    309:                linkend = dsz > 1 ? realend - 1 : realend;
                    310:                if (0 == (j = linkend - linkstart))
                    311:                        return(0);
                    312:                printf("Sx %.*s", (int)j, &buf[linkstart]);
                    313:                return(1);
                    314:        } else if (0 == j)
1.19      kristaps  315:                return(0);
                    316:
                    317:        /* See if we qualify as being a link or not. */
1.20      kristaps  318:        if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
                    319:                (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
                    320:                (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
                    321:                (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
                    322:                (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
                    323:                (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
                    324:                /* Gross. */
                    325:                printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
                    326:                        realend) - linkstart), &buf[linkstart]);
1.19      kristaps  327:                return(1);
                    328:        }
                    329:
                    330:        /* See if we qualify as a mailto. */
1.20      kristaps  331:        if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19      kristaps  332:                printf("Mt %.*s", (int)j, &buf[linkstart]);
                    333:                return(1);
                    334:        }
                    335:
                    336:        /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
                    337:        if ((j > 3 && ')' == buf[linkend - 1]) &&
                    338:                ('(' == buf[linkend - 3])) {
                    339:                printf("Xr %.*s %c", (int)(j - 3),
                    340:                        &buf[linkstart], buf[linkend - 2]);
                    341:                return(1);
                    342:        } else if ((j > 4 && ')' == buf[linkend - 1]) &&
                    343:                ('(' == buf[linkend - 4])) {
                    344:                printf("Xr %.*s %.*s", (int)(j - 4),
                    345:                        &buf[linkstart], 2, &buf[linkend - 3]);
                    346:                return(1);
                    347:        } else if ((j > 5 && ')' == buf[linkend - 1]) &&
                    348:                ('(' == buf[linkend - 5])) {
                    349:                printf("Xr %.*s %.*s", (int)(j - 5),
                    350:                        &buf[linkstart], 3, &buf[linkend - 4]);
                    351:                return(1);
                    352:        }
                    353:
                    354:        /* Last try: do we have a double-colon? */
                    355:        for (i = linkstart + 1; i < linkend; i++)
                    356:                if (':' == buf[i] && ':' == buf[i - 1])
1.18      kristaps  357:                        break;
1.9       kristaps  358:
1.19      kristaps  359:        if (i < linkend)
1.10      kristaps  360:                printf("Xr %.*s " PERL_SECTION,
1.19      kristaps  361:                        (int)j, &buf[linkstart]);
1.9       kristaps  362:        else
1.19      kristaps  363:                printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9       kristaps  364:
                    365:        return(1);
                    366: }
                    367:
1.13      kristaps  368: /*
                    369:  * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
                    370:  * then it's likely that we're a flag.
                    371:  * Our flag might be followed by an argument, so make sure that we're
                    372:  * accounting for that, too.
                    373:  * If we don't have a flag at all, however, then assume we're an "Ar".
                    374:  */
                    375: static void
                    376: dosynopsisfl(const char *buf, size_t *start, size_t end)
                    377: {
                    378:        size_t   i;
                    379: again:
1.14      kristaps  380:        assert(*start + 1 < end);
                    381:        assert('-' == buf[*start]);
                    382:
                    383:        if ( ! isalnum((int)buf[*start + 1]) &&
                    384:                '?' != buf[*start + 1] &&
                    385:                '-' != buf[*start + 1]) {
                    386:                (*start)--;
                    387:                fputs("Ar ", stdout);
                    388:                return;
                    389:        }
                    390:
1.13      kristaps  391:        (*start)++;
                    392:        for (i = *start; i < end; i++)
                    393:                if (isalnum((int)buf[i]))
                    394:                        continue;
1.14      kristaps  395:                else if ('?' == buf[i])
                    396:                        continue;
1.13      kristaps  397:                else if ('-' == buf[i])
                    398:                        continue;
                    399:                else if ('_' == buf[i])
                    400:                        continue;
                    401:                else
                    402:                        break;
                    403:
                    404:        assert(i < end);
                    405:
                    406:        if ( ! (' ' == buf[i] || '>' == buf[i])) {
                    407:                printf("Ar ");
                    408:                return;
                    409:        }
                    410:
                    411:        printf("Fl ");
                    412:        if (end - *start > 1 &&
                    413:                isupper((int)buf[*start]) &&
                    414:                islower((int)buf[*start + 1]) &&
                    415:                (end - *start == 2 ||
                    416:                 ' ' == buf[*start + 2]))
                    417:                printf("\\&");
                    418:        printf("%.*s ", (int)(i - *start), &buf[*start]);
                    419:        *start = i;
                    420:
                    421:        if (' ' == buf[i]) {
                    422:                while (i < end && ' ' == buf[i])
                    423:                        i++;
                    424:                assert(i < end);
                    425:                if ('-' == buf[i]) {
                    426:                        *start = i;
                    427:                        goto again;
                    428:                }
                    429:                printf("Ar ");
                    430:                *start = i;
                    431:        }
                    432: }
                    433:
1.9       kristaps  434: /*
1.1       schwarze  435:  * We're at the character in front of a format code, which is structured
                    436:  * like X<...> and can contain nested format codes.
                    437:  * This consumes the whole format code, and any nested format codes, til
                    438:  * the end of matched production.
                    439:  * If "reentrant", then we're being called after a macro has already
                    440:  * been printed to the current line.
1.6       kristaps  441:  * If "nomacro", then we don't print any macros, just contained data
                    442:  * (e.g., following "Sh" or "Nm").
1.15      kristaps  443:  * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
                    444:  * as the first format code on a line (for decoration as an "Nm"),
                    445:  * non-zero otherwise.
1.6       kristaps  446:  * Return whether we've printed a macro or not--in other words, whether
                    447:  * this should trigger a subsequent newline (this should be ignored when
                    448:  * reentrant).
1.1       schwarze  449:  */
                    450: static int
1.15      kristaps  451: formatcode(struct state *st, const char *buf, size_t *start,
                    452:        size_t end, int reentrant, int nomacro, int pos)
1.1       schwarze  453: {
                    454:        enum fmt         fmt;
1.5       kristaps  455:        size_t           i, j, dsz;
1.31    ! schwarze  456:        int              white;
1.1       schwarze  457:
                    458:        assert(*start + 1 < end);
                    459:        assert('<' == buf[*start + 1]);
                    460:
1.6       kristaps  461:        /*
                    462:         * First, look up the format code.
1.30      schwarze  463:         * If it's not valid, treat it as a NOOP.
1.6       kristaps  464:         */
                    465:        for (fmt = 0; fmt < FMT__MAX; fmt++)
                    466:                if (buf[*start] == fmts[fmt])
                    467:                        break;
                    468:
1.5       kristaps  469:        /*
                    470:         * Determine whether we're overriding our delimiter.
                    471:         * According to POD, if we have more than one '<' followed by a
                    472:         * space, then we need a space followed by matching '>' to close
                    473:         * the expression.
                    474:         * Otherwise we use the usual '<' and '>' matched pair.
                    475:         */
                    476:        i = *start + 1;
                    477:        while (i < end && '<' == buf[i])
                    478:                i++;
                    479:        assert(i > *start + 1);
                    480:        dsz = i - (*start + 1);
                    481:        if (dsz > 1 && (i >= end || ' ' != buf[i]))
                    482:                dsz = 1;
                    483:
                    484:        /* Remember, if dsz>1, to jump the trailing space. */
                    485:        *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1       schwarze  486:
                    487:        /*
1.6       kristaps  488:         * Escapes and ignored codes (NULL and INDEX) don't print macro
                    489:         * sequences, so just output them like normal text before
                    490:         * processing for real macros.
1.1       schwarze  491:         */
                    492:        if (FMT_ESCAPE == fmt) {
1.31    ! schwarze  493:                formatescape(st, buf, start, end);
1.1       schwarze  494:                return(0);
                    495:        } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5       kristaps  496:                /*
1.6       kristaps  497:                 * Just consume til the end delimiter, accounting for
                    498:                 * whether it's a custom one.
1.5       kristaps  499:                 */
                    500:                for ( ; *start < end; (*start)++) {
                    501:                        if ('>' != buf[*start])
                    502:                                continue;
                    503:                        else if (dsz == 1)
                    504:                                break;
                    505:                        assert(*start > 0);
                    506:                        if (' ' != buf[*start - 1])
                    507:                                continue;
                    508:                        i = *start;
                    509:                        for (j = 0; i < end && j < dsz; j++)
                    510:                                if ('>' != buf[i++])
                    511:                                        break;
                    512:                        if (dsz != j)
                    513:                                continue;
                    514:                        (*start) += dsz;
                    515:                        break;
                    516:                }
1.24      kristaps  517:                if (*start < end) {
                    518:                        assert('>' == buf[*start]);
                    519:                        (*start)++;
                    520:                }
                    521:                if (isspace(last))
                    522:                        while (*start < end && isspace((int)buf[*start]))
                    523:                                (*start)++;
1.1       schwarze  524:                return(0);
                    525:        }
                    526:
1.6       kristaps  527:        /*
                    528:         * Check whether we're supposed to print macro stuff (this is
                    529:         * suppressed in, e.g., "Nm" and "Sh" macros).
                    530:         */
1.30      schwarze  531:        if (FMT__MAX != fmt && !nomacro) {
1.31    ! schwarze  532:                white = ' ' == last || '\n' == last ||
        !           533:                        ' ' == buf[*start];
        !           534:
1.1       schwarze  535:                /*
1.31    ! schwarze  536:                 * If we are on a text line and there is no
        !           537:                 * whitespace before our content, we have to make
        !           538:                 * the previous word a prefix to the macro line.
1.1       schwarze  539:                 */
1.31    ! schwarze  540:
        !           541:                if ( ! white && ! reentrant) {
        !           542:                        if ( ! st->hasnl)
        !           543:                                putchar('\n');
        !           544:                        printf(".Pf ");
        !           545:                }
        !           546:
        !           547:                outbuf_flush(st);
        !           548:
        !           549:                /* Whitespace is easier to suppress on macro lines. */
        !           550:
        !           551:                if ( ! white && reentrant)
        !           552:                        printf(" Ns");
        !           553:
        !           554:                /* Unless we are on a macro line, start one. */
        !           555:
        !           556:                if (white && ! reentrant) {
1.6       kristaps  557:                        if (last != '\n')
                    558:                                putchar('\n');
1.1       schwarze  559:                        putchar('.');
1.31    ! schwarze  560:                } else
1.1       schwarze  561:                        putchar(' ');
1.31    ! schwarze  562:
        !           563:                /* Print the macro corresponding to this format code. */
1.6       kristaps  564:
1.1       schwarze  565:                switch (fmt) {
                    566:                case (FMT_ITALIC):
                    567:                        printf("Em ");
                    568:                        break;
                    569:                case (FMT_BOLD):
1.14      kristaps  570:                        if (SECT_SYNOPSIS == st->sect) {
                    571:                                if (1 == dsz && '-' == buf[*start])
                    572:                                        dosynopsisfl(buf, start, end);
1.15      kristaps  573:                                else if (0 == pos)
                    574:                                        printf("Nm ");
1.14      kristaps  575:                                else
                    576:                                        printf("Ar ");
                    577:                                break;
                    578:                        }
1.27      schwarze  579:                        if (0 == strncmp(buf + *start, "NULL", 4) &&
                    580:                            ('=' == buf[*start + 4] ||
                    581:                             '>' == buf[*start + 4]))
                    582:                                printf("Dv ");
                    583:                        else
                    584:                                printf("Sy ");
1.1       schwarze  585:                        break;
                    586:                case (FMT_CODE):
1.2       schwarze  587:                        printf("Qo Li ");
1.1       schwarze  588:                        break;
                    589:                case (FMT_LINK):
1.19      kristaps  590:                        /* Try to link; use "No" if it's empty. */
1.9       kristaps  591:                        if ( ! trylink(buf, start, end, dsz))
                    592:                                printf("No ");
1.1       schwarze  593:                        break;
                    594:                case (FMT_FILE):
                    595:                        printf("Pa ");
                    596:                        break;
                    597:                case (FMT_NBSP):
                    598:                        printf("No ");
                    599:                        break;
                    600:                default:
                    601:                        abort();
                    602:                }
1.31    ! schwarze  603:        } else
        !           604:                outbuf_flush(st);
1.1       schwarze  605:
                    606:        /*
1.6       kristaps  607:         * Process until we reach the end marker (e.g., '>') or until we
1.5       kristaps  608:         * find a nested format code.
1.1       schwarze  609:         * Don't emit any newlines: since we're on a macro line, we
                    610:         * don't want to break the line.
                    611:         */
                    612:        while (*start < end) {
1.5       kristaps  613:                if ('>' == buf[*start] && 1 == dsz) {
1.1       schwarze  614:                        (*start)++;
                    615:                        break;
1.5       kristaps  616:                } else if ('>' == buf[*start] &&
                    617:                                ' ' == buf[*start - 1]) {
                    618:                        /*
                    619:                         * Handle custom delimiters.
                    620:                         * These require a certain number of
                    621:                         * space-preceded carrots before we're really at
                    622:                         * the end.
                    623:                         */
                    624:                        i = *start;
                    625:                        for (j = 0; i < end && j < dsz; j++)
                    626:                                if ('>' != buf[i++])
                    627:                                        break;
                    628:                        if (dsz == j) {
                    629:                                *start += dsz;
                    630:                                break;
                    631:                        }
1.1       schwarze  632:                }
                    633:                if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15      kristaps  634:                        formatcode(st, buf, start, end, 1, nomacro, 1);
1.1       schwarze  635:                        continue;
                    636:                }
1.3       schwarze  637:
1.4       schwarze  638:                /*
                    639:                 * Make sure that any macro-like words (or
                    640:                 * really any word starting with a capital
                    641:                 * letter) is assumed to be a macro that must be
                    642:                 * escaped.
                    643:                 * This matches "Xx " and "XxEOLN".
                    644:                 */
                    645:                if ((' ' == last || '\n' == last) &&
                    646:                                end - *start > 1 &&
                    647:                                isupper((int)buf[*start]) &&
                    648:                                islower((int)buf[*start + 1]) &&
                    649:                                (end - *start == 2 ||
                    650:                                 ' ' == buf[*start + 2]))
                    651:                        printf("\\&");
1.3       schwarze  652:
1.4       schwarze  653:                /* Suppress newline. */
1.6       kristaps  654:                if ('\n' == buf[*start])
                    655:                        putchar(last = ' ');
                    656:                else
                    657:                        putchar(last = buf[*start]);
1.4       schwarze  658:
1.8       kristaps  659:                /* Protect against character escapes. */
                    660:                if ('\\' == last)
                    661:                        putchar('e');
                    662:
1.6       kristaps  663:                (*start)++;
                    664:
                    665:                if (' ' == last)
                    666:                        while (*start < end && ' ' == buf[*start])
                    667:                                (*start)++;
1.1       schwarze  668:        }
1.2       schwarze  669:
1.30      schwarze  670:        if (FMT__MAX == fmt)
                    671:                return(0);
                    672:
1.2       schwarze  673:        if ( ! nomacro && FMT_CODE == fmt)
                    674:                printf(" Qc ");
1.1       schwarze  675:
                    676:        /*
1.6       kristaps  677:         * We're now subsequent the format code.
                    678:         * If there isn't a space (or newline) here, and we haven't just
                    679:         * printed a space, then suppress space.
1.1       schwarze  680:         */
1.6       kristaps  681:        if ( ! nomacro && ' ' != last)
                    682:                if (' ' != buf[*start] && '\n' != buf[*start])
                    683:                        printf(" Ns ");
1.5       kristaps  684:
1.1       schwarze  685:        return(1);
                    686: }
                    687:
                    688: /*
                    689:  * Calls formatcode() til the end of a paragraph.
                    690:  */
                    691: static void
1.11      kristaps  692: formatcodeln(struct state *st, const char *buf,
                    693:        size_t *start, size_t end, int nomacro)
1.1       schwarze  694: {
                    695:
1.4       schwarze  696:        last = ' ';
1.1       schwarze  697:        while (*start < end)  {
                    698:                if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15      kristaps  699:                        formatcode(st, buf, start, end, 1, nomacro, 1);
1.1       schwarze  700:                        continue;
                    701:                }
1.4       schwarze  702:                /*
                    703:                 * Since we're already on a macro line, we want to make
                    704:                 * sure that we don't inadvertently invoke a macro.
                    705:                 * We need to do this carefully because section names
                    706:                 * are used in troff and we don't want to escape
                    707:                 * something that needn't be escaped.
                    708:                 */
                    709:                if (' ' == last && end - *start > 1 &&
                    710:                                isupper((int)buf[*start]) &&
                    711:                                islower((int)buf[*start + 1]) &&
                    712:                                (end - *start == 2 ||
                    713:                                 ' ' == buf[*start + 2]))
                    714:                        printf("\\&");
                    715:
1.8       kristaps  716:                if ('\n' == buf[*start])
                    717:                        putchar(last = ' ');
                    718:                else
1.1       schwarze  719:                        putchar(last = buf[*start]);
1.8       kristaps  720:
                    721:                /* Protect against character escapes. */
                    722:                if ('\\' == last)
                    723:                        putchar('e');
                    724:
1.1       schwarze  725:                (*start)++;
                    726:        }
                    727: }
                    728:
                    729: /*
1.4       schwarze  730:  * Guess at what kind of list we are.
                    731:  * These are taken straight from the POD manual.
                    732:  * I don't know what people do in real life.
                    733:  */
                    734: static enum list
                    735: listguess(const char *buf, size_t start, size_t end)
                    736: {
                    737:        size_t           len = end - start;
                    738:
                    739:        assert(end >= start);
                    740:
                    741:        if (len == 1 && '*' == buf[start])
                    742:                return(LIST_BULLET);
                    743:        if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
                    744:                return(LIST_ENUM);
                    745:        else if (len == 1 && '1' == buf[start])
                    746:                return(LIST_ENUM);
                    747:        else
                    748:                return(LIST_TAG);
                    749: }
                    750:
                    751: /*
1.1       schwarze  752:  * A command paragraph, as noted in the perlpod manual, just indicates
                    753:  * that we should do something, optionally with some text to print as
                    754:  * well.
                    755:  */
                    756: static void
                    757: command(struct state *st, const char *buf, size_t start, size_t end)
                    758: {
                    759:        size_t           len, csz;
                    760:        enum cmd         cmd;
                    761:
                    762:        assert('=' == buf[start]);
                    763:        start++;
                    764:        len = end - start;
                    765:
                    766:        for (cmd = 0; cmd < CMD__MAX; cmd++) {
                    767:                csz = strlen(cmds[cmd]);
                    768:                if (len < csz)
                    769:                        continue;
                    770:                if (0 == memcmp(&buf[start], cmd[cmds], csz))
                    771:                        break;
                    772:        }
                    773:
                    774:        /* Ignore bogus commands. */
                    775:
                    776:        if (CMD__MAX == cmd)
                    777:                return;
                    778:
                    779:        start += csz;
1.8       kristaps  780:        while (start < end && ' ' == buf[start])
                    781:                start++;
                    782:
1.1       schwarze  783:        len = end - start;
                    784:
                    785:        if (st->paused) {
                    786:                st->paused = CMD_END != cmd;
                    787:                return;
                    788:        }
                    789:
                    790:        switch (cmd) {
                    791:        case (CMD_POD):
                    792:                break;
                    793:        case (CMD_HEAD1):
                    794:                /*
                    795:                 * The behaviour of head= follows from a quick glance at
                    796:                 * how pod2man handles it.
                    797:                 */
                    798:                printf(".Sh ");
1.11      kristaps  799:                st->sect = SECT_NONE;
                    800:                if (end - start == 4) {
1.1       schwarze  801:                        if (0 == memcmp(&buf[start], "NAME", 4))
1.11      kristaps  802:                                st->sect = SECT_NAME;
                    803:                } else if (end - start == 8) {
                    804:                        if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
                    805:                                st->sect = SECT_SYNOPSIS;
                    806:                }
                    807:                formatcodeln(st, buf, &start, end, 1);
1.31    ! schwarze  808:                putchar(last = '\n');
1.1       schwarze  809:                st->haspar = 1;
                    810:                break;
                    811:        case (CMD_HEAD2):
                    812:                printf(".Ss ");
1.11      kristaps  813:                formatcodeln(st, buf, &start, end, 1);
1.31    ! schwarze  814:                putchar(last = '\n');
1.1       schwarze  815:                st->haspar = 1;
                    816:                break;
                    817:        case (CMD_HEAD3):
                    818:                puts(".Pp");
                    819:                printf(".Em ");
1.11      kristaps  820:                formatcodeln(st, buf, &start, end, 0);
1.31    ! schwarze  821:                putchar(last = '\n');
1.1       schwarze  822:                puts(".Pp");
                    823:                st->haspar = 1;
                    824:                break;
                    825:        case (CMD_HEAD4):
                    826:                puts(".Pp");
                    827:                printf(".No ");
1.11      kristaps  828:                formatcodeln(st, buf, &start, end, 0);
1.31    ! schwarze  829:                putchar(last = '\n');
1.1       schwarze  830:                puts(".Pp");
                    831:                st->haspar = 1;
                    832:                break;
                    833:        case (CMD_OVER):
1.4       schwarze  834:                /*
                    835:                 * If we have an existing list that hasn't had an =item
                    836:                 * yet, then make sure that we open it now.
                    837:                 * We use the default list type, but that can't be
                    838:                 * helped (we haven't seen any items yet).
1.1       schwarze  839:                 */
1.4       schwarze  840:                if (st->lpos > 0)
                    841:                        if (LIST__MAX == st->lstack[st->lpos - 1]) {
                    842:                                st->lstack[st->lpos - 1] = LIST_TAG;
                    843:                                puts(".Bl -tag -width Ds");
                    844:                        }
                    845:                st->lpos++;
                    846:                assert(st->lpos < LIST_STACKSZ);
                    847:                st->lstack[st->lpos - 1] = LIST__MAX;
1.1       schwarze  848:                break;
                    849:        case (CMD_ITEM):
1.6       kristaps  850:                if (0 == st->lpos) {
                    851:                        /*
                    852:                         * Bad markup.
                    853:                         * Try to compensate.
                    854:                         */
                    855:                        st->lstack[st->lpos] = LIST__MAX;
                    856:                        st->lpos++;
                    857:                }
1.4       schwarze  858:                assert(st->lpos > 0);
                    859:                /*
                    860:                 * If we're the first =item, guess at what our content
                    861:                 * will be: "*" is a bullet list, "1." is a numbered
                    862:                 * list, and everything is tagged.
                    863:                 */
                    864:                if (LIST__MAX == st->lstack[st->lpos - 1]) {
                    865:                        st->lstack[st->lpos - 1] =
                    866:                                listguess(buf, start, end);
                    867:                        switch (st->lstack[st->lpos - 1]) {
                    868:                        case (LIST_BULLET):
                    869:                                puts(".Bl -bullet");
                    870:                                break;
                    871:                        case (LIST_ENUM):
                    872:                                puts(".Bl -enum");
                    873:                                break;
                    874:                        default:
                    875:                                puts(".Bl -tag -width Ds");
                    876:                                break;
                    877:                        }
                    878:                }
                    879:                switch (st->lstack[st->lpos - 1]) {
                    880:                case (LIST_TAG):
                    881:                        printf(".It ");
1.11      kristaps  882:                        formatcodeln(st, buf, &start, end, 0);
1.31    ! schwarze  883:                        putchar(last = '\n');
1.4       schwarze  884:                        break;
                    885:                case (LIST_ENUM):
                    886:                        /* FALLTHROUGH */
                    887:                case (LIST_BULLET):
                    888:                        /*
                    889:                         * Abandon the remainder of the paragraph
                    890:                         * because we're going to be a bulletted or
                    891:                         * numbered list.
                    892:                         */
                    893:                        puts(".It");
                    894:                        break;
                    895:                default:
                    896:                        abort();
                    897:                }
1.1       schwarze  898:                st->haspar = 1;
                    899:                break;
                    900:        case (CMD_BACK):
1.4       schwarze  901:                /* Make sure we don't back over the stack. */
                    902:                if (st->lpos > 0) {
                    903:                        st->lpos--;
                    904:                        puts(".El");
                    905:                }
1.1       schwarze  906:                break;
                    907:        case (CMD_BEGIN):
                    908:                /*
                    909:                 * We disregard all types for now.
                    910:                 * TODO: process at least "text" in a -literal block.
                    911:                 */
                    912:                st->paused = 1;
                    913:                break;
                    914:        case (CMD_FOR):
                    915:                /*
                    916:                 * We ignore all types of encodings and formats
                    917:                 * unilaterally.
                    918:                 */
                    919:                break;
                    920:        case (CMD_ENCODING):
                    921:                break;
                    922:        case (CMD_CUT):
                    923:                st->parsing = 0;
                    924:                return;
                    925:        default:
                    926:                abort();
                    927:        }
                    928:
                    929:        /* Any command (but =cut) makes us start parsing. */
                    930:        st->parsing = 1;
                    931: }
                    932:
                    933: /*
                    934:  * Just pump out the line in a verbatim block.
                    935:  */
                    936: static void
                    937: verbatim(struct state *st, const char *buf, size_t start, size_t end)
                    938: {
1.22      kristaps  939:        size_t           i;
1.1       schwarze  940:
                    941:        if ( ! st->parsing || st->paused)
                    942:                return;
1.22      kristaps  943: again:
                    944:        /*
                    945:         * If we're in the SYNOPSIS, see if we're an #include block.
                    946:         * If we are, then print the "In" macro and re-loop.
                    947:         * This handles any number of inclusions, but only when they
                    948:         * come before the remaining parts...
                    949:         */
                    950:        if (SECT_SYNOPSIS == st->sect) {
                    951:                i = start;
                    952:                for (i = start; i < end && ' ' == buf[i]; i++)
                    953:                        /* Spin. */ ;
                    954:                if (i == end)
                    955:                        return;
                    956:                /* We're an include block! */
                    957:                if (end - i > 10 &&
                    958:                        0 == memcmp(&buf[i], "#include <", 10)) {
                    959:                        start = i + 10;
                    960:                        while (start < end && ' ' == buf[start])
                    961:                                start++;
                    962:                        fputs(".In ", stdout);
                    963:                        /* Stop til the '>' marker or we hit eoln. */
                    964:                        while (start < end &&
                    965:                                '>' != buf[start] && '\n' != buf[start])
                    966:                                putchar(buf[start++]);
                    967:                        putchar('\n');
                    968:                        if (start < end && '>' == buf[start])
                    969:                                start++;
                    970:                        if (start < end && '\n' == buf[start])
                    971:                                start++;
                    972:                        if (start < end)
                    973:                                goto again;
                    974:                        return;
                    975:                }
                    976:        }
                    977:
                    978:        if (start == end)
                    979:                return;
1.1       schwarze  980:        puts(".Bd -literal");
1.8       kristaps  981:        for (last = ' '; start < end; start++) {
                    982:                /*
                    983:                 * Handle accidental macros (newline starting with
                    984:                 * control character) and escapes.
                    985:                 */
                    986:                if ('\n' == last)
1.7       kristaps  987:                        if ('.' == buf[start] || '\'' == buf[start])
                    988:                                printf("\\&");
1.8       kristaps  989:                putchar(last = buf[start]);
                    990:                if ('\\' == buf[start])
                    991:                        printf("e");
1.7       kristaps  992:        }
1.31    ! schwarze  993:        putchar(last = '\n');
1.1       schwarze  994:        puts(".Ed");
                    995: }
                    996:
                    997: /*
1.13      kristaps  998:  * See dosynopsisop().
                    999:  */
                   1000: static int
                   1001: hasmatch(const char *buf, size_t start, size_t end)
                   1002: {
                   1003:        size_t   stack;
                   1004:
                   1005:        for (stack = 0; start < end; start++)
                   1006:                if (buf[start] == '[')
                   1007:                        stack++;
                   1008:                else if (buf[start] == ']' && 0 == stack)
                   1009:                        return(1);
                   1010:                else if (buf[start] == ']')
                   1011:                        stack--;
                   1012:        return(0);
                   1013: }
                   1014:
                   1015: /*
                   1016:  * If we're in the SYNOPSIS section and we've encounter braces in an
                   1017:  * ordinary paragraph, then try to see whether we're an [-option].
                   1018:  * Do this, if we're an opening bracket, by first seeing if we have a
                   1019:  * matching end via hasmatch().
                   1020:  * If we're an ending bracket, see if we have a stack already.
                   1021:  */
                   1022: static int
1.31    ! schwarze 1023: dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack)
1.13      kristaps 1024: {
                   1025:
                   1026:        assert('[' == buf[*start] || ']' == buf[*start]);
                   1027:
                   1028:        if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.31    ! schwarze 1029:                if ('\n' != last)
1.13      kristaps 1030:                        putchar('\n');
                   1031:                puts(".Oo");
                   1032:                (*opstack)++;
                   1033:        } else if ('[' == buf[*start])
                   1034:                return(0);
                   1035:
                   1036:        if (']' == buf[*start] && *opstack > 0) {
1.31    ! schwarze 1037:                if ('\n' != last)
1.13      kristaps 1038:                        putchar('\n');
                   1039:                puts(".Oc");
                   1040:                (*opstack)--;
                   1041:        } else if (']' == buf[*start])
                   1042:                return(0);
                   1043:
                   1044:        (*start)++;
1.31    ! schwarze 1045:        last = '\n';
1.13      kristaps 1046:        while (' ' == buf[*start])
                   1047:                (*start)++;
                   1048:        return(1);
                   1049: }
                   1050:
                   1051: /*
1.17      kristaps 1052:  * Format multiple "Nm" manpage names in the NAME section.
                   1053:  */
                   1054: static void
                   1055: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
                   1056: {
                   1057:        size_t   word;
                   1058:
                   1059:        while (*start < end && ' ' == buf[*start])
                   1060:                (*start)++;
                   1061:
                   1062:        if (end == *start) {
                   1063:                puts(".Nm unknown");
                   1064:                return;
                   1065:        }
                   1066:
                   1067:        while (*start < end) {
                   1068:                fputs(".Nm ", stdout);
                   1069:                for (word = *start; word < end; word++)
                   1070:                        if (',' == buf[word])
                   1071:                                break;
                   1072:                formatcodeln(st, buf, start, word, 1);
                   1073:                if (*start == end) {
1.31    ! schwarze 1074:                        putchar(last = '\n');
1.17      kristaps 1075:                        continue;
                   1076:                }
                   1077:                assert(',' == buf[*start]);
                   1078:                puts(" ,");
                   1079:                (*start)++;
                   1080:                while (*start < end && ' ' == buf[*start])
                   1081:                        (*start)++;
                   1082:        }
                   1083: }
                   1084:
                   1085: /*
1.1       schwarze 1086:  * Ordinary paragraph.
                   1087:  * Well, this is really the hardest--POD seems to assume that, for
                   1088:  * example, a leading space implies a newline, and so on.
                   1089:  * Lots of other snakes in the grass: escaping a newline followed by a
                   1090:  * period (accidental mdoc(7) control), double-newlines after macro
                   1091:  * passages, etc.
                   1092:  */
                   1093: static void
                   1094: ordinary(struct state *st, const char *buf, size_t start, size_t end)
                   1095: {
1.13      kristaps 1096:        size_t          i, j, opstack;
1.15      kristaps 1097:        int             seq;
1.1       schwarze 1098:
                   1099:        if ( ! st->parsing || st->paused)
                   1100:                return;
                   1101:
                   1102:        /*
                   1103:         * Special-case: the NAME section.
                   1104:         * If we find a "-" when searching from the end, assume that
                   1105:         * we're in "name - description" format.
                   1106:         * To wit, print out a "Nm" and "Nd" in that format.
                   1107:         */
1.11      kristaps 1108:        if (SECT_NAME == st->sect) {
1.15      kristaps 1109:                for (i = end - 2; i > start; i--)
                   1110:                        if ('-' == buf[i] && ' ' == buf[i + 1])
1.1       schwarze 1111:                                break;
                   1112:                if ('-' == buf[i]) {
                   1113:                        j = i;
                   1114:                        /* Roll over multiple "-". */
                   1115:                        for ( ; i > start; i--)
                   1116:                                if ('-' != buf[i])
                   1117:                                        break;
1.17      kristaps 1118:                        donamenm(st, buf, &start, i + 1);
1.5       kristaps 1119:                        start = j + 1;
1.17      kristaps 1120:                        while (start < end && ' ' == buf[start])
                   1121:                                start++;
1.15      kristaps 1122:                        fputs(".Nd ", stdout);
1.11      kristaps 1123:                        formatcodeln(st, buf, &start, end, 1);
1.31    ! schwarze 1124:                        putchar(last = '\n');
1.1       schwarze 1125:                        return;
                   1126:                }
                   1127:        }
                   1128:
                   1129:        if ( ! st->haspar)
                   1130:                puts(".Pp");
                   1131:
                   1132:        st->haspar = 0;
1.31    ! schwarze 1133:        st->hasnl = 1;
1.1       schwarze 1134:        last = '\n';
1.13      kristaps 1135:        opstack = 0;
1.1       schwarze 1136:
1.15      kristaps 1137:        for (seq = 0; start < end; seq++) {
1.1       schwarze 1138:                /*
                   1139:                 * Loop til we get either to a newline or escape.
                   1140:                 * Escape initial control characters.
                   1141:                 */
                   1142:                while (start < end) {
                   1143:                        if (start < end - 1 && '<' == buf[start + 1])
                   1144:                                break;
                   1145:                        else if ('\n' == buf[start])
                   1146:                                break;
                   1147:                        else if ('\n' == last && '.' == buf[start])
1.31    ! schwarze 1148:                                outbuf_addstr(st, "\\&");
1.1       schwarze 1149:                        else if ('\n' == last && '\'' == buf[start])
1.31    ! schwarze 1150:                                outbuf_addstr(st, "\\&");
1.12      kristaps 1151:                        /*
                   1152:                         * If we're in the SYNOPSIS, have square
                   1153:                         * brackets indicate that we're opening and
                   1154:                         * closing an optional context.
                   1155:                         */
1.13      kristaps 1156:                        if (SECT_SYNOPSIS == st->sect &&
                   1157:                                ('[' == buf[start] ||
                   1158:                                 ']' == buf[start]) &&
1.31    ! schwarze 1159:                                dosynopsisop(buf, &start, end, &opstack))
1.13      kristaps 1160:                                continue;
1.31    ! schwarze 1161:                        last = buf[start++];
        !          1162:                        if (' ' == last) {
        !          1163:                                outbuf_flush(st);
        !          1164:                                putchar(' ');
        !          1165:                        } else
        !          1166:                                outbuf_addchar(st);
1.1       schwarze 1167:                }
                   1168:
                   1169:                if (start < end - 1 && '<' == buf[start + 1]) {
1.15      kristaps 1170:                        if (formatcode(st, buf, &start, end, 0, 0, seq)) {
1.30      schwarze 1171:                                /*
                   1172:                                 * Let mdoc(7) handle trailing punctuation.
                   1173:                                 * XXX Some punctuation characters
                   1174:                                 *     are not handled yet.
                   1175:                                 */
1.16      kristaps 1176:                                if ((start == end - 1 ||
                   1177:                                        (start < end - 1 &&
                   1178:                                         (' ' == buf[start + 1] ||
                   1179:                                          '\n' == buf[start + 1]))) &&
                   1180:                                        ('.' == buf[start] ||
                   1181:                                         ',' == buf[start])) {
                   1182:                                        putchar(' ');
                   1183:                                        putchar(buf[start++]);
                   1184:                                }
1.30      schwarze 1185:                                /* End the macro line. */
1.1       schwarze 1186:                                putchar(last = '\n');
1.31    ! schwarze 1187:                                st->hasnl = 1;
1.30      schwarze 1188:                                /*
                   1189:                                 * Consume all whitespace
                   1190:                                 * so we don't accidentally start
                   1191:                                 * an implicit literal line.
                   1192:                                 */
1.6       kristaps 1193:                                while (start < end && ' ' == buf[start])
                   1194:                                        start++;
                   1195:                        }
1.1       schwarze 1196:                } else if (start < end && '\n' == buf[start]) {
1.31    ! schwarze 1197:                        outbuf_newln(st);
1.1       schwarze 1198:                        if (++start >= end)
                   1199:                                continue;
                   1200:                        /*
                   1201:                         * If we have whitespace next, eat it to prevent
                   1202:                         * mdoc(7) from thinking that it's meant for
                   1203:                         * verbatim text.
                   1204:                         * It is--but if we start with that, we can't
                   1205:                         * have a macro subsequent it, which may be
                   1206:                         * possible if we have an escape next.
                   1207:                         */
1.31    ! schwarze 1208:                        if (' ' == buf[start] || '\t' == buf[start])
1.1       schwarze 1209:                                puts(".br");
                   1210:                        for ( ; start < end; start++)
                   1211:                                if (' ' != buf[start] && '\t' != buf[start])
                   1212:                                        break;
1.12      kristaps 1213:                }
1.1       schwarze 1214:        }
1.31    ! schwarze 1215:        outbuf_newln(st);
1.1       schwarze 1216: }
                   1217:
                   1218: /*
                   1219:  * There are three kinds of paragraphs: verbatim (starts with whitespace
                   1220:  * of some sort), ordinary (starts without "=" marker), or a command
                   1221:  * (default: starts with "=").
                   1222:  */
                   1223: static void
                   1224: dopar(struct state *st, const char *buf, size_t start, size_t end)
                   1225: {
                   1226:
                   1227:        if (end == start)
                   1228:                return;
                   1229:        if (' ' == buf[start] || '\t' == buf[start])
                   1230:                verbatim(st, buf, start, end);
                   1231:        else if ('=' != buf[start])
                   1232:                ordinary(st, buf, start, end);
                   1233:        else
                   1234:                command(st, buf, start, end);
                   1235: }
                   1236:
                   1237: /*
                   1238:  * Loop around paragraphs within a document, processing each one in the
                   1239:  * POD way.
                   1240:  */
                   1241: static void
                   1242: dofile(const struct args *args, const char *fname,
                   1243:        const struct tm *tm, const char *buf, size_t sz)
                   1244: {
1.29      schwarze 1245:        char             datebuf[64];
1.1       schwarze 1246:        struct state     st;
1.29      schwarze 1247:        const char      *fbase, *fext, *section, *date;
1.1       schwarze 1248:        char            *title, *cp;
1.29      schwarze 1249:        size_t           sup, end, i, cur = 0;
1.1       schwarze 1250:
                   1251:        if (0 == sz)
                   1252:                return;
                   1253:
1.29      schwarze 1254:        /*
                   1255:         * Parsing the filename is almost always required,
                   1256:         * except when both the title and the section
                   1257:         * are provided on the command line.
                   1258:         */
                   1259:
                   1260:        if (NULL == args->title || NULL == args->section) {
                   1261:                fbase = strrchr(fname, '/');
                   1262:                if (NULL == fbase)
                   1263:                        fbase = fname;
                   1264:                else
                   1265:                        fbase++;
                   1266:                fext = strrchr(fbase, '.');
                   1267:        } else
                   1268:                fext = NULL;
                   1269:
                   1270:        /*
                   1271:         * The title will be converted to uppercase,
                   1272:         * so it needs to be copied.
                   1273:         */
                   1274:
                   1275:        title = (NULL != args->title) ? strdup(args->title) :
                   1276:                (NULL != fext) ? strndup(fbase, fext - fbase) :
                   1277:                strdup(fbase);
1.1       schwarze 1278:
                   1279:        if (NULL == title) {
                   1280:                perror(NULL);
                   1281:                exit(EXIT_FAILURE);
                   1282:        }
                   1283:
                   1284:        /* Section is 1 unless suffix is "pm". */
                   1285:
1.29      schwarze 1286:        section = (NULL != args->section) ? args->section :
                   1287:            (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
                   1288:            PERL_SECTION;
1.1       schwarze 1289:
                   1290:        /* Date.  Or the given "tm" if not supplied. */
                   1291:
                   1292:        if (NULL == (date = args->date)) {
                   1293:                strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
                   1294:                date = datebuf;
                   1295:        }
                   1296:
                   1297:        for (cp = title; '\0' != *cp; cp++)
                   1298:                *cp = toupper((int)*cp);
                   1299:
                   1300:        /* The usual mdoc(7) preamble. */
                   1301:
                   1302:        printf(".Dd %s\n", date);
                   1303:        printf(".Dt %s %s\n", title, section);
                   1304:        puts(".Os");
                   1305:
                   1306:        free(title);
                   1307:
                   1308:        memset(&st, 0, sizeof(struct state));
                   1309:        assert(sz > 0);
                   1310:
                   1311:        /* Main loop over file contents. */
                   1312:
                   1313:        while (cur < sz) {
                   1314:                /* Read until next paragraph. */
                   1315:                for (i = cur + 1; i < sz; i++)
                   1316:                        if ('\n' == buf[i] && '\n' == buf[i - 1]) {
                   1317:                                /* Consume blank paragraphs. */
                   1318:                                while (i + 1 < sz && '\n' == buf[i + 1])
                   1319:                                        i++;
                   1320:                                break;
                   1321:                        }
                   1322:
                   1323:                /* Adjust end marker for EOF. */
                   1324:                end = i < sz ? i - 1 :
                   1325:                        ('\n' == buf[sz - 1] ? sz - 1 : sz);
                   1326:                sup = i < sz ? end + 2 : sz;
                   1327:
                   1328:                /* Process paragraph and adjust start. */
                   1329:                dopar(&st, buf, cur, end);
                   1330:                cur = sup;
                   1331:        }
                   1332: }
                   1333:
                   1334: /*
                   1335:  * Read a single file fully into memory.
                   1336:  * If the file is "-", do it from stdin.
                   1337:  * If successfully read, send the input buffer to dofile() for further
                   1338:  * processing.
                   1339:  */
                   1340: static int
                   1341: readfile(const struct args *args, const char *fname)
                   1342: {
                   1343:        int              fd;
                   1344:        char            *buf;
                   1345:        size_t           bufsz, cur;
                   1346:        ssize_t          ssz;
                   1347:        struct tm       *tm;
                   1348:        time_t           ttm;
                   1349:        struct stat      st;
                   1350:
                   1351:        fd = 0 != strcmp("-", fname) ?
                   1352:                open(fname, O_RDONLY, 0) : STDIN_FILENO;
                   1353:
                   1354:        if (-1 == fd) {
                   1355:                perror(fname);
                   1356:                return(0);
                   1357:        }
                   1358:
                   1359:        if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
                   1360:                ttm = time(NULL);
                   1361:                tm = localtime(&ttm);
                   1362:        } else
                   1363:                tm = localtime(&st.st_mtime);
                   1364:
                   1365:        /*
                   1366:         * Arbitrarily-sized initial buffer.
                   1367:         * Should be big enough for most files...
                   1368:         */
                   1369:        cur = 0;
                   1370:        bufsz = 1 << 14;
                   1371:        if (NULL == (buf = malloc(bufsz))) {
                   1372:                perror(NULL);
                   1373:                exit(EXIT_FAILURE);
                   1374:        }
                   1375:
                   1376:        while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
                   1377:                /* Double buffer size on fill. */
                   1378:                if ((size_t)ssz == bufsz - cur)  {
                   1379:                        bufsz *= 2;
                   1380:                        if (NULL == (buf = realloc(buf, bufsz))) {
                   1381:                                perror(NULL);
                   1382:                                exit(EXIT_FAILURE);
                   1383:                        }
                   1384:                }
                   1385:                cur += (size_t)ssz;
                   1386:        }
                   1387:        if (ssz < 0) {
                   1388:                perror(fname);
                   1389:                free(buf);
                   1390:                return(0);
                   1391:        }
                   1392:
                   1393:        dofile(args, STDIN_FILENO == fd ?
                   1394:                "STDIN" : fname, tm, buf, cur);
                   1395:        free(buf);
                   1396:        if (STDIN_FILENO != fd)
                   1397:                close(fd);
                   1398:        return(1);
                   1399: }
                   1400:
                   1401: int
                   1402: main(int argc, char *argv[])
                   1403: {
                   1404:        const char      *fname, *name;
                   1405:        struct args      args;
                   1406:        int              c;
                   1407:
                   1408:        name = strrchr(argv[0], '/');
                   1409:        if (name == NULL)
                   1410:                name = argv[0];
                   1411:        else
                   1412:                ++name;
                   1413:
                   1414:        memset(&args, 0, sizeof(struct args));
                   1415:        fname = "-";
                   1416:
                   1417:        /* Accept no arguments for now. */
                   1418:
                   1419:        while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
                   1420:                switch (c) {
                   1421:                case ('h'):
                   1422:                        /* FALLTHROUGH */
                   1423:                case ('l'):
                   1424:                        /* FALLTHROUGH */
                   1425:                case ('c'):
                   1426:                        /* FALLTHROUGH */
                   1427:                case ('o'):
                   1428:                        /* FALLTHROUGH */
                   1429:                case ('q'):
                   1430:                        /* FALLTHROUGH */
                   1431:                case ('r'):
                   1432:                        /* FALLTHROUGH */
                   1433:                case ('u'):
                   1434:                        /* FALLTHROUGH */
                   1435:                case ('v'):
                   1436:                        /* Ignore these. */
                   1437:                        break;
                   1438:                case ('d'):
                   1439:                        args.date = optarg;
                   1440:                        break;
                   1441:                case ('n'):
                   1442:                        args.title = optarg;
                   1443:                        break;
                   1444:                case ('s'):
                   1445:                        args.section = optarg;
                   1446:                        break;
                   1447:                default:
                   1448:                        goto usage;
                   1449:                }
                   1450:
                   1451:        argc -= optind;
                   1452:        argv += optind;
                   1453:
                   1454:        /* Accept only a single input file. */
                   1455:
1.25      schwarze 1456:        if (argc > 1)
                   1457:                goto usage;
1.1       schwarze 1458:        else if (1 == argc)
                   1459:                fname = *argv;
                   1460:
                   1461:        return(readfile(&args, fname) ?
                   1462:                EXIT_SUCCESS : EXIT_FAILURE);
                   1463:
                   1464: usage:
                   1465:        fprintf(stderr, "usage: %s [-d date] "
1.25      schwarze 1466:            "[-n title] [-s section] [file]\n", name);
1.1       schwarze 1467:
                   1468:        return(EXIT_FAILURE);
                   1469: }
CVSweb