pod2mdoc/pod2mdoc.c - annotate

Return to pod2mdoc.c CVS log
Up to [cvsweb.bsd.lv] / pod2mdoc
Annotation of pod2mdoc/pod2mdoc.c, Revision 1.20

1.20    ! kristaps    1: /*     $Id: pod2mdoc.c,v 1.19 2014/04/03 10:17:14 kristaps Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/stat.h>
                     18: #include <sys/time.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <string.h>
                     27: #include <unistd.h>
                     28:
1.10      kristaps   29: /*
1.19      kristaps   30:  * In what section can we find Perl module manuals?
                     31:  * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
                     32:  * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10      kristaps   33:  */
                     34: #define        PERL_SECTION    "3p"
                     35:
1.1       schwarze   36: struct args {
                     37:        const char      *title; /* override "Dt" title */
                     38:        const char      *date; /* override "Dd" date */
                     39:        const char      *section; /* override "Dt" section */
                     40: };
                     41:
1.4       schwarze   42: enum   list {
                     43:        LIST_BULLET = 0,
                     44:        LIST_ENUM,
                     45:        LIST_TAG,
                     46:        LIST__MAX
                     47: };
                     48:
1.11      kristaps   49: enum   sect {
                     50:        SECT_NONE = 0,
                     51:        SECT_NAME, /* NAME section */
                     52:        SECT_SYNOPSIS, /* SYNOPSIS section */
                     53: };
                     54:
1.1       schwarze   55: struct state {
                     56:        int              parsing; /* after =cut of before command */
                     57:        int              paused; /* in =begin and before =end */
                     58:        int              haspar; /* in paragraph: do we need Pp? */
1.11      kristaps   59:        enum sect        sect; /* which section are we in? */
1.1       schwarze   60:        const char      *fname; /* file being parsed */
1.4       schwarze   61: #define        LIST_STACKSZ     128
                     62:        enum list        lstack[LIST_STACKSZ]; /* open lists */
                     63:        size_t           lpos; /* where in list stack */
1.1       schwarze   64: };
                     65:
                     66: enum   fmt {
                     67:        FMT_ITALIC,
                     68:        FMT_BOLD,
                     69:        FMT_CODE,
                     70:        FMT_LINK,
                     71:        FMT_ESCAPE,
                     72:        FMT_FILE,
                     73:        FMT_NBSP,
                     74:        FMT_INDEX,
                     75:        FMT_NULL,
                     76:        FMT__MAX
                     77: };
                     78:
                     79: enum   cmd {
                     80:        CMD_POD = 0,
                     81:        CMD_HEAD1,
                     82:        CMD_HEAD2,
                     83:        CMD_HEAD3,
                     84:        CMD_HEAD4,
                     85:        CMD_OVER,
                     86:        CMD_ITEM,
                     87:        CMD_BACK,
                     88:        CMD_BEGIN,
                     89:        CMD_END,
                     90:        CMD_FOR,
                     91:        CMD_ENCODING,
                     92:        CMD_CUT,
                     93:        CMD__MAX
                     94: };
                     95:
                     96: static const char *const cmds[CMD__MAX] = {
                     97:        "pod",          /* CMD_POD */
                     98:        "head1",        /* CMD_HEAD1 */
                     99:        "head2",        /* CMD_HEAD2 */
                    100:        "head3",        /* CMD_HEAD3 */
                    101:        "head4",        /* CMD_HEAD4 */
                    102:        "over",         /* CMD_OVER */
                    103:        "item",         /* CMD_ITEM */
                    104:        "back",         /* CMD_BACK */
                    105:        "begin",        /* CMD_BEGIN */
                    106:        "end",          /* CMD_END */
                    107:        "for",          /* CMD_FOR */
                    108:        "encoding",     /* CMD_ENCODING */
                    109:        "cut"           /* CMD_CUT */
                    110: };
                    111:
                    112: static const char fmts[FMT__MAX] = {
                    113:        'I',            /* FMT_ITALIC */
                    114:        'B',            /* FMT_BOLD */
                    115:        'C',            /* FMT_CODE */
                    116:        'L',            /* FMT_LINK */
                    117:        'E',            /* FMT_ESCAPE */
                    118:        'F',            /* FMT_FILE */
                    119:        'S',            /* FMT_NBSP */
                    120:        'X',            /* FMT_INDEX */
                    121:        'Z'             /* FMT_NULL */
                    122: };
                    123:
1.6       kristaps  124: static int     last;
                    125:
1.1       schwarze  126: /*
                    127:  * Given buf[*start] is at the start of an escape name, read til the end
                    128:  * of the escape ('>') then try to do something with it.
                    129:  * Sets start to be one after the '>'.
                    130:  */
                    131: static void
                    132: formatescape(const char *buf, size_t *start, size_t end)
                    133: {
                    134:        char             esc[16]; /* no more needed */
                    135:        size_t           i, max;
                    136:
                    137:        max = sizeof(esc) - 1;
                    138:        i = 0;
                    139:        /* Read til our buffer is full. */
                    140:        while (*start < end && '>' != buf[*start] && i < max)
                    141:                esc[i++] = buf[(*start)++];
                    142:        esc[i] = '\0';
                    143:
                    144:        if (i == max) {
                    145:                /* Too long... skip til we end. */
                    146:                while (*start < end && '>' != buf[*start])
                    147:                        (*start)++;
                    148:                return;
                    149:        } else if (*start >= end)
                    150:                return;
                    151:
                    152:        assert('>' == buf[*start]);
                    153:        (*start)++;
                    154:
                    155:        /*
                    156:         * TODO: right now, we only recognise the named escapes.
                    157:         * Just let the rest of them go.
                    158:         */
1.6       kristaps  159:        if (0 == strcmp(esc, "lt"))
1.1       schwarze  160:                printf("\\(la");
                    161:        else if (0 == strcmp(esc, "gt"))
                    162:                printf("\\(ra");
                    163:        else if (0 == strcmp(esc, "vb"))
                    164:                printf("\\(ba");
                    165:        else if (0 == strcmp(esc, "sol"))
                    166:                printf("\\(sl");
1.6       kristaps  167:        else
                    168:                return;
                    169:
                    170:        last = 'a';
1.1       schwarze  171: }
                    172:
                    173: /*
1.9       kristaps  174:  * Run some heuristics to intuit a link format.
1.19      kristaps  175:  * I set "start" to be the end of the sequence (last right-carrot) so
1.9       kristaps  176:  * that the caller can safely just continue processing.
1.19      kristaps  177:  * If this is just an empty tag, I'll return 0.
1.9       kristaps  178:  */
                    179: static int
                    180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
                    181: {
1.19      kristaps  182:        size_t           linkstart, realend, linkend, i, j, textsz;
1.18      kristaps  183:        const char      *text;
1.9       kristaps  184:
                    185:        /*
                    186:         * Scan to the start of the terminus.
                    187:         * This function is more or less replicated in the formatcode()
                    188:         * for null or index formatting codes.
                    189:         */
1.19      kristaps  190:        for (linkstart = realend = *start; realend < end; realend++) {
                    191:                if ('>' != buf[realend])
1.9       kristaps  192:                        continue;
                    193:                else if (dsz == 1)
                    194:                        break;
1.19      kristaps  195:                assert(realend > 0);
                    196:                if (' ' != buf[realend - 1])
1.9       kristaps  197:                        continue;
1.19      kristaps  198:                for (i = realend, j = 0; i < end && j < dsz; j++)
1.9       kristaps  199:                        if ('>' != buf[i++])
                    200:                                break;
                    201:                if (dsz == j)
                    202:                        break;
                    203:        }
1.19      kristaps  204:
                    205:        /* Ignore stubs. */
                    206:        if (realend == end || realend == *start)
1.9       kristaps  207:                return(0);
                    208:
1.19      kristaps  209:        /* Set linkend to the end of content. */
                    210:        linkend = dsz > 1 ? realend - 1 : realend;
1.18      kristaps  211:
1.19      kristaps  212:        /* Re-scan to see if we have a title or section. */
1.18      kristaps  213:        text = &buf[*start];
1.19      kristaps  214:        for (textsz = *start; textsz < linkend; textsz++)
                    215:                if ('|' == buf[textsz] || '/' == buf[textsz])
1.18      kristaps  216:                        break;
                    217:
1.19      kristaps  218:        if (textsz < linkend && '|' == buf[textsz]) {
1.20    ! kristaps  219:                /* With title: set start, then end at section. */
1.19      kristaps  220:                linkstart = textsz + 1;
1.18      kristaps  221:                textsz = textsz - *start;
1.19      kristaps  222:                for (i = linkstart; i < linkend; i++)
                    223:                        if ('/' == buf[i])
                    224:                                break;
                    225:                if (i < linkend)
                    226:                        linkend = i;
1.20    ! kristaps  227:        } else if (textsz < linkend && '/' == buf[textsz]) {
        !           228:                /* With section: set end at section. */
        !           229:                linkend = textsz;
        !           230:                textsz = 0;
        !           231:        } else
        !           232:                /* No title, no section. */
1.18      kristaps  233:                textsz = 0;
1.19      kristaps  234:
                    235:        *start = realend;
                    236:        j = linkend - linkstart;
                    237:
1.20    ! kristaps  238:        /* Do we have only subsection material? */
        !           239:        if (0 == j && '/' == buf[linkend]) {
        !           240:                linkstart = linkend + 1;
        !           241:                linkend = dsz > 1 ? realend - 1 : realend;
        !           242:                if (0 == (j = linkend - linkstart))
        !           243:                        return(0);
        !           244:                printf("Sx %.*s", (int)j, &buf[linkstart]);
        !           245:                return(1);
        !           246:        } else if (0 == j)
1.19      kristaps  247:                return(0);
                    248:
                    249:        /* See if we qualify as being a link or not. */
1.20    ! kristaps  250:        if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
        !           251:                (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
        !           252:                (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
        !           253:                (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
        !           254:                (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
        !           255:                (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
        !           256:                /* Gross. */
        !           257:                printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
        !           258:                        realend) - linkstart), &buf[linkstart]);
1.19      kristaps  259:                return(1);
                    260:        }
                    261:
                    262:        /* See if we qualify as a mailto. */
1.20    ! kristaps  263:        if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19      kristaps  264:                printf("Mt %.*s", (int)j, &buf[linkstart]);
                    265:                return(1);
                    266:        }
                    267:
                    268:        /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
                    269:        if ((j > 3 && ')' == buf[linkend - 1]) &&
                    270:                ('(' == buf[linkend - 3])) {
                    271:                printf("Xr %.*s %c", (int)(j - 3),
                    272:                        &buf[linkstart], buf[linkend - 2]);
                    273:                return(1);
                    274:        } else if ((j > 4 && ')' == buf[linkend - 1]) &&
                    275:                ('(' == buf[linkend - 4])) {
                    276:                printf("Xr %.*s %.*s", (int)(j - 4),
                    277:                        &buf[linkstart], 2, &buf[linkend - 3]);
                    278:                return(1);
                    279:        } else if ((j > 5 && ')' == buf[linkend - 1]) &&
                    280:                ('(' == buf[linkend - 5])) {
                    281:                printf("Xr %.*s %.*s", (int)(j - 5),
                    282:                        &buf[linkstart], 3, &buf[linkend - 4]);
                    283:                return(1);
                    284:        }
                    285:
                    286:        /* Last try: do we have a double-colon? */
                    287:        for (i = linkstart + 1; i < linkend; i++)
                    288:                if (':' == buf[i] && ':' == buf[i - 1])
1.18      kristaps  289:                        break;
1.9       kristaps  290:
1.19      kristaps  291:        if (i < linkend)
1.10      kristaps  292:                printf("Xr %.*s " PERL_SECTION,
1.19      kristaps  293:                        (int)j, &buf[linkstart]);
1.9       kristaps  294:        else
1.19      kristaps  295:                printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9       kristaps  296:
                    297:        return(1);
                    298: }
                    299:
1.13      kristaps  300: /*
                    301:  * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
                    302:  * then it's likely that we're a flag.
                    303:  * Our flag might be followed by an argument, so make sure that we're
                    304:  * accounting for that, too.
                    305:  * If we don't have a flag at all, however, then assume we're an "Ar".
                    306:  */
                    307: static void
                    308: dosynopsisfl(const char *buf, size_t *start, size_t end)
                    309: {
                    310:        size_t   i;
                    311: again:
1.14      kristaps  312:        assert(*start + 1 < end);
                    313:        assert('-' == buf[*start]);
                    314:
                    315:        if ( ! isalnum((int)buf[*start + 1]) &&
                    316:                '?' != buf[*start + 1] &&
                    317:                '-' != buf[*start + 1]) {
                    318:                (*start)--;
                    319:                fputs("Ar ", stdout);
                    320:                return;
                    321:        }
                    322:
1.13      kristaps  323:        (*start)++;
                    324:        for (i = *start; i < end; i++)
                    325:                if (isalnum((int)buf[i]))
                    326:                        continue;
1.14      kristaps  327:                else if ('?' == buf[i])
                    328:                        continue;
1.13      kristaps  329:                else if ('-' == buf[i])
                    330:                        continue;
                    331:                else if ('_' == buf[i])
                    332:                        continue;
                    333:                else
                    334:                        break;
                    335:
                    336:        assert(i < end);
                    337:
                    338:        if ( ! (' ' == buf[i] || '>' == buf[i])) {
                    339:                printf("Ar ");
                    340:                return;
                    341:        }
                    342:
                    343:        printf("Fl ");
                    344:        if (end - *start > 1 &&
                    345:                isupper((int)buf[*start]) &&
                    346:                islower((int)buf[*start + 1]) &&
                    347:                (end - *start == 2 ||
                    348:                 ' ' == buf[*start + 2]))
                    349:                printf("\\&");
                    350:        printf("%.*s ", (int)(i - *start), &buf[*start]);
                    351:        *start = i;
                    352:
                    353:        if (' ' == buf[i]) {
                    354:                while (i < end && ' ' == buf[i])
                    355:                        i++;
                    356:                assert(i < end);
                    357:                if ('-' == buf[i]) {
                    358:                        *start = i;
                    359:                        goto again;
                    360:                }
                    361:                printf("Ar ");
                    362:                *start = i;
                    363:        }
                    364: }
                    365:
1.9       kristaps  366: /*
1.1       schwarze  367:  * We're at the character in front of a format code, which is structured
                    368:  * like X<...> and can contain nested format codes.
                    369:  * This consumes the whole format code, and any nested format codes, til
                    370:  * the end of matched production.
                    371:  * If "reentrant", then we're being called after a macro has already
                    372:  * been printed to the current line.
1.6       kristaps  373:  * If "nomacro", then we don't print any macros, just contained data
                    374:  * (e.g., following "Sh" or "Nm").
1.15      kristaps  375:  * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
                    376:  * as the first format code on a line (for decoration as an "Nm"),
                    377:  * non-zero otherwise.
1.6       kristaps  378:  * Return whether we've printed a macro or not--in other words, whether
                    379:  * this should trigger a subsequent newline (this should be ignored when
                    380:  * reentrant).
1.1       schwarze  381:  */
                    382: static int
1.15      kristaps  383: formatcode(struct state *st, const char *buf, size_t *start,
                    384:        size_t end, int reentrant, int nomacro, int pos)
1.1       schwarze  385: {
                    386:        enum fmt         fmt;
1.5       kristaps  387:        size_t           i, j, dsz;
1.1       schwarze  388:
                    389:        assert(*start + 1 < end);
                    390:        assert('<' == buf[*start + 1]);
                    391:
1.6       kristaps  392:        /*
                    393:         * First, look up the format code.
                    394:         * If it's not valid, then exit immediately.
                    395:         */
                    396:        for (fmt = 0; fmt < FMT__MAX; fmt++)
                    397:                if (buf[*start] == fmts[fmt])
                    398:                        break;
                    399:
                    400:        if (FMT__MAX == fmt) {
                    401:                putchar(last = buf[(*start)++]);
1.8       kristaps  402:                if ('\\' == last)
                    403:                        putchar('e');
1.6       kristaps  404:                return(0);
                    405:        }
                    406:
1.5       kristaps  407:        /*
                    408:         * Determine whether we're overriding our delimiter.
                    409:         * According to POD, if we have more than one '<' followed by a
                    410:         * space, then we need a space followed by matching '>' to close
                    411:         * the expression.
                    412:         * Otherwise we use the usual '<' and '>' matched pair.
                    413:         */
                    414:        i = *start + 1;
                    415:        while (i < end && '<' == buf[i])
                    416:                i++;
                    417:        assert(i > *start + 1);
                    418:        dsz = i - (*start + 1);
                    419:        if (dsz > 1 && (i >= end || ' ' != buf[i]))
                    420:                dsz = 1;
                    421:
                    422:        /* Remember, if dsz>1, to jump the trailing space. */
                    423:        *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1       schwarze  424:
                    425:        /*
1.6       kristaps  426:         * Escapes and ignored codes (NULL and INDEX) don't print macro
                    427:         * sequences, so just output them like normal text before
                    428:         * processing for real macros.
1.1       schwarze  429:         */
                    430:        if (FMT_ESCAPE == fmt) {
                    431:                formatescape(buf, start, end);
                    432:                return(0);
                    433:        } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5       kristaps  434:                /*
1.6       kristaps  435:                 * Just consume til the end delimiter, accounting for
                    436:                 * whether it's a custom one.
1.5       kristaps  437:                 */
                    438:                for ( ; *start < end; (*start)++) {
                    439:                        if ('>' != buf[*start])
                    440:                                continue;
                    441:                        else if (dsz == 1)
                    442:                                break;
                    443:                        assert(*start > 0);
                    444:                        if (' ' != buf[*start - 1])
                    445:                                continue;
                    446:                        i = *start;
                    447:                        for (j = 0; i < end && j < dsz; j++)
                    448:                                if ('>' != buf[i++])
                    449:                                        break;
                    450:                        if (dsz != j)
                    451:                                continue;
                    452:                        (*start) += dsz;
                    453:                        break;
                    454:                }
1.1       schwarze  455:                return(0);
                    456:        }
                    457:
1.6       kristaps  458:        /*
                    459:         * Check whether we're supposed to print macro stuff (this is
                    460:         * suppressed in, e.g., "Nm" and "Sh" macros).
                    461:         */
1.1       schwarze  462:        if ( ! nomacro) {
                    463:                /*
                    464:                 * Print out the macro describing this format code.
                    465:                 * If we're not "reentrant" (not yet on a macro line)
                    466:                 * then print a newline, if necessary, and the macro
                    467:                 * indicator.
                    468:                 * Otherwise, offset us with a space.
                    469:                 */
1.6       kristaps  470:                if ( ! reentrant) {
                    471:                        if (last != '\n')
                    472:                                putchar('\n');
1.1       schwarze  473:                        putchar('.');
1.6       kristaps  474:                } else
1.1       schwarze  475:                        putchar(' ');
                    476:
                    477:                /*
1.6       kristaps  478:                 * If we don't have whitespace before us (and none after
                    479:                 * the opening delimiter), then suppress macro
                    480:                 * whitespace with Pf.
1.1       schwarze  481:                 */
1.6       kristaps  482:                if (' ' != last && '\n' != last && ' ' != buf[*start])
                    483:                        printf("Pf ");
                    484:
1.1       schwarze  485:                switch (fmt) {
                    486:                case (FMT_ITALIC):
                    487:                        printf("Em ");
                    488:                        break;
                    489:                case (FMT_BOLD):
1.14      kristaps  490:                        if (SECT_SYNOPSIS == st->sect) {
                    491:                                if (1 == dsz && '-' == buf[*start])
                    492:                                        dosynopsisfl(buf, start, end);
1.15      kristaps  493:                                else if (0 == pos)
                    494:                                        printf("Nm ");
1.14      kristaps  495:                                else
                    496:                                        printf("Ar ");
                    497:                                break;
                    498:                        }
                    499:                        printf("Sy ");
1.1       schwarze  500:                        break;
                    501:                case (FMT_CODE):
1.2       schwarze  502:                        printf("Qo Li ");
1.1       schwarze  503:                        break;
                    504:                case (FMT_LINK):
1.19      kristaps  505:                        /* Try to link; use "No" if it's empty. */
1.9       kristaps  506:                        if ( ! trylink(buf, start, end, dsz))
                    507:                                printf("No ");
1.1       schwarze  508:                        break;
                    509:                case (FMT_FILE):
                    510:                        printf("Pa ");
                    511:                        break;
                    512:                case (FMT_NBSP):
                    513:                        printf("No ");
                    514:                        break;
                    515:                default:
                    516:                        abort();
                    517:                }
                    518:        }
                    519:
                    520:        /*
1.6       kristaps  521:         * Process until we reach the end marker (e.g., '>') or until we
1.5       kristaps  522:         * find a nested format code.
1.1       schwarze  523:         * Don't emit any newlines: since we're on a macro line, we
                    524:         * don't want to break the line.
                    525:         */
                    526:        while (*start < end) {
1.5       kristaps  527:                if ('>' == buf[*start] && 1 == dsz) {
1.1       schwarze  528:                        (*start)++;
                    529:                        break;
1.5       kristaps  530:                } else if ('>' == buf[*start] &&
                    531:                                ' ' == buf[*start - 1]) {
                    532:                        /*
                    533:                         * Handle custom delimiters.
                    534:                         * These require a certain number of
                    535:                         * space-preceded carrots before we're really at
                    536:                         * the end.
                    537:                         */
                    538:                        i = *start;
                    539:                        for (j = 0; i < end && j < dsz; j++)
                    540:                                if ('>' != buf[i++])
                    541:                                        break;
                    542:                        if (dsz == j) {
                    543:                                *start += dsz;
                    544:                                break;
                    545:                        }
1.1       schwarze  546:                }
                    547:                if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15      kristaps  548:                        formatcode(st, buf, start, end, 1, nomacro, 1);
1.1       schwarze  549:                        continue;
                    550:                }
1.3       schwarze  551:
1.4       schwarze  552:                /*
                    553:                 * Make sure that any macro-like words (or
                    554:                 * really any word starting with a capital
                    555:                 * letter) is assumed to be a macro that must be
                    556:                 * escaped.
                    557:                 * This matches "Xx " and "XxEOLN".
                    558:                 */
                    559:                if ((' ' == last || '\n' == last) &&
                    560:                                end - *start > 1 &&
                    561:                                isupper((int)buf[*start]) &&
                    562:                                islower((int)buf[*start + 1]) &&
                    563:                                (end - *start == 2 ||
                    564:                                 ' ' == buf[*start + 2]))
                    565:                        printf("\\&");
1.3       schwarze  566:
1.4       schwarze  567:                /* Suppress newline. */
1.6       kristaps  568:                if ('\n' == buf[*start])
                    569:                        putchar(last = ' ');
                    570:                else
                    571:                        putchar(last = buf[*start]);
1.4       schwarze  572:
1.8       kristaps  573:                /* Protect against character escapes. */
                    574:                if ('\\' == last)
                    575:                        putchar('e');
                    576:
1.6       kristaps  577:                (*start)++;
                    578:
                    579:                if (' ' == last)
                    580:                        while (*start < end && ' ' == buf[*start])
                    581:                                (*start)++;
1.1       schwarze  582:        }
1.2       schwarze  583:
                    584:        if ( ! nomacro && FMT_CODE == fmt)
                    585:                printf(" Qc ");
1.1       schwarze  586:
                    587:        /*
1.6       kristaps  588:         * We're now subsequent the format code.
                    589:         * If there isn't a space (or newline) here, and we haven't just
                    590:         * printed a space, then suppress space.
1.1       schwarze  591:         */
1.6       kristaps  592:        if ( ! nomacro && ' ' != last)
                    593:                if (' ' != buf[*start] && '\n' != buf[*start])
                    594:                        printf(" Ns ");
1.5       kristaps  595:
1.1       schwarze  596:        return(1);
                    597: }
                    598:
                    599: /*
                    600:  * Calls formatcode() til the end of a paragraph.
                    601:  */
                    602: static void
1.11      kristaps  603: formatcodeln(struct state *st, const char *buf,
                    604:        size_t *start, size_t end, int nomacro)
1.1       schwarze  605: {
                    606:
1.4       schwarze  607:        last = ' ';
1.1       schwarze  608:        while (*start < end)  {
                    609:                if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15      kristaps  610:                        formatcode(st, buf, start, end, 1, nomacro, 1);
1.1       schwarze  611:                        continue;
                    612:                }
1.4       schwarze  613:                /*
                    614:                 * Since we're already on a macro line, we want to make
                    615:                 * sure that we don't inadvertently invoke a macro.
                    616:                 * We need to do this carefully because section names
                    617:                 * are used in troff and we don't want to escape
                    618:                 * something that needn't be escaped.
                    619:                 */
                    620:                if (' ' == last && end - *start > 1 &&
                    621:                                isupper((int)buf[*start]) &&
                    622:                                islower((int)buf[*start + 1]) &&
                    623:                                (end - *start == 2 ||
                    624:                                 ' ' == buf[*start + 2]))
                    625:                        printf("\\&");
                    626:
1.8       kristaps  627:                if ('\n' == buf[*start])
                    628:                        putchar(last = ' ');
                    629:                else
1.1       schwarze  630:                        putchar(last = buf[*start]);
1.8       kristaps  631:
                    632:                /* Protect against character escapes. */
                    633:                if ('\\' == last)
                    634:                        putchar('e');
                    635:
1.1       schwarze  636:                (*start)++;
                    637:        }
                    638: }
                    639:
                    640: /*
1.4       schwarze  641:  * Guess at what kind of list we are.
                    642:  * These are taken straight from the POD manual.
                    643:  * I don't know what people do in real life.
                    644:  */
                    645: static enum list
                    646: listguess(const char *buf, size_t start, size_t end)
                    647: {
                    648:        size_t           len = end - start;
                    649:
                    650:        assert(end >= start);
                    651:
                    652:        if (len == 1 && '*' == buf[start])
                    653:                return(LIST_BULLET);
                    654:        if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
                    655:                return(LIST_ENUM);
                    656:        else if (len == 1 && '1' == buf[start])
                    657:                return(LIST_ENUM);
                    658:        else
                    659:                return(LIST_TAG);
                    660: }
                    661:
                    662: /*
1.1       schwarze  663:  * A command paragraph, as noted in the perlpod manual, just indicates
                    664:  * that we should do something, optionally with some text to print as
                    665:  * well.
                    666:  */
                    667: static void
                    668: command(struct state *st, const char *buf, size_t start, size_t end)
                    669: {
                    670:        size_t           len, csz;
                    671:        enum cmd         cmd;
                    672:
                    673:        assert('=' == buf[start]);
                    674:        start++;
                    675:        len = end - start;
                    676:
                    677:        for (cmd = 0; cmd < CMD__MAX; cmd++) {
                    678:                csz = strlen(cmds[cmd]);
                    679:                if (len < csz)
                    680:                        continue;
                    681:                if (0 == memcmp(&buf[start], cmd[cmds], csz))
                    682:                        break;
                    683:        }
                    684:
                    685:        /* Ignore bogus commands. */
                    686:
                    687:        if (CMD__MAX == cmd)
                    688:                return;
                    689:
                    690:        start += csz;
1.8       kristaps  691:        while (start < end && ' ' == buf[start])
                    692:                start++;
                    693:
1.1       schwarze  694:        len = end - start;
                    695:
                    696:        if (st->paused) {
                    697:                st->paused = CMD_END != cmd;
                    698:                return;
                    699:        }
                    700:
                    701:        switch (cmd) {
                    702:        case (CMD_POD):
                    703:                break;
                    704:        case (CMD_HEAD1):
                    705:                /*
                    706:                 * The behaviour of head= follows from a quick glance at
                    707:                 * how pod2man handles it.
                    708:                 */
                    709:                printf(".Sh ");
1.11      kristaps  710:                st->sect = SECT_NONE;
                    711:                if (end - start == 4) {
1.1       schwarze  712:                        if (0 == memcmp(&buf[start], "NAME", 4))
1.11      kristaps  713:                                st->sect = SECT_NAME;
                    714:                } else if (end - start == 8) {
                    715:                        if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
                    716:                                st->sect = SECT_SYNOPSIS;
                    717:                }
                    718:                formatcodeln(st, buf, &start, end, 1);
1.1       schwarze  719:                putchar('\n');
                    720:                st->haspar = 1;
                    721:                break;
                    722:        case (CMD_HEAD2):
                    723:                printf(".Ss ");
1.11      kristaps  724:                formatcodeln(st, buf, &start, end, 1);
1.1       schwarze  725:                putchar('\n');
                    726:                st->haspar = 1;
                    727:                break;
                    728:        case (CMD_HEAD3):
                    729:                puts(".Pp");
                    730:                printf(".Em ");
1.11      kristaps  731:                formatcodeln(st, buf, &start, end, 0);
1.1       schwarze  732:                putchar('\n');
                    733:                puts(".Pp");
                    734:                st->haspar = 1;
                    735:                break;
                    736:        case (CMD_HEAD4):
                    737:                puts(".Pp");
                    738:                printf(".No ");
1.11      kristaps  739:                formatcodeln(st, buf, &start, end, 0);
1.1       schwarze  740:                putchar('\n');
                    741:                puts(".Pp");
                    742:                st->haspar = 1;
                    743:                break;
                    744:        case (CMD_OVER):
1.4       schwarze  745:                /*
                    746:                 * If we have an existing list that hasn't had an =item
                    747:                 * yet, then make sure that we open it now.
                    748:                 * We use the default list type, but that can't be
                    749:                 * helped (we haven't seen any items yet).
1.1       schwarze  750:                 */
1.4       schwarze  751:                if (st->lpos > 0)
                    752:                        if (LIST__MAX == st->lstack[st->lpos - 1]) {
                    753:                                st->lstack[st->lpos - 1] = LIST_TAG;
                    754:                                puts(".Bl -tag -width Ds");
                    755:                        }
                    756:                st->lpos++;
                    757:                assert(st->lpos < LIST_STACKSZ);
                    758:                st->lstack[st->lpos - 1] = LIST__MAX;
1.1       schwarze  759:                break;
                    760:        case (CMD_ITEM):
1.6       kristaps  761:                if (0 == st->lpos) {
                    762:                        /*
                    763:                         * Bad markup.
                    764:                         * Try to compensate.
                    765:                         */
                    766:                        st->lstack[st->lpos] = LIST__MAX;
                    767:                        st->lpos++;
                    768:                }
1.4       schwarze  769:                assert(st->lpos > 0);
                    770:                /*
                    771:                 * If we're the first =item, guess at what our content
                    772:                 * will be: "*" is a bullet list, "1." is a numbered
                    773:                 * list, and everything is tagged.
                    774:                 */
                    775:                if (LIST__MAX == st->lstack[st->lpos - 1]) {
                    776:                        st->lstack[st->lpos - 1] =
                    777:                                listguess(buf, start, end);
                    778:                        switch (st->lstack[st->lpos - 1]) {
                    779:                        case (LIST_BULLET):
                    780:                                puts(".Bl -bullet");
                    781:                                break;
                    782:                        case (LIST_ENUM):
                    783:                                puts(".Bl -enum");
                    784:                                break;
                    785:                        default:
                    786:                                puts(".Bl -tag -width Ds");
                    787:                                break;
                    788:                        }
                    789:                }
                    790:                switch (st->lstack[st->lpos - 1]) {
                    791:                case (LIST_TAG):
                    792:                        printf(".It ");
1.11      kristaps  793:                        formatcodeln(st, buf, &start, end, 0);
1.4       schwarze  794:                        putchar('\n');
                    795:                        break;
                    796:                case (LIST_ENUM):
                    797:                        /* FALLTHROUGH */
                    798:                case (LIST_BULLET):
                    799:                        /*
                    800:                         * Abandon the remainder of the paragraph
                    801:                         * because we're going to be a bulletted or
                    802:                         * numbered list.
                    803:                         */
                    804:                        puts(".It");
                    805:                        break;
                    806:                default:
                    807:                        abort();
                    808:                }
1.1       schwarze  809:                st->haspar = 1;
                    810:                break;
                    811:        case (CMD_BACK):
1.4       schwarze  812:                /* Make sure we don't back over the stack. */
                    813:                if (st->lpos > 0) {
                    814:                        st->lpos--;
                    815:                        puts(".El");
                    816:                }
1.1       schwarze  817:                break;
                    818:        case (CMD_BEGIN):
                    819:                /*
                    820:                 * We disregard all types for now.
                    821:                 * TODO: process at least "text" in a -literal block.
                    822:                 */
                    823:                st->paused = 1;
                    824:                break;
                    825:        case (CMD_FOR):
                    826:                /*
                    827:                 * We ignore all types of encodings and formats
                    828:                 * unilaterally.
                    829:                 */
                    830:                break;
                    831:        case (CMD_ENCODING):
                    832:                break;
                    833:        case (CMD_CUT):
                    834:                st->parsing = 0;
                    835:                return;
                    836:        default:
                    837:                abort();
                    838:        }
                    839:
                    840:        /* Any command (but =cut) makes us start parsing. */
                    841:        st->parsing = 1;
                    842: }
                    843:
                    844: /*
                    845:  * Just pump out the line in a verbatim block.
                    846:  */
                    847: static void
                    848: verbatim(struct state *st, const char *buf, size_t start, size_t end)
                    849: {
1.8       kristaps  850:        int              last;
1.1       schwarze  851:
                    852:        if ( ! st->parsing || st->paused)
                    853:                return;
                    854:
                    855:        puts(".Bd -literal");
1.8       kristaps  856:        for (last = ' '; start < end; start++) {
                    857:                /*
                    858:                 * Handle accidental macros (newline starting with
                    859:                 * control character) and escapes.
                    860:                 */
                    861:                if ('\n' == last)
1.7       kristaps  862:                        if ('.' == buf[start] || '\'' == buf[start])
                    863:                                printf("\\&");
1.8       kristaps  864:                putchar(last = buf[start]);
                    865:                if ('\\' == buf[start])
                    866:                        printf("e");
1.7       kristaps  867:        }
                    868:        putchar('\n');
1.1       schwarze  869:        puts(".Ed");
                    870: }
                    871:
                    872: /*
1.13      kristaps  873:  * See dosynopsisop().
                    874:  */
                    875: static int
                    876: hasmatch(const char *buf, size_t start, size_t end)
                    877: {
                    878:        size_t   stack;
                    879:
                    880:        for (stack = 0; start < end; start++)
                    881:                if (buf[start] == '[')
                    882:                        stack++;
                    883:                else if (buf[start] == ']' && 0 == stack)
                    884:                        return(1);
                    885:                else if (buf[start] == ']')
                    886:                        stack--;
                    887:        return(0);
                    888: }
                    889:
                    890: /*
                    891:  * If we're in the SYNOPSIS section and we've encounter braces in an
                    892:  * ordinary paragraph, then try to see whether we're an [-option].
                    893:  * Do this, if we're an opening bracket, by first seeing if we have a
                    894:  * matching end via hasmatch().
                    895:  * If we're an ending bracket, see if we have a stack already.
                    896:  */
                    897: static int
                    898: dosynopsisop(const char *buf, int *last,
                    899:        size_t *start, size_t end, size_t *opstack)
                    900: {
                    901:
                    902:        assert('[' == buf[*start] || ']' == buf[*start]);
                    903:
                    904:        if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
                    905:                if ('\n' != *last)
                    906:                        putchar('\n');
                    907:                puts(".Oo");
                    908:                (*opstack)++;
                    909:        } else if ('[' == buf[*start])
                    910:                return(0);
                    911:
                    912:        if (']' == buf[*start] && *opstack > 0) {
                    913:                if ('\n' != *last)
                    914:                        putchar('\n');
                    915:                puts(".Oc");
                    916:                (*opstack)--;
                    917:        } else if (']' == buf[*start])
                    918:                return(0);
                    919:
                    920:        (*start)++;
                    921:        *last = '\n';
                    922:        while (' ' == buf[*start])
                    923:                (*start)++;
                    924:        return(1);
                    925: }
                    926:
                    927: /*
1.17      kristaps  928:  * Format multiple "Nm" manpage names in the NAME section.
                    929:  */
                    930: static void
                    931: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
                    932: {
                    933:        size_t   word;
                    934:
                    935:        while (*start < end && ' ' == buf[*start])
                    936:                (*start)++;
                    937:
                    938:        if (end == *start) {
                    939:                puts(".Nm unknown");
                    940:                return;
                    941:        }
                    942:
                    943:        while (*start < end) {
                    944:                fputs(".Nm ", stdout);
                    945:                for (word = *start; word < end; word++)
                    946:                        if (',' == buf[word])
                    947:                                break;
                    948:                formatcodeln(st, buf, start, word, 1);
                    949:                if (*start == end) {
                    950:                        putchar('\n');
                    951:                        continue;
                    952:                }
                    953:                assert(',' == buf[*start]);
                    954:                puts(" ,");
                    955:                (*start)++;
                    956:                while (*start < end && ' ' == buf[*start])
                    957:                        (*start)++;
                    958:        }
                    959: }
                    960:
                    961: /*
1.1       schwarze  962:  * Ordinary paragraph.
                    963:  * Well, this is really the hardest--POD seems to assume that, for
                    964:  * example, a leading space implies a newline, and so on.
                    965:  * Lots of other snakes in the grass: escaping a newline followed by a
                    966:  * period (accidental mdoc(7) control), double-newlines after macro
                    967:  * passages, etc.
                    968:  */
                    969: static void
                    970: ordinary(struct state *st, const char *buf, size_t start, size_t end)
                    971: {
1.13      kristaps  972:        size_t          i, j, opstack;
1.15      kristaps  973:        int             seq;
1.1       schwarze  974:
                    975:        if ( ! st->parsing || st->paused)
                    976:                return;
                    977:
                    978:        /*
                    979:         * Special-case: the NAME section.
                    980:         * If we find a "-" when searching from the end, assume that
                    981:         * we're in "name - description" format.
                    982:         * To wit, print out a "Nm" and "Nd" in that format.
                    983:         */
1.11      kristaps  984:        if (SECT_NAME == st->sect) {
1.15      kristaps  985:                for (i = end - 2; i > start; i--)
                    986:                        if ('-' == buf[i] && ' ' == buf[i + 1])
1.1       schwarze  987:                                break;
                    988:                if ('-' == buf[i]) {
                    989:                        j = i;
                    990:                        /* Roll over multiple "-". */
                    991:                        for ( ; i > start; i--)
                    992:                                if ('-' != buf[i])
                    993:                                        break;
1.17      kristaps  994:                        donamenm(st, buf, &start, i + 1);
1.5       kristaps  995:                        start = j + 1;
1.17      kristaps  996:                        while (start < end && ' ' == buf[start])
                    997:                                start++;
1.15      kristaps  998:                        fputs(".Nd ", stdout);
1.11      kristaps  999:                        formatcodeln(st, buf, &start, end, 1);
1.5       kristaps 1000:                        putchar('\n');
1.1       schwarze 1001:                        return;
                   1002:                }
                   1003:        }
                   1004:
                   1005:        if ( ! st->haspar)
                   1006:                puts(".Pp");
                   1007:
                   1008:        st->haspar = 0;
                   1009:        last = '\n';
1.13      kristaps 1010:        opstack = 0;
1.1       schwarze 1011:
1.15      kristaps 1012:        for (seq = 0; start < end; seq++) {
1.1       schwarze 1013:                /*
                   1014:                 * Loop til we get either to a newline or escape.
                   1015:                 * Escape initial control characters.
                   1016:                 */
                   1017:                while (start < end) {
                   1018:                        if (start < end - 1 && '<' == buf[start + 1])
                   1019:                                break;
                   1020:                        else if ('\n' == buf[start])
                   1021:                                break;
                   1022:                        else if ('\n' == last && '.' == buf[start])
                   1023:                                printf("\\&");
                   1024:                        else if ('\n' == last && '\'' == buf[start])
                   1025:                                printf("\\&");
1.12      kristaps 1026:                        /*
                   1027:                         * If we're in the SYNOPSIS, have square
                   1028:                         * brackets indicate that we're opening and
                   1029:                         * closing an optional context.
                   1030:                         */
1.13      kristaps 1031:                        if (SECT_SYNOPSIS == st->sect &&
                   1032:                                ('[' == buf[start] ||
                   1033:                                 ']' == buf[start]) &&
                   1034:                                dosynopsisop(buf, &last,
                   1035:                                        &start, end, &opstack))
                   1036:                                continue;
1.1       schwarze 1037:                        putchar(last = buf[start++]);
1.8       kristaps 1038:                        if ('\\' == last)
                   1039:                                putchar('e');
1.1       schwarze 1040:                }
                   1041:
                   1042:                if (start < end - 1 && '<' == buf[start + 1]) {
                   1043:                        /*
                   1044:                         * We've encountered a format code.
                   1045:                         * This is going to trigger a macro no matter
                   1046:                         * what, so print a newline now.
                   1047:                         * Then print the (possibly nested) macros and
                   1048:                         * following that, a newline.
1.8       kristaps 1049:                         * Consume all whitespace so we don't
                   1050:                         * accidentally start an implicit literal line.
1.16      kristaps 1051:                         * If the macro ends with a flush comma or
                   1052:                         * period, let mdoc(7) handle it for us.
1.1       schwarze 1053:                         */
1.15      kristaps 1054:                        if (formatcode(st, buf, &start, end, 0, 0, seq)) {
1.16      kristaps 1055:                                if ((start == end - 1 ||
                   1056:                                        (start < end - 1 &&
                   1057:                                         (' ' == buf[start + 1] ||
                   1058:                                          '\n' == buf[start + 1]))) &&
                   1059:                                        ('.' == buf[start] ||
                   1060:                                         ',' == buf[start])) {
                   1061:                                        putchar(' ');
                   1062:                                        putchar(buf[start++]);
                   1063:                                }
1.1       schwarze 1064:                                putchar(last = '\n');
1.6       kristaps 1065:                                while (start < end && ' ' == buf[start])
                   1066:                                        start++;
                   1067:                        }
1.1       schwarze 1068:                } else if (start < end && '\n' == buf[start]) {
                   1069:                        /*
                   1070:                         * Print the newline only if we haven't already
                   1071:                         * printed a newline.
                   1072:                         */
                   1073:                        if (last != '\n')
                   1074:                                putchar(last = buf[start]);
                   1075:                        if (++start >= end)
                   1076:                                continue;
                   1077:                        /*
                   1078:                         * If we have whitespace next, eat it to prevent
                   1079:                         * mdoc(7) from thinking that it's meant for
                   1080:                         * verbatim text.
                   1081:                         * It is--but if we start with that, we can't
                   1082:                         * have a macro subsequent it, which may be
                   1083:                         * possible if we have an escape next.
                   1084:                         */
                   1085:                        if (' ' == buf[start] || '\t' == buf[start]) {
                   1086:                                puts(".br");
                   1087:                                last = '\n';
                   1088:                        }
                   1089:                        for ( ; start < end; start++)
                   1090:                                if (' ' != buf[start] && '\t' != buf[start])
                   1091:                                        break;
1.12      kristaps 1092:                }
1.1       schwarze 1093:        }
                   1094:
                   1095:        if (last != '\n')
                   1096:                putchar('\n');
                   1097: }
                   1098:
                   1099: /*
                   1100:  * There are three kinds of paragraphs: verbatim (starts with whitespace
                   1101:  * of some sort), ordinary (starts without "=" marker), or a command
                   1102:  * (default: starts with "=").
                   1103:  */
                   1104: static void
                   1105: dopar(struct state *st, const char *buf, size_t start, size_t end)
                   1106: {
                   1107:
                   1108:        if (end == start)
                   1109:                return;
                   1110:        if (' ' == buf[start] || '\t' == buf[start])
                   1111:                verbatim(st, buf, start, end);
                   1112:        else if ('=' != buf[start])
                   1113:                ordinary(st, buf, start, end);
                   1114:        else
                   1115:                command(st, buf, start, end);
                   1116: }
                   1117:
                   1118: /*
                   1119:  * Loop around paragraphs within a document, processing each one in the
                   1120:  * POD way.
                   1121:  */
                   1122: static void
                   1123: dofile(const struct args *args, const char *fname,
                   1124:        const struct tm *tm, const char *buf, size_t sz)
                   1125: {
                   1126:        size_t           sup, end, i, cur = 0;
                   1127:        struct state     st;
                   1128:        const char      *section, *date;
                   1129:        char             datebuf[64];
                   1130:        char            *title, *cp;
                   1131:
                   1132:        if (0 == sz)
                   1133:                return;
                   1134:
                   1135:        /* Title is last path component of the filename. */
                   1136:
                   1137:        if (NULL != args->title)
                   1138:                title = strdup(args->title);
                   1139:        else if (NULL != (cp = strrchr(fname, '/')))
                   1140:                title = strdup(cp + 1);
                   1141:        else
                   1142:                title = strdup(fname);
                   1143:
                   1144:        if (NULL == title) {
                   1145:                perror(NULL);
                   1146:                exit(EXIT_FAILURE);
                   1147:        }
                   1148:
                   1149:        /* Section is 1 unless suffix is "pm". */
                   1150:
                   1151:        if (NULL == (section = args->section)) {
                   1152:                section = "1";
                   1153:                if (NULL != (cp = strrchr(title, '.'))) {
                   1154:                        *cp++ = '\0';
                   1155:                        if (0 == strcmp(cp, "pm"))
1.10      kristaps 1156:                                section = PERL_SECTION;
1.1       schwarze 1157:                }
                   1158:        }
                   1159:
                   1160:        /* Date.  Or the given "tm" if not supplied. */
                   1161:
                   1162:        if (NULL == (date = args->date)) {
                   1163:                strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
                   1164:                date = datebuf;
                   1165:        }
                   1166:
                   1167:        for (cp = title; '\0' != *cp; cp++)
                   1168:                *cp = toupper((int)*cp);
                   1169:
                   1170:        /* The usual mdoc(7) preamble. */
                   1171:
                   1172:        printf(".Dd %s\n", date);
                   1173:        printf(".Dt %s %s\n", title, section);
                   1174:        puts(".Os");
                   1175:
                   1176:        free(title);
                   1177:
                   1178:        memset(&st, 0, sizeof(struct state));
                   1179:        assert(sz > 0);
                   1180:
                   1181:        /* Main loop over file contents. */
                   1182:
                   1183:        while (cur < sz) {
                   1184:                /* Read until next paragraph. */
                   1185:                for (i = cur + 1; i < sz; i++)
                   1186:                        if ('\n' == buf[i] && '\n' == buf[i - 1]) {
                   1187:                                /* Consume blank paragraphs. */
                   1188:                                while (i + 1 < sz && '\n' == buf[i + 1])
                   1189:                                        i++;
                   1190:                                break;
                   1191:                        }
                   1192:
                   1193:                /* Adjust end marker for EOF. */
                   1194:                end = i < sz ? i - 1 :
                   1195:                        ('\n' == buf[sz - 1] ? sz - 1 : sz);
                   1196:                sup = i < sz ? end + 2 : sz;
                   1197:
                   1198:                /* Process paragraph and adjust start. */
                   1199:                dopar(&st, buf, cur, end);
                   1200:                cur = sup;
                   1201:        }
                   1202: }
                   1203:
                   1204: /*
                   1205:  * Read a single file fully into memory.
                   1206:  * If the file is "-", do it from stdin.
                   1207:  * If successfully read, send the input buffer to dofile() for further
                   1208:  * processing.
                   1209:  */
                   1210: static int
                   1211: readfile(const struct args *args, const char *fname)
                   1212: {
                   1213:        int              fd;
                   1214:        char            *buf;
                   1215:        size_t           bufsz, cur;
                   1216:        ssize_t          ssz;
                   1217:        struct tm       *tm;
                   1218:        time_t           ttm;
                   1219:        struct stat      st;
                   1220:
                   1221:        assert(NULL != fname);
                   1222:
                   1223:        fd = 0 != strcmp("-", fname) ?
                   1224:                open(fname, O_RDONLY, 0) : STDIN_FILENO;
                   1225:
                   1226:        if (-1 == fd) {
                   1227:                perror(fname);
                   1228:                return(0);
                   1229:        }
                   1230:
                   1231:        if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
                   1232:                ttm = time(NULL);
                   1233:                tm = localtime(&ttm);
                   1234:        } else
                   1235:                tm = localtime(&st.st_mtime);
                   1236:
                   1237:        /*
                   1238:         * Arbitrarily-sized initial buffer.
                   1239:         * Should be big enough for most files...
                   1240:         */
                   1241:        cur = 0;
                   1242:        bufsz = 1 << 14;
                   1243:        if (NULL == (buf = malloc(bufsz))) {
                   1244:                perror(NULL);
                   1245:                exit(EXIT_FAILURE);
                   1246:        }
                   1247:
                   1248:        while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
                   1249:                /* Double buffer size on fill. */
                   1250:                if ((size_t)ssz == bufsz - cur)  {
                   1251:                        bufsz *= 2;
                   1252:                        if (NULL == (buf = realloc(buf, bufsz))) {
                   1253:                                perror(NULL);
                   1254:                                exit(EXIT_FAILURE);
                   1255:                        }
                   1256:                }
                   1257:                cur += (size_t)ssz;
                   1258:        }
                   1259:        if (ssz < 0) {
                   1260:                perror(fname);
                   1261:                free(buf);
                   1262:                return(0);
                   1263:        }
                   1264:
                   1265:        dofile(args, STDIN_FILENO == fd ?
                   1266:                "STDIN" : fname, tm, buf, cur);
                   1267:        free(buf);
                   1268:        if (STDIN_FILENO != fd)
                   1269:                close(fd);
                   1270:        return(1);
                   1271: }
                   1272:
                   1273: int
                   1274: main(int argc, char *argv[])
                   1275: {
                   1276:        const char      *fname, *name;
                   1277:        struct args      args;
                   1278:        int              c;
                   1279:
                   1280:        name = strrchr(argv[0], '/');
                   1281:        if (name == NULL)
                   1282:                name = argv[0];
                   1283:        else
                   1284:                ++name;
                   1285:
                   1286:        memset(&args, 0, sizeof(struct args));
                   1287:        fname = "-";
                   1288:
                   1289:        /* Accept no arguments for now. */
                   1290:
                   1291:        while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
                   1292:                switch (c) {
                   1293:                case ('h'):
                   1294:                        /* FALLTHROUGH */
                   1295:                case ('l'):
                   1296:                        /* FALLTHROUGH */
                   1297:                case ('c'):
                   1298:                        /* FALLTHROUGH */
                   1299:                case ('o'):
                   1300:                        /* FALLTHROUGH */
                   1301:                case ('q'):
                   1302:                        /* FALLTHROUGH */
                   1303:                case ('r'):
                   1304:                        /* FALLTHROUGH */
                   1305:                case ('u'):
                   1306:                        /* FALLTHROUGH */
                   1307:                case ('v'):
                   1308:                        /* Ignore these. */
                   1309:                        break;
                   1310:                case ('d'):
                   1311:                        args.date = optarg;
                   1312:                        break;
                   1313:                case ('n'):
                   1314:                        args.title = optarg;
                   1315:                        break;
                   1316:                case ('s'):
                   1317:                        args.section = optarg;
                   1318:                        break;
                   1319:                default:
                   1320:                        goto usage;
                   1321:                }
                   1322:
                   1323:        argc -= optind;
                   1324:        argv += optind;
                   1325:
                   1326:        /* Accept only a single input file. */
                   1327:
                   1328:        if (argc > 2)
                   1329:                return(EXIT_FAILURE);
                   1330:        else if (1 == argc)
                   1331:                fname = *argv;
                   1332:
                   1333:        return(readfile(&args, fname) ?
                   1334:                EXIT_SUCCESS : EXIT_FAILURE);
                   1335:
                   1336: usage:
                   1337:        fprintf(stderr, "usage: %s [-d date] "
                   1338:                "[-n title] [-s section]\n", name);
                   1339:
                   1340:        return(EXIT_FAILURE);
                   1341: }
CVSweb