pod2mdoc/pod2mdoc.c - annotate

Return to pod2mdoc.c CVS log
Up to [cvsweb.bsd.lv] / pod2mdoc
Annotation of pod2mdoc/pod2mdoc.c, Revision 1.3

1.3     ! schwarze    1: /*     $Id: pod2mdoc.c,v 1.2 2014/03/20 15:15:32 schwarze Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/stat.h>
                     18: #include <sys/time.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <string.h>
                     27: #include <unistd.h>
                     28:
                     29: struct args {
                     30:        const char      *title; /* override "Dt" title */
                     31:        const char      *date; /* override "Dd" date */
                     32:        const char      *section; /* override "Dt" section */
                     33: };
                     34:
                     35: struct state {
                     36:        int              parsing; /* after =cut of before command */
                     37:        int              paused; /* in =begin and before =end */
                     38:        int              haspar; /* in paragraph: do we need Pp? */
                     39:        int              isname; /* are we the NAME section? */
                     40:        const char      *fname; /* file being parsed */
                     41: };
                     42:
                     43: enum   fmt {
                     44:        FMT_ITALIC,
                     45:        FMT_BOLD,
                     46:        FMT_CODE,
                     47:        FMT_LINK,
                     48:        FMT_ESCAPE,
                     49:        FMT_FILE,
                     50:        FMT_NBSP,
                     51:        FMT_INDEX,
                     52:        FMT_NULL,
                     53:        FMT__MAX
                     54: };
                     55:
                     56: enum   cmd {
                     57:        CMD_POD = 0,
                     58:        CMD_HEAD1,
                     59:        CMD_HEAD2,
                     60:        CMD_HEAD3,
                     61:        CMD_HEAD4,
                     62:        CMD_OVER,
                     63:        CMD_ITEM,
                     64:        CMD_BACK,
                     65:        CMD_BEGIN,
                     66:        CMD_END,
                     67:        CMD_FOR,
                     68:        CMD_ENCODING,
                     69:        CMD_CUT,
                     70:        CMD__MAX
                     71: };
                     72:
                     73: static const char *const cmds[CMD__MAX] = {
                     74:        "pod",          /* CMD_POD */
                     75:        "head1",        /* CMD_HEAD1 */
                     76:        "head2",        /* CMD_HEAD2 */
                     77:        "head3",        /* CMD_HEAD3 */
                     78:        "head4",        /* CMD_HEAD4 */
                     79:        "over",         /* CMD_OVER */
                     80:        "item",         /* CMD_ITEM */
                     81:        "back",         /* CMD_BACK */
                     82:        "begin",        /* CMD_BEGIN */
                     83:        "end",          /* CMD_END */
                     84:        "for",          /* CMD_FOR */
                     85:        "encoding",     /* CMD_ENCODING */
                     86:        "cut"           /* CMD_CUT */
                     87: };
                     88:
                     89: static const char fmts[FMT__MAX] = {
                     90:        'I',            /* FMT_ITALIC */
                     91:        'B',            /* FMT_BOLD */
                     92:        'C',            /* FMT_CODE */
                     93:        'L',            /* FMT_LINK */
                     94:        'E',            /* FMT_ESCAPE */
                     95:        'F',            /* FMT_FILE */
                     96:        'S',            /* FMT_NBSP */
                     97:        'X',            /* FMT_INDEX */
                     98:        'Z'             /* FMT_NULL */
                     99: };
                    100:
                    101: /*
                    102:  * Given buf[*start] is at the start of an escape name, read til the end
                    103:  * of the escape ('>') then try to do something with it.
                    104:  * Sets start to be one after the '>'.
                    105:  */
                    106: static void
                    107: formatescape(const char *buf, size_t *start, size_t end)
                    108: {
                    109:        char             esc[16]; /* no more needed */
                    110:        size_t           i, max;
                    111:
                    112:        max = sizeof(esc) - 1;
                    113:        i = 0;
                    114:        /* Read til our buffer is full. */
                    115:        while (*start < end && '>' != buf[*start] && i < max)
                    116:                esc[i++] = buf[(*start)++];
                    117:        esc[i] = '\0';
                    118:
                    119:        if (i == max) {
                    120:                /* Too long... skip til we end. */
                    121:                while (*start < end && '>' != buf[*start])
                    122:                        (*start)++;
                    123:                return;
                    124:        } else if (*start >= end)
                    125:                return;
                    126:
                    127:        assert('>' == buf[*start]);
                    128:        (*start)++;
                    129:
                    130:        /*
                    131:         * TODO: right now, we only recognise the named escapes.
                    132:         * Just let the rest of them go.
                    133:         */
                    134:        if (0 == strcmp(esc, "lt"))
                    135:                printf("\\(la");
                    136:        else if (0 == strcmp(esc, "gt"))
                    137:                printf("\\(ra");
                    138:        else if (0 == strcmp(esc, "vb"))
                    139:                printf("\\(ba");
                    140:        else if (0 == strcmp(esc, "sol"))
                    141:                printf("\\(sl");
                    142: }
                    143:
                    144: /*
                    145:  * Skip space characters.
                    146:  */
                    147: static void
                    148: skipspace(const char *buf, size_t *start, size_t end)
                    149: {
                    150:
                    151:        while (*start < end && ' ' == buf[*start])
                    152:                (*start)++;
                    153: }
                    154:
                    155: /*
                    156:  * We're at the character in front of a format code, which is structured
                    157:  * like X<...> and can contain nested format codes.
                    158:  * This consumes the whole format code, and any nested format codes, til
                    159:  * the end of matched production.
                    160:  * If "reentrant", then we're being called after a macro has already
                    161:  * been printed to the current line.
                    162:  * "last" is set to the last read character: this is used to determine
                    163:  * whether we should buffer with space or not.
                    164:  * If "nomacro", then we don't print any macros, just contained data.
                    165:  */
                    166: static int
                    167: formatcode(const char *buf, size_t *start,
                    168:        size_t end, int reentrant, int last, int nomacro)
                    169: {
                    170:        enum fmt         fmt;
                    171:
                    172:        assert(*start + 1 < end);
                    173:        assert('<' == buf[*start + 1]);
                    174:
                    175:        for (fmt = 0; fmt < FMT__MAX; fmt++)
                    176:                if (buf[*start] == fmts[fmt])
                    177:                        break;
                    178:
                    179:        /* Invalid macros are just regular text. */
                    180:
                    181:        if (FMT__MAX == fmt) {
                    182:                putchar(buf[*start]);
                    183:                (*start)++;
                    184:                return(0);
                    185:        }
                    186:
                    187:        *start += 2;
                    188:
                    189:        /*
                    190:         * Escapes don't print macro sequences, so just output them like
                    191:         * normal text before processing for macros.
                    192:         */
                    193:        if (FMT_ESCAPE == fmt) {
                    194:                formatescape(buf, start, end);
                    195:                return(0);
                    196:        } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
                    197:                /* For indices and nulls, just consume. */
                    198:                while (*start < end && '>' != buf[*start])
                    199:                        (*start)++;
                    200:                if (*start < end)
                    201:                        (*start)++;
                    202:                return(0);
                    203:        }
                    204:
                    205:        if ( ! nomacro) {
                    206:                /*
                    207:                 * Print out the macro describing this format code.
                    208:                 * If we're not "reentrant" (not yet on a macro line)
                    209:                 * then print a newline, if necessary, and the macro
                    210:                 * indicator.
                    211:                 * Otherwise, offset us with a space.
                    212:                 */
                    213:                if ( ! reentrant && last != '\n')
                    214:                        putchar('\n');
                    215:                if ( ! reentrant)
                    216:                        putchar('.');
                    217:                else
                    218:                        putchar(' ');
                    219:
                    220:                /*
                    221:                 * If we don't have whitespace before us, then suppress
                    222:                 * macro whitespace with Ns.
                    223:                 */
                    224:                if (' ' != last)
                    225:                        printf("Ns ");
                    226:                switch (fmt) {
                    227:                case (FMT_ITALIC):
                    228:                        printf("Em ");
                    229:                        break;
                    230:                case (FMT_BOLD):
                    231:                        printf("Sy ");
                    232:                        break;
                    233:                case (FMT_CODE):
1.2       schwarze  234:                        printf("Qo Li ");
1.1       schwarze  235:                        break;
                    236:                case (FMT_LINK):
                    237:                        printf("Lk ");
                    238:                        break;
                    239:                case (FMT_FILE):
                    240:                        printf("Pa ");
                    241:                        break;
                    242:                case (FMT_NBSP):
                    243:                        /* TODO. */
                    244:                        printf("No ");
                    245:                        break;
                    246:                default:
                    247:                        abort();
                    248:                }
                    249:        }
                    250:
                    251:        /*
                    252:         * Read until we reach the end market ('>') or until we find a
                    253:         * nested format code.
                    254:         * Don't emit any newlines: since we're on a macro line, we
                    255:         * don't want to break the line.
                    256:         */
                    257:        while (*start < end) {
                    258:                if ('>' == buf[*start]) {
                    259:                        (*start)++;
                    260:                        break;
                    261:                }
                    262:                if (*start + 1 < end && '<' == buf[*start + 1]) {
                    263:                        formatcode(buf, start, end, 1, last, nomacro);
                    264:                        continue;
                    265:                }
1.3     ! schwarze  266:
1.1       schwarze  267:                        /*
                    268:                         * Make sure that any macro-like words (or
                    269:                         * really any word starting with a capital
                    270:                         * letter) is assumed to be a macro that must be
                    271:                         * escaped.
                    272:                         * XXX: should this be isalpha()?
                    273:                         */
                    274:                        if ((' ' == last || '\n' == last) &&
                    275:                                isupper(buf[*start]))
                    276:                                printf("\\&");
1.3     ! schwarze  277:
        !           278:                last = buf[*start];
        !           279:                if ('\n' == last)
        !           280:                        last = ' ';
        !           281:                putchar(last);
        !           282:
1.1       schwarze  283:                (*start)++;
                    284:        }
1.2       schwarze  285:
                    286:        if ( ! nomacro && FMT_CODE == fmt)
                    287:                printf(" Qc ");
1.1       schwarze  288:
                    289:        if (reentrant)
                    290:                return(1);
                    291:
                    292:        /*
                    293:         * If we're not reentrant, we want to put ending punctuation on
                    294:         * the macro line so that it's properly handled by being
                    295:         * smooshed against the terminal word.
                    296:         */
                    297:        skipspace(buf, start, end);
                    298:        if (',' != buf[*start] && '.' != buf[*start] &&
                    299:                '!' != buf[*start] && '?' != buf[*start] &&
                    300:                ')' != buf[*start])
                    301:                return(1);
                    302:        while (*start < end) {
                    303:                if (',' != buf[*start] &&
                    304:                        '.' != buf[*start] &&
                    305:                        '!' != buf[*start] &&
                    306:                        '?' != buf[*start] &&
                    307:                        ')' != buf[*start])
                    308:                        break;
                    309:                putchar(' ');
                    310:                putchar(buf[*start]);
                    311:                (*start)++;
                    312:        }
                    313:        skipspace(buf, start, end);
                    314:        return(1);
                    315: }
                    316:
                    317: /*
                    318:  * Calls formatcode() til the end of a paragraph.
                    319:  */
                    320: static void
                    321: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
                    322: {
                    323:        int              last;
                    324:
                    325:        last = '\n';
                    326:        while (*start < end)  {
                    327:                if (*start + 1 < end && '<' == buf[*start + 1]) {
                    328:                        formatcode(buf, start, end, 1, last, nomacro);
                    329:                        continue;
                    330:                }
                    331:                if ('\n' != buf[*start])
                    332:                        putchar(last = buf[*start]);
                    333:                (*start)++;
                    334:        }
                    335: }
                    336:
                    337: /*
                    338:  * A command paragraph, as noted in the perlpod manual, just indicates
                    339:  * that we should do something, optionally with some text to print as
                    340:  * well.
                    341:  */
                    342: static void
                    343: command(struct state *st, const char *buf, size_t start, size_t end)
                    344: {
                    345:        size_t           len, csz;
                    346:        enum cmd         cmd;
                    347:
                    348:        assert('=' == buf[start]);
                    349:        start++;
                    350:        len = end - start;
                    351:
                    352:        for (cmd = 0; cmd < CMD__MAX; cmd++) {
                    353:                csz = strlen(cmds[cmd]);
                    354:                if (len < csz)
                    355:                        continue;
                    356:                if (0 == memcmp(&buf[start], cmd[cmds], csz))
                    357:                        break;
                    358:        }
                    359:
                    360:        /* Ignore bogus commands. */
                    361:
                    362:        if (CMD__MAX == cmd)
                    363:                return;
                    364:
                    365:        start += csz;
                    366:        skipspace(buf, &start, end);
                    367:        len = end - start;
                    368:
                    369:        if (st->paused) {
                    370:                st->paused = CMD_END != cmd;
                    371:                return;
                    372:        }
                    373:
                    374:        switch (cmd) {
                    375:        case (CMD_POD):
                    376:                break;
                    377:        case (CMD_HEAD1):
                    378:                /*
                    379:                 * The behaviour of head= follows from a quick glance at
                    380:                 * how pod2man handles it.
                    381:                 */
                    382:                printf(".Sh ");
                    383:                st->isname = 0;
                    384:                if (end - start == 4)
                    385:                        if (0 == memcmp(&buf[start], "NAME", 4))
                    386:                                st->isname = 1;
                    387:                formatcodeln(buf, &start, end, 1);
                    388:                putchar('\n');
                    389:                st->haspar = 1;
                    390:                break;
                    391:        case (CMD_HEAD2):
                    392:                printf(".Ss ");
                    393:                formatcodeln(buf, &start, end, 1);
                    394:                putchar('\n');
                    395:                st->haspar = 1;
                    396:                break;
                    397:        case (CMD_HEAD3):
                    398:                puts(".Pp");
                    399:                printf(".Em ");
                    400:                formatcodeln(buf, &start, end, 0);
                    401:                putchar('\n');
                    402:                puts(".Pp");
                    403:                st->haspar = 1;
                    404:                break;
                    405:        case (CMD_HEAD4):
                    406:                puts(".Pp");
                    407:                printf(".No ");
                    408:                formatcodeln(buf, &start, end, 0);
                    409:                putchar('\n');
                    410:                puts(".Pp");
                    411:                st->haspar = 1;
                    412:                break;
                    413:        case (CMD_OVER):
                    414:                /*
                    415:                 * TODO: we should be doing this after we process the
                    416:                 * first =item to see whether we'll do an -enum,
                    417:                 * -bullet, or something else.
                    418:                 */
                    419:                puts(".Bl -tag -width Ds");
                    420:                break;
                    421:        case (CMD_ITEM):
                    422:                printf(".It ");
                    423:                formatcodeln(buf, &start, end, 0);
                    424:                putchar('\n');
                    425:                st->haspar = 1;
                    426:                break;
                    427:        case (CMD_BACK):
                    428:                puts(".El");
                    429:                break;
                    430:        case (CMD_BEGIN):
                    431:                /*
                    432:                 * We disregard all types for now.
                    433:                 * TODO: process at least "text" in a -literal block.
                    434:                 */
                    435:                st->paused = 1;
                    436:                break;
                    437:        case (CMD_FOR):
                    438:                /*
                    439:                 * We ignore all types of encodings and formats
                    440:                 * unilaterally.
                    441:                 */
                    442:                break;
                    443:        case (CMD_ENCODING):
                    444:                break;
                    445:        case (CMD_CUT):
                    446:                st->parsing = 0;
                    447:                return;
                    448:        default:
                    449:                abort();
                    450:        }
                    451:
                    452:        /* Any command (but =cut) makes us start parsing. */
                    453:        st->parsing = 1;
                    454: }
                    455:
                    456: /*
                    457:  * Just pump out the line in a verbatim block.
                    458:  */
                    459: static void
                    460: verbatim(struct state *st, const char *buf, size_t start, size_t end)
                    461: {
                    462:
                    463:        if ( ! st->parsing || st->paused)
                    464:                return;
                    465:
                    466:        puts(".Bd -literal");
                    467:        printf("%.*s\n", (int)(end - start), &buf[start]);
                    468:        puts(".Ed");
                    469: }
                    470:
                    471: /*
                    472:  * Ordinary paragraph.
                    473:  * Well, this is really the hardest--POD seems to assume that, for
                    474:  * example, a leading space implies a newline, and so on.
                    475:  * Lots of other snakes in the grass: escaping a newline followed by a
                    476:  * period (accidental mdoc(7) control), double-newlines after macro
                    477:  * passages, etc.
                    478:  */
                    479: static void
                    480: ordinary(struct state *st, const char *buf, size_t start, size_t end)
                    481: {
                    482:        int             last;
                    483:        size_t          i, j;
                    484:
                    485:        if ( ! st->parsing || st->paused)
                    486:                return;
                    487:
                    488:        /*
                    489:         * Special-case: the NAME section.
                    490:         * If we find a "-" when searching from the end, assume that
                    491:         * we're in "name - description" format.
                    492:         * To wit, print out a "Nm" and "Nd" in that format.
                    493:         */
                    494:        if (st->isname) {
                    495:                for (i = end - 1; i > start; i--)
                    496:                        if ('-' == buf[i])
                    497:                                break;
                    498:                if ('-' == buf[i]) {
                    499:                        j = i;
                    500:                        /* Roll over multiple "-". */
                    501:                        for ( ; i > start; i--)
                    502:                                if ('-' != buf[i])
                    503:                                        break;
                    504:                        printf(".Nm %.*s\n",
                    505:                                (int)((i + 1) - start), &buf[start]);
                    506:                        printf(".Nd %.*s\n",
                    507:                                (int)(end - (j + 1)), &buf[j + 1]);
                    508:                        return;
                    509:                }
                    510:        }
                    511:
                    512:        if ( ! st->haspar)
                    513:                puts(".Pp");
                    514:
                    515:        st->haspar = 0;
                    516:        last = '\n';
                    517:
                    518:        while (start < end) {
                    519:                /*
                    520:                 * Loop til we get either to a newline or escape.
                    521:                 * Escape initial control characters.
                    522:                 */
                    523:                while (start < end) {
                    524:                        if (start < end - 1 && '<' == buf[start + 1])
                    525:                                break;
                    526:                        else if ('\n' == buf[start])
                    527:                                break;
                    528:                        else if ('\n' == last && '.' == buf[start])
                    529:                                printf("\\&");
                    530:                        else if ('\n' == last && '\'' == buf[start])
                    531:                                printf("\\&");
                    532:                        putchar(last = buf[start++]);
                    533:                }
                    534:
                    535:                if (start < end - 1 && '<' == buf[start + 1]) {
                    536:                        /*
                    537:                         * We've encountered a format code.
                    538:                         * This is going to trigger a macro no matter
                    539:                         * what, so print a newline now.
                    540:                         * Then print the (possibly nested) macros and
                    541:                         * following that, a newline.
                    542:                         */
                    543:                        if (formatcode(buf, &start, end, 0, last, 0))
                    544:                                putchar(last = '\n');
                    545:                } else if (start < end && '\n' == buf[start]) {
                    546:                        /*
                    547:                         * Print the newline only if we haven't already
                    548:                         * printed a newline.
                    549:                         */
                    550:                        if (last != '\n')
                    551:                                putchar(last = buf[start]);
                    552:                        if (++start >= end)
                    553:                                continue;
                    554:                        /*
                    555:                         * If we have whitespace next, eat it to prevent
                    556:                         * mdoc(7) from thinking that it's meant for
                    557:                         * verbatim text.
                    558:                         * It is--but if we start with that, we can't
                    559:                         * have a macro subsequent it, which may be
                    560:                         * possible if we have an escape next.
                    561:                         */
                    562:                        if (' ' == buf[start] || '\t' == buf[start]) {
                    563:                                puts(".br");
                    564:                                last = '\n';
                    565:                        }
                    566:                        for ( ; start < end; start++)
                    567:                                if (' ' != buf[start] && '\t' != buf[start])
                    568:                                        break;
                    569:                } else if (start < end) {
                    570:                        /*
                    571:                         * Default: print the character.
                    572:                         * Escape initial control characters.
                    573:                         */
                    574:                        if ('\n' == last && '.' == buf[start])
                    575:                                printf("\\&");
                    576:                        else if ('\n' == last && '\'' == buf[start])
                    577:                                printf("\\&");
                    578:                        putchar(last = buf[start++]);
                    579:                }
                    580:        }
                    581:
                    582:        if (last != '\n')
                    583:                putchar('\n');
                    584: }
                    585:
                    586: /*
                    587:  * There are three kinds of paragraphs: verbatim (starts with whitespace
                    588:  * of some sort), ordinary (starts without "=" marker), or a command
                    589:  * (default: starts with "=").
                    590:  */
                    591: static void
                    592: dopar(struct state *st, const char *buf, size_t start, size_t end)
                    593: {
                    594:
                    595:        if (end == start)
                    596:                return;
                    597:        if (' ' == buf[start] || '\t' == buf[start])
                    598:                verbatim(st, buf, start, end);
                    599:        else if ('=' != buf[start])
                    600:                ordinary(st, buf, start, end);
                    601:        else
                    602:                command(st, buf, start, end);
                    603: }
                    604:
                    605: /*
                    606:  * Loop around paragraphs within a document, processing each one in the
                    607:  * POD way.
                    608:  */
                    609: static void
                    610: dofile(const struct args *args, const char *fname,
                    611:        const struct tm *tm, const char *buf, size_t sz)
                    612: {
                    613:        size_t           sup, end, i, cur = 0;
                    614:        struct state     st;
                    615:        const char      *section, *date;
                    616:        char             datebuf[64];
                    617:        char            *title, *cp;
                    618:
                    619:        if (0 == sz)
                    620:                return;
                    621:
                    622:        /* Title is last path component of the filename. */
                    623:
                    624:        if (NULL != args->title)
                    625:                title = strdup(args->title);
                    626:        else if (NULL != (cp = strrchr(fname, '/')))
                    627:                title = strdup(cp + 1);
                    628:        else
                    629:                title = strdup(fname);
                    630:
                    631:        if (NULL == title) {
                    632:                perror(NULL);
                    633:                exit(EXIT_FAILURE);
                    634:        }
                    635:
                    636:        /* Section is 1 unless suffix is "pm". */
                    637:
                    638:        if (NULL == (section = args->section)) {
                    639:                section = "1";
                    640:                if (NULL != (cp = strrchr(title, '.'))) {
                    641:                        *cp++ = '\0';
                    642:                        if (0 == strcmp(cp, "pm"))
                    643:                                section = "3p";
                    644:                }
                    645:        }
                    646:
                    647:        /* Date.  Or the given "tm" if not supplied. */
                    648:
                    649:        if (NULL == (date = args->date)) {
                    650:                strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
                    651:                date = datebuf;
                    652:        }
                    653:
                    654:        for (cp = title; '\0' != *cp; cp++)
                    655:                *cp = toupper((int)*cp);
                    656:
                    657:        /* The usual mdoc(7) preamble. */
                    658:
                    659:        printf(".Dd %s\n", date);
                    660:        printf(".Dt %s %s\n", title, section);
                    661:        puts(".Os");
                    662:
                    663:        free(title);
                    664:
                    665:        memset(&st, 0, sizeof(struct state));
                    666:        assert(sz > 0);
                    667:
                    668:        /* Main loop over file contents. */
                    669:
                    670:        while (cur < sz) {
                    671:                /* Read until next paragraph. */
                    672:                for (i = cur + 1; i < sz; i++)
                    673:                        if ('\n' == buf[i] && '\n' == buf[i - 1]) {
                    674:                                /* Consume blank paragraphs. */
                    675:                                while (i + 1 < sz && '\n' == buf[i + 1])
                    676:                                        i++;
                    677:                                break;
                    678:                        }
                    679:
                    680:                /* Adjust end marker for EOF. */
                    681:                end = i < sz ? i - 1 :
                    682:                        ('\n' == buf[sz - 1] ? sz - 1 : sz);
                    683:                sup = i < sz ? end + 2 : sz;
                    684:
                    685:                /* Process paragraph and adjust start. */
                    686:                dopar(&st, buf, cur, end);
                    687:                cur = sup;
                    688:        }
                    689: }
                    690:
                    691: /*
                    692:  * Read a single file fully into memory.
                    693:  * If the file is "-", do it from stdin.
                    694:  * If successfully read, send the input buffer to dofile() for further
                    695:  * processing.
                    696:  */
                    697: static int
                    698: readfile(const struct args *args, const char *fname)
                    699: {
                    700:        int              fd;
                    701:        char            *buf;
                    702:        size_t           bufsz, cur;
                    703:        ssize_t          ssz;
                    704:        struct tm       *tm;
                    705:        time_t           ttm;
                    706:        struct stat      st;
                    707:
                    708:        assert(NULL != fname);
                    709:
                    710:        fd = 0 != strcmp("-", fname) ?
                    711:                open(fname, O_RDONLY, 0) : STDIN_FILENO;
                    712:
                    713:        if (-1 == fd) {
                    714:                perror(fname);
                    715:                return(0);
                    716:        }
                    717:
                    718:        if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
                    719:                ttm = time(NULL);
                    720:                tm = localtime(&ttm);
                    721:        } else
                    722:                tm = localtime(&st.st_mtime);
                    723:
                    724:        /*
                    725:         * Arbitrarily-sized initial buffer.
                    726:         * Should be big enough for most files...
                    727:         */
                    728:        cur = 0;
                    729:        bufsz = 1 << 14;
                    730:        if (NULL == (buf = malloc(bufsz))) {
                    731:                perror(NULL);
                    732:                exit(EXIT_FAILURE);
                    733:        }
                    734:
                    735:        while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
                    736:                /* Double buffer size on fill. */
                    737:                if ((size_t)ssz == bufsz - cur)  {
                    738:                        bufsz *= 2;
                    739:                        if (NULL == (buf = realloc(buf, bufsz))) {
                    740:                                perror(NULL);
                    741:                                exit(EXIT_FAILURE);
                    742:                        }
                    743:                }
                    744:                cur += (size_t)ssz;
                    745:        }
                    746:        if (ssz < 0) {
                    747:                perror(fname);
                    748:                free(buf);
                    749:                return(0);
                    750:        }
                    751:
                    752:        dofile(args, STDIN_FILENO == fd ?
                    753:                "STDIN" : fname, tm, buf, cur);
                    754:        free(buf);
                    755:        if (STDIN_FILENO != fd)
                    756:                close(fd);
                    757:        return(1);
                    758: }
                    759:
                    760: int
                    761: main(int argc, char *argv[])
                    762: {
                    763:        const char      *fname, *name;
                    764:        struct args      args;
                    765:        int              c;
                    766:
                    767:        name = strrchr(argv[0], '/');
                    768:        if (name == NULL)
                    769:                name = argv[0];
                    770:        else
                    771:                ++name;
                    772:
                    773:        memset(&args, 0, sizeof(struct args));
                    774:        fname = "-";
                    775:
                    776:        /* Accept no arguments for now. */
                    777:
                    778:        while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
                    779:                switch (c) {
                    780:                case ('h'):
                    781:                        /* FALLTHROUGH */
                    782:                case ('l'):
                    783:                        /* FALLTHROUGH */
                    784:                case ('c'):
                    785:                        /* FALLTHROUGH */
                    786:                case ('o'):
                    787:                        /* FALLTHROUGH */
                    788:                case ('q'):
                    789:                        /* FALLTHROUGH */
                    790:                case ('r'):
                    791:                        /* FALLTHROUGH */
                    792:                case ('u'):
                    793:                        /* FALLTHROUGH */
                    794:                case ('v'):
                    795:                        /* Ignore these. */
                    796:                        break;
                    797:                case ('d'):
                    798:                        args.date = optarg;
                    799:                        break;
                    800:                case ('n'):
                    801:                        args.title = optarg;
                    802:                        break;
                    803:                case ('s'):
                    804:                        args.section = optarg;
                    805:                        break;
                    806:                default:
                    807:                        goto usage;
                    808:                }
                    809:
                    810:        argc -= optind;
                    811:        argv += optind;
                    812:
                    813:        /* Accept only a single input file. */
                    814:
                    815:        if (argc > 2)
                    816:                return(EXIT_FAILURE);
                    817:        else if (1 == argc)
                    818:                fname = *argv;
                    819:
                    820:        return(readfile(&args, fname) ?
                    821:                EXIT_SUCCESS : EXIT_FAILURE);
                    822:
                    823: usage:
                    824:        fprintf(stderr, "usage: %s [-d date] "
                    825:                "[-n title] [-s section]\n", name);
                    826:
                    827:        return(EXIT_FAILURE);
                    828: }
CVSweb