pod2mdoc/pod2mdoc.c - annotate

Return to pod2mdoc.c CVS log
Up to [cvsweb.bsd.lv] / pod2mdoc
Annotation of pod2mdoc/pod2mdoc.c, Revision 1.4

1.4     ! schwarze    1: /*     $Id: pod2mdoc.c,v 1.7 2014/03/20 00:55:35 kristaps Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/stat.h>
                     18: #include <sys/time.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <string.h>
                     27: #include <unistd.h>
                     28:
                     29: struct args {
                     30:        const char      *title; /* override "Dt" title */
                     31:        const char      *date; /* override "Dd" date */
                     32:        const char      *section; /* override "Dt" section */
                     33: };
                     34:
1.4     ! schwarze   35: enum   list {
        !            36:        LIST_BULLET = 0,
        !            37:        LIST_ENUM,
        !            38:        LIST_TAG,
        !            39:        LIST__MAX
        !            40: };
        !            41:
1.1       schwarze   42: struct state {
                     43:        int              parsing; /* after =cut of before command */
                     44:        int              paused; /* in =begin and before =end */
                     45:        int              haspar; /* in paragraph: do we need Pp? */
                     46:        int              isname; /* are we the NAME section? */
                     47:        const char      *fname; /* file being parsed */
1.4     ! schwarze   48: #define        LIST_STACKSZ     128
        !            49:        enum list        lstack[LIST_STACKSZ]; /* open lists */
        !            50:        size_t           lpos; /* where in list stack */
1.1       schwarze   51: };
                     52:
                     53: enum   fmt {
                     54:        FMT_ITALIC,
                     55:        FMT_BOLD,
                     56:        FMT_CODE,
                     57:        FMT_LINK,
                     58:        FMT_ESCAPE,
                     59:        FMT_FILE,
                     60:        FMT_NBSP,
                     61:        FMT_INDEX,
                     62:        FMT_NULL,
                     63:        FMT__MAX
                     64: };
                     65:
                     66: enum   cmd {
                     67:        CMD_POD = 0,
                     68:        CMD_HEAD1,
                     69:        CMD_HEAD2,
                     70:        CMD_HEAD3,
                     71:        CMD_HEAD4,
                     72:        CMD_OVER,
                     73:        CMD_ITEM,
                     74:        CMD_BACK,
                     75:        CMD_BEGIN,
                     76:        CMD_END,
                     77:        CMD_FOR,
                     78:        CMD_ENCODING,
                     79:        CMD_CUT,
                     80:        CMD__MAX
                     81: };
                     82:
                     83: static const char *const cmds[CMD__MAX] = {
                     84:        "pod",          /* CMD_POD */
                     85:        "head1",        /* CMD_HEAD1 */
                     86:        "head2",        /* CMD_HEAD2 */
                     87:        "head3",        /* CMD_HEAD3 */
                     88:        "head4",        /* CMD_HEAD4 */
                     89:        "over",         /* CMD_OVER */
                     90:        "item",         /* CMD_ITEM */
                     91:        "back",         /* CMD_BACK */
                     92:        "begin",        /* CMD_BEGIN */
                     93:        "end",          /* CMD_END */
                     94:        "for",          /* CMD_FOR */
                     95:        "encoding",     /* CMD_ENCODING */
                     96:        "cut"           /* CMD_CUT */
                     97: };
                     98:
                     99: static const char fmts[FMT__MAX] = {
                    100:        'I',            /* FMT_ITALIC */
                    101:        'B',            /* FMT_BOLD */
                    102:        'C',            /* FMT_CODE */
                    103:        'L',            /* FMT_LINK */
                    104:        'E',            /* FMT_ESCAPE */
                    105:        'F',            /* FMT_FILE */
                    106:        'S',            /* FMT_NBSP */
                    107:        'X',            /* FMT_INDEX */
                    108:        'Z'             /* FMT_NULL */
                    109: };
                    110:
                    111: /*
                    112:  * Given buf[*start] is at the start of an escape name, read til the end
                    113:  * of the escape ('>') then try to do something with it.
                    114:  * Sets start to be one after the '>'.
                    115:  */
                    116: static void
                    117: formatescape(const char *buf, size_t *start, size_t end)
                    118: {
                    119:        char             esc[16]; /* no more needed */
                    120:        size_t           i, max;
                    121:
                    122:        max = sizeof(esc) - 1;
                    123:        i = 0;
                    124:        /* Read til our buffer is full. */
                    125:        while (*start < end && '>' != buf[*start] && i < max)
                    126:                esc[i++] = buf[(*start)++];
                    127:        esc[i] = '\0';
                    128:
                    129:        if (i == max) {
                    130:                /* Too long... skip til we end. */
                    131:                while (*start < end && '>' != buf[*start])
                    132:                        (*start)++;
                    133:                return;
                    134:        } else if (*start >= end)
                    135:                return;
                    136:
                    137:        assert('>' == buf[*start]);
                    138:        (*start)++;
                    139:
                    140:        /*
                    141:         * TODO: right now, we only recognise the named escapes.
                    142:         * Just let the rest of them go.
                    143:         */
                    144:        if (0 == strcmp(esc, "lt"))
                    145:                printf("\\(la");
                    146:        else if (0 == strcmp(esc, "gt"))
                    147:                printf("\\(ra");
                    148:        else if (0 == strcmp(esc, "vb"))
                    149:                printf("\\(ba");
                    150:        else if (0 == strcmp(esc, "sol"))
                    151:                printf("\\(sl");
                    152: }
                    153:
                    154: /*
                    155:  * Skip space characters.
                    156:  */
                    157: static void
                    158: skipspace(const char *buf, size_t *start, size_t end)
                    159: {
                    160:
                    161:        while (*start < end && ' ' == buf[*start])
                    162:                (*start)++;
                    163: }
                    164:
                    165: /*
                    166:  * We're at the character in front of a format code, which is structured
                    167:  * like X<...> and can contain nested format codes.
                    168:  * This consumes the whole format code, and any nested format codes, til
                    169:  * the end of matched production.
                    170:  * If "reentrant", then we're being called after a macro has already
                    171:  * been printed to the current line.
                    172:  * "last" is set to the last read character: this is used to determine
                    173:  * whether we should buffer with space or not.
                    174:  * If "nomacro", then we don't print any macros, just contained data.
                    175:  */
                    176: static int
                    177: formatcode(const char *buf, size_t *start,
                    178:        size_t end, int reentrant, int last, int nomacro)
                    179: {
                    180:        enum fmt         fmt;
                    181:
                    182:        assert(*start + 1 < end);
                    183:        assert('<' == buf[*start + 1]);
                    184:
                    185:        for (fmt = 0; fmt < FMT__MAX; fmt++)
                    186:                if (buf[*start] == fmts[fmt])
                    187:                        break;
                    188:
                    189:        /* Invalid macros are just regular text. */
                    190:
                    191:        if (FMT__MAX == fmt) {
                    192:                putchar(buf[*start]);
                    193:                (*start)++;
                    194:                return(0);
                    195:        }
                    196:
                    197:        *start += 2;
                    198:
                    199:        /*
                    200:         * Escapes don't print macro sequences, so just output them like
                    201:         * normal text before processing for macros.
                    202:         */
                    203:        if (FMT_ESCAPE == fmt) {
                    204:                formatescape(buf, start, end);
                    205:                return(0);
                    206:        } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
                    207:                /* For indices and nulls, just consume. */
                    208:                while (*start < end && '>' != buf[*start])
                    209:                        (*start)++;
                    210:                if (*start < end)
                    211:                        (*start)++;
                    212:                return(0);
                    213:        }
                    214:
                    215:        if ( ! nomacro) {
                    216:                /*
                    217:                 * Print out the macro describing this format code.
                    218:                 * If we're not "reentrant" (not yet on a macro line)
                    219:                 * then print a newline, if necessary, and the macro
                    220:                 * indicator.
                    221:                 * Otherwise, offset us with a space.
                    222:                 */
                    223:                if ( ! reentrant && last != '\n')
                    224:                        putchar('\n');
                    225:                if ( ! reentrant)
                    226:                        putchar('.');
                    227:                else
                    228:                        putchar(' ');
                    229:
                    230:                /*
                    231:                 * If we don't have whitespace before us, then suppress
                    232:                 * macro whitespace with Ns.
                    233:                 */
                    234:                if (' ' != last)
                    235:                        printf("Ns ");
                    236:                switch (fmt) {
                    237:                case (FMT_ITALIC):
                    238:                        printf("Em ");
                    239:                        break;
                    240:                case (FMT_BOLD):
                    241:                        printf("Sy ");
                    242:                        break;
                    243:                case (FMT_CODE):
1.2       schwarze  244:                        printf("Qo Li ");
1.1       schwarze  245:                        break;
                    246:                case (FMT_LINK):
                    247:                        printf("Lk ");
                    248:                        break;
                    249:                case (FMT_FILE):
                    250:                        printf("Pa ");
                    251:                        break;
                    252:                case (FMT_NBSP):
                    253:                        /* TODO. */
                    254:                        printf("No ");
                    255:                        break;
                    256:                default:
                    257:                        abort();
                    258:                }
                    259:        }
                    260:
                    261:        /*
                    262:         * Read until we reach the end market ('>') or until we find a
                    263:         * nested format code.
                    264:         * Don't emit any newlines: since we're on a macro line, we
                    265:         * don't want to break the line.
                    266:         */
                    267:        while (*start < end) {
                    268:                if ('>' == buf[*start]) {
                    269:                        (*start)++;
                    270:                        break;
                    271:                }
                    272:                if (*start + 1 < end && '<' == buf[*start + 1]) {
                    273:                        formatcode(buf, start, end, 1, last, nomacro);
                    274:                        continue;
                    275:                }
1.3       schwarze  276:
1.4     ! schwarze  277:                /*
        !           278:                 * Make sure that any macro-like words (or
        !           279:                 * really any word starting with a capital
        !           280:                 * letter) is assumed to be a macro that must be
        !           281:                 * escaped.
        !           282:                 * This matches "Xx " and "XxEOLN".
        !           283:                 */
        !           284:                if ((' ' == last || '\n' == last) &&
        !           285:                                end - *start > 1 &&
        !           286:                                isupper((int)buf[*start]) &&
        !           287:                                islower((int)buf[*start + 1]) &&
        !           288:                                (end - *start == 2 ||
        !           289:                                 ' ' == buf[*start + 2]))
        !           290:                        printf("\\&");
1.3       schwarze  291:
1.4     ! schwarze  292:                /* Suppress newline. */
        !           293:                if ('\n' == (last = buf[(*start)++]))
1.3       schwarze  294:                        last = ' ';
1.4     ! schwarze  295:
1.3       schwarze  296:                putchar(last);
1.1       schwarze  297:        }
1.2       schwarze  298:
                    299:        if ( ! nomacro && FMT_CODE == fmt)
                    300:                printf(" Qc ");
1.1       schwarze  301:
                    302:        if (reentrant)
                    303:                return(1);
                    304:
                    305:        /*
                    306:         * If we're not reentrant, we want to put ending punctuation on
                    307:         * the macro line so that it's properly handled by being
                    308:         * smooshed against the terminal word.
                    309:         */
                    310:        skipspace(buf, start, end);
                    311:        if (',' != buf[*start] && '.' != buf[*start] &&
                    312:                '!' != buf[*start] && '?' != buf[*start] &&
                    313:                ')' != buf[*start])
                    314:                return(1);
                    315:        while (*start < end) {
                    316:                if (',' != buf[*start] &&
                    317:                        '.' != buf[*start] &&
                    318:                        '!' != buf[*start] &&
                    319:                        '?' != buf[*start] &&
                    320:                        ')' != buf[*start])
                    321:                        break;
                    322:                putchar(' ');
                    323:                putchar(buf[*start]);
                    324:                (*start)++;
                    325:        }
                    326:        skipspace(buf, start, end);
                    327:        return(1);
                    328: }
                    329:
                    330: /*
                    331:  * Calls formatcode() til the end of a paragraph.
                    332:  */
                    333: static void
                    334: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
                    335: {
                    336:        int              last;
                    337:
1.4     ! schwarze  338:        last = ' ';
1.1       schwarze  339:        while (*start < end)  {
                    340:                if (*start + 1 < end && '<' == buf[*start + 1]) {
                    341:                        formatcode(buf, start, end, 1, last, nomacro);
                    342:                        continue;
                    343:                }
1.4     ! schwarze  344:                /*
        !           345:                 * Since we're already on a macro line, we want to make
        !           346:                 * sure that we don't inadvertently invoke a macro.
        !           347:                 * We need to do this carefully because section names
        !           348:                 * are used in troff and we don't want to escape
        !           349:                 * something that needn't be escaped.
        !           350:                 */
        !           351:                if (' ' == last && end - *start > 1 &&
        !           352:                                isupper((int)buf[*start]) &&
        !           353:                                islower((int)buf[*start + 1]) &&
        !           354:                                (end - *start == 2 ||
        !           355:                                 ' ' == buf[*start + 2]))
        !           356:                        printf("\\&");
        !           357:
1.1       schwarze  358:                if ('\n' != buf[*start])
                    359:                        putchar(last = buf[*start]);
1.4     ! schwarze  360:                else
        !           361:                        putchar(last = ' ');
1.1       schwarze  362:                (*start)++;
                    363:        }
                    364: }
                    365:
                    366: /*
1.4     ! schwarze  367:  * Guess at what kind of list we are.
        !           368:  * These are taken straight from the POD manual.
        !           369:  * I don't know what people do in real life.
        !           370:  */
        !           371: static enum list
        !           372: listguess(const char *buf, size_t start, size_t end)
        !           373: {
        !           374:        size_t           len = end - start;
        !           375:
        !           376:        assert(end >= start);
        !           377:
        !           378:        if (len == 1 && '*' == buf[start])
        !           379:                return(LIST_BULLET);
        !           380:        if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
        !           381:                return(LIST_ENUM);
        !           382:        else if (len == 1 && '1' == buf[start])
        !           383:                return(LIST_ENUM);
        !           384:        else
        !           385:                return(LIST_TAG);
        !           386: }
        !           387:
        !           388: /*
1.1       schwarze  389:  * A command paragraph, as noted in the perlpod manual, just indicates
                    390:  * that we should do something, optionally with some text to print as
                    391:  * well.
                    392:  */
                    393: static void
                    394: command(struct state *st, const char *buf, size_t start, size_t end)
                    395: {
                    396:        size_t           len, csz;
                    397:        enum cmd         cmd;
                    398:
                    399:        assert('=' == buf[start]);
                    400:        start++;
                    401:        len = end - start;
                    402:
                    403:        for (cmd = 0; cmd < CMD__MAX; cmd++) {
                    404:                csz = strlen(cmds[cmd]);
                    405:                if (len < csz)
                    406:                        continue;
                    407:                if (0 == memcmp(&buf[start], cmd[cmds], csz))
                    408:                        break;
                    409:        }
                    410:
                    411:        /* Ignore bogus commands. */
                    412:
                    413:        if (CMD__MAX == cmd)
                    414:                return;
                    415:
                    416:        start += csz;
                    417:        skipspace(buf, &start, end);
                    418:        len = end - start;
                    419:
                    420:        if (st->paused) {
                    421:                st->paused = CMD_END != cmd;
                    422:                return;
                    423:        }
                    424:
                    425:        switch (cmd) {
                    426:        case (CMD_POD):
                    427:                break;
                    428:        case (CMD_HEAD1):
                    429:                /*
                    430:                 * The behaviour of head= follows from a quick glance at
                    431:                 * how pod2man handles it.
                    432:                 */
                    433:                printf(".Sh ");
                    434:                st->isname = 0;
                    435:                if (end - start == 4)
                    436:                        if (0 == memcmp(&buf[start], "NAME", 4))
                    437:                                st->isname = 1;
                    438:                formatcodeln(buf, &start, end, 1);
                    439:                putchar('\n');
                    440:                st->haspar = 1;
                    441:                break;
                    442:        case (CMD_HEAD2):
                    443:                printf(".Ss ");
                    444:                formatcodeln(buf, &start, end, 1);
                    445:                putchar('\n');
                    446:                st->haspar = 1;
                    447:                break;
                    448:        case (CMD_HEAD3):
                    449:                puts(".Pp");
                    450:                printf(".Em ");
                    451:                formatcodeln(buf, &start, end, 0);
                    452:                putchar('\n');
                    453:                puts(".Pp");
                    454:                st->haspar = 1;
                    455:                break;
                    456:        case (CMD_HEAD4):
                    457:                puts(".Pp");
                    458:                printf(".No ");
                    459:                formatcodeln(buf, &start, end, 0);
                    460:                putchar('\n');
                    461:                puts(".Pp");
                    462:                st->haspar = 1;
                    463:                break;
                    464:        case (CMD_OVER):
1.4     ! schwarze  465:                /*
        !           466:                 * If we have an existing list that hasn't had an =item
        !           467:                 * yet, then make sure that we open it now.
        !           468:                 * We use the default list type, but that can't be
        !           469:                 * helped (we haven't seen any items yet).
1.1       schwarze  470:                 */
1.4     ! schwarze  471:                if (st->lpos > 0)
        !           472:                        if (LIST__MAX == st->lstack[st->lpos - 1]) {
        !           473:                                st->lstack[st->lpos - 1] = LIST_TAG;
        !           474:                                puts(".Bl -tag -width Ds");
        !           475:                        }
        !           476:                st->lpos++;
        !           477:                assert(st->lpos < LIST_STACKSZ);
        !           478:                st->lstack[st->lpos - 1] = LIST__MAX;
1.1       schwarze  479:                break;
                    480:        case (CMD_ITEM):
1.4     ! schwarze  481:                assert(st->lpos > 0);
        !           482:                /*
        !           483:                 * If we're the first =item, guess at what our content
        !           484:                 * will be: "*" is a bullet list, "1." is a numbered
        !           485:                 * list, and everything is tagged.
        !           486:                 */
        !           487:                if (LIST__MAX == st->lstack[st->lpos - 1]) {
        !           488:                        st->lstack[st->lpos - 1] =
        !           489:                                listguess(buf, start, end);
        !           490:                        switch (st->lstack[st->lpos - 1]) {
        !           491:                        case (LIST_BULLET):
        !           492:                                puts(".Bl -bullet");
        !           493:                                break;
        !           494:                        case (LIST_ENUM):
        !           495:                                puts(".Bl -enum");
        !           496:                                break;
        !           497:                        default:
        !           498:                                puts(".Bl -tag -width Ds");
        !           499:                                break;
        !           500:                        }
        !           501:                }
        !           502:                switch (st->lstack[st->lpos - 1]) {
        !           503:                case (LIST_TAG):
        !           504:                        printf(".It ");
        !           505:                        formatcodeln(buf, &start, end, 0);
        !           506:                        putchar('\n');
        !           507:                        break;
        !           508:                case (LIST_ENUM):
        !           509:                        /* FALLTHROUGH */
        !           510:                case (LIST_BULLET):
        !           511:                        /*
        !           512:                         * Abandon the remainder of the paragraph
        !           513:                         * because we're going to be a bulletted or
        !           514:                         * numbered list.
        !           515:                         */
        !           516:                        puts(".It");
        !           517:                        break;
        !           518:                default:
        !           519:                        abort();
        !           520:                }
1.1       schwarze  521:                st->haspar = 1;
                    522:                break;
                    523:        case (CMD_BACK):
1.4     ! schwarze  524:                /* Make sure we don't back over the stack. */
        !           525:                if (st->lpos > 0) {
        !           526:                        st->lpos--;
        !           527:                        puts(".El");
        !           528:                }
1.1       schwarze  529:                break;
                    530:        case (CMD_BEGIN):
                    531:                /*
                    532:                 * We disregard all types for now.
                    533:                 * TODO: process at least "text" in a -literal block.
                    534:                 */
                    535:                st->paused = 1;
                    536:                break;
                    537:        case (CMD_FOR):
                    538:                /*
                    539:                 * We ignore all types of encodings and formats
                    540:                 * unilaterally.
                    541:                 */
                    542:                break;
                    543:        case (CMD_ENCODING):
                    544:                break;
                    545:        case (CMD_CUT):
                    546:                st->parsing = 0;
                    547:                return;
                    548:        default:
                    549:                abort();
                    550:        }
                    551:
                    552:        /* Any command (but =cut) makes us start parsing. */
                    553:        st->parsing = 1;
                    554: }
                    555:
                    556: /*
                    557:  * Just pump out the line in a verbatim block.
                    558:  */
                    559: static void
                    560: verbatim(struct state *st, const char *buf, size_t start, size_t end)
                    561: {
                    562:
                    563:        if ( ! st->parsing || st->paused)
                    564:                return;
                    565:
                    566:        puts(".Bd -literal");
                    567:        printf("%.*s\n", (int)(end - start), &buf[start]);
                    568:        puts(".Ed");
                    569: }
                    570:
                    571: /*
                    572:  * Ordinary paragraph.
                    573:  * Well, this is really the hardest--POD seems to assume that, for
                    574:  * example, a leading space implies a newline, and so on.
                    575:  * Lots of other snakes in the grass: escaping a newline followed by a
                    576:  * period (accidental mdoc(7) control), double-newlines after macro
                    577:  * passages, etc.
                    578:  */
                    579: static void
                    580: ordinary(struct state *st, const char *buf, size_t start, size_t end)
                    581: {
                    582:        int             last;
                    583:        size_t          i, j;
                    584:
                    585:        if ( ! st->parsing || st->paused)
                    586:                return;
                    587:
                    588:        /*
                    589:         * Special-case: the NAME section.
                    590:         * If we find a "-" when searching from the end, assume that
                    591:         * we're in "name - description" format.
                    592:         * To wit, print out a "Nm" and "Nd" in that format.
                    593:         */
                    594:        if (st->isname) {
                    595:                for (i = end - 1; i > start; i--)
                    596:                        if ('-' == buf[i])
                    597:                                break;
                    598:                if ('-' == buf[i]) {
                    599:                        j = i;
                    600:                        /* Roll over multiple "-". */
                    601:                        for ( ; i > start; i--)
                    602:                                if ('-' != buf[i])
                    603:                                        break;
1.4     ! schwarze  604:                        /* FIXME: escape macro-like words etc. */
1.1       schwarze  605:                        printf(".Nm %.*s\n",
                    606:                                (int)((i + 1) - start), &buf[start]);
                    607:                        printf(".Nd %.*s\n",
                    608:                                (int)(end - (j + 1)), &buf[j + 1]);
                    609:                        return;
                    610:                }
                    611:        }
                    612:
                    613:        if ( ! st->haspar)
                    614:                puts(".Pp");
                    615:
                    616:        st->haspar = 0;
                    617:        last = '\n';
                    618:
                    619:        while (start < end) {
                    620:                /*
                    621:                 * Loop til we get either to a newline or escape.
                    622:                 * Escape initial control characters.
                    623:                 */
                    624:                while (start < end) {
                    625:                        if (start < end - 1 && '<' == buf[start + 1])
                    626:                                break;
                    627:                        else if ('\n' == buf[start])
                    628:                                break;
                    629:                        else if ('\n' == last && '.' == buf[start])
                    630:                                printf("\\&");
                    631:                        else if ('\n' == last && '\'' == buf[start])
                    632:                                printf("\\&");
                    633:                        putchar(last = buf[start++]);
                    634:                }
                    635:
                    636:                if (start < end - 1 && '<' == buf[start + 1]) {
                    637:                        /*
                    638:                         * We've encountered a format code.
                    639:                         * This is going to trigger a macro no matter
                    640:                         * what, so print a newline now.
                    641:                         * Then print the (possibly nested) macros and
                    642:                         * following that, a newline.
                    643:                         */
                    644:                        if (formatcode(buf, &start, end, 0, last, 0))
                    645:                                putchar(last = '\n');
                    646:                } else if (start < end && '\n' == buf[start]) {
                    647:                        /*
                    648:                         * Print the newline only if we haven't already
                    649:                         * printed a newline.
                    650:                         */
                    651:                        if (last != '\n')
                    652:                                putchar(last = buf[start]);
                    653:                        if (++start >= end)
                    654:                                continue;
                    655:                        /*
                    656:                         * If we have whitespace next, eat it to prevent
                    657:                         * mdoc(7) from thinking that it's meant for
                    658:                         * verbatim text.
                    659:                         * It is--but if we start with that, we can't
                    660:                         * have a macro subsequent it, which may be
                    661:                         * possible if we have an escape next.
                    662:                         */
                    663:                        if (' ' == buf[start] || '\t' == buf[start]) {
                    664:                                puts(".br");
                    665:                                last = '\n';
                    666:                        }
                    667:                        for ( ; start < end; start++)
                    668:                                if (' ' != buf[start] && '\t' != buf[start])
                    669:                                        break;
                    670:                } else if (start < end) {
                    671:                        /*
                    672:                         * Default: print the character.
                    673:                         * Escape initial control characters.
                    674:                         */
                    675:                        if ('\n' == last && '.' == buf[start])
                    676:                                printf("\\&");
                    677:                        else if ('\n' == last && '\'' == buf[start])
                    678:                                printf("\\&");
                    679:                        putchar(last = buf[start++]);
                    680:                }
                    681:        }
                    682:
                    683:        if (last != '\n')
                    684:                putchar('\n');
                    685: }
                    686:
                    687: /*
                    688:  * There are three kinds of paragraphs: verbatim (starts with whitespace
                    689:  * of some sort), ordinary (starts without "=" marker), or a command
                    690:  * (default: starts with "=").
                    691:  */
                    692: static void
                    693: dopar(struct state *st, const char *buf, size_t start, size_t end)
                    694: {
                    695:
                    696:        if (end == start)
                    697:                return;
                    698:        if (' ' == buf[start] || '\t' == buf[start])
                    699:                verbatim(st, buf, start, end);
                    700:        else if ('=' != buf[start])
                    701:                ordinary(st, buf, start, end);
                    702:        else
                    703:                command(st, buf, start, end);
                    704: }
                    705:
                    706: /*
                    707:  * Loop around paragraphs within a document, processing each one in the
                    708:  * POD way.
                    709:  */
                    710: static void
                    711: dofile(const struct args *args, const char *fname,
                    712:        const struct tm *tm, const char *buf, size_t sz)
                    713: {
                    714:        size_t           sup, end, i, cur = 0;
                    715:        struct state     st;
                    716:        const char      *section, *date;
                    717:        char             datebuf[64];
                    718:        char            *title, *cp;
                    719:
                    720:        if (0 == sz)
                    721:                return;
                    722:
                    723:        /* Title is last path component of the filename. */
                    724:
                    725:        if (NULL != args->title)
                    726:                title = strdup(args->title);
                    727:        else if (NULL != (cp = strrchr(fname, '/')))
                    728:                title = strdup(cp + 1);
                    729:        else
                    730:                title = strdup(fname);
                    731:
                    732:        if (NULL == title) {
                    733:                perror(NULL);
                    734:                exit(EXIT_FAILURE);
                    735:        }
                    736:
                    737:        /* Section is 1 unless suffix is "pm". */
                    738:
                    739:        if (NULL == (section = args->section)) {
                    740:                section = "1";
                    741:                if (NULL != (cp = strrchr(title, '.'))) {
                    742:                        *cp++ = '\0';
                    743:                        if (0 == strcmp(cp, "pm"))
                    744:                                section = "3p";
                    745:                }
                    746:        }
                    747:
                    748:        /* Date.  Or the given "tm" if not supplied. */
                    749:
                    750:        if (NULL == (date = args->date)) {
                    751:                strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
                    752:                date = datebuf;
                    753:        }
                    754:
                    755:        for (cp = title; '\0' != *cp; cp++)
                    756:                *cp = toupper((int)*cp);
                    757:
                    758:        /* The usual mdoc(7) preamble. */
                    759:
                    760:        printf(".Dd %s\n", date);
                    761:        printf(".Dt %s %s\n", title, section);
                    762:        puts(".Os");
                    763:
                    764:        free(title);
                    765:
                    766:        memset(&st, 0, sizeof(struct state));
                    767:        assert(sz > 0);
                    768:
                    769:        /* Main loop over file contents. */
                    770:
                    771:        while (cur < sz) {
                    772:                /* Read until next paragraph. */
                    773:                for (i = cur + 1; i < sz; i++)
                    774:                        if ('\n' == buf[i] && '\n' == buf[i - 1]) {
                    775:                                /* Consume blank paragraphs. */
                    776:                                while (i + 1 < sz && '\n' == buf[i + 1])
                    777:                                        i++;
                    778:                                break;
                    779:                        }
                    780:
                    781:                /* Adjust end marker for EOF. */
                    782:                end = i < sz ? i - 1 :
                    783:                        ('\n' == buf[sz - 1] ? sz - 1 : sz);
                    784:                sup = i < sz ? end + 2 : sz;
                    785:
                    786:                /* Process paragraph and adjust start. */
                    787:                dopar(&st, buf, cur, end);
                    788:                cur = sup;
                    789:        }
                    790: }
                    791:
                    792: /*
                    793:  * Read a single file fully into memory.
                    794:  * If the file is "-", do it from stdin.
                    795:  * If successfully read, send the input buffer to dofile() for further
                    796:  * processing.
                    797:  */
                    798: static int
                    799: readfile(const struct args *args, const char *fname)
                    800: {
                    801:        int              fd;
                    802:        char            *buf;
                    803:        size_t           bufsz, cur;
                    804:        ssize_t          ssz;
                    805:        struct tm       *tm;
                    806:        time_t           ttm;
                    807:        struct stat      st;
                    808:
                    809:        assert(NULL != fname);
                    810:
                    811:        fd = 0 != strcmp("-", fname) ?
                    812:                open(fname, O_RDONLY, 0) : STDIN_FILENO;
                    813:
                    814:        if (-1 == fd) {
                    815:                perror(fname);
                    816:                return(0);
                    817:        }
                    818:
                    819:        if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
                    820:                ttm = time(NULL);
                    821:                tm = localtime(&ttm);
                    822:        } else
                    823:                tm = localtime(&st.st_mtime);
                    824:
                    825:        /*
                    826:         * Arbitrarily-sized initial buffer.
                    827:         * Should be big enough for most files...
                    828:         */
                    829:        cur = 0;
                    830:        bufsz = 1 << 14;
                    831:        if (NULL == (buf = malloc(bufsz))) {
                    832:                perror(NULL);
                    833:                exit(EXIT_FAILURE);
                    834:        }
                    835:
                    836:        while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
                    837:                /* Double buffer size on fill. */
                    838:                if ((size_t)ssz == bufsz - cur)  {
                    839:                        bufsz *= 2;
                    840:                        if (NULL == (buf = realloc(buf, bufsz))) {
                    841:                                perror(NULL);
                    842:                                exit(EXIT_FAILURE);
                    843:                        }
                    844:                }
                    845:                cur += (size_t)ssz;
                    846:        }
                    847:        if (ssz < 0) {
                    848:                perror(fname);
                    849:                free(buf);
                    850:                return(0);
                    851:        }
                    852:
                    853:        dofile(args, STDIN_FILENO == fd ?
                    854:                "STDIN" : fname, tm, buf, cur);
                    855:        free(buf);
                    856:        if (STDIN_FILENO != fd)
                    857:                close(fd);
                    858:        return(1);
                    859: }
                    860:
                    861: int
                    862: main(int argc, char *argv[])
                    863: {
                    864:        const char      *fname, *name;
                    865:        struct args      args;
                    866:        int              c;
                    867:
                    868:        name = strrchr(argv[0], '/');
                    869:        if (name == NULL)
                    870:                name = argv[0];
                    871:        else
                    872:                ++name;
                    873:
                    874:        memset(&args, 0, sizeof(struct args));
                    875:        fname = "-";
                    876:
                    877:        /* Accept no arguments for now. */
                    878:
                    879:        while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
                    880:                switch (c) {
                    881:                case ('h'):
                    882:                        /* FALLTHROUGH */
                    883:                case ('l'):
                    884:                        /* FALLTHROUGH */
                    885:                case ('c'):
                    886:                        /* FALLTHROUGH */
                    887:                case ('o'):
                    888:                        /* FALLTHROUGH */
                    889:                case ('q'):
                    890:                        /* FALLTHROUGH */
                    891:                case ('r'):
                    892:                        /* FALLTHROUGH */
                    893:                case ('u'):
                    894:                        /* FALLTHROUGH */
                    895:                case ('v'):
                    896:                        /* Ignore these. */
                    897:                        break;
                    898:                case ('d'):
                    899:                        args.date = optarg;
                    900:                        break;
                    901:                case ('n'):
                    902:                        args.title = optarg;
                    903:                        break;
                    904:                case ('s'):
                    905:                        args.section = optarg;
                    906:                        break;
                    907:                default:
                    908:                        goto usage;
                    909:                }
                    910:
                    911:        argc -= optind;
                    912:        argv += optind;
                    913:
                    914:        /* Accept only a single input file. */
                    915:
                    916:        if (argc > 2)
                    917:                return(EXIT_FAILURE);
                    918:        else if (1 == argc)
                    919:                fname = *argv;
                    920:
                    921:        return(readfile(&args, fname) ?
                    922:                EXIT_SUCCESS : EXIT_FAILURE);
                    923:
                    924: usage:
                    925:        fprintf(stderr, "usage: %s [-d date] "
                    926:                "[-n title] [-s section]\n", name);
                    927:
                    928:        return(EXIT_FAILURE);
                    929: }
CVSweb