[BACK]Return to main.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / texi2mdoc

Annotation of texi2mdoc/main.c, Revision 1.1.1.1

1.1       kristaps    1: /*     $Id: main.c,v 1.216 2015/01/16 21:15:05 schwarze Exp $ */
                      2: /*
                      3:  * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/mman.h>
                     18: #include <sys/stat.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
                     24: #include <stdarg.h>
                     25: #include <stdio.h>
                     26: #include <stdlib.h>
                     27: #include <string.h>
                     28:
                     29: /*
                     30:  * This defines each one of the Texinfo commands that we understand.
                     31:  * Obviously this only refers to native commands; overriden names are a
                     32:  * different story.
                     33:  */
                     34: enum   texicmd {
                     35:        TEXICMD_A4PAPER,
                     36:        TEXICMD_ANCHOR,
                     37:        TEXICMD_AT,
                     38:        TEXICMD_BYE,
                     39:        TEXICMD_CHAPTER,
                     40:        TEXICMD_CINDEX,
                     41:        TEXICMD_CODE,
                     42:        TEXICMD_COMMAND,
                     43:        TEXICMD_COMMENT,
                     44:        TEXICMD_CONTENTS,
                     45:        TEXICMD_COPYING,
                     46:        TEXICMD_COPYRIGHT,
                     47:        TEXICMD_DETAILMENU,
                     48:        TEXICMD_DIRCATEGORY,
                     49:        TEXICMD_DIRENTRY,
                     50:        TEXICMD_EMAIL,
                     51:        TEXICMD_EMPH,
                     52:        TEXICMD_END,
                     53:        TEXICMD_EXAMPLE,
                     54:        TEXICMD_FILE,
                     55:        TEXICMD_I,
                     56:        TEXICMD_IFHTML,
                     57:        TEXICMD_IFNOTTEX,
                     58:        TEXICMD_IFTEX,
                     59:        TEXICMD_IMAGE,
                     60:        TEXICMD_ITEM,
                     61:        TEXICMD_ITEMIZE,
                     62:        TEXICMD_KBD,
                     63:        TEXICMD_LATEX,
                     64:        TEXICMD_MENU,
                     65:        TEXICMD_NODE,
                     66:        TEXICMD_QUOTATION,
                     67:        TEXICMD_PARINDENT,
                     68:        TEXICMD_REF,
                     69:        TEXICMD_SAMP,
                     70:        TEXICMD_SECTION,
                     71:        TEXICMD_SETCHAPNEWPAGE,
                     72:        TEXICMD_SETFILENAME,
                     73:        TEXICMD_SETTITLE,
                     74:        TEXICMD_SUBSECTION,
                     75:        TEXICMD_TABLE,
                     76:        TEXICMD_TEX,
                     77:        TEXICMD_TEXSYM,
                     78:        TEXICMD_TITLEFONT,
                     79:        TEXICMD_TITLEPAGE,
                     80:        TEXICMD_TOP,
                     81:        TEXICMD_UNNUMBERED,
                     82:        TEXICMD_URL,
                     83:        TEXICMD_VAR,
                     84:        TEXICMD__MAX
                     85: };
                     86:
                     87: /*
                     88:  * The file currently being parsed.
                     89:  * This keeps track of our location within that file.
                     90:  */
                     91: struct texifile {
                     92:        const char      *name; /* name of the file */
                     93:        size_t           line; /* current line (from zero) */
                     94:        size_t           col; /* current column in line (from zero) */
                     95:        char            *map; /* mmap'd file */
                     96:        size_t           mapsz; /* size of mmap */
                     97: };
                     98:
                     99: struct texi;
                    100:
                    101: typedef        void (*texicmdfp)(struct texi *,
                    102:        enum texicmd, const char *, size_t, size_t *);
                    103:
                    104: /*
                    105:  * Describes Texinfo commands, whether native or overriden.
                    106:  */
                    107: struct texitok {
                    108:        texicmdfp        fp; /* callback (or NULL if none) */
                    109:        const char      *tok; /* name of the token */
                    110:        size_t           len; /* strlen(tok) */
                    111: };
                    112:
                    113: /*
                    114:  * The main parse structure.
                    115:  * This keeps any necessary information handy.
                    116:  */
                    117: struct texi {
                    118:        struct texifile  files[64];
                    119:        size_t           filepos;
                    120:        unsigned         flags;
                    121: #define        TEXI_IGN         0x01 /* don't print anything */
                    122: #define        TEXI_HEADER     (TEXI_IGN | 0x02) /* haven't seen @top yet */
                    123: #define        TEXI_LITERAL     0x04 /* output all whitespace */
                    124:        size_t           outcol; /* column of output */
                    125:        int              outmacro; /* whether output is in line macro */
                    126:        int              seenws; /* whitespace has been ignored */
                    127: };
                    128:
                    129: #define        ismpunct(_x) \
                    130:        ('.' == (_x) || \
                    131:         ',' == (_x) || \
                    132:         ';' == (_x))
                    133:
                    134: static void doarg1(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    135: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    136: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    137: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    138: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    139: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    140: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    141: static void doifnottex(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    142: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    143: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    144: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    145: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    146: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    147: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    148: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    149: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    150: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    151: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    152: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    153: static void dosh(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    154: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    155: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    156:
                    157: static const struct texitok texitoks[TEXICMD__MAX] = {
                    158:        { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
                    159:        { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
                    160:        { dosymbol, "@", 1 }, /* TEXICMD_AT */
                    161:        { dobye, "bye", 3 }, /* TEXICMD_BYE */
                    162:        { dosh, "chapter", 7 }, /* TEXICMD_CHAPTER */
                    163:        { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
                    164:        { doliteral, "code", 4 }, /* TEXICMD_CODE */
                    165:        { docommand, "command", 7 }, /* TEXICMD_COMMAND */
                    166:        { doignline, "c", 1 }, /* TEXICMD_COMMENT */
                    167:        { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
                    168:        { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
                    169:        { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
                    170:        { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
                    171:        { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
                    172:        { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
                    173:        { doarg1, "email", 5 }, /* TEXICMD_EMAIL */
                    174:        { doemph, "emph", 4 }, /* TEXICMD_EMPH */
                    175:        { NULL, "end", 3 }, /* TEXICMD_END */
                    176:        { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
                    177:        { dofile, "file", 4 }, /* TEXICMD_FILE */
                    178:        { doitalic, "i", 1 }, /* TEXICMD_I */
                    179:        { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
                    180:        { doifnottex, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
                    181:        { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
                    182:        { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
                    183:        { doitem, "item", 4 }, /* TEXICMD_ITEM */
                    184:        { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
                    185:        { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
                    186:        { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
                    187:        { doignblock, "menu", 4 }, /* TEXICMD_MENU */
                    188:        { doignline, "node", 4 }, /* TEXICMD_NODE */
                    189:        { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
                    190:        { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
                    191:        { dobracket, "ref", 3 }, /* TEXICMD_REF */
                    192:        { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
                    193:        { dosection, "section", 7 }, /* TEXICMD_SECTION */
                    194:        { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
                    195:        { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
                    196:        { doignline, "settitle", 8 }, /* TEXICMD_SETTITLE */
                    197:        { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
                    198:        { dotable, "table", 5 }, /* TEXICMD_TABLE */
                    199:        { doignblock, "tex", 3 }, /* TEXICMD_TEX */
                    200:        { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
                    201:        { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
                    202:        { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
                    203:        { dotop, "top", 3 }, /* TEXICMD_TOP */
                    204:        { dosh, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
                    205:        { doarg1, "url", 3 }, /* TEXICMD_URL */
                    206:        { doliteral, "var", 3 }, /* TEXICMD_VAR */
                    207: };
                    208:
                    209: static void
                    210: texifilepop(struct texi *p)
                    211: {
                    212:        struct texifile *f;
                    213:
                    214:        assert(p->filepos > 0);
                    215:        f = &p->files[--p->filepos];
                    216:        munmap(f->map, f->mapsz);
                    217: }
                    218:
                    219: static void
                    220: texiexit(struct texi *p)
                    221: {
                    222:
                    223:        while (p->filepos > 0)
                    224:                texifilepop(p);
                    225: }
                    226:
                    227: static void
                    228: texifatal(struct texi *p, const char *errstring)
                    229: {
                    230:
                    231:        perror(errstring);
                    232:        texiexit(p);
                    233:        exit(EXIT_FAILURE);
                    234: }
                    235:
                    236: /*
                    237:  * Print a generic warning message (to stderr) tied to our current
                    238:  * location in the parse sequence.
                    239:  */
                    240: static void
                    241: texiwarn(const struct texi *p, const char *fmt, ...)
                    242: {
                    243:        va_list  ap;
                    244:
                    245:        fprintf(stderr, "%s:%zu:%zu: ",
                    246:                p->files[p->filepos - 1].name,
                    247:                p->files[p->filepos - 1].line + 1,
                    248:                p->files[p->filepos - 1].col + 1);
                    249:        va_start(ap, fmt);
                    250:        vfprintf(stderr, fmt, ap);
                    251:        va_end(ap);
                    252:        fputc('\n', stderr);
                    253: }
                    254:
                    255: /*
                    256:  * Put a single data character.
                    257:  * This MUST NOT be a mdoc(7) command: it should be free text that's
                    258:  * outputted to the screen.
                    259:  */
                    260: static void
                    261: texiputchar(struct texi *p, char c)
                    262: {
                    263:
                    264:        if (TEXI_IGN & p->flags)
                    265:                return;
                    266:
                    267:        putchar(c);
                    268:        if ('\n' == c) {
                    269:                p->outcol = 0;
                    270:                p->outmacro = 0;
                    271:                p->seenws = 0;
                    272:        } else
                    273:                p->outcol++;
                    274: }
                    275:
                    276: /*
                    277:  * Put multiple characters (see texiputchar()).
                    278:  */
                    279: static void
                    280: texiputchars(struct texi *p, const char *s)
                    281: {
                    282:
                    283:        while ('\0' != *s)
                    284:                texiputchar(p, *s++);
                    285: }
                    286:
                    287: /*
                    288:  * Put an mdoc(7) command without the trailing newline.
                    289:  * This should ONLY be used for mdoc(7) commands!
                    290:  */
                    291: static void
                    292: texifputs(struct texi *p, const char *s)
                    293: {
                    294:        int      rc;
                    295:
                    296:        if (TEXI_IGN & p->flags)
                    297:                return;
                    298:        if (p->outcol)
                    299:                texiputchar(p, '\n');
                    300:        if (EOF != (rc = fputs(s, stdout)))
                    301:                p->outcol += rc;
                    302: }
                    303:
                    304: /*
                    305:  * Put an mdoc(7) command with the trailing newline.
                    306:  * This should ONLY be used for mdoc(7) commands!
                    307:  */
                    308: static void
                    309: teximacro(struct texi *p, const char *s)
                    310: {
                    311:
                    312:        if (TEXI_IGN & p->flags)
                    313:                return;
                    314:        if (p->outcol)
                    315:                texiputchar(p, '\n');
                    316:        puts(s);
                    317:        p->outcol = 0;
                    318:        p->seenws = 0;
                    319: }
                    320:
                    321: /*
                    322:  * Advance by a single byte in the input stream.
                    323:  */
                    324: static void
                    325: advance(struct texi *p, const char *buf, size_t *pos)
                    326: {
                    327:
                    328:        if ('\n' == buf[*pos]) {
                    329:                p->files[p->filepos - 1].line++;
                    330:                p->files[p->filepos - 1].col = 0;
                    331:        } else
                    332:                p->files[p->filepos - 1].col++;
                    333:
                    334:        (*pos)++;
                    335: }
                    336:
                    337: /*
                    338:  * Advance to the next non-whitespace word in the input stream.
                    339:  * If we're in literal mode, then print all of the whitespace as we're
                    340:  * doing so.
                    341:  */
                    342: static size_t
                    343: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    344: {
                    345:
                    346:        if (TEXI_LITERAL & p->flags) {
                    347:                while (*pos < sz && isspace(buf[*pos])) {
                    348:                        texiputchar(p, buf[*pos]);
                    349:                        advance(p, buf, pos);
                    350:                }
                    351:                return(*pos);
                    352:        }
                    353:
                    354:        while (*pos < sz && isspace(buf[*pos])) {
                    355:                p->seenws = 1;
                    356:                /*
                    357:                 * If it looks like we've printed a double-line, then
                    358:                 * output a paragraph.
                    359:                 * FIXME: this is stupid.
                    360:                 */
                    361:                if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
                    362:                        teximacro(p, ".Pp");
                    363:                advance(p, buf, pos);
                    364:        }
                    365:        return(*pos);
                    366: }
                    367:
                    368: /*
                    369:  * Advance to the EOLN in the input stream.
                    370:  */
                    371: static size_t
                    372: advanceeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    373: {
                    374:
                    375:        while (*pos < sz && '\n' != buf[*pos])
                    376:                advance(p, buf, pos);
                    377:        return(*pos);
                    378: }
                    379:
                    380: /*
                    381:  * Advance to position "end", which is an absolute position in the
                    382:  * current buffer greater than or equal to the current position.
                    383:  */
                    384: static void
                    385: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
                    386: {
                    387:
                    388:        assert(*pos <= end);
                    389:        while (*pos < end)
                    390:                advance(p, buf, pos);
                    391: }
                    392:
                    393: /*
                    394:  * Output a free-form word in the input stream, progressing to the next
                    395:  * command or white-space.
                    396:  * This also will advance the input stream.
                    397:  */
                    398: static void
                    399: texiword(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    400: {
                    401:
                    402:        /*
                    403:         * XXX: if we're in literal mode, then we shouldn't do any
                    404:         * reflowing of text here.
                    405:         */
                    406:        if (p->outcol > 72 && ! (TEXI_LITERAL & p->flags))
                    407:                texiputchar(p, '\n');
                    408:
                    409:        if (p->seenws && p->outcol && ! (TEXI_LITERAL & p->flags))
                    410:                texiputchar(p, ' ');
                    411:
                    412:        p->seenws = 0;
                    413:
                    414:        while (*pos < sz && ! isspace(buf[*pos])) {
                    415:                switch (buf[*pos]) {
                    416:                case ('@'):
                    417:                case ('}'):
                    418:                case ('{'):
                    419:                        return;
                    420:                }
                    421:                if (*pos < sz - 1 &&
                    422:                         '`' == buf[*pos] &&
                    423:                         '`' == buf[*pos + 1]) {
                    424:                        texiputchars(p, "\\(lq");
                    425:                        advance(p, buf, pos);
                    426:                } else if (*pos < sz - 1 &&
                    427:                         '\'' == buf[*pos] &&
                    428:                         '\'' == buf[*pos + 1]) {
                    429:                        texiputchars(p, "\\(rq");
                    430:                        advance(p, buf, pos);
                    431:                } else
                    432:                        texiputchar(p, buf[*pos]);
                    433:                advance(p, buf, pos);
                    434:        }
                    435: }
                    436:
                    437: static enum texicmd
                    438: texicmd(struct texi *p, const char *buf,
                    439:        size_t pos, size_t sz, size_t *end)
                    440: {
                    441:        size_t   i, len;
                    442:
                    443:        assert('@' == buf[pos]);
                    444:        for (*end = ++pos; *end < sz && ! isspace(buf[*end]); (*end)++)
                    445:                if ('@' == buf[*end] || '{' == buf[*end])
                    446:                        break;
                    447:
                    448:        len = *end - pos;
                    449:        for (i = 0; i < TEXICMD__MAX; i++) {
                    450:                if (len != texitoks[i].len)
                    451:                        continue;
                    452:                if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
                    453:                        return(i);
                    454:        }
                    455:
                    456:        texiwarn(p, "bad command: %.*s", (int)len, &buf[pos]);
                    457:        return(TEXICMD__MAX);
                    458: }
                    459:
                    460: static void
                    461: parseeof(struct texi *p, const char *buf, size_t sz)
                    462: {
                    463:        size_t           pos = 0;
                    464:        enum texicmd     cmd;
                    465:        size_t           end;
                    466:
                    467:        while ((pos = advancenext(p, buf, sz, &pos)) < sz) {
                    468:                switch (buf[pos]) {
                    469:                case ('}'):
                    470:                        texiwarn(p, "unexpected \"}\"");
                    471:                        advance(p, buf, &pos);
                    472:                        continue;
                    473:                case ('{'):
                    474:                        texiwarn(p, "unexpected \"{\"");
                    475:                        advance(p, buf, &pos);
                    476:                        continue;
                    477:                case ('@'):
                    478:                        break;
                    479:                default:
                    480:                        texiword(p, buf, sz, &pos);
                    481:                        continue;
                    482:                }
                    483:
                    484:                cmd = texicmd(p, buf, pos, sz, &end);
                    485:                advanceto(p, buf, &pos, end);
                    486:                if (TEXICMD__MAX == cmd)
                    487:                        continue;
                    488:                if (NULL != texitoks[cmd].fp)
                    489:                        (*texitoks[cmd].fp)(p, cmd, buf, sz, &pos);
                    490:        }
                    491: }
                    492:
                    493: static void
                    494: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    495: {
                    496:        size_t           end;
                    497:        enum texicmd     cmd;
                    498:
                    499:        if (*pos == sz || '{' != buf[*pos])
                    500:                return;
                    501:        advance(p, buf, pos);
                    502:
                    503:        while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
                    504:                switch (buf[*pos]) {
                    505:                case ('}'):
                    506:                        advance(p, buf, pos);
                    507:                        return;
                    508:                case ('{'):
                    509:                        texiwarn(p, "unexpected \"{\"");
                    510:                        advance(p, buf, pos);
                    511:                        continue;
                    512:                case ('@'):
                    513:                        break;
                    514:                default:
                    515:                        texiword(p, buf, sz, pos);
                    516:                        continue;
                    517:                }
                    518:
                    519:                cmd = texicmd(p, buf, *pos, sz, &end);
                    520:                advanceto(p, buf, pos, end);
                    521:                if (TEXICMD__MAX == cmd)
                    522:                        continue;
                    523:                if (NULL != texitoks[cmd].fp)
                    524:                        (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
                    525:        }
                    526: }
                    527:
                    528: static void
                    529: parseto(struct texi *p, const char *buf,
                    530:        size_t sz, size_t *pos, const char *endtoken)
                    531: {
                    532:        size_t           end;
                    533:        enum texicmd     cmd;
                    534:        size_t           endtoksz;
                    535:
                    536:        endtoksz = strlen(endtoken);
                    537:        assert(endtoksz > 0);
                    538:
                    539:        while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
                    540:                switch (buf[*pos]) {
                    541:                case ('}'):
                    542:                        texiwarn(p, "unexpected \"}\"");
                    543:                        advance(p, buf, pos);
                    544:                        continue;
                    545:                case ('{'):
                    546:                        texiwarn(p, "unexpected \"{\"");
                    547:                        advance(p, buf, pos);
                    548:                        continue;
                    549:                case ('@'):
                    550:                        break;
                    551:                default:
                    552:                        texiword(p, buf, sz, pos);
                    553:                        continue;
                    554:                }
                    555:
                    556:                cmd = texicmd(p, buf, *pos, sz, &end);
                    557:                advanceto(p, buf, pos, end);
                    558:                if (TEXICMD_END == cmd) {
                    559:                        while (*pos < sz && ' ' == buf[*pos])
                    560:                                advance(p, buf, pos);
                    561:                        /*
                    562:                         * FIXME: skip tabs and also check the full
                    563:                         * word, not just its initial substring!
                    564:                         */
                    565:                        if (sz - *pos >= endtoksz && 0 == strncmp
                    566:                                 (&buf[*pos], endtoken, endtoksz)) {
                    567:                                advanceeoln(p, buf, sz, pos);
                    568:                                break;
                    569:                        }
                    570:                        texiwarn(p, "unexpected \"end\"");
                    571:                        advanceeoln(p, buf, sz, pos);
                    572:                        continue;
                    573:                } else if (TEXICMD__MAX != cmd)
                    574:                        if (NULL != texitoks[cmd].fp)
                    575:                                (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
                    576:        }
                    577: }
                    578:
                    579: static void
                    580: doignblock(struct texi *p, enum texicmd cmd,
                    581:        const char *buf, size_t sz, size_t *pos)
                    582: {
                    583:        unsigned int     sv = p->flags;
                    584:        const char      *blockname;
                    585:
                    586:        p->flags |= TEXI_IGN;
                    587:        switch (cmd) {
                    588:        case (TEXICMD_COPYING):
                    589:                blockname = "copying";
                    590:                break;
                    591:        case (TEXICMD_DETAILMENU):
                    592:                blockname = "detailmenu";
                    593:                break;
                    594:        case (TEXICMD_DIRENTRY):
                    595:                blockname = "direntry";
                    596:                break;
                    597:        case (TEXICMD_IFHTML):
                    598:                blockname = "ifhtml";
                    599:                break;
                    600:        case (TEXICMD_IFTEX):
                    601:                blockname = "iftex";
                    602:                break;
                    603:        case (TEXICMD_MENU):
                    604:                blockname = "menu";
                    605:                break;
                    606:        case (TEXICMD_TEX):
                    607:                blockname = "tex";
                    608:                break;
                    609:        case (TEXICMD_TITLEPAGE):
                    610:                blockname = "titlepage";
                    611:                break;
                    612:        default:
                    613:                abort();
                    614:        }
                    615:        parseto(p, buf, sz, pos, blockname);
                    616:        p->flags = sv;
                    617: }
                    618:
                    619: static void
                    620: doifnottex(struct texi *p, enum texicmd cmd,
                    621:        const char *buf, size_t sz, size_t *pos)
                    622: {
                    623:
                    624:        parseto(p, buf, sz, pos, "ifnottex");
                    625: }
                    626:
                    627: static void
                    628: doinline(struct texi *p, const char *buf,
                    629:        size_t sz, size_t *pos, const char *macro)
                    630: {
                    631:
                    632:        if ( ! p->outmacro)
                    633:                texifputs(p, ".");
                    634:        texiputchars(p, macro);
                    635:        texiputchar(p, ' ');
                    636:        p->seenws = 0;
                    637:        p->outmacro++;
                    638:        parsebracket(p, buf, sz, pos);
                    639:        p->outmacro--;
                    640:        if (*pos < sz - 1 &&
                    641:                 ismpunct(buf[*pos]) &&
                    642:                 isspace(buf[*pos + 1])) {
                    643:                texiputchar(p, ' ');
                    644:                texiputchar(p, buf[*pos]);
                    645:                advance(p, buf, pos);
                    646:        }
                    647:        if ( ! p->outmacro)
                    648:                texiputchar(p, '\n');
                    649: }
                    650:
                    651: static void
                    652: doitalic(struct texi *p, enum texicmd cmd,
                    653:        const char *buf, size_t sz, size_t *pos)
                    654: {
                    655:
                    656:        texiputchars(p, "\\fI");
                    657:        parsebracket(p, buf, sz, pos);
                    658:        texiputchars(p, "\\fP");
                    659: }
                    660:
                    661: static void
                    662: doliteral(struct texi *p, enum texicmd cmd,
                    663:        const char *buf, size_t sz, size_t *pos)
                    664: {
                    665:
                    666:        if (TEXI_LITERAL & p->flags)
                    667:                parsebracket(p, buf, sz, pos);
                    668:        else
                    669:                doinline(p, buf, sz, pos, "Li");
                    670: }
                    671:
                    672: static void
                    673: doemph(struct texi *p, enum texicmd cmd,
                    674:        const char *buf, size_t sz, size_t *pos)
                    675: {
                    676:
                    677:        if (TEXI_LITERAL & p->flags)
                    678:                doitalic(p, cmd, buf, sz, pos);
                    679:        else
                    680:                doinline(p, buf, sz, pos, "Em");
                    681: }
                    682:
                    683: static void
                    684: docommand(struct texi *p, enum texicmd cmd,
                    685:        const char *buf, size_t sz, size_t *pos)
                    686: {
                    687:
                    688:        doinline(p, buf, sz, pos, "Xr");
                    689: }
                    690:
                    691: static void
                    692: dobracket(struct texi *p, enum texicmd cmd,
                    693:        const char *buf, size_t sz, size_t *pos)
                    694: {
                    695:
                    696:        parsebracket(p, buf, sz, pos);
                    697: }
                    698:
                    699: static void
                    700: dofile(struct texi *p, enum texicmd cmd,
                    701:        const char *buf, size_t sz, size_t *pos)
                    702: {
                    703:
                    704:        if (TEXI_LITERAL & p->flags)
                    705:                parsebracket(p, buf, sz, pos);
                    706:        else
                    707:                doinline(p, buf, sz, pos, "Pa");
                    708: }
                    709:
                    710: static void
                    711: doexample(struct texi *p, enum texicmd cmd,
                    712:        const char *buf, size_t sz, size_t *pos)
                    713: {
                    714:        unsigned int    sv;
                    715:
                    716:        teximacro(p, ".Bd -literal");
                    717:        advanceeoln(p, buf, sz, pos);
                    718:        if ('\n' == buf[*pos])
                    719:                advance(p, buf, pos);
                    720:        sv = p->flags;
                    721:        p->flags |= TEXI_LITERAL;
                    722:        parseto(p, buf, sz, pos, "example");
                    723:        p->flags = sv;
                    724:        teximacro(p, ".Ed");
                    725: }
                    726:
                    727: static void
                    728: dobye(struct texi *p, enum texicmd cmd,
                    729:        const char *buf, size_t sz, size_t *pos)
                    730: {
                    731:
                    732:        texiexit(p);
                    733:        exit(EXIT_SUCCESS);
                    734: }
                    735:
                    736: static void
                    737: dosymbol(struct texi *p, enum texicmd cmd,
                    738:        const char *buf, size_t sz, size_t *pos)
                    739: {
                    740:
                    741:        switch (cmd) {
                    742:        case (TEXICMD_AT):
                    743:                texiputchars(p, "@");
                    744:                break;
                    745:        case (TEXICMD_COPYRIGHT):
                    746:                texiputchars(p, "\\(co");
                    747:                break;
                    748:        case (TEXICMD_LATEX):
                    749:                texiputchars(p, "LaTeX");
                    750:                break;
                    751:        case (TEXICMD_TEXSYM):
                    752:                texiputchars(p, "TeX");
                    753:                break;
                    754:        default:
                    755:                abort();
                    756:        }
                    757:
                    758:        doignbracket(p, cmd, buf, sz, pos);
                    759: }
                    760:
                    761: static void
                    762: doquotation(struct texi *p, enum texicmd cmd,
                    763:        const char *buf, size_t sz, size_t *pos)
                    764: {
                    765:
                    766:        teximacro(p, ".Qo");
                    767:        parseto(p, buf, sz, pos, "quotation");
                    768:        teximacro(p, ".Qc");
                    769: }
                    770:
                    771: static void
                    772: doarg1(struct texi *p, enum texicmd cmd,
                    773:        const char *buf, size_t sz, size_t *pos)
                    774: {
                    775:
                    776:        if (*pos == sz || '{' != buf[*pos])
                    777:                return;
                    778:        advance(p, buf, pos);
                    779:        if ( ! p->outmacro)
                    780:                texifputs(p, ".");
                    781:        switch (cmd) {
                    782:        case (TEXICMD_EMAIL):
                    783:                texiputchars(p, "Lk ");
                    784:                break;
                    785:        case (TEXICMD_URL):
                    786:                texiputchars(p, "Mt ");
                    787:                break;
                    788:        default:
                    789:                abort();
                    790:        }
                    791:        while (*pos < sz && '}' != buf[*pos] && ',' != buf[*pos]) {
                    792:                texiputchar(p, buf[*pos]);
                    793:                advance(p, buf, pos);
                    794:        }
                    795:        while (*pos < sz && '}' != buf[*pos])
                    796:                advance(p, buf, pos);
                    797:        if (*pos < sz)
                    798:                advance(p, buf, pos);
                    799:        if (*pos < sz - 1 &&
                    800:                 ismpunct(buf[*pos]) &&
                    801:                 isspace(buf[*pos + 1])) {
                    802:                texiputchar(p, ' ');
                    803:                texiputchar(p, buf[*pos]);
                    804:                advance(p, buf, pos);
                    805:        }
                    806:        if ( ! p->outmacro)
                    807:                texiputchar(p, '\n');
                    808: }
                    809:
                    810: static void
                    811: dosubsection(struct texi *p, enum texicmd cmd,
                    812:                const char *buf, size_t sz, size_t *pos)
                    813: {
                    814:
                    815:        if (TEXI_IGN & p->flags) {
                    816:                advanceeoln(p, buf, sz, pos);
                    817:                return;
                    818:        }
                    819:        while (*pos < sz && ' ' == buf[*pos])
                    820:                advance(p, buf, pos);
                    821:        texifputs(p, ".Pp");
                    822:        while (*pos < sz && '\n' != buf[*pos]) {
                    823:                texiputchar(p, buf[*pos]);
                    824:                advance(p, buf, pos);
                    825:        }
                    826:        texifputs(p, ".Pp");
                    827: }
                    828:
                    829: static void
                    830: dosection(struct texi *p, enum texicmd cmd,
                    831:                const char *buf, size_t sz, size_t *pos)
                    832: {
                    833:
                    834:        if (TEXI_IGN & p->flags) {
                    835:                advanceeoln(p, buf, sz, pos);
                    836:                return;
                    837:        }
                    838:        while (*pos < sz && ' ' == buf[*pos])
                    839:                advance(p, buf, pos);
                    840:        texifputs(p, ".Ss ");
                    841:        while (*pos < sz && '\n' != buf[*pos]) {
                    842:                texiputchar(p, buf[*pos]);
                    843:                advance(p, buf, pos);
                    844:        }
                    845:        texiputchar(p, '\n');
                    846: }
                    847:
                    848: static void
                    849: dosh(struct texi *p, enum texicmd cmd,
                    850:        const char *buf, size_t sz, size_t *pos)
                    851: {
                    852:
                    853:        if (TEXI_IGN & p->flags) {
                    854:                advanceeoln(p, buf, sz, pos);
                    855:                return;
                    856:        }
                    857:        while (*pos < sz && ' ' == buf[*pos])
                    858:                advance(p, buf, pos);
                    859:        texifputs(p, ".Sh ");
                    860:        while (*pos < sz && '\n' != buf[*pos]) {
                    861:                texiputchar(p, toupper(buf[*pos]));
                    862:                advance(p, buf, pos);
                    863:        }
                    864:        texiputchar(p, '\n');
                    865: }
                    866:
                    867: static void
                    868: dotop(struct texi *p, enum texicmd cmd,
                    869:        const char *buf, size_t sz, size_t *pos)
                    870: {
                    871:
                    872:        p->flags &= ~TEXI_HEADER;
                    873:        advanceeoln(p, buf, sz, pos);
                    874:        teximacro(p, ".Dd $Mdocdate$");
                    875:        teximacro(p, ".Dt SOMETHING 7");
                    876:        teximacro(p, ".Os");
                    877:        teximacro(p, ".Sh NAME");
                    878:        teximacro(p, ".Nm Something");
                    879:        teximacro(p, ".Nd Something");
                    880: }
                    881:
                    882: static void
                    883: doitem(struct texi *p, enum texicmd cmd,
                    884:        const char *buf, size_t sz, size_t *pos)
                    885: {
                    886:        size_t   end;
                    887:
                    888:        /* See if we have arguments... */
                    889:        for (end = *pos; end < sz; end++)
                    890:                if (' ' != buf[end] && '\t' != buf[end])
                    891:                        break;
                    892:
                    893:        /* If we have arguments, print them too. */
                    894:        if ('\n' != buf[end]) {
                    895:                texifputs(p, ".It");
                    896:                /* FIXME: process commands. */
                    897:                while (*pos < sz && '\n' != buf[*pos]) {
                    898:                        texiputchar(p, buf[*pos]);
                    899:                        advance(p, buf, pos);
                    900:                }
                    901:                texiputchar(p, '\n');
                    902:        } else
                    903:                teximacro(p, ".It");
                    904: }
                    905:
                    906: static void
                    907: dotable(struct texi *p, enum texicmd cmd,
                    908:        const char *buf, size_t sz, size_t *pos)
                    909: {
                    910:
                    911:        teximacro(p, ".Bl -tag -width Ds");
                    912:        parseto(p, buf, sz, pos, "table");
                    913:        teximacro(p, ".El");
                    914: }
                    915:
                    916: static void
                    917: doitemize(struct texi *p, enum texicmd cmd,
                    918:        const char *buf, size_t sz, size_t *pos)
                    919: {
                    920:
                    921:        teximacro(p, ".Bl -bullet");
                    922:        parseto(p, buf, sz, pos, "itemize");
                    923:        teximacro(p, ".El");
                    924: }
                    925:
                    926: static void
                    927: doignbracket(struct texi *p, enum texicmd cmd,
                    928:        const char *buf, size_t sz, size_t *pos)
                    929: {
                    930:        unsigned int     sv = p->flags;
                    931:
                    932:        p->flags |= TEXI_IGN;
                    933:        parsebracket(p, buf, sz, pos);
                    934:        p->flags = sv;
                    935: }
                    936:
                    937: static void
                    938: doignline(struct texi *p, enum texicmd cmd,
                    939:        const char *buf, size_t sz, size_t *pos)
                    940: {
                    941:
                    942:        advanceeoln(p, buf, sz, pos);
                    943:        if (*pos < sz)
                    944:                advance(p, buf, pos);
                    945: }
                    946:
                    947: static int
                    948: parsefile(struct texi *p, const char *fname)
                    949: {
                    950:        struct texifile  *f;
                    951:        int               fd;
                    952:        struct stat       st;
                    953:
                    954:        assert(p->filepos < 64);
                    955:        f = &p->files[p->filepos];
                    956:        memset(f, 0, sizeof(struct texifile));
                    957:
                    958:        f->name = fname;
                    959:        if (-1 == (fd = open(fname, O_RDONLY, 0))) {
                    960:                texifatal(p, fname);
                    961:        } else if (-1 == fstat(fd, &st)) {
                    962:                close(fd);
                    963:                texifatal(p, fname);
                    964:        }
                    965:
                    966:        f->mapsz = st.st_size;
                    967:        f->map = mmap(NULL, f->mapsz,
                    968:                PROT_READ, MAP_SHARED, fd, 0);
                    969:        close(fd);
                    970:
                    971:        if (MAP_FAILED == f->map) {
                    972:                texifatal(p, fname);
                    973:                return(0);
                    974:        }
                    975:
                    976:        p->filepos++;
                    977:        parseeof(p, f->map, f->mapsz);
                    978:        texifilepop(p);
                    979:        return(1);
                    980: }
                    981:
                    982: int
                    983: main(int argc, char *argv[])
                    984: {
                    985:        struct texi      texi;
                    986:        int              c, rc;
                    987:        const char      *progname;
                    988:
                    989:        progname = strrchr(argv[0], '/');
                    990:        if (progname == NULL)
                    991:                progname = argv[0];
                    992:        else
                    993:                ++progname;
                    994:
                    995:        while (-1 != (c = getopt(argc, argv, "")))
                    996:                switch (c) {
                    997:                default:
                    998:                        goto usage;
                    999:                }
                   1000:
                   1001:        argv += optind;
                   1002:        if (0 == (argc -= optind))
                   1003:                goto usage;
                   1004:
                   1005:        memset(&texi, 0, sizeof(struct texi));
                   1006:        texi.flags = TEXI_HEADER;
                   1007:        rc = parsefile(&texi, argv[0]);
                   1008:        return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
                   1009:
                   1010: usage:
                   1011:        fprintf(stderr, "usage: %s file\n", progname);
                   1012:        return(EXIT_FAILURE);
                   1013: }

CVSweb