[BACK]Return to main.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / texi2mdoc

Annotation of texi2mdoc/main.c, Revision 1.2

1.2     ! kristaps    1: /*     $Id: main.c,v 1.1.1.1 2015/02/16 22:24:43 kristaps Exp $ */
1.1       kristaps    2: /*
                      3:  * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/mman.h>
                     18: #include <sys/stat.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
1.2     ! kristaps   24: #include <libgen.h>
        !            25: #include <limits.h>
1.1       kristaps   26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30:
                     31: /*
                     32:  * This defines each one of the Texinfo commands that we understand.
                     33:  * Obviously this only refers to native commands; overriden names are a
                     34:  * different story.
                     35:  */
                     36: enum   texicmd {
1.2     ! kristaps   37:        TEXICMD_ACRONYM,
1.1       kristaps   38:        TEXICMD_A4PAPER,
                     39:        TEXICMD_ANCHOR,
1.2     ! kristaps   40:        TEXICMD_APPENDIX,
        !            41:        TEXICMD_APPENDIXSEC,
1.1       kristaps   42:        TEXICMD_AT,
                     43:        TEXICMD_BYE,
                     44:        TEXICMD_CHAPTER,
                     45:        TEXICMD_CINDEX,
                     46:        TEXICMD_CODE,
                     47:        TEXICMD_COMMAND,
                     48:        TEXICMD_COMMENT,
1.2     ! kristaps   49:        TEXICMD_COMMENT_LONG,
1.1       kristaps   50:        TEXICMD_CONTENTS,
                     51:        TEXICMD_COPYING,
                     52:        TEXICMD_COPYRIGHT,
                     53:        TEXICMD_DETAILMENU,
                     54:        TEXICMD_DIRCATEGORY,
                     55:        TEXICMD_DIRENTRY,
1.2     ! kristaps   56:        TEXICMD_DOTS,
1.1       kristaps   57:        TEXICMD_EMAIL,
                     58:        TEXICMD_EMPH,
                     59:        TEXICMD_END,
1.2     ! kristaps   60:        TEXICMD_ENUMERATE,
1.1       kristaps   61:        TEXICMD_EXAMPLE,
                     62:        TEXICMD_FILE,
1.2     ! kristaps   63:        TEXICMD_HEADING,
1.1       kristaps   64:        TEXICMD_I,
                     65:        TEXICMD_IFHTML,
                     66:        TEXICMD_IFNOTTEX,
                     67:        TEXICMD_IFTEX,
                     68:        TEXICMD_IMAGE,
1.2     ! kristaps   69:        TEXICMD_INCLUDE,
1.1       kristaps   70:        TEXICMD_ITEM,
                     71:        TEXICMD_ITEMIZE,
                     72:        TEXICMD_KBD,
                     73:        TEXICMD_LATEX,
                     74:        TEXICMD_MENU,
                     75:        TEXICMD_NODE,
                     76:        TEXICMD_QUOTATION,
                     77:        TEXICMD_PARINDENT,
1.2     ! kristaps   78:        TEXICMD_PRINTINDEX,
1.1       kristaps   79:        TEXICMD_REF,
                     80:        TEXICMD_SAMP,
                     81:        TEXICMD_SECTION,
                     82:        TEXICMD_SETCHAPNEWPAGE,
                     83:        TEXICMD_SETFILENAME,
                     84:        TEXICMD_SETTITLE,
                     85:        TEXICMD_SUBSECTION,
                     86:        TEXICMD_TABLE,
                     87:        TEXICMD_TEX,
                     88:        TEXICMD_TEXSYM,
                     89:        TEXICMD_TITLEFONT,
                     90:        TEXICMD_TITLEPAGE,
                     91:        TEXICMD_TOP,
                     92:        TEXICMD_UNNUMBERED,
1.2     ! kristaps   93:        TEXICMD_UNNUMBEREDSEC,
1.1       kristaps   94:        TEXICMD_URL,
                     95:        TEXICMD_VAR,
                     96:        TEXICMD__MAX
                     97: };
                     98:
                     99: /*
                    100:  * The file currently being parsed.
                    101:  * This keeps track of our location within that file.
                    102:  */
                    103: struct texifile {
                    104:        const char      *name; /* name of the file */
                    105:        size_t           line; /* current line (from zero) */
                    106:        size_t           col; /* current column in line (from zero) */
                    107:        char            *map; /* mmap'd file */
                    108:        size_t           mapsz; /* size of mmap */
                    109: };
                    110:
                    111: struct texi;
                    112:
1.2     ! kristaps  113: /*
        !           114:  * Callback for functions implementing texi commands.
        !           115:  */
1.1       kristaps  116: typedef        void (*texicmdfp)(struct texi *,
                    117:        enum texicmd, const char *, size_t, size_t *);
                    118:
                    119: /*
                    120:  * Describes Texinfo commands, whether native or overriden.
                    121:  */
                    122: struct texitok {
                    123:        texicmdfp        fp; /* callback (or NULL if none) */
                    124:        const char      *tok; /* name of the token */
                    125:        size_t           len; /* strlen(tok) */
                    126: };
                    127:
                    128: /*
                    129:  * The main parse structure.
                    130:  * This keeps any necessary information handy.
                    131:  */
                    132: struct texi {
                    133:        struct texifile  files[64];
                    134:        size_t           filepos;
                    135:        unsigned         flags;
                    136: #define        TEXI_IGN         0x01 /* don't print anything */
                    137: #define        TEXI_HEADER     (TEXI_IGN | 0x02) /* haven't seen @top yet */
                    138: #define        TEXI_LITERAL     0x04 /* output all whitespace */
                    139:        size_t           outcol; /* column of output */
                    140:        int              outmacro; /* whether output is in line macro */
                    141:        int              seenws; /* whitespace has been ignored */
1.2     ! kristaps  142:        char            *dir; /* texi directory */
1.1       kristaps  143: };
                    144:
1.2     ! kristaps  145: /* FIXME: don't use this crap. */
1.1       kristaps  146: #define        ismpunct(_x) \
                    147:        ('.' == (_x) || \
                    148:         ',' == (_x) || \
                    149:         ';' == (_x))
1.2     ! kristaps  150: #define        isws(_x) \
        !           151:        (' ' == (_x) || '\t' == (_x))
1.1       kristaps  152:
                    153: static void doarg1(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    154: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    155: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    156: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    157: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2     ! kristaps  158: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1       kristaps  159: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    160: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    161: static void doifnottex(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    162: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    163: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    164: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2     ! kristaps  165: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1       kristaps  166: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    167: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    168: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    169: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    170: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    171: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    172: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    173: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    174: static void dosh(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    175: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    176: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
                    177:
                    178: static const struct texitok texitoks[TEXICMD__MAX] = {
1.2     ! kristaps  179:        { doarg1, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.1       kristaps  180:        { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
                    181:        { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.2     ! kristaps  182:        { dosh, "appendix", 8 }, /* TEXICMD_APPENDIX */
        !           183:        { dosh, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
1.1       kristaps  184:        { dosymbol, "@", 1 }, /* TEXICMD_AT */
                    185:        { dobye, "bye", 3 }, /* TEXICMD_BYE */
                    186:        { dosh, "chapter", 7 }, /* TEXICMD_CHAPTER */
                    187:        { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
                    188:        { doliteral, "code", 4 }, /* TEXICMD_CODE */
                    189:        { docommand, "command", 7 }, /* TEXICMD_COMMAND */
                    190:        { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2     ! kristaps  191:        { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1       kristaps  192:        { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
                    193:        { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
                    194:        { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
                    195:        { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
                    196:        { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
                    197:        { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.2     ! kristaps  198:        { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.1       kristaps  199:        { doarg1, "email", 5 }, /* TEXICMD_EMAIL */
                    200:        { doemph, "emph", 4 }, /* TEXICMD_EMPH */
                    201:        { NULL, "end", 3 }, /* TEXICMD_END */
1.2     ! kristaps  202:        { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.1       kristaps  203:        { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
                    204:        { dofile, "file", 4 }, /* TEXICMD_FILE */
1.2     ! kristaps  205:        { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.1       kristaps  206:        { doitalic, "i", 1 }, /* TEXICMD_I */
                    207:        { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
                    208:        { doifnottex, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
                    209:        { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
                    210:        { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2     ! kristaps  211:        { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.1       kristaps  212:        { doitem, "item", 4 }, /* TEXICMD_ITEM */
                    213:        { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
                    214:        { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
                    215:        { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
                    216:        { doignblock, "menu", 4 }, /* TEXICMD_MENU */
                    217:        { doignline, "node", 4 }, /* TEXICMD_NODE */
                    218:        { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.2     ! kristaps  219:        { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.1       kristaps  220:        { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
                    221:        { dobracket, "ref", 3 }, /* TEXICMD_REF */
                    222:        { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
                    223:        { dosection, "section", 7 }, /* TEXICMD_SECTION */
                    224:        { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
                    225:        { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
                    226:        { doignline, "settitle", 8 }, /* TEXICMD_SETTITLE */
                    227:        { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
                    228:        { dotable, "table", 5 }, /* TEXICMD_TABLE */
                    229:        { doignblock, "tex", 3 }, /* TEXICMD_TEX */
                    230:        { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
                    231:        { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
                    232:        { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
                    233:        { dotop, "top", 3 }, /* TEXICMD_TOP */
                    234:        { dosh, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2     ! kristaps  235:        { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.1       kristaps  236:        { doarg1, "url", 3 }, /* TEXICMD_URL */
                    237:        { doliteral, "var", 3 }, /* TEXICMD_VAR */
                    238: };
                    239:
1.2     ! kristaps  240: /*
        !           241:  * Unmap the top-most file that we're using.
        !           242:  */
1.1       kristaps  243: static void
                    244: texifilepop(struct texi *p)
                    245: {
                    246:        struct texifile *f;
                    247:
                    248:        assert(p->filepos > 0);
                    249:        f = &p->files[--p->filepos];
                    250:        munmap(f->map, f->mapsz);
                    251: }
                    252:
1.2     ! kristaps  253: /*
        !           254:  * Unmap all files that we're currently using.
        !           255:  * The utility should exit(...) after this is called.
        !           256:  */
1.1       kristaps  257: static void
                    258: texiexit(struct texi *p)
                    259: {
                    260:
                    261:        while (p->filepos > 0)
                    262:                texifilepop(p);
1.2     ! kristaps  263:        free(p->dir);
1.1       kristaps  264: }
                    265:
1.2     ! kristaps  266: /*
        !           267:  * Fatal error: unmap all files and exit.
        !           268:  * The "errstring" is passed to perror(3).
        !           269:  */
1.1       kristaps  270: static void
1.2     ! kristaps  271: texiabort(struct texi *p, const char *errstring)
1.1       kristaps  272: {
                    273:
                    274:        perror(errstring);
                    275:        texiexit(p);
                    276:        exit(EXIT_FAILURE);
                    277: }
                    278:
                    279: /*
                    280:  * Print a generic warning message (to stderr) tied to our current
                    281:  * location in the parse sequence.
                    282:  */
                    283: static void
                    284: texiwarn(const struct texi *p, const char *fmt, ...)
                    285: {
                    286:        va_list  ap;
                    287:
1.2     ! kristaps  288:        fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1       kristaps  289:                p->files[p->filepos - 1].name,
                    290:                p->files[p->filepos - 1].line + 1,
                    291:                p->files[p->filepos - 1].col + 1);
                    292:        va_start(ap, fmt);
                    293:        vfprintf(stderr, fmt, ap);
                    294:        va_end(ap);
                    295:        fputc('\n', stderr);
                    296: }
                    297:
1.2     ! kristaps  298: static void
        !           299: texierr(struct texi *p, const char *fmt, ...)
        !           300: {
        !           301:        va_list  ap;
        !           302:
        !           303:        fprintf(stderr, "%s:%zu:%zu: error: ",
        !           304:                p->files[p->filepos - 1].name,
        !           305:                p->files[p->filepos - 1].line + 1,
        !           306:                p->files[p->filepos - 1].col + 1);
        !           307:        va_start(ap, fmt);
        !           308:        vfprintf(stderr, fmt, ap);
        !           309:        va_end(ap);
        !           310:        fputc('\n', stderr);
        !           311:        texiexit(p);
        !           312:        exit(EXIT_FAILURE);
        !           313: }
        !           314:
1.1       kristaps  315: /*
                    316:  * Put a single data character.
                    317:  * This MUST NOT be a mdoc(7) command: it should be free text that's
                    318:  * outputted to the screen.
                    319:  */
                    320: static void
                    321: texiputchar(struct texi *p, char c)
                    322: {
                    323:
                    324:        if (TEXI_IGN & p->flags)
                    325:                return;
                    326:
                    327:        putchar(c);
                    328:        if ('\n' == c) {
                    329:                p->outcol = 0;
                    330:                p->outmacro = 0;
                    331:                p->seenws = 0;
                    332:        } else
                    333:                p->outcol++;
                    334: }
                    335:
                    336: /*
                    337:  * Put multiple characters (see texiputchar()).
                    338:  */
                    339: static void
                    340: texiputchars(struct texi *p, const char *s)
                    341: {
                    342:
                    343:        while ('\0' != *s)
                    344:                texiputchar(p, *s++);
                    345: }
                    346:
                    347: /*
                    348:  * Put an mdoc(7) command without the trailing newline.
                    349:  * This should ONLY be used for mdoc(7) commands!
                    350:  */
                    351: static void
                    352: texifputs(struct texi *p, const char *s)
                    353: {
                    354:        int      rc;
                    355:
                    356:        if (TEXI_IGN & p->flags)
                    357:                return;
                    358:        if (p->outcol)
                    359:                texiputchar(p, '\n');
                    360:        if (EOF != (rc = fputs(s, stdout)))
                    361:                p->outcol += rc;
                    362: }
                    363:
                    364: /*
                    365:  * Put an mdoc(7) command with the trailing newline.
                    366:  * This should ONLY be used for mdoc(7) commands!
                    367:  */
                    368: static void
                    369: teximacro(struct texi *p, const char *s)
                    370: {
                    371:
                    372:        if (TEXI_IGN & p->flags)
                    373:                return;
                    374:        if (p->outcol)
                    375:                texiputchar(p, '\n');
                    376:        puts(s);
                    377:        p->outcol = 0;
                    378:        p->seenws = 0;
                    379: }
                    380:
                    381: /*
                    382:  * Advance by a single byte in the input stream.
                    383:  */
                    384: static void
                    385: advance(struct texi *p, const char *buf, size_t *pos)
                    386: {
                    387:
                    388:        if ('\n' == buf[*pos]) {
                    389:                p->files[p->filepos - 1].line++;
                    390:                p->files[p->filepos - 1].col = 0;
                    391:        } else
                    392:                p->files[p->filepos - 1].col++;
                    393:
                    394:        (*pos)++;
                    395: }
                    396:
                    397: /*
                    398:  * Advance to the next non-whitespace word in the input stream.
                    399:  * If we're in literal mode, then print all of the whitespace as we're
                    400:  * doing so.
                    401:  */
                    402: static size_t
                    403: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    404: {
                    405:
                    406:        if (TEXI_LITERAL & p->flags) {
                    407:                while (*pos < sz && isspace(buf[*pos])) {
                    408:                        texiputchar(p, buf[*pos]);
                    409:                        advance(p, buf, pos);
                    410:                }
                    411:                return(*pos);
                    412:        }
                    413:
                    414:        while (*pos < sz && isspace(buf[*pos])) {
                    415:                p->seenws = 1;
                    416:                /*
                    417:                 * If it looks like we've printed a double-line, then
                    418:                 * output a paragraph.
                    419:                 * FIXME: this is stupid.
                    420:                 */
                    421:                if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
                    422:                        teximacro(p, ".Pp");
                    423:                advance(p, buf, pos);
                    424:        }
                    425:        return(*pos);
                    426: }
                    427:
                    428: /*
                    429:  * Advance to the EOLN in the input stream.
                    430:  */
                    431: static size_t
                    432: advanceeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    433: {
                    434:
                    435:        while (*pos < sz && '\n' != buf[*pos])
                    436:                advance(p, buf, pos);
                    437:        return(*pos);
                    438: }
                    439:
                    440: /*
                    441:  * Advance to position "end", which is an absolute position in the
                    442:  * current buffer greater than or equal to the current position.
                    443:  */
                    444: static void
                    445: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
                    446: {
                    447:
                    448:        assert(*pos <= end);
                    449:        while (*pos < end)
                    450:                advance(p, buf, pos);
                    451: }
                    452:
                    453: /*
                    454:  * Output a free-form word in the input stream, progressing to the next
                    455:  * command or white-space.
                    456:  * This also will advance the input stream.
                    457:  */
                    458: static void
                    459: texiword(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    460: {
                    461:
                    462:        /*
                    463:         * XXX: if we're in literal mode, then we shouldn't do any
                    464:         * reflowing of text here.
                    465:         */
                    466:        if (p->outcol > 72 && ! (TEXI_LITERAL & p->flags))
                    467:                texiputchar(p, '\n');
                    468:
                    469:        if (p->seenws && p->outcol && ! (TEXI_LITERAL & p->flags))
                    470:                texiputchar(p, ' ');
                    471:
                    472:        p->seenws = 0;
                    473:
                    474:        while (*pos < sz && ! isspace(buf[*pos])) {
                    475:                switch (buf[*pos]) {
                    476:                case ('@'):
                    477:                case ('}'):
                    478:                case ('{'):
                    479:                        return;
                    480:                }
                    481:                if (*pos < sz - 1 &&
                    482:                         '`' == buf[*pos] &&
                    483:                         '`' == buf[*pos + 1]) {
                    484:                        texiputchars(p, "\\(lq");
                    485:                        advance(p, buf, pos);
                    486:                } else if (*pos < sz - 1 &&
                    487:                         '\'' == buf[*pos] &&
                    488:                         '\'' == buf[*pos + 1]) {
                    489:                        texiputchars(p, "\\(rq");
                    490:                        advance(p, buf, pos);
                    491:                } else
                    492:                        texiputchar(p, buf[*pos]);
                    493:                advance(p, buf, pos);
                    494:        }
                    495: }
                    496:
                    497: static enum texicmd
                    498: texicmd(struct texi *p, const char *buf,
                    499:        size_t pos, size_t sz, size_t *end)
                    500: {
                    501:        size_t   i, len;
                    502:
                    503:        assert('@' == buf[pos]);
                    504:        for (*end = ++pos; *end < sz && ! isspace(buf[*end]); (*end)++)
1.2     ! kristaps  505:                if ((*end > pos && '@' == buf[*end]) || '{' == buf[*end])
1.1       kristaps  506:                        break;
                    507:
                    508:        len = *end - pos;
                    509:        for (i = 0; i < TEXICMD__MAX; i++) {
                    510:                if (len != texitoks[i].len)
                    511:                        continue;
                    512:                if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
                    513:                        return(i);
                    514:        }
                    515:
                    516:        texiwarn(p, "bad command: %.*s", (int)len, &buf[pos]);
                    517:        return(TEXICMD__MAX);
                    518: }
                    519:
                    520: static void
                    521: parseeof(struct texi *p, const char *buf, size_t sz)
                    522: {
                    523:        size_t           pos = 0;
                    524:        enum texicmd     cmd;
                    525:        size_t           end;
                    526:
                    527:        while ((pos = advancenext(p, buf, sz, &pos)) < sz) {
                    528:                switch (buf[pos]) {
                    529:                case ('}'):
                    530:                        texiwarn(p, "unexpected \"}\"");
                    531:                        advance(p, buf, &pos);
                    532:                        continue;
                    533:                case ('{'):
                    534:                        texiwarn(p, "unexpected \"{\"");
                    535:                        advance(p, buf, &pos);
                    536:                        continue;
                    537:                case ('@'):
                    538:                        break;
                    539:                default:
                    540:                        texiword(p, buf, sz, &pos);
                    541:                        continue;
                    542:                }
                    543:
                    544:                cmd = texicmd(p, buf, pos, sz, &end);
                    545:                advanceto(p, buf, &pos, end);
                    546:                if (TEXICMD__MAX == cmd)
                    547:                        continue;
                    548:                if (NULL != texitoks[cmd].fp)
                    549:                        (*texitoks[cmd].fp)(p, cmd, buf, sz, &pos);
                    550:        }
                    551: }
                    552:
                    553: static void
                    554: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
                    555: {
                    556:        size_t           end;
                    557:        enum texicmd     cmd;
                    558:
                    559:        if (*pos == sz || '{' != buf[*pos])
                    560:                return;
                    561:        advance(p, buf, pos);
                    562:
                    563:        while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
                    564:                switch (buf[*pos]) {
                    565:                case ('}'):
                    566:                        advance(p, buf, pos);
                    567:                        return;
                    568:                case ('{'):
                    569:                        texiwarn(p, "unexpected \"{\"");
                    570:                        advance(p, buf, pos);
                    571:                        continue;
                    572:                case ('@'):
                    573:                        break;
                    574:                default:
                    575:                        texiword(p, buf, sz, pos);
                    576:                        continue;
                    577:                }
                    578:
                    579:                cmd = texicmd(p, buf, *pos, sz, &end);
                    580:                advanceto(p, buf, pos, end);
                    581:                if (TEXICMD__MAX == cmd)
                    582:                        continue;
                    583:                if (NULL != texitoks[cmd].fp)
                    584:                        (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
                    585:        }
                    586: }
                    587:
                    588: static void
                    589: parseto(struct texi *p, const char *buf,
                    590:        size_t sz, size_t *pos, const char *endtoken)
                    591: {
                    592:        size_t           end;
                    593:        enum texicmd     cmd;
                    594:        size_t           endtoksz;
                    595:
                    596:        endtoksz = strlen(endtoken);
                    597:        assert(endtoksz > 0);
                    598:
                    599:        while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
                    600:                switch (buf[*pos]) {
                    601:                case ('}'):
                    602:                        texiwarn(p, "unexpected \"}\"");
                    603:                        advance(p, buf, pos);
                    604:                        continue;
                    605:                case ('{'):
                    606:                        texiwarn(p, "unexpected \"{\"");
                    607:                        advance(p, buf, pos);
                    608:                        continue;
                    609:                case ('@'):
                    610:                        break;
                    611:                default:
                    612:                        texiword(p, buf, sz, pos);
                    613:                        continue;
                    614:                }
                    615:
                    616:                cmd = texicmd(p, buf, *pos, sz, &end);
                    617:                advanceto(p, buf, pos, end);
                    618:                if (TEXICMD_END == cmd) {
1.2     ! kristaps  619:                        while (*pos < sz && isws(buf[*pos]))
1.1       kristaps  620:                                advance(p, buf, pos);
                    621:                        /*
                    622:                         * FIXME: skip tabs and also check the full
                    623:                         * word, not just its initial substring!
                    624:                         */
                    625:                        if (sz - *pos >= endtoksz && 0 == strncmp
                    626:                                 (&buf[*pos], endtoken, endtoksz)) {
                    627:                                advanceeoln(p, buf, sz, pos);
                    628:                                break;
                    629:                        }
                    630:                        texiwarn(p, "unexpected \"end\"");
                    631:                        advanceeoln(p, buf, sz, pos);
                    632:                        continue;
                    633:                } else if (TEXICMD__MAX != cmd)
                    634:                        if (NULL != texitoks[cmd].fp)
                    635:                                (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
                    636:        }
                    637: }
                    638:
                    639: static void
1.2     ! kristaps  640: parsefile(struct texi *p, const char *fname)
        !           641: {
        !           642:        struct texifile  *f;
        !           643:        int               fd;
        !           644:        struct stat       st;
        !           645:
        !           646:        assert(p->filepos < 64);
        !           647:        f = &p->files[p->filepos];
        !           648:        memset(f, 0, sizeof(struct texifile));
        !           649:
        !           650:        f->name = fname;
        !           651:        if (-1 == (fd = open(fname, O_RDONLY, 0))) {
        !           652:                texiabort(p, fname);
        !           653:        } else if (-1 == fstat(fd, &st)) {
        !           654:                close(fd);
        !           655:                texiabort(p, fname);
        !           656:        }
        !           657:
        !           658:        f->mapsz = st.st_size;
        !           659:        f->map = mmap(NULL, f->mapsz,
        !           660:                PROT_READ, MAP_SHARED, fd, 0);
        !           661:        close(fd);
        !           662:
        !           663:        if (MAP_FAILED == f->map)
        !           664:                texiabort(p, fname);
        !           665:
        !           666:        p->filepos++;
        !           667:        parseeof(p, f->map, f->mapsz);
        !           668:        texifilepop(p);
        !           669: }
        !           670:
        !           671: static void
1.1       kristaps  672: doignblock(struct texi *p, enum texicmd cmd,
                    673:        const char *buf, size_t sz, size_t *pos)
                    674: {
                    675:        unsigned int     sv = p->flags;
                    676:        const char      *blockname;
                    677:
                    678:        p->flags |= TEXI_IGN;
                    679:        switch (cmd) {
                    680:        case (TEXICMD_COPYING):
                    681:                blockname = "copying";
                    682:                break;
                    683:        case (TEXICMD_DETAILMENU):
                    684:                blockname = "detailmenu";
                    685:                break;
                    686:        case (TEXICMD_DIRENTRY):
                    687:                blockname = "direntry";
                    688:                break;
                    689:        case (TEXICMD_IFHTML):
                    690:                blockname = "ifhtml";
                    691:                break;
                    692:        case (TEXICMD_IFTEX):
                    693:                blockname = "iftex";
                    694:                break;
                    695:        case (TEXICMD_MENU):
                    696:                blockname = "menu";
                    697:                break;
                    698:        case (TEXICMD_TEX):
                    699:                blockname = "tex";
                    700:                break;
                    701:        case (TEXICMD_TITLEPAGE):
                    702:                blockname = "titlepage";
                    703:                break;
                    704:        default:
                    705:                abort();
                    706:        }
                    707:        parseto(p, buf, sz, pos, blockname);
                    708:        p->flags = sv;
                    709: }
                    710:
                    711: static void
                    712: doifnottex(struct texi *p, enum texicmd cmd,
                    713:        const char *buf, size_t sz, size_t *pos)
                    714: {
                    715:
                    716:        parseto(p, buf, sz, pos, "ifnottex");
                    717: }
                    718:
                    719: static void
                    720: doinline(struct texi *p, const char *buf,
                    721:        size_t sz, size_t *pos, const char *macro)
                    722: {
                    723:
                    724:        if ( ! p->outmacro)
                    725:                texifputs(p, ".");
                    726:        texiputchars(p, macro);
                    727:        texiputchar(p, ' ');
                    728:        p->seenws = 0;
                    729:        p->outmacro++;
                    730:        parsebracket(p, buf, sz, pos);
                    731:        p->outmacro--;
                    732:        if (*pos < sz - 1 &&
                    733:                 ismpunct(buf[*pos]) &&
                    734:                 isspace(buf[*pos + 1])) {
                    735:                texiputchar(p, ' ');
                    736:                texiputchar(p, buf[*pos]);
                    737:                advance(p, buf, pos);
                    738:        }
                    739:        if ( ! p->outmacro)
                    740:                texiputchar(p, '\n');
                    741: }
                    742:
                    743: static void
1.2     ! kristaps  744: doinclude(struct texi *p, enum texicmd cmd,
        !           745:        const char *buf, size_t sz, size_t *pos)
        !           746: {
        !           747:        char     fname[PATH_MAX], path[PATH_MAX];
        !           748:        size_t   i;
        !           749:        int      rc;
        !           750:
        !           751:        while (*pos < sz && ' ' == buf[*pos])
        !           752:                advance(p, buf, pos);
        !           753:
        !           754:        /* Read in the filename. */
        !           755:        for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
        !           756:                if (i == sizeof(fname) - 1)
        !           757:                        break;
        !           758:                fname[i] = buf[*pos];
        !           759:                advance(p, buf, pos);
        !           760:        }
        !           761:
        !           762:        if (i == 0)
        !           763:                texierr(p, "path too short");
        !           764:        else if ('\n' != buf[*pos])
        !           765:                texierr(p, "path too long");
        !           766:        else if ('/' == fname[0])
        !           767:                texierr(p, "no absolute paths");
        !           768:        fname[i] = '\0';
        !           769:
        !           770:        if (strstr(fname, "../") || strstr(fname, "/.."))
        !           771:                texierr(p, "insecure path");
        !           772:
        !           773:        /* Append filename to original name's directory. */
        !           774:        rc = snprintf(path, sizeof(path), "%s/%s", p->dir, fname);
        !           775:        if (rc < 0)
        !           776:                texierr(p, "couldn't format filename");
        !           777:        else if ((size_t)rc >= sizeof(path))
        !           778:                texierr(p, "path too long");
        !           779:
        !           780:        /* Pump through to parser. */
        !           781:        parsefile(p, path);
        !           782: }
        !           783:
        !           784: static void
1.1       kristaps  785: doitalic(struct texi *p, enum texicmd cmd,
                    786:        const char *buf, size_t sz, size_t *pos)
                    787: {
                    788:
                    789:        texiputchars(p, "\\fI");
                    790:        parsebracket(p, buf, sz, pos);
                    791:        texiputchars(p, "\\fP");
                    792: }
                    793:
                    794: static void
                    795: doliteral(struct texi *p, enum texicmd cmd,
                    796:        const char *buf, size_t sz, size_t *pos)
                    797: {
                    798:
                    799:        if (TEXI_LITERAL & p->flags)
                    800:                parsebracket(p, buf, sz, pos);
                    801:        else
                    802:                doinline(p, buf, sz, pos, "Li");
                    803: }
                    804:
                    805: static void
                    806: doemph(struct texi *p, enum texicmd cmd,
                    807:        const char *buf, size_t sz, size_t *pos)
                    808: {
                    809:
                    810:        if (TEXI_LITERAL & p->flags)
                    811:                doitalic(p, cmd, buf, sz, pos);
                    812:        else
                    813:                doinline(p, buf, sz, pos, "Em");
                    814: }
                    815:
                    816: static void
                    817: docommand(struct texi *p, enum texicmd cmd,
                    818:        const char *buf, size_t sz, size_t *pos)
                    819: {
                    820:
                    821:        doinline(p, buf, sz, pos, "Xr");
                    822: }
                    823:
                    824: static void
                    825: dobracket(struct texi *p, enum texicmd cmd,
                    826:        const char *buf, size_t sz, size_t *pos)
                    827: {
                    828:
                    829:        parsebracket(p, buf, sz, pos);
                    830: }
                    831:
                    832: static void
                    833: dofile(struct texi *p, enum texicmd cmd,
                    834:        const char *buf, size_t sz, size_t *pos)
                    835: {
                    836:
                    837:        if (TEXI_LITERAL & p->flags)
                    838:                parsebracket(p, buf, sz, pos);
                    839:        else
                    840:                doinline(p, buf, sz, pos, "Pa");
                    841: }
                    842:
                    843: static void
                    844: doexample(struct texi *p, enum texicmd cmd,
                    845:        const char *buf, size_t sz, size_t *pos)
                    846: {
                    847:        unsigned int    sv;
                    848:
                    849:        teximacro(p, ".Bd -literal");
                    850:        advanceeoln(p, buf, sz, pos);
                    851:        if ('\n' == buf[*pos])
                    852:                advance(p, buf, pos);
                    853:        sv = p->flags;
                    854:        p->flags |= TEXI_LITERAL;
                    855:        parseto(p, buf, sz, pos, "example");
                    856:        p->flags = sv;
                    857:        teximacro(p, ".Ed");
                    858: }
                    859:
                    860: static void
                    861: dobye(struct texi *p, enum texicmd cmd,
                    862:        const char *buf, size_t sz, size_t *pos)
                    863: {
                    864:
                    865:        texiexit(p);
                    866:        exit(EXIT_SUCCESS);
                    867: }
                    868:
                    869: static void
                    870: dosymbol(struct texi *p, enum texicmd cmd,
                    871:        const char *buf, size_t sz, size_t *pos)
                    872: {
                    873:
                    874:        switch (cmd) {
                    875:        case (TEXICMD_AT):
                    876:                texiputchars(p, "@");
                    877:                break;
                    878:        case (TEXICMD_COPYRIGHT):
                    879:                texiputchars(p, "\\(co");
                    880:                break;
1.2     ! kristaps  881:        case (TEXICMD_DOTS):
        !           882:                texiputchars(p, "...");
        !           883:                break;
1.1       kristaps  884:        case (TEXICMD_LATEX):
                    885:                texiputchars(p, "LaTeX");
                    886:                break;
                    887:        case (TEXICMD_TEXSYM):
                    888:                texiputchars(p, "TeX");
                    889:                break;
                    890:        default:
                    891:                abort();
                    892:        }
                    893:
                    894:        doignbracket(p, cmd, buf, sz, pos);
                    895: }
                    896:
                    897: static void
                    898: doquotation(struct texi *p, enum texicmd cmd,
                    899:        const char *buf, size_t sz, size_t *pos)
                    900: {
                    901:
                    902:        teximacro(p, ".Qo");
                    903:        parseto(p, buf, sz, pos, "quotation");
                    904:        teximacro(p, ".Qc");
                    905: }
                    906:
                    907: static void
                    908: doarg1(struct texi *p, enum texicmd cmd,
                    909:        const char *buf, size_t sz, size_t *pos)
                    910: {
                    911:
                    912:        if (*pos == sz || '{' != buf[*pos])
                    913:                return;
                    914:        advance(p, buf, pos);
                    915:        if ( ! p->outmacro)
                    916:                texifputs(p, ".");
                    917:        switch (cmd) {
                    918:        case (TEXICMD_EMAIL):
                    919:                texiputchars(p, "Lk ");
                    920:                break;
                    921:        case (TEXICMD_URL):
                    922:                texiputchars(p, "Mt ");
                    923:                break;
                    924:        default:
                    925:                abort();
                    926:        }
                    927:        while (*pos < sz && '}' != buf[*pos] && ',' != buf[*pos]) {
                    928:                texiputchar(p, buf[*pos]);
                    929:                advance(p, buf, pos);
                    930:        }
                    931:        while (*pos < sz && '}' != buf[*pos])
                    932:                advance(p, buf, pos);
                    933:        if (*pos < sz)
                    934:                advance(p, buf, pos);
                    935:        if (*pos < sz - 1 &&
                    936:                 ismpunct(buf[*pos]) &&
                    937:                 isspace(buf[*pos + 1])) {
                    938:                texiputchar(p, ' ');
                    939:                texiputchar(p, buf[*pos]);
                    940:                advance(p, buf, pos);
                    941:        }
                    942:        if ( ! p->outmacro)
                    943:                texiputchar(p, '\n');
                    944: }
                    945:
                    946: static void
                    947: dosubsection(struct texi *p, enum texicmd cmd,
                    948:                const char *buf, size_t sz, size_t *pos)
                    949: {
                    950:
                    951:        if (TEXI_IGN & p->flags) {
                    952:                advanceeoln(p, buf, sz, pos);
                    953:                return;
                    954:        }
                    955:        while (*pos < sz && ' ' == buf[*pos])
                    956:                advance(p, buf, pos);
                    957:        texifputs(p, ".Pp");
                    958:        while (*pos < sz && '\n' != buf[*pos]) {
                    959:                texiputchar(p, buf[*pos]);
                    960:                advance(p, buf, pos);
                    961:        }
                    962:        texifputs(p, ".Pp");
                    963: }
                    964:
                    965: static void
                    966: dosection(struct texi *p, enum texicmd cmd,
                    967:                const char *buf, size_t sz, size_t *pos)
                    968: {
                    969:
                    970:        if (TEXI_IGN & p->flags) {
                    971:                advanceeoln(p, buf, sz, pos);
                    972:                return;
                    973:        }
1.2     ! kristaps  974:        while (*pos < sz && isws(buf[*pos]) )
1.1       kristaps  975:                advance(p, buf, pos);
                    976:        texifputs(p, ".Ss ");
                    977:        while (*pos < sz && '\n' != buf[*pos]) {
                    978:                texiputchar(p, buf[*pos]);
                    979:                advance(p, buf, pos);
                    980:        }
                    981:        texiputchar(p, '\n');
                    982: }
                    983:
                    984: static void
                    985: dosh(struct texi *p, enum texicmd cmd,
                    986:        const char *buf, size_t sz, size_t *pos)
                    987: {
                    988:
                    989:        if (TEXI_IGN & p->flags) {
                    990:                advanceeoln(p, buf, sz, pos);
                    991:                return;
                    992:        }
1.2     ! kristaps  993:        while (*pos < sz && isws(buf[*pos]))
1.1       kristaps  994:                advance(p, buf, pos);
                    995:        texifputs(p, ".Sh ");
                    996:        while (*pos < sz && '\n' != buf[*pos]) {
                    997:                texiputchar(p, toupper(buf[*pos]));
                    998:                advance(p, buf, pos);
                    999:        }
                   1000:        texiputchar(p, '\n');
                   1001: }
                   1002:
                   1003: static void
                   1004: dotop(struct texi *p, enum texicmd cmd,
                   1005:        const char *buf, size_t sz, size_t *pos)
                   1006: {
                   1007:
                   1008:        p->flags &= ~TEXI_HEADER;
                   1009:        advanceeoln(p, buf, sz, pos);
1.2     ! kristaps 1010:        teximacro(p, ".Dd $Mdocdate: February 16 2015 $");
1.1       kristaps 1011:        teximacro(p, ".Dt SOMETHING 7");
                   1012:        teximacro(p, ".Os");
                   1013:        teximacro(p, ".Sh NAME");
                   1014:        teximacro(p, ".Nm Something");
                   1015:        teximacro(p, ".Nd Something");
                   1016: }
                   1017:
                   1018: static void
                   1019: doitem(struct texi *p, enum texicmd cmd,
                   1020:        const char *buf, size_t sz, size_t *pos)
                   1021: {
                   1022:        size_t   end;
                   1023:
                   1024:        /* See if we have arguments... */
                   1025:        for (end = *pos; end < sz; end++)
1.2     ! kristaps 1026:                if ( ! isws(buf[end]))
1.1       kristaps 1027:                        break;
                   1028:
                   1029:        /* If we have arguments, print them too. */
                   1030:        if ('\n' != buf[end]) {
                   1031:                texifputs(p, ".It");
                   1032:                /* FIXME: process commands. */
                   1033:                while (*pos < sz && '\n' != buf[*pos]) {
                   1034:                        texiputchar(p, buf[*pos]);
                   1035:                        advance(p, buf, pos);
                   1036:                }
                   1037:                texiputchar(p, '\n');
                   1038:        } else
                   1039:                teximacro(p, ".It");
                   1040: }
                   1041:
                   1042: static void
                   1043: dotable(struct texi *p, enum texicmd cmd,
                   1044:        const char *buf, size_t sz, size_t *pos)
                   1045: {
                   1046:
                   1047:        teximacro(p, ".Bl -tag -width Ds");
                   1048:        parseto(p, buf, sz, pos, "table");
                   1049:        teximacro(p, ".El");
                   1050: }
                   1051:
                   1052: static void
1.2     ! kristaps 1053: doenumerate(struct texi *p, enum texicmd cmd,
        !          1054:        const char *buf, size_t sz, size_t *pos)
        !          1055: {
        !          1056:
        !          1057:        teximacro(p, ".Bl -enum");
        !          1058:        parseto(p, buf, sz, pos, "enumerate");
        !          1059:        teximacro(p, ".El");
        !          1060: }
        !          1061:
        !          1062: static void
1.1       kristaps 1063: doitemize(struct texi *p, enum texicmd cmd,
                   1064:        const char *buf, size_t sz, size_t *pos)
                   1065: {
                   1066:
                   1067:        teximacro(p, ".Bl -bullet");
                   1068:        parseto(p, buf, sz, pos, "itemize");
                   1069:        teximacro(p, ".El");
                   1070: }
                   1071:
                   1072: static void
                   1073: doignbracket(struct texi *p, enum texicmd cmd,
                   1074:        const char *buf, size_t sz, size_t *pos)
                   1075: {
                   1076:        unsigned int     sv = p->flags;
                   1077:
                   1078:        p->flags |= TEXI_IGN;
                   1079:        parsebracket(p, buf, sz, pos);
                   1080:        p->flags = sv;
                   1081: }
                   1082:
                   1083: static void
                   1084: doignline(struct texi *p, enum texicmd cmd,
                   1085:        const char *buf, size_t sz, size_t *pos)
                   1086: {
                   1087:
                   1088:        advanceeoln(p, buf, sz, pos);
                   1089:        if (*pos < sz)
                   1090:                advance(p, buf, pos);
                   1091: }
                   1092:
                   1093: int
                   1094: main(int argc, char *argv[])
                   1095: {
                   1096:        struct texi      texi;
1.2     ! kristaps 1097:        int              c;
        !          1098:        char            *path, *dir;
1.1       kristaps 1099:        const char      *progname;
                   1100:
                   1101:        progname = strrchr(argv[0], '/');
                   1102:        if (progname == NULL)
                   1103:                progname = argv[0];
                   1104:        else
                   1105:                ++progname;
                   1106:
                   1107:        while (-1 != (c = getopt(argc, argv, "")))
                   1108:                switch (c) {
                   1109:                default:
                   1110:                        goto usage;
                   1111:                }
                   1112:
                   1113:        argv += optind;
                   1114:        if (0 == (argc -= optind))
                   1115:                goto usage;
                   1116:
1.2     ! kristaps 1117:        if (NULL == (path = strdup(argv[0]))) {
        !          1118:                perror(NULL);
        !          1119:                exit(EXIT_FAILURE);
        !          1120:        } else if (NULL == (dir = dirname(path))) {
        !          1121:                perror(argv[0]);
        !          1122:                free(path);
        !          1123:                exit(EXIT_FAILURE);
        !          1124:        }
        !          1125:        free(path);
        !          1126:
1.1       kristaps 1127:        memset(&texi, 0, sizeof(struct texi));
                   1128:        texi.flags = TEXI_HEADER;
1.2     ! kristaps 1129:        texi.dir = strdup(dir);
        !          1130:        parsefile(&texi, argv[0]);
        !          1131:        texiexit(&texi);
        !          1132:        return(EXIT_FAILURE);
1.1       kristaps 1133: usage:
                   1134:        fprintf(stderr, "usage: %s file\n", progname);
                   1135:        return(EXIT_FAILURE);
                   1136: }

CVSweb