[BACK]Return to util.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / texi2mdoc

Annotation of texi2mdoc/util.c, Revision 1.14

1.14    ! kristaps    1: /*     $Id: util.c,v 1.13 2015/02/24 14:35:40 kristaps Exp $ */
1.1       kristaps    2: /*
                      3:  * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/mman.h>
                     18: #include <sys/stat.h>
                     19:
                     20: #include <assert.h>
                     21: #include <ctype.h>
                     22: #include <fcntl.h>
                     23: #include <getopt.h>
                     24: #include <libgen.h>
                     25: #include <limits.h>
                     26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30: #include <time.h>
                     31: #include <unistd.h>
                     32:
                     33: #include "extern.h"
                     34:
                     35: /*
                     36:  * Unmap the top-most file in the stack of files currently opened (that
                     37:  * is, nested calls to parsefile()).
                     38:  */
                     39: void
                     40: texifilepop(struct texi *p)
                     41: {
                     42:        struct texifile *f;
                     43:
                     44:        assert(p->filepos > 0);
                     45:        f = &p->files[--p->filepos];
1.14    ! kristaps   46:        free(f->map);
1.1       kristaps   47: }
                     48:
1.7       kristaps   49: static void
                     50: teximacrofree(struct teximacro *p)
                     51: {
                     52:        size_t   i;
                     53:
                     54:        for (i = 0; i < p->argsz; i++)
                     55:                free(p->args[i]);
                     56:
                     57:        free(p->args);
                     58:        free(p->key);
                     59:        free(p->value);
                     60: }
                     61:
                     62: static void
                     63: texivaluefree(struct texivalue *p)
                     64: {
                     65:
                     66:        free(p->key);
                     67:        free(p->value);
                     68: }
                     69:
1.1       kristaps   70: /*
                     71:  * Unmap all files that we're currently using and free all resources
                     72:  * that we've allocated during the parse.
                     73:  * The utility should exit(...) after this is called.
                     74:  */
                     75: void
                     76: texiexit(struct texi *p)
                     77: {
                     78:        size_t   i;
                     79:
                     80:        /* Make sure we're newline-terminated. */
                     81:        if (p->outcol)
                     82:                putchar('\n');
                     83:
                     84:        /* Unmap all files. */
                     85:        while (p->filepos > 0)
                     86:                texifilepop(p);
                     87:
1.7       kristaps   88:        for (i = 0; i < p->macrosz; i++)
                     89:                teximacrofree(&p->macros[i]);
1.1       kristaps   90:        for (i = 0; i < p->dirsz; i++)
                     91:                free(p->dirs[i]);
1.4       kristaps   92:        for (i = 0; i < p->indexsz; i++)
                     93:                free(p->indexs[i]);
1.7       kristaps   94:        for (i = 0; i < p->valsz; i++)
                     95:                texivaluefree(&p->vals[i]);
1.4       kristaps   96:
1.7       kristaps   97:        free(p->macros);
1.1       kristaps   98:        free(p->vals);
1.4       kristaps   99:        free(p->indexs);
1.1       kristaps  100:        free(p->dirs);
                    101:        free(p->subtitle);
                    102:        free(p->title);
                    103: }
                    104:
                    105: /*
                    106:  * Fatal error: unmap all files and exit.
                    107:  * The "errstring" is passed to perror(3).
                    108:  */
                    109: void
                    110: texiabort(struct texi *p, const char *errstring)
                    111: {
                    112:
                    113:        perror(errstring);
                    114:        texiexit(p);
                    115:        exit(EXIT_FAILURE);
                    116: }
                    117:
                    118: /*
                    119:  * Print a generic warning message (to stderr) tied to our current
                    120:  * location in the parse sequence.
                    121:  */
                    122: void
                    123: texiwarn(const struct texi *p, const char *fmt, ...)
                    124: {
                    125:        va_list  ap;
                    126:
                    127:        fprintf(stderr, "%s:%zu:%zu: warning: ",
                    128:                p->files[p->filepos - 1].name,
                    129:                p->files[p->filepos - 1].line + 1,
                    130:                p->files[p->filepos - 1].col + 1);
                    131:        va_start(ap, fmt);
                    132:        vfprintf(stderr, fmt, ap);
                    133:        va_end(ap);
                    134:        fputc('\n', stderr);
                    135: }
                    136:
                    137: /*
                    138:  * Print an error message (to stderr) tied to our current location in
                    139:  * the parse sequence, invoke texiexit(), then die.
                    140:  */
                    141: void
                    142: texierr(struct texi *p, const char *fmt, ...)
                    143: {
                    144:        va_list  ap;
                    145:
                    146:        fprintf(stderr, "%s:%zu:%zu: error: ",
                    147:                p->files[p->filepos - 1].name,
                    148:                p->files[p->filepos - 1].line + 1,
                    149:                p->files[p->filepos - 1].col + 1);
                    150:        va_start(ap, fmt);
                    151:        vfprintf(stderr, fmt, ap);
                    152:        va_end(ap);
                    153:        fputc('\n', stderr);
                    154:        texiexit(p);
                    155:        exit(EXIT_FAILURE);
                    156: }
                    157:
                    158: /*
                    159:  * Put a single data character to the output if we're not ignoring.
1.13      kristaps  160:  * Escape starting a line with a control character and slashes.
1.1       kristaps  161:  */
                    162: void
                    163: texiputchar(struct texi *p, char c)
                    164: {
                    165:
                    166:        if (p->ign)
                    167:                return;
                    168:        if ('.' == c && 0 == p->outcol)
                    169:                fputs("\\&", stdout);
1.10      kristaps  170:        if ('\'' == c && 0 == p->outcol)
                    171:                fputs("\\&", stdout);
1.1       kristaps  172:
                    173:        putchar(c);
1.13      kristaps  174:        if ('\\' == c)
                    175:                putchar('e');
1.1       kristaps  176:        p->seenvs = 0;
                    177:        if ('\n' == c) {
                    178:                p->outcol = 0;
                    179:                p->seenws = 0;
                    180:        } else
                    181:                p->outcol++;
                    182: }
                    183:
                    184: /*
1.13      kristaps  185:  * Put an opaque series of characters.
                    186:  * Characters starting a line with a control character are escaped, but
                    187:  * that's it, so don't use this for non-controlled sequences of text.
1.1       kristaps  188:  */
                    189: void
                    190: texiputchars(struct texi *p, const char *s)
                    191: {
                    192:
1.13      kristaps  193:        if (p->ign)
                    194:                return;
                    195:        if ('.' == *s && 0 == p->outcol)
                    196:                fputs("\\&", stdout);
                    197:        if ('\'' == *s && 0 == p->outcol)
                    198:                fputs("\\&", stdout);
                    199:        p->outcol += fputs(s, stdout);
                    200:        p->seenvs = 0;
1.9       kristaps  201: }
                    202:
                    203: /*
                    204:  * This puts all characters onto the output stream but makes sure to
                    205:  * escape mdoc(7) slashes.
1.14    ! kristaps  206:  * FIXME: useless.
1.9       kristaps  207:  */
                    208: void
1.14    ! kristaps  209: texiputbuf(struct texi *p, size_t start, size_t end)
1.9       kristaps  210: {
                    211:
1.14    ! kristaps  212:        for ( ; start < end; start++)
        !           213:                texiputchar(p, BUF(p)[start]);
1.1       kristaps  214: }
                    215:
                    216: /*
                    217:  * Close an mdoc(7) macro opened with teximacroopen().
                    218:  * If there are no more macros on the line, prints a newline.
                    219:  */
                    220: void
                    221: teximacroclose(struct texi *p)
                    222: {
                    223:
                    224:        if (p->ign)
                    225:                return;
                    226:
                    227:        if (0 == --p->outmacro) {
                    228:                putchar('\n');
                    229:                p->outcol = p->seenws = 0;
                    230:        }
                    231: }
                    232:
                    233: /*
                    234:  * Open a mdoc(7) macro.
                    235:  * This is used for line macros, e.g., Qq [foo bar baz].
                    236:  * It can be invoked for nested macros, e.g., Qq Li foo .
                    237:  * TODO: flush-right punctuation (e.g., parenthesis).
                    238:  */
                    239: void
                    240: teximacroopen(struct texi *p, const char *s)
                    241: {
                    242:        int      rc;
                    243:
                    244:        if (p->ign)
                    245:                return;
                    246:
                    247:        if (p->outcol && 0 == p->outmacro) {
                    248:                putchar('\n');
                    249:                p->outcol = 0;
                    250:        }
                    251:
                    252:        if (0 == p->outmacro)
                    253:                putchar('.');
                    254:        else
                    255:                putchar(' ');
                    256:
                    257:        if (EOF != (rc = fputs(s, stdout)))
                    258:                p->outcol += rc;
                    259:
                    260:        putchar(' ');
                    261:        p->outcol++;
                    262:        p->outmacro++;
                    263:        p->seenws = 0;
                    264: }
                    265:
                    266: /*
                    267:  * Put a stadnalone mdoc(7) command with the trailing newline.
                    268:  */
                    269: void
                    270: teximacro(struct texi *p, const char *s)
                    271: {
                    272:
                    273:        if (p->ign)
                    274:                return;
                    275:
                    276:        if (p->outmacro)
                    277:                texierr(p, "\"%s\" in open line scope!?", s);
                    278:        if (p->literal)
                    279:                texierr(p, "\"%s\" in a literal scope!?", s);
                    280:
                    281:        if (p->outcol)
                    282:                putchar('\n');
                    283:
                    284:        putchar('.');
                    285:        puts(s);
                    286:        p->outcol = p->seenws = 0;
                    287: }
                    288:
                    289: /*
                    290:  * Introduce vertical space during normal (non-macro) input.
                    291:  */
                    292: void
                    293: texivspace(struct texi *p)
                    294: {
                    295:
1.5       kristaps  296:        if (p->seenvs || TEXILIST_TABLE == p->list)
1.1       kristaps  297:                return;
                    298:        teximacro(p, "Pp");
                    299:        p->seenvs = 1;
                    300: }
                    301:
                    302: /*
                    303:  * Advance by a single byte in the input stream, adjusting our location
                    304:  * in the current input file.
                    305:  */
                    306: void
1.14    ! kristaps  307: advance(struct texi *p, size_t *pos)
1.1       kristaps  308: {
                    309:
1.14    ! kristaps  310:        if ('\n' == BUF(p)[*pos]) {
1.1       kristaps  311:                p->files[p->filepos - 1].line++;
                    312:                p->files[p->filepos - 1].col = 0;
                    313:        } else
                    314:                p->files[p->filepos - 1].col++;
                    315:
                    316:        (*pos)++;
                    317: }
                    318:
                    319: /*
                    320:  * It's common to wait punctuation to float on the right side of macro
                    321:  * lines in mdoc(7), e.g., ".Em hello ) ."
                    322:  * This function does so, and should be called before teximacroclose().
                    323:  * It will detect that it's the last in the nested macros and
                    324:  * appropriately flush-left punctuation alongside the macro.
                    325:  */
                    326: void
1.14    ! kristaps  327: texipunctuate(struct texi *p, size_t *pos)
1.1       kristaps  328: {
                    329:        size_t   start, end;
                    330:
                    331:        if (1 != p->outmacro)
                    332:                return;
                    333:
1.14    ! kristaps  334:        for (start = end = *pos; end < BUFSZ(p); end++) {
        !           335:                switch (BUF(p)[end]) {
1.1       kristaps  336:                case (','):
                    337:                case (')'):
                    338:                case ('.'):
                    339:                case ('"'):
                    340:                case (':'):
                    341:                case ('!'):
                    342:                case ('?'):
                    343:                        continue;
                    344:                default:
                    345:                        break;
                    346:                }
                    347:                break;
                    348:        }
                    349:        if (end == *pos)
                    350:                return;
1.14    ! kristaps  351:        if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
        !           352:                '\n' == BUF(p)[end]) {
1.1       kristaps  353:                for ( ; start < end; start++) {
                    354:                        texiputchar(p, ' ');
1.14    ! kristaps  355:                        texiputchar(p, BUF(p)[start]);
        !           356:                        advance(p, pos);
1.1       kristaps  357:                }
                    358:        }
                    359: }
                    360:
                    361: /*
                    362:  * Advance to the next non-whitespace word in the input stream.
                    363:  * If we're in literal mode, then print all of the whitespace as we're
                    364:  * doing so.
                    365:  */
                    366: static size_t
1.14    ! kristaps  367: advancenext(struct texi *p, size_t *pos)
1.1       kristaps  368: {
                    369:
                    370:        if (p->literal) {
1.14    ! kristaps  371:                while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
        !           372:                        texiputchar(p, BUF(p)[*pos]);
        !           373:                        advance(p, pos);
1.1       kristaps  374:                }
                    375:                return(*pos);
                    376:        }
                    377:
1.14    ! kristaps  378:        while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1       kristaps  379:                p->seenws = 1;
                    380:                /*
                    381:                 * If it looks like we've printed a double-line, then
                    382:                 * output a paragraph.
                    383:                 * FIXME: this is stupid.
                    384:                 */
1.14    ! kristaps  385:                if (*pos && '\n' == BUF(p)[*pos] && '\n' == BUF(p)[*pos - 1])
1.1       kristaps  386:                        texivspace(p);
1.14    ! kristaps  387:                advance(p, pos);
1.1       kristaps  388:        }
                    389:        return(*pos);
                    390: }
                    391:
                    392: /*
                    393:  * Advance to the EOLN in the input stream.
                    394:  * NOTE: THIS SHOULD NOT BE CALLED ON BLANK TEXT, as it will read up to
                    395:  * the @\n.
                    396:  */
                    397: size_t
1.14    ! kristaps  398: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1       kristaps  399: {
                    400:
1.14    ! kristaps  401:        while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
        !           402:                advance(p, pos);
        !           403:        if (*pos < BUFSZ(p) && consumenl)
        !           404:                advance(p, pos);
1.1       kristaps  405:        return(*pos);
                    406: }
                    407:
                    408: /*
                    409:  * Advance to position "end", which is an absolute position in the
                    410:  * current buffer greater than or equal to the current position.
                    411:  */
                    412: void
1.14    ! kristaps  413: advanceto(struct texi *p, size_t *pos, size_t end)
1.1       kristaps  414: {
                    415:
                    416:        assert(*pos <= end);
                    417:        while (*pos < end)
1.14    ! kristaps  418:                advance(p, pos);
1.1       kristaps  419: }
                    420:
1.7       kristaps  421: static void
1.14    ! kristaps  422: texiexecmacro(struct texi *p, struct teximacro *m, size_t *pos)
1.7       kristaps  423: {
1.11      kristaps  424:        size_t            valsz, realsz, aasz, asz,
                    425:                           ssz, i, j, k, start, end;
                    426:        char             *val;
                    427:        char            **args;
                    428:        const char       *cp;
1.7       kristaps  429:
1.14    ! kristaps  430:        args = argparse(p, pos, &asz, m->argsz);
1.7       kristaps  431:        if (asz != m->argsz)
                    432:                texiwarn(p, "invalid macro argument length");
                    433:        aasz = asz < m->argsz ? asz : m->argsz;
                    434:
                    435:        if (0 == aasz) {
1.14    ! kristaps  436:                texisplice(p, m->value, strlen(m->value), pos);
1.7       kristaps  437:                return;
                    438:        }
                    439:
                    440:        valsz = realsz = strlen(m->value);
                    441:        val = strdup(m->value);
                    442:
                    443:        for (i = j = 0; i < realsz; i++) {
                    444:                /* Parse blindly til the backslash delimiter. */
                    445:                if ('\\' != m->value[i]) {
                    446:                        val[j++] = m->value[i];
                    447:                        val[j] = '\0';
                    448:                        continue;
                    449:                } else if (i == realsz - 1)
                    450:                        texierr(p, "trailing argument name delimiter");
                    451:
                    452:                /* Double-backslash is escaped. */
                    453:                if ('\\' == m->value[i + 1]) {
                    454:                        val[j++] = m->value[i++];
                    455:                        val[j] = '\0';
                    456:                        continue;
                    457:                }
                    458:
                    459:                assert('\\' == m->value[i] && i < realsz - 1);
                    460:
                    461:                /* Parse to terminating delimiter. */
                    462:                /* FIXME: embedded, escaped delimiters? */
                    463:                for (start = end = i + 1; end < realsz; end++)
                    464:                        if ('\\' == m->value[end])
                    465:                                break;
                    466:                if (end == realsz)
                    467:                        texierr(p, "unterminated argument name");
                    468:
                    469:                for (k = 0; k < aasz; k++) {
                    470:                        if ((ssz = strlen(m->args[k])) != (end - start))
                    471:                                continue;
                    472:                        if (strncmp(&m->value[start], m->args[k], ssz))
                    473:                                continue;
                    474:                        break;
                    475:                }
                    476:
                    477:                /*
                    478:                 * Argument didn't exist in argument table.
1.14    ! kristaps  479:                 * Just ignore it.
1.7       kristaps  480:                 */
                    481:                if (k == aasz) {
1.14    ! kristaps  482:                        i = end;
1.7       kristaps  483:                        continue;
                    484:                }
                    485:
                    486:                if (strlen(args[k]) > ssz) {
                    487:                        valsz += strlen(args[k]);
                    488:                        val = realloc(val, valsz + 1);
                    489:                        if (NULL == val)
                    490:                                texiabort(p, NULL);
                    491:                }
                    492:
1.11      kristaps  493:                for (cp = args[k]; '\0' != *cp; cp++)
                    494:                        val[j++] = *cp;
                    495:
                    496:                val[j] = '\0';
1.7       kristaps  497:                i = end;
                    498:        }
                    499:
1.14    ! kristaps  500:        texisplice(p, val, strlen(val), pos);
1.7       kristaps  501:
                    502:        for (i = 0; i < asz; i++)
                    503:                free(args[i]);
                    504:        free(args);
                    505:        free(val);
                    506: }
                    507:
1.1       kristaps  508: /*
                    509:  * Output a free-form word in the input stream, progressing to the next
                    510:  * command or white-space.
                    511:  * This also will advance the input stream.
                    512:  */
                    513: static void
1.14    ! kristaps  514: parseword(struct texi *p, size_t *pos, char extra)
1.1       kristaps  515: {
                    516:
                    517:        if (p->seenws && 0 == p->outmacro &&
                    518:                 p->outcol > 72 && 0 == p->literal)
                    519:                texiputchar(p, '\n');
                    520:        /* FIXME: abstract this: we use it elsewhere. */
                    521:        if (p->seenws && p->outcol && 0 == p->literal)
                    522:                texiputchar(p, ' ');
                    523:
                    524:        p->seenws = 0;
                    525:
1.14    ! kristaps  526:        while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
        !           527:                switch (BUF(p)[*pos]) {
1.1       kristaps  528:                case ('@'):
                    529:                case ('}'):
                    530:                case ('{'):
                    531:                        return;
                    532:                }
1.14    ! kristaps  533:                if ('\0' != extra && BUF(p)[*pos] == extra)
1.1       kristaps  534:                        return;
1.14    ! kristaps  535:                if (*pos < BUFSZ(p) - 1 &&
        !           536:                         '`' == BUF(p)[*pos] &&
        !           537:                         '`' == BUF(p)[*pos + 1]) {
1.1       kristaps  538:                        texiputchars(p, "\\(lq");
1.14    ! kristaps  539:                        advance(p, pos);
        !           540:                } else if (*pos < BUFSZ(p) - 1 &&
        !           541:                         '\'' == BUF(p)[*pos] &&
        !           542:                         '\'' == BUF(p)[*pos + 1]) {
1.1       kristaps  543:                        texiputchars(p, "\\(rq");
1.14    ! kristaps  544:                        advance(p, pos);
1.1       kristaps  545:                } else
1.14    ! kristaps  546:                        texiputchar(p, BUF(p)[*pos]);
        !           547:                advance(p, pos);
1.1       kristaps  548:        }
                    549: }
                    550:
                    551: /*
                    552:  * Look up the command at position "pos" in the buffer, returning it (or
                    553:  * TEXICMD__MAX if none found) and setting "end" to be the absolute
                    554:  * index after the command name.
                    555:  */
                    556: enum texicmd
1.14    ! kristaps  557: texicmd(struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1       kristaps  558: {
1.4       kristaps  559:        size_t   i, len, toksz;
1.1       kristaps  560:
1.14    ! kristaps  561:        assert('@' == BUF(p)[pos]);
1.1       kristaps  562:
1.7       kristaps  563:        if (NULL != macro)
                    564:                *macro = NULL;
                    565:
1.14    ! kristaps  566:        if ((*end = pos) == BUFSZ(p))
1.1       kristaps  567:                return(TEXICMD__MAX);
1.14    ! kristaps  568:        else if ((*end = ++pos) == BUFSZ(p))
1.1       kristaps  569:                return(TEXICMD__MAX);
                    570:
                    571:        /* Alphabetic commands are special. */
1.14    ! kristaps  572:        if ( ! isalpha(BUF(p)[pos])) {
        !           573:                if ((*end = pos + 1) == BUFSZ(p))
1.1       kristaps  574:                        return(TEXICMD__MAX);
                    575:                for (i = 0; i < TEXICMD__MAX; i++) {
                    576:                        if (1 != texitoks[i].len)
                    577:                                continue;
1.14    ! kristaps  578:                        if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1       kristaps  579:                                return(i);
                    580:                }
1.14    ! kristaps  581:                texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1       kristaps  582:                return(TEXICMD__MAX);
                    583:        }
                    584:
1.4       kristaps  585:        /* Scan to the end of the possible command name. */
1.14    ! kristaps  586:        for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
        !           587:                if ((*end > pos && ('@' == BUF(p)[*end] ||
        !           588:                          '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1       kristaps  589:                        break;
                    590:
1.4       kristaps  591:        /* Look for the command. */
1.1       kristaps  592:        len = *end - pos;
                    593:        for (i = 0; i < TEXICMD__MAX; i++) {
                    594:                if (len != texitoks[i].len)
                    595:                        continue;
1.14    ! kristaps  596:                if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1       kristaps  597:                        return(i);
                    598:        }
                    599:
1.4       kristaps  600:        /* Look for it in our indices. */
                    601:        for (i = 0; i < p->indexsz; i++) {
                    602:                toksz = strlen(p->indexs[i]);
                    603:                if (len != 5 + toksz)
                    604:                        continue;
1.14    ! kristaps  605:                if (strncmp(&BUF(p)[pos], p->indexs[i], toksz))
1.4       kristaps  606:                        continue;
1.14    ! kristaps  607:                if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7       kristaps  608:                        return(TEXICMD_USER_INDEX);
                    609:        }
                    610:
                    611:        for (i = 0; i < p->macrosz; i++) {
                    612:                if (len != strlen(p->macros[i].key))
                    613:                        continue;
1.14    ! kristaps  614:                if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7       kristaps  615:                        continue;
                    616:                if (NULL != macro)
                    617:                        *macro = &p->macros[i];
                    618:                return(TEXICMD__MAX);
1.4       kristaps  619:        }
                    620:
1.14    ! kristaps  621:        texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1       kristaps  622:        return(TEXICMD__MAX);
                    623: }
                    624:
                    625: /*
                    626:  * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
                    627:  * Num should be set to the argument we're currently parsing, although
                    628:  * it suffixes for it to be zero or non-zero.
                    629:  * This will return 1 if there are more arguments, 0 otherwise.
                    630:  * This will stop (returning 0) in the event of EOF or if we're not at a
                    631:  * bracket for the zeroth parse.
                    632:  */
                    633: int
1.14    ! kristaps  634: parsearg(struct texi *p, size_t *pos, size_t num)
1.1       kristaps  635: {
1.7       kristaps  636:        size_t            end;
                    637:        enum texicmd      cmd;
                    638:        struct teximacro *macro;
1.1       kristaps  639:
1.14    ! kristaps  640:        while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
        !           641:                advance(p, pos);
        !           642:        if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1       kristaps  643:                return(0);
                    644:        if (0 == num)
1.14    ! kristaps  645:                advance(p, pos);
1.1       kristaps  646:
1.14    ! kristaps  647:        while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
        !           648:                switch (BUF(p)[*pos]) {
1.1       kristaps  649:                case (','):
1.14    ! kristaps  650:                        advance(p, pos);
1.1       kristaps  651:                        return(1);
                    652:                case ('}'):
1.14    ! kristaps  653:                        advance(p, pos);
1.1       kristaps  654:                        return(0);
                    655:                case ('{'):
                    656:                        if (0 == p->ign)
                    657:                                texiwarn(p, "unexpected \"{\"");
1.14    ! kristaps  658:                        advance(p, pos);
1.1       kristaps  659:                        continue;
                    660:                case ('@'):
                    661:                        break;
                    662:                default:
1.14    ! kristaps  663:                        parseword(p, pos, ',');
1.1       kristaps  664:                        continue;
                    665:                }
                    666:
1.14    ! kristaps  667:                cmd = texicmd(p, *pos, &end, &macro);
        !           668:                advanceto(p, pos, end);
1.7       kristaps  669:                if (NULL != macro)
1.14    ! kristaps  670:                        texiexecmacro(p, macro, pos);
1.1       kristaps  671:                if (TEXICMD__MAX == cmd)
                    672:                        continue;
                    673:                if (NULL != texitoks[cmd].fp)
1.14    ! kristaps  674:                        (*texitoks[cmd].fp)(p, cmd, pos);
1.1       kristaps  675:        }
                    676:        return(0);
                    677: }
                    678:
                    679: /*
                    680:  * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
                    681:  * This will stop in the event of EOF or if we're not at a bracket.
                    682:  */
                    683: void
1.14    ! kristaps  684: parsebracket(struct texi *p, size_t *pos)
1.1       kristaps  685: {
1.7       kristaps  686:        size_t            end;
                    687:        enum texicmd      cmd;
                    688:        struct teximacro *macro;
1.1       kristaps  689:
1.14    ! kristaps  690:        while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
        !           691:                advance(p, pos);
1.1       kristaps  692:
1.14    ! kristaps  693:        if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1       kristaps  694:                return;
1.14    ! kristaps  695:        advance(p, pos);
1.1       kristaps  696:
1.14    ! kristaps  697:        while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
        !           698:                switch (BUF(p)[*pos]) {
1.1       kristaps  699:                case ('}'):
1.14    ! kristaps  700:                        advance(p, pos);
1.1       kristaps  701:                        return;
                    702:                case ('{'):
                    703:                        if (0 == p->ign)
                    704:                                texiwarn(p, "unexpected \"{\"");
1.14    ! kristaps  705:                        advance(p, pos);
1.1       kristaps  706:                        continue;
                    707:                case ('@'):
                    708:                        break;
                    709:                default:
1.14    ! kristaps  710:                        parseword(p, pos, '\0');
1.1       kristaps  711:                        continue;
                    712:                }
                    713:
1.14    ! kristaps  714:                cmd = texicmd(p, *pos, &end, &macro);
        !           715:                advanceto(p, pos, end);
1.7       kristaps  716:                if (NULL != macro)
1.14    ! kristaps  717:                        texiexecmacro(p, macro, pos);
1.1       kristaps  718:                if (TEXICMD__MAX == cmd)
                    719:                        continue;
                    720:                if (NULL != texitoks[cmd].fp)
1.14    ! kristaps  721:                        (*texitoks[cmd].fp)(p, cmd, pos);
1.1       kristaps  722:        }
                    723: }
                    724:
                    725: /*
                    726:  * This should be invoked when we're on a macro line and want to process
                    727:  * to the end of the current input line, doing all of our macros along
                    728:  * the way.
                    729:  */
                    730: void
1.14    ! kristaps  731: parseeoln(struct texi *p, size_t *pos)
1.1       kristaps  732: {
1.7       kristaps  733:        size_t            end;
                    734:        enum texicmd      cmd;
                    735:        struct teximacro *macro;
1.1       kristaps  736:
1.14    ! kristaps  737:        while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
        !           738:                while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1       kristaps  739:                        p->seenws = 1;
                    740:                        if (p->literal)
1.14    ! kristaps  741:                                texiputchar(p, BUF(p)[*pos]);
        !           742:                        advance(p, pos);
1.1       kristaps  743:                }
1.14    ! kristaps  744:                switch (BUF(p)[*pos]) {
1.1       kristaps  745:                case ('}'):
                    746:                        if (0 == p->ign)
                    747:                                texiwarn(p, "unexpected \"}\"");
1.14    ! kristaps  748:                        advance(p, pos);
1.1       kristaps  749:                        continue;
                    750:                case ('{'):
                    751:                        if (0 == p->ign)
                    752:                                texiwarn(p, "unexpected \"{\"");
1.14    ! kristaps  753:                        advance(p, pos);
1.1       kristaps  754:                        continue;
                    755:                case ('@'):
                    756:                        break;
                    757:                default:
1.14    ! kristaps  758:                        parseword(p, pos, '\0');
1.1       kristaps  759:                        continue;
                    760:                }
                    761:
1.14    ! kristaps  762:                cmd = texicmd(p, *pos, &end, &macro);
        !           763:                advanceto(p, pos, end);
1.7       kristaps  764:                if (NULL != macro)
1.14    ! kristaps  765:                        texiexecmacro(p, macro, pos);
1.1       kristaps  766:                if (TEXICMD__MAX == cmd)
                    767:                        continue;
                    768:                if (NULL != texitoks[cmd].fp)
1.14    ! kristaps  769:                        (*texitoks[cmd].fp)(p, cmd, pos);
1.1       kristaps  770:        }
1.14    ! kristaps  771:
        !           772:        if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
        !           773:                advance(p, pos);
1.1       kristaps  774: }
                    775:
                    776: /*
                    777:  * Parse a single word or command.
                    778:  * This will return immediately at the EOF.
                    779:  */
1.14    ! kristaps  780: static void
        !           781: parsesingle(struct texi *p, size_t *pos)
1.1       kristaps  782: {
1.7       kristaps  783:        size_t            end;
                    784:        enum texicmd      cmd;
                    785:        struct teximacro *macro;
1.1       kristaps  786:
1.14    ! kristaps  787:        if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1       kristaps  788:                return;
                    789:
1.14    ! kristaps  790:        switch (BUF(p)[*pos]) {
1.1       kristaps  791:        case ('}'):
                    792:                if (0 == p->ign)
                    793:                        texiwarn(p, "unexpected \"}\"");
1.14    ! kristaps  794:                advance(p, pos);
1.1       kristaps  795:                return;
                    796:        case ('{'):
                    797:                if (0 == p->ign)
                    798:                        texiwarn(p, "unexpected \"{\"");
1.14    ! kristaps  799:                advance(p, pos);
1.1       kristaps  800:                return;
                    801:        case ('@'):
                    802:                break;
                    803:        default:
1.14    ! kristaps  804:                parseword(p, pos, '\0');
1.1       kristaps  805:                return;
                    806:        }
                    807:
1.14    ! kristaps  808:        cmd = texicmd(p, *pos, &end, &macro);
        !           809:        advanceto(p, pos, end);
1.7       kristaps  810:        if (NULL != macro)
1.14    ! kristaps  811:                texiexecmacro(p, macro, pos);
1.1       kristaps  812:        if (TEXICMD__MAX == cmd)
                    813:                return;
                    814:        if (NULL != texitoks[cmd].fp)
1.14    ! kristaps  815:                (*texitoks[cmd].fp)(p, cmd, pos);
1.1       kristaps  816: }
                    817:
                    818: /*
                    819:  * This is used in the @deffn type of command.
                    820:  * These have an arbitrary number of line arguments; however, these
                    821:  * arguments may or may not be surrounded by brackets.
                    822:  * In this function, we parse each one as either a bracketed or
                    823:  * non-bracketed argument, returning 0 when we've reached the end of
                    824:  * line or 1 otherwise.
                    825:  */
                    826: int
1.14    ! kristaps  827: parselinearg(struct texi *p, size_t *pos)
1.1       kristaps  828: {
                    829:
1.14    ! kristaps  830:        while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1       kristaps  831:                p->seenws = 1;
1.14    ! kristaps  832:                advance(p, pos);
1.1       kristaps  833:        }
                    834:
1.14    ! kristaps  835:        if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
        !           836:                parsebracket(p, pos);
        !           837:        else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
        !           838:                parsesingle(p, pos);
1.1       kristaps  839:        else
                    840:                return(0);
                    841:
                    842:        return(1);
                    843: }
                    844:
                    845: /*
                    846:  * Parse til the end of the buffer.
                    847:  */
1.14    ! kristaps  848: static void
        !           849: parseeof(struct texi *p)
1.1       kristaps  850: {
                    851:        size_t   pos;
                    852:
1.14    ! kristaps  853:        for (pos = 0; pos < BUFSZ(p); )
        !           854:                parsesingle(p, &pos);
1.1       kristaps  855: }
                    856:
1.8       kristaps  857: void
1.14    ! kristaps  858: texisplice(struct texi *p, const char *buf, size_t sz, size_t *pos)
1.8       kristaps  859: {
1.14    ! kristaps  860:        char            *cp;
        !           861:        struct texifile *f;
1.8       kristaps  862:
1.14    ! kristaps  863:        assert(p->filepos > 0);
        !           864:        f = &p->files[p->filepos - 1];
1.8       kristaps  865:
1.14    ! kristaps  866:        if (f->mapsz + sz > f->mapmaxsz) {
        !           867:                f->mapmaxsz = f->mapsz + sz + 1024;
        !           868:                cp = realloc(f->map, f->mapmaxsz);
        !           869:                if (NULL == cp)
        !           870:                        texiabort(p, NULL);
        !           871:                f->map = cp;
        !           872:        }
1.8       kristaps  873:
1.14    ! kristaps  874:        memmove(f->map + *pos + sz, f->map + *pos, f->mapsz - *pos);
        !           875:        memcpy(f->map + *pos, buf, sz);
        !           876:        f->mapsz += sz;
1.8       kristaps  877: }
                    878:
                    879: /*
1.1       kristaps  880:  * Parse a block sequence until we have the "@end endtoken" command
                    881:  * invocation.
                    882:  * This will return immediately at EOF.
                    883:  */
                    884: void
1.14    ! kristaps  885: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1       kristaps  886: {
1.7       kristaps  887:        size_t            end;
                    888:        enum texicmd      cmd;
                    889:        size_t            endtoksz;
                    890:        struct teximacro *macro;
1.1       kristaps  891:
                    892:        endtoksz = strlen(endtoken);
                    893:        assert(endtoksz > 0);
                    894:
1.14    ! kristaps  895:        while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
        !           896:                switch (BUF(p)[*pos]) {
1.1       kristaps  897:                case ('}'):
                    898:                        if (0 == p->ign)
                    899:                                texiwarn(p, "unexpected \"}\"");
1.14    ! kristaps  900:                        advance(p, pos);
1.1       kristaps  901:                        continue;
                    902:                case ('{'):
                    903:                        if (0 == p->ign)
                    904:                                texiwarn(p, "unexpected \"{\"");
1.14    ! kristaps  905:                        advance(p, pos);
1.1       kristaps  906:                        continue;
                    907:                case ('@'):
                    908:                        break;
                    909:                default:
1.14    ! kristaps  910:                        parseword(p, pos, '\0');
1.1       kristaps  911:                        continue;
                    912:                }
                    913:
1.14    ! kristaps  914:                cmd = texicmd(p, *pos, &end, &macro);
        !           915:                advanceto(p, pos, end);
1.1       kristaps  916:                if (TEXICMD_END == cmd) {
1.14    ! kristaps  917:                        while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
        !           918:                                advance(p, pos);
1.1       kristaps  919:                        /*
                    920:                         * FIXME: check the full word, not just its
                    921:                         * initial substring!
                    922:                         */
1.14    ! kristaps  923:                        if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
        !           924:                                 (&BUF(p)[*pos], endtoken, endtoksz)) {
        !           925:                                advanceeoln(p, pos, 0);
1.1       kristaps  926:                                break;
                    927:                        }
                    928:                        if (0 == p->ign)
                    929:                                texiwarn(p, "unexpected \"end\"");
1.14    ! kristaps  930:                        advanceeoln(p, pos, 0);
1.1       kristaps  931:                        continue;
1.7       kristaps  932:                }
                    933:                if (NULL != macro)
1.14    ! kristaps  934:                        texiexecmacro(p, macro, pos);
1.7       kristaps  935:                if (TEXICMD__MAX == cmd)
                    936:                        continue;
                    937:                if (NULL != texitoks[cmd].fp)
1.14    ! kristaps  938:                        (*texitoks[cmd].fp)(p, cmd, pos);
1.1       kristaps  939:        }
                    940: }
                    941:
                    942: /*
1.12      kristaps  943:  * Like parsefile() but used for reading from stdandard input.
                    944:  * This can only be called for the first file!
                    945:  */
                    946: void
                    947: parsestdin(struct texi *p)
                    948: {
                    949:        struct texifile *f;
                    950:        ssize_t          ssz;
                    951:
                    952:        assert(0 == p->filepos);
                    953:        f = &p->files[p->filepos];
                    954:        memset(f, 0, sizeof(struct texifile));
                    955:
                    956:        f->type = TEXISRC_STDIN;
                    957:        f->name = "<stdin>";
                    958:
1.14    ! kristaps  959:        for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
        !           960:                if (f->mapsz == f->mapmaxsz) {
        !           961:                        if (f->mapmaxsz == (1U << 31))
1.12      kristaps  962:                                texierr(p, "stdin buffer too long");
1.14    ! kristaps  963:                        f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
        !           964:                                2 * f->mapmaxsz : 65536;
        !           965:                        f->map = realloc(f->map, f->mapmaxsz);
1.12      kristaps  966:                        if (NULL == f->map)
                    967:                                texiabort(p, NULL);
                    968:                }
1.14    ! kristaps  969:                ssz = read(STDIN_FILENO, f->map +
        !           970:                        (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12      kristaps  971:                if (0 == ssz)
                    972:                        break;
                    973:                else if (-1 == ssz)
                    974:                        texiabort(p, NULL);
                    975:        }
                    976:
                    977:        p->filepos++;
1.14    ! kristaps  978:        parseeof(p);
1.12      kristaps  979:        texifilepop(p);
                    980: }
                    981:
                    982: /*
1.1       kristaps  983:  * Memory-map the file "fname" and begin parsing it unless "parse" is
                    984:  * zero, in which case we just dump the file to stdout (making sure it
                    985:  * doesn't trip up mdoc(7) along the way).
                    986:  * This can be called in a nested context.
                    987:  */
                    988: void
                    989: parsefile(struct texi *p, const char *fname, int parse)
                    990: {
                    991:        struct texifile *f;
                    992:        int              fd;
                    993:        struct stat      st;
                    994:        size_t           i;
1.14    ! kristaps  995:        char            *map;
1.1       kristaps  996:
1.5       kristaps  997:        if (64 == p->filepos)
1.6       kristaps  998:                texierr(p, "too many open files");
1.1       kristaps  999:        f = &p->files[p->filepos];
                   1000:        memset(f, 0, sizeof(struct texifile));
                   1001:
1.12      kristaps 1002:        f->type = TEXISRC_FILE;
1.1       kristaps 1003:        f->name = fname;
                   1004:        if (-1 == (fd = open(fname, O_RDONLY, 0))) {
                   1005:                texiabort(p, fname);
                   1006:        } else if (-1 == fstat(fd, &st)) {
                   1007:                close(fd);
                   1008:                texiabort(p, fname);
                   1009:        }
                   1010:
1.14    ! kristaps 1011:        f->mapsz = f->mapmaxsz = st.st_size;
        !          1012:        map = mmap(NULL, f->mapsz,
1.1       kristaps 1013:                PROT_READ, MAP_SHARED, fd, 0);
                   1014:        close(fd);
                   1015:
1.14    ! kristaps 1016:        if (MAP_FAILED == map)
1.1       kristaps 1017:                texiabort(p, fname);
                   1018:
                   1019:        if ( ! parse) {
1.13      kristaps 1020:                for (i = 0; i < f->mapsz; i++)
1.14    ! kristaps 1021:                        texiputchar(p, map[i]);
1.13      kristaps 1022:                if (p->outcol)
                   1023:                        texiputchar(p, '\n');
1.14    ! kristaps 1024:                munmap(map, f->mapsz);
        !          1025:                return;
        !          1026:        }
        !          1027:
        !          1028:        p->filepos++;
        !          1029:        f->map = malloc(f->mapsz);
        !          1030:        memcpy(f->map, map, f->mapsz);
        !          1031:        munmap(map, f->mapsz);
        !          1032:        parseeof(p);
1.1       kristaps 1033:        texifilepop(p);
                   1034: }
                   1035:
1.2       kristaps 1036: /*
                   1037:  * Look up the value to a stored pair's value starting in "buf" from
                   1038:  * start to end.
                   1039:  * Return the pointer to the value memory, which can be NULL if the
                   1040:  * pointer key does not exist.
                   1041:  * The pointer can point to NULL if the value has been unset.
                   1042:  */
                   1043: static char **
1.14    ! kristaps 1044: valuequery(const struct texi *p, size_t start, size_t end)
1.2       kristaps 1045: {
                   1046:        size_t   i, sz, len;
                   1047:
                   1048:        assert(end >= start);
                   1049:        /* Ignore zero-length. */
                   1050:        if (0 == (len = (end - start)))
                   1051:                return(NULL);
                   1052:        for (i = 0; i < p->valsz; i++) {
                   1053:                sz = strlen(p->vals[i].key);
                   1054:                if (sz != len)
                   1055:                        continue;
1.14    ! kristaps 1056:                if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2       kristaps 1057:                        return(&p->vals[i].value);
                   1058:        }
                   1059:        return(NULL);
                   1060: }
                   1061:
                   1062: /*
                   1063:  * Parse a key until the end of line, e.g., @clear foo\n, and return the
                   1064:  * pointer to its value via valuequery().
                   1065:  */
                   1066: static char **
1.14    ! kristaps 1067: valuelquery(struct texi *p, size_t *pos)
1.2       kristaps 1068: {
                   1069:        size_t    start, end;
                   1070:        char    **ret;
                   1071:
1.14    ! kristaps 1072:        while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
        !          1073:                advance(p, pos);
        !          1074:        if (*pos == BUFSZ(p))
1.2       kristaps 1075:                return(NULL);
1.14    ! kristaps 1076:        for (start = end = *pos; end < BUFSZ(p); end++)
        !          1077:                if ('\n' == BUF(p)[end])
1.2       kristaps 1078:                        break;
1.14    ! kristaps 1079:        advanceto(p, pos, end);
        !          1080:        if (*pos < BUFSZ(p)) {
        !          1081:                assert('\n' == BUF(p)[*pos]);
        !          1082:                advance(p, pos);
1.2       kristaps 1083:        }
1.14    ! kristaps 1084:        if (NULL == (ret = valuequery(p, start, end)))
1.2       kristaps 1085:                return(NULL);
                   1086:        return(ret);
                   1087: }
                   1088:
                   1089: void
1.14    ! kristaps 1090: valuelclear(struct texi *p, size_t *pos)
1.2       kristaps 1091: {
                   1092:        char    **ret;
                   1093:
1.14    ! kristaps 1094:        if (NULL == (ret = valuelquery(p, pos)))
1.2       kristaps 1095:                return;
                   1096:        free(*ret);
                   1097:        *ret = NULL;
                   1098: }
                   1099:
                   1100: const char *
1.14    ! kristaps 1101: valuellookup(struct texi *p, size_t *pos)
1.2       kristaps 1102: {
                   1103:        char    **ret;
                   1104:
1.14    ! kristaps 1105:        if (NULL == (ret = valuelquery(p, pos)))
1.2       kristaps 1106:                return(NULL);
                   1107:        return(*ret);
                   1108: }
                   1109:
                   1110: /*
                   1111:  * Parse a key from a bracketed string, e.g., @value{foo}, and return
                   1112:  * the pointer to its value.
                   1113:  * If the returned pointer is NULL, either there was no string within
                   1114:  * the brackets (or no brackets), or the value was not found, or the
                   1115:  * value had previously been unset.
                   1116:  */
                   1117: const char *
1.14    ! kristaps 1118: valueblookup(struct texi *p, size_t *pos)
1.2       kristaps 1119: {
                   1120:        size_t    start, end;
                   1121:        char    **ret;
                   1122:
1.14    ! kristaps 1123:        while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
        !          1124:                advance(p, pos);
        !          1125:        if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2       kristaps 1126:                return(NULL);
1.14    ! kristaps 1127:        advance(p, pos);
        !          1128:        for (start = end = *pos; end < BUFSZ(p); end++)
        !          1129:                if ('}' == BUF(p)[end])
1.2       kristaps 1130:                        break;
1.14    ! kristaps 1131:        advanceto(p, pos, end);
        !          1132:        if (*pos < BUFSZ(p)) {
        !          1133:                assert('}' == BUF(p)[*pos]);
        !          1134:                advance(p, pos);
1.2       kristaps 1135:        }
1.14    ! kristaps 1136:        if (NULL == (ret = valuequery(p, start, end)))
1.2       kristaps 1137:                return(NULL);
                   1138:        return(*ret);
                   1139: }
                   1140:
                   1141: void
                   1142: valueadd(struct texi *p, char *key, char *val)
                   1143: {
                   1144:        size_t   i;
                   1145:
                   1146:        assert(NULL != key);
                   1147:        assert(NULL != val);
                   1148:
                   1149:        for (i = 0; i < p->valsz; i++)
                   1150:                if (0 == strcmp(p->vals[i].key, key))
                   1151:                        break;
                   1152:
                   1153:        if (i < p->valsz) {
                   1154:                free(key);
                   1155:                free(p->vals[i].value);
                   1156:                p->vals[i].value = val;
                   1157:        } else {
1.4       kristaps 1158:                /* FIXME: reallocarray() */
1.2       kristaps 1159:                p->vals = realloc(p->vals,
                   1160:                        (p->valsz + 1) *
                   1161:                         sizeof(struct texivalue));
1.4       kristaps 1162:                if (NULL == p->vals)
                   1163:                        texiabort(p, NULL);
1.2       kristaps 1164:                p->vals[p->valsz].key = key;
                   1165:                p->vals[p->valsz].value = val;
                   1166:                p->valsz++;
                   1167:        }
1.7       kristaps 1168: }
                   1169:
                   1170: /*
                   1171:  * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
                   1172:  * declaration form, @macro foo {arg1, ...}) and textually convert it to
                   1173:  * an array of arguments of size "argsz".
                   1174:  * These need to be freed individually and as a whole.
                   1175:  * NOTE: this will puke on @, or @} macros, which can trick it into
                   1176:  * stopping argument parsing earlier.
                   1177:  * Ergo, textual: this doesn't interpret the arguments in any way.
                   1178:  */
                   1179: char **
1.14    ! kristaps 1180: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7       kristaps 1181: {
                   1182:        char    **args;
                   1183:        size_t    start, end, stack;
                   1184:
1.14    ! kristaps 1185:        while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
        !          1186:                advance(p, pos);
1.7       kristaps 1187:
                   1188:        args = NULL;
                   1189:        *argsz = 0;
                   1190:
1.14    ! kristaps 1191:        if ('{' != BUF(p)[*pos] && hint) {
1.10      kristaps 1192:                /*
                   1193:                 * Special case: if we encounter an unbracketed argument
                   1194:                 * and we're being invoked with non-zero arguments
                   1195:                 * (versus being set, i.e., hint>0), then parse until
                   1196:                 * the end of line.
                   1197:                 */
                   1198:                *argsz = 1;
                   1199:                args = calloc(1, sizeof(char *));
                   1200:                if (NULL == args)
                   1201:                        texiabort(p, NULL);
                   1202:                start = *pos;
1.14    ! kristaps 1203:                while (*pos < BUFSZ(p)) {
        !          1204:                        if ('\n' == BUF(p)[*pos])
1.10      kristaps 1205:                                break;
1.14    ! kristaps 1206:                        advance(p, pos);
1.10      kristaps 1207:                }
                   1208:                args[0] = malloc(*pos - start + 1);
1.14    ! kristaps 1209:                memcpy(args[0], &BUF(p)[start], *pos - start);
1.10      kristaps 1210:                args[0][*pos - start] = '\0';
1.14    ! kristaps 1211:                if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
        !          1212:                        advance(p, pos);
1.10      kristaps 1213:                return(args);
1.14    ! kristaps 1214:        } else if ('{' != BUF(p)[*pos])
1.7       kristaps 1215:                return(args);
                   1216:
                   1217:        /* Parse til the closing '}', putting into the array. */
1.14    ! kristaps 1218:        advance(p, pos);
        !          1219:        while (*pos < BUFSZ(p)) {
        !          1220:                while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
        !          1221:                        advance(p, pos);
1.7       kristaps 1222:                start = *pos;
                   1223:                stack = 0;
1.14    ! kristaps 1224:                while (*pos < BUFSZ(p)) {
1.7       kristaps 1225:                        /*
                   1226:                         * According to the manual, commas within
                   1227:                         * embedded commands are escaped.
                   1228:                         * We keep track of embedded-ness in the "stack"
                   1229:                         * state anyway, so this is free.
                   1230:                         */
1.14    ! kristaps 1231:                        if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7       kristaps 1232:                                break;
1.14    ! kristaps 1233:                        else if (0 == stack && '}' == BUF(p)[*pos])
1.7       kristaps 1234:                                break;
1.14    ! kristaps 1235:                        else if (0 != stack && '}' == BUF(p)[*pos])
1.7       kristaps 1236:                                stack--;
1.14    ! kristaps 1237:                        else if ('{' == BUF(p)[*pos])
1.7       kristaps 1238:                                stack++;
1.14    ! kristaps 1239:                        advance(p, pos);
1.7       kristaps 1240:                }
                   1241:                if (stack)
                   1242:                        texiwarn(p, "unterminated macro "
                   1243:                                "in macro arguments");
1.14    ! kristaps 1244:                if ((end = *pos) == BUFSZ(p))
1.7       kristaps 1245:                        break;
                   1246:                /* Test for zero-length '{  }'. */
1.14    ! kristaps 1247:                if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7       kristaps 1248:                        break;
                   1249:                /* FIXME: use reallocarray. */
                   1250:                args = realloc
                   1251:                        (args, sizeof(char *) *
                   1252:                         (*argsz + 1));
                   1253:                if (NULL == args)
                   1254:                        texiabort(p, NULL);
                   1255:                args[*argsz] = malloc(end - start + 1);
                   1256:                if (NULL == args[*argsz])
                   1257:                        texiabort(p, NULL);
                   1258:                memcpy(args[*argsz],
1.14    ! kristaps 1259:                        &BUF(p)[start], end - start);
1.7       kristaps 1260:                args[*argsz][end - start] = '\0';
                   1261:                (*argsz)++;
1.14    ! kristaps 1262:                if ('}' == BUF(p)[*pos])
1.7       kristaps 1263:                        break;
1.14    ! kristaps 1264:                advance(p, pos);
1.7       kristaps 1265:        }
                   1266:
1.14    ! kristaps 1267:        if (*pos == BUFSZ(p))
1.7       kristaps 1268:                texierr(p, "unterminated arguments");
1.14    ! kristaps 1269:        assert('}' == BUF(p)[*pos]);
        !          1270:        advance(p, pos);
1.7       kristaps 1271:        return(args);
1.2       kristaps 1272: }

CVSweb