=================================================================== RCS file: /cvs/texi2mdoc/util.c,v retrieving revision 1.7 retrieving revision 1.12 diff -u -p -r1.7 -r1.12 --- texi2mdoc/util.c 2015/02/23 11:44:30 1.7 +++ texi2mdoc/util.c 2015/02/23 22:50:11 1.12 @@ -1,4 +1,4 @@ -/* $Id: util.c,v 1.7 2015/02/23 11:44:30 kristaps Exp $ */ +/* $Id: util.c,v 1.12 2015/02/23 22:50:11 kristaps Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * @@ -43,7 +43,10 @@ texifilepop(struct texi *p) assert(p->filepos > 0); f = &p->files[--p->filepos]; - munmap(f->map, f->mapsz); + if (TEXISRC_FILE == f->type) + munmap(f->map, f->mapsz); + else + free(f->map); } static void @@ -170,6 +173,8 @@ texiputchar(struct texi *p, char c) if ('.' == c && 0 == p->outcol) fputs("\\&", stdout); + if ('\'' == c && 0 == p->outcol) + fputs("\\&", stdout); putchar(c); p->seenvs = 0; @@ -193,6 +198,21 @@ texiputchars(struct texi *p, const char *s) } /* + * This puts all characters onto the output stream but makes sure to + * escape mdoc(7) slashes. + */ +void +texiputbuf(struct texi *p, const char *buf, size_t start, size_t end) +{ + + for ( ; start < end; start++) { + texiputchar(p, buf[start]); + if ('\\' == buf[start]) + texiputchar(p, 'e'); + } +} + +/* * Close an mdoc(7) macro opened with teximacroopen(). * If there are no more macros on the line, prints a newline. */ @@ -404,18 +424,19 @@ static void texiexecmacro(struct texi *p, struct teximacro *m, const char *buf, size_t sz, size_t *pos) { - size_t valsz, realsz, aasz, asz, - ssz, i, j, k, start, end; - char *val; - char **args; + size_t valsz, realsz, aasz, asz, + ssz, i, j, k, start, end; + char *val; + char **args; + const char *cp; - args = argparse(p, buf, sz, pos, &asz); + args = argparse(p, buf, sz, pos, &asz, m->argsz); if (asz != m->argsz) texiwarn(p, "invalid macro argument length"); aasz = asz < m->argsz ? asz : m->argsz; if (0 == aasz) { - parseeof(p, m->value, strlen(m->value)); + parsemembuf(p, m->value, strlen(m->value)); return; } @@ -477,11 +498,14 @@ texiexecmacro(struct texi *p, struct teximacro *m, texiabort(p, NULL); } - j = strlcat(val, args[k], valsz + 1); + for (cp = args[k]; '\0' != *cp; cp++) + val[j++] = *cp; + + val[j] = '\0'; i = end; } - parseeof(p, val, strlen(val)); + parsemembuf(p, val, strlen(val)); for (i = 0; i < asz; i++) free(args[i]); @@ -527,6 +551,9 @@ texiword(struct texi *p, const char *buf, '\'' == buf[*pos + 1]) { texiputchars(p, "\\(rq"); advance(p, buf, pos); + } else if ('\\' == buf[*pos]) { + texiputchar(p, buf[*pos]); + texiputchar(p, 'e'); } else texiputchar(p, buf[*pos]); advance(p, buf, pos); @@ -839,6 +866,34 @@ parseeof(struct texi *p, const char *buf, size_t sz) } /* + * This is like parseeof() except that it's to be invoked on memory + * buffers while parsing a larger scope. + * This is useful for parsing macro sequences. + * The line, column, and name of the calling file context are saved, the + * column and line reset, then all of these restored after parse. + */ +void +parsemembuf(struct texi *p, const char *buf, size_t sz) +{ + size_t svln, svcol; + const char *svname; + + svln = p->files[p->filepos - 1].line; + svcol = p->files[p->filepos - 1].col; + svname = p->files[p->filepos - 1].name; + + p->files[p->filepos - 1].line = 0; + p->files[p->filepos - 1].col = 0; + p->files[p->filepos - 1].name = ""; + + parseeof(p, buf, sz); + + p->files[p->filepos - 1].line = svln; + p->files[p->filepos - 1].col = svcol; + p->files[p->filepos - 1].name = svname; +} + +/* * Parse a block sequence until we have the "@end endtoken" command * invocation. * This will return immediately at EOF. @@ -903,6 +958,47 @@ parseto(struct texi *p, const char *buf, } /* + * Like parsefile() but used for reading from stdandard input. + * This can only be called for the first file! + */ +void +parsestdin(struct texi *p) +{ + struct texifile *f; + size_t off; + ssize_t ssz; + + assert(0 == p->filepos); + f = &p->files[p->filepos]; + memset(f, 0, sizeof(struct texifile)); + + f->type = TEXISRC_STDIN; + f->name = ""; + + for (off = 0; ; off += (size_t)ssz) { + if (off == f->mapsz) { + if (f->mapsz == (1U << 31)) + texierr(p, "stdin buffer too long"); + f->mapsz = f->mapsz > 65536 / 2 ? + 2 * f->mapsz : 65536; + f->map = realloc(f->map, f->mapsz); + if (NULL == f->map) + texiabort(p, NULL); + } + ssz = read(STDIN_FILENO, + f->map + (int)off, f->mapsz - off); + if (0 == ssz) + break; + else if (-1 == ssz) + texiabort(p, NULL); + } + + p->filepos++; + parseeof(p, f->map, off); + texifilepop(p); +} + +/* * Memory-map the file "fname" and begin parsing it unless "parse" is * zero, in which case we just dump the file to stdout (making sure it * doesn't trip up mdoc(7) along the way). @@ -921,6 +1017,7 @@ parsefile(struct texi *p, const char *fname, int parse f = &p->files[p->filepos]; memset(f, 0, sizeof(struct texifile)); + f->type = TEXISRC_FILE; f->name = fname; if (-1 == (fd = open(fname, O_RDONLY, 0))) { texiabort(p, fname); @@ -1104,7 +1201,7 @@ valueadd(struct texi *p, char *key, char *val) */ char ** argparse(struct texi *p, const char *buf, - size_t sz, size_t *pos, size_t *argsz) + size_t sz, size_t *pos, size_t *argsz, size_t hint) { char **args; size_t start, end, stack; @@ -1115,9 +1212,31 @@ argparse(struct texi *p, const char *buf, args = NULL; *argsz = 0; - /* Check for no arguments. */ - if ('{' != buf[*pos]) + if ('{' != buf[*pos] && hint) { + /* + * Special case: if we encounter an unbracketed argument + * and we're being invoked with non-zero arguments + * (versus being set, i.e., hint>0), then parse until + * the end of line. + */ + *argsz = 1; + args = calloc(1, sizeof(char *)); + if (NULL == args) + texiabort(p, NULL); + start = *pos; + while (*pos < sz) { + if ('\n' == buf[*pos]) + break; + advance(p, buf, pos); + } + args[0] = malloc(*pos - start + 1); + memcpy(args[0], &buf[start], *pos - start); + args[0][*pos - start] = '\0'; + if (*pos < sz && '\n' == buf[*pos]) + advance(p, buf, pos); return(args); + } else if ('{' != buf[*pos]) + return(args); /* Parse til the closing '}', putting into the array. */ advance(p, buf, pos); @@ -1133,7 +1252,7 @@ argparse(struct texi *p, const char *buf, * We keep track of embedded-ness in the "stack" * state anyway, so this is free. */ - if (0 == stack && ',' == buf[*pos]) + if (',' == buf[*pos] && 0 == stack && 1 != hint) break; else if (0 == stack && '}' == buf[*pos]) break; @@ -1151,8 +1270,6 @@ argparse(struct texi *p, const char *buf, /* Test for zero-length '{ }'. */ if (start == end && '}' == buf[*pos] && 0 == *argsz) break; - if (start == end) - texierr(p, "zero-length argument"); /* FIXME: use reallocarray. */ args = realloc (args, sizeof(char *) *