=================================================================== RCS file: /cvs/texi2mdoc/util.c,v retrieving revision 1.1 retrieving revision 1.8 diff -u -p -r1.1 -r1.8 --- texi2mdoc/util.c 2015/02/20 09:58:50 1.1 +++ texi2mdoc/util.c 2015/02/23 11:56:39 1.8 @@ -1,4 +1,4 @@ -/* $Id: util.c,v 1.1 2015/02/20 09:58:50 kristaps Exp $ */ +/* $Id: util.c,v 1.8 2015/02/23 11:56:39 kristaps Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * @@ -46,6 +46,27 @@ texifilepop(struct texi *p) munmap(f->map, f->mapsz); } +static void +teximacrofree(struct teximacro *p) +{ + size_t i; + + for (i = 0; i < p->argsz; i++) + free(p->args[i]); + + free(p->args); + free(p->key); + free(p->value); +} + +static void +texivaluefree(struct texivalue *p) +{ + + free(p->key); + free(p->value); +} + /* * Unmap all files that we're currently using and free all resources * that we've allocated during the parse. @@ -64,15 +85,18 @@ texiexit(struct texi *p) while (p->filepos > 0) texifilepop(p); + for (i = 0; i < p->macrosz; i++) + teximacrofree(&p->macros[i]); for (i = 0; i < p->dirsz; i++) free(p->dirs[i]); + for (i = 0; i < p->indexsz; i++) + free(p->indexs[i]); + for (i = 0; i < p->valsz; i++) + texivaluefree(&p->vals[i]); - for (i = 0; i < p->valsz; i++) { - free(p->vals[i].value); - free(p->vals[i].key); - } - + free(p->macros); free(p->vals); + free(p->indexs); free(p->dirs); free(p->subtitle); free(p->title); @@ -248,7 +272,7 @@ void texivspace(struct texi *p) { - if (p->seenvs) + if (p->seenvs || TEXILIST_TABLE == p->list) return; teximacro(p, "Pp"); p->seenvs = 1; @@ -376,6 +400,95 @@ advanceto(struct texi *p, const char *buf, size_t *pos advance(p, buf, pos); } +static void +texiexecmacro(struct texi *p, struct teximacro *m, + const char *buf, size_t sz, size_t *pos) +{ + size_t valsz, realsz, aasz, asz, + ssz, i, j, k, start, end; + char *val; + char **args; + + args = argparse(p, buf, sz, pos, &asz, m->argsz); + if (asz != m->argsz) + texiwarn(p, "invalid macro argument length"); + aasz = asz < m->argsz ? asz : m->argsz; + + if (0 == aasz) { + parsemembuf(p, m->value, strlen(m->value)); + return; + } + + valsz = realsz = strlen(m->value); + val = strdup(m->value); + + for (i = j = 0; i < realsz; i++) { + /* Parse blindly til the backslash delimiter. */ + if ('\\' != m->value[i]) { + val[j++] = m->value[i]; + val[j] = '\0'; + continue; + } else if (i == realsz - 1) + texierr(p, "trailing argument name delimiter"); + + /* Double-backslash is escaped. */ + if ('\\' == m->value[i + 1]) { + val[j++] = m->value[i++]; + val[j] = '\0'; + continue; + } + + assert('\\' == m->value[i] && i < realsz - 1); + + /* Parse to terminating delimiter. */ + /* FIXME: embedded, escaped delimiters? */ + for (start = end = i + 1; end < realsz; end++) + if ('\\' == m->value[end]) + break; + if (end == realsz) + texierr(p, "unterminated argument name"); + + for (k = 0; k < aasz; k++) { + if ((ssz = strlen(m->args[k])) != (end - start)) + continue; + if (strncmp(&m->value[start], m->args[k], ssz)) + continue; + break; + } + + /* + * Argument didn't exist in argument table. + * No need to reallocate here: we just copy the text + * directly from the macro value into the buffer. + */ + if (k == aasz) { + for ( ; i < end; i++) + val[j++] = m->value[i]; + assert('\\' == m->value[i]); + val[j++] = m->value[i]; + val[j] = '\0'; + continue; + } + + if (strlen(args[k]) > ssz) { + valsz += strlen(args[k]); + val = realloc(val, valsz + 1); + if (NULL == val) + texiabort(p, NULL); + } + + j = strlcat(val, args[k], valsz + 1); + i = end; + } + + parsemembuf(p, val, strlen(val)); + + for (i = 0; i < asz; i++) + free(args[i]); + free(args); + free(val); +} + /* * Output a free-form word in the input stream, progressing to the next * command or white-space. @@ -426,13 +539,16 @@ texiword(struct texi *p, const char *buf, * index after the command name. */ enum texicmd -texicmd(struct texi *p, const char *buf, - size_t pos, size_t sz, size_t *end) +texicmd(struct texi *p, const char *buf, size_t pos, + size_t sz, size_t *end, struct teximacro **macro) { - size_t i, len; + size_t i, len, toksz; assert('@' == buf[pos]); + if (NULL != macro) + *macro = NULL; + if ((*end = pos) == sz) return(TEXICMD__MAX); else if ((*end = ++pos) == sz) @@ -452,11 +568,13 @@ texicmd(struct texi *p, const char *buf, return(TEXICMD__MAX); } + /* Scan to the end of the possible command name. */ for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++) if ((*end > pos && ('@' == buf[*end] || '{' == buf[*end] || '}' == buf[*end]))) break; + /* Look for the command. */ len = *end - pos; for (i = 0; i < TEXICMD__MAX; i++) { if (len != texitoks[i].len) @@ -465,6 +583,27 @@ texicmd(struct texi *p, const char *buf, return(i); } + /* Look for it in our indices. */ + for (i = 0; i < p->indexsz; i++) { + toksz = strlen(p->indexs[i]); + if (len != 5 + toksz) + continue; + if (strncmp(&buf[pos], p->indexs[i], toksz)) + continue; + if (0 == strncmp(&buf[pos + toksz], "index", 5)) + return(TEXICMD_USER_INDEX); + } + + for (i = 0; i < p->macrosz; i++) { + if (len != strlen(p->macros[i].key)) + continue; + if (strncmp(&buf[pos], p->macros[i].key, len)) + continue; + if (NULL != macro) + *macro = &p->macros[i]; + return(TEXICMD__MAX); + } + texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]); return(TEXICMD__MAX); } @@ -481,8 +620,9 @@ int parsearg(struct texi *p, const char *buf, size_t sz, size_t *pos, size_t num) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && ismspace(buf[*pos])) advance(p, buf, pos); @@ -511,8 +651,10 @@ parsearg(struct texi *p, const char *buf, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -528,8 +670,9 @@ parsearg(struct texi *p, const char *buf, void parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && ismspace(buf[*pos])) advance(p, buf, pos); @@ -555,8 +698,10 @@ parsebracket(struct texi *p, const char *buf, size_t s continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -572,8 +717,9 @@ parsebracket(struct texi *p, const char *buf, size_t s void parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && '\n' != buf[*pos]) { while (*pos < sz && isws(buf[*pos])) { @@ -600,8 +746,10 @@ parseeoln(struct texi *p, const char *buf, size_t sz, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -616,8 +764,9 @@ parseeoln(struct texi *p, const char *buf, size_t sz, void parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; if ((*pos = advancenext(p, buf, sz, pos)) >= sz) return; @@ -640,8 +789,10 @@ parsesingle(struct texi *p, const char *buf, size_t sz return; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) return; if (NULL != texitoks[cmd].fp) @@ -667,7 +818,7 @@ parselinearg(struct texi *p, const char *buf, size_t s if (*pos < sz && '{' == buf[*pos]) parsebracket(p, buf, sz, pos); - else if ('\n' != buf[*pos]) + else if (*pos < sz && '\n' != buf[*pos]) parsesingle(p, buf, sz, pos); else return(0); @@ -688,6 +839,34 @@ parseeof(struct texi *p, const char *buf, size_t sz) } /* + * This is like parseeof() except that it's to be invoked on memory + * buffers while parsing a larger scope. + * This is useful for parsing macro sequences. + * The line, column, and name of the calling file context are saved, the + * column and line reset, then all of these restored after parse. + */ +void +parsemembuf(struct texi *p, const char *buf, size_t sz) +{ + size_t svln, svcol; + const char *svname; + + svln = p->files[p->filepos - 1].line; + svcol = p->files[p->filepos - 1].col; + svname = p->files[p->filepos - 1].name; + + p->files[p->filepos - 1].line = 0; + p->files[p->filepos - 1].col = 0; + p->files[p->filepos - 1].name = ""; + + parseeof(p, buf, sz); + + p->files[p->filepos - 1].line = svln; + p->files[p->filepos - 1].col = svcol; + p->files[p->filepos - 1].name = svname; +} + +/* * Parse a block sequence until we have the "@end endtoken" command * invocation. * This will return immediately at EOF. @@ -696,9 +875,10 @@ void parseto(struct texi *p, const char *buf, size_t sz, size_t *pos, const char *endtoken) { - size_t end; - enum texicmd cmd; - size_t endtoksz; + size_t end; + enum texicmd cmd; + size_t endtoksz; + struct teximacro *macro; endtoksz = strlen(endtoken); assert(endtoksz > 0); @@ -722,7 +902,7 @@ parseto(struct texi *p, const char *buf, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); if (TEXICMD_END == cmd) { while (*pos < sz && isws(buf[*pos])) @@ -740,9 +920,13 @@ parseto(struct texi *p, const char *buf, texiwarn(p, "unexpected \"end\""); advanceeoln(p, buf, sz, pos, 0); continue; - } else if (TEXICMD__MAX != cmd) - if (NULL != texitoks[cmd].fp) - (*texitoks[cmd].fp)(p, cmd, buf, sz, pos); + } + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); + if (TEXICMD__MAX == cmd) + continue; + if (NULL != texitoks[cmd].fp) + (*texitoks[cmd].fp)(p, cmd, buf, sz, pos); } } @@ -760,7 +944,8 @@ parsefile(struct texi *p, const char *fname, int parse struct stat st; size_t i; - assert(p->filepos < 64); + if (64 == p->filepos) + texierr(p, "too many open files"); f = &p->files[p->filepos]; memset(f, 0, sizeof(struct texifile)); @@ -801,3 +986,222 @@ parsefile(struct texi *p, const char *fname, int parse texifilepop(p); } +/* + * Look up the value to a stored pair's value starting in "buf" from + * start to end. + * Return the pointer to the value memory, which can be NULL if the + * pointer key does not exist. + * The pointer can point to NULL if the value has been unset. + */ +static char ** +valuequery(const struct texi *p, + const char *buf, size_t start, size_t end) +{ + size_t i, sz, len; + + assert(end >= start); + /* Ignore zero-length. */ + if (0 == (len = (end - start))) + return(NULL); + for (i = 0; i < p->valsz; i++) { + sz = strlen(p->vals[i].key); + if (sz != len) + continue; + if (0 == strncmp(p->vals[i].key, &buf[start], len)) + return(&p->vals[i].value); + } + return(NULL); +} + +/* + * Parse a key until the end of line, e.g., @clear foo\n, and return the + * pointer to its value via valuequery(). + */ +static char ** +valuelquery(struct texi *p, const char *buf, size_t sz, size_t *pos) +{ + size_t start, end; + char **ret; + + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + if (*pos == sz) + return(NULL); + for (start = end = *pos; end < sz; end++) + if ('\n' == buf[end]) + break; + advanceto(p, buf, pos, end); + if (*pos < sz) { + assert('\n' == buf[*pos]); + advance(p, buf, pos); + } + if (NULL == (ret = valuequery(p, buf, start, end))) + return(NULL); + return(ret); +} + +void +valuelclear(struct texi *p, const char *buf, size_t sz, size_t *pos) +{ + char **ret; + + if (NULL == (ret = valuelquery(p, buf, sz, pos))) + return; + free(*ret); + *ret = NULL; +} + +const char * +valuellookup(struct texi *p, const char *buf, size_t sz, size_t *pos) +{ + char **ret; + + if (NULL == (ret = valuelquery(p, buf, sz, pos))) + return(NULL); + return(*ret); +} + +/* + * Parse a key from a bracketed string, e.g., @value{foo}, and return + * the pointer to its value. + * If the returned pointer is NULL, either there was no string within + * the brackets (or no brackets), or the value was not found, or the + * value had previously been unset. + */ +const char * +valueblookup(struct texi *p, const char *buf, size_t sz, size_t *pos) +{ + size_t start, end; + char **ret; + + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + if (*pos == sz || '{' != buf[*pos]) + return(NULL); + advance(p, buf, pos); + for (start = end = *pos; end < sz; end++) + if ('}' == buf[end]) + break; + advanceto(p, buf, pos, end); + if (*pos < sz) { + assert('}' == buf[*pos]); + advance(p, buf, pos); + } + if (NULL == (ret = valuequery(p, buf, start, end))) + return(NULL); + return(*ret); +} + +void +valueadd(struct texi *p, char *key, char *val) +{ + size_t i; + + assert(NULL != key); + assert(NULL != val); + + for (i = 0; i < p->valsz; i++) + if (0 == strcmp(p->vals[i].key, key)) + break; + + if (i < p->valsz) { + free(key); + free(p->vals[i].value); + p->vals[i].value = val; + } else { + /* FIXME: reallocarray() */ + p->vals = realloc(p->vals, + (p->valsz + 1) * + sizeof(struct texivalue)); + if (NULL == p->vals) + texiabort(p, NULL); + p->vals[p->valsz].key = key; + p->vals[p->valsz].value = val; + p->valsz++; + } +} + +/* + * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the + * declaration form, @macro foo {arg1, ...}) and textually convert it to + * an array of arguments of size "argsz". + * These need to be freed individually and as a whole. + * NOTE: this will puke on @, or @} macros, which can trick it into + * stopping argument parsing earlier. + * Ergo, textual: this doesn't interpret the arguments in any way. + */ +char ** +argparse(struct texi *p, const char *buf, + size_t sz, size_t *pos, size_t *argsz, size_t hint) +{ + char **args; + size_t start, end, stack; + + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + + args = NULL; + *argsz = 0; + + /* Check for no arguments. */ + if ('{' != buf[*pos]) + return(args); + + /* Parse til the closing '}', putting into the array. */ + advance(p, buf, pos); + while (*pos < sz) { + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + start = *pos; + stack = 0; + while (*pos < sz) { + /* + * According to the manual, commas within + * embedded commands are escaped. + * We keep track of embedded-ness in the "stack" + * state anyway, so this is free. + */ + if (',' == buf[*pos] && 0 == stack && 1 != hint) + break; + else if (0 == stack && '}' == buf[*pos]) + break; + else if (0 != stack && '}' == buf[*pos]) + stack--; + else if ('{' == buf[*pos]) + stack++; + advance(p, buf, pos); + } + if (stack) + texiwarn(p, "unterminated macro " + "in macro arguments"); + if ((end = *pos) == sz) + break; + /* Test for zero-length '{ }'. */ + if (start == end && '}' == buf[*pos] && 0 == *argsz) + break; + if (start == end) + texierr(p, "zero-length argument"); + /* FIXME: use reallocarray. */ + args = realloc + (args, sizeof(char *) * + (*argsz + 1)); + if (NULL == args) + texiabort(p, NULL); + args[*argsz] = malloc(end - start + 1); + if (NULL == args[*argsz]) + texiabort(p, NULL); + memcpy(args[*argsz], + &buf[start], end - start); + args[*argsz][end - start] = '\0'; + (*argsz)++; + if ('}' == buf[*pos]) + break; + advance(p, buf, pos); + } + + if (*pos == sz) + texierr(p, "unterminated arguments"); + assert('}' == buf[*pos]); + advance(p, buf, pos); + return(args); +}