=================================================================== RCS file: /cvs/texi2mdoc/util.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -p -r1.6 -r1.7 --- texi2mdoc/util.c 2015/02/21 22:01:32 1.6 +++ texi2mdoc/util.c 2015/02/23 11:44:30 1.7 @@ -1,4 +1,4 @@ -/* $Id: util.c,v 1.6 2015/02/21 22:01:32 kristaps Exp $ */ +/* $Id: util.c,v 1.7 2015/02/23 11:44:30 kristaps Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * @@ -46,6 +46,27 @@ texifilepop(struct texi *p) munmap(f->map, f->mapsz); } +static void +teximacrofree(struct teximacro *p) +{ + size_t i; + + for (i = 0; i < p->argsz; i++) + free(p->args[i]); + + free(p->args); + free(p->key); + free(p->value); +} + +static void +texivaluefree(struct texivalue *p) +{ + + free(p->key); + free(p->value); +} + /* * Unmap all files that we're currently using and free all resources * that we've allocated during the parse. @@ -64,17 +85,16 @@ texiexit(struct texi *p) while (p->filepos > 0) texifilepop(p); + for (i = 0; i < p->macrosz; i++) + teximacrofree(&p->macros[i]); for (i = 0; i < p->dirsz; i++) free(p->dirs[i]); - for (i = 0; i < p->indexsz; i++) free(p->indexs[i]); + for (i = 0; i < p->valsz; i++) + texivaluefree(&p->vals[i]); - for (i = 0; i < p->valsz; i++) { - free(p->vals[i].value); - free(p->vals[i].key); - } - + free(p->macros); free(p->vals); free(p->indexs); free(p->dirs); @@ -380,6 +400,95 @@ advanceto(struct texi *p, const char *buf, size_t *pos advance(p, buf, pos); } +static void +texiexecmacro(struct texi *p, struct teximacro *m, + const char *buf, size_t sz, size_t *pos) +{ + size_t valsz, realsz, aasz, asz, + ssz, i, j, k, start, end; + char *val; + char **args; + + args = argparse(p, buf, sz, pos, &asz); + if (asz != m->argsz) + texiwarn(p, "invalid macro argument length"); + aasz = asz < m->argsz ? asz : m->argsz; + + if (0 == aasz) { + parseeof(p, m->value, strlen(m->value)); + return; + } + + valsz = realsz = strlen(m->value); + val = strdup(m->value); + + for (i = j = 0; i < realsz; i++) { + /* Parse blindly til the backslash delimiter. */ + if ('\\' != m->value[i]) { + val[j++] = m->value[i]; + val[j] = '\0'; + continue; + } else if (i == realsz - 1) + texierr(p, "trailing argument name delimiter"); + + /* Double-backslash is escaped. */ + if ('\\' == m->value[i + 1]) { + val[j++] = m->value[i++]; + val[j] = '\0'; + continue; + } + + assert('\\' == m->value[i] && i < realsz - 1); + + /* Parse to terminating delimiter. */ + /* FIXME: embedded, escaped delimiters? */ + for (start = end = i + 1; end < realsz; end++) + if ('\\' == m->value[end]) + break; + if (end == realsz) + texierr(p, "unterminated argument name"); + + for (k = 0; k < aasz; k++) { + if ((ssz = strlen(m->args[k])) != (end - start)) + continue; + if (strncmp(&m->value[start], m->args[k], ssz)) + continue; + break; + } + + /* + * Argument didn't exist in argument table. + * No need to reallocate here: we just copy the text + * directly from the macro value into the buffer. + */ + if (k == aasz) { + for ( ; i < end; i++) + val[j++] = m->value[i]; + assert('\\' == m->value[i]); + val[j++] = m->value[i]; + val[j] = '\0'; + continue; + } + + if (strlen(args[k]) > ssz) { + valsz += strlen(args[k]); + val = realloc(val, valsz + 1); + if (NULL == val) + texiabort(p, NULL); + } + + j = strlcat(val, args[k], valsz + 1); + i = end; + } + + parseeof(p, val, strlen(val)); + + for (i = 0; i < asz; i++) + free(args[i]); + free(args); + free(val); +} + /* * Output a free-form word in the input stream, progressing to the next * command or white-space. @@ -430,13 +539,16 @@ texiword(struct texi *p, const char *buf, * index after the command name. */ enum texicmd -texicmd(struct texi *p, const char *buf, - size_t pos, size_t sz, size_t *end) +texicmd(struct texi *p, const char *buf, size_t pos, + size_t sz, size_t *end, struct teximacro **macro) { size_t i, len, toksz; assert('@' == buf[pos]); + if (NULL != macro) + *macro = NULL; + if ((*end = pos) == sz) return(TEXICMD__MAX); else if ((*end = ++pos) == sz) @@ -479,9 +591,19 @@ texicmd(struct texi *p, const char *buf, if (strncmp(&buf[pos], p->indexs[i], toksz)) continue; if (0 == strncmp(&buf[pos + toksz], "index", 5)) - return(TEXICMD_INDEX); + return(TEXICMD_USER_INDEX); } + for (i = 0; i < p->macrosz; i++) { + if (len != strlen(p->macros[i].key)) + continue; + if (strncmp(&buf[pos], p->macros[i].key, len)) + continue; + if (NULL != macro) + *macro = &p->macros[i]; + return(TEXICMD__MAX); + } + texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]); return(TEXICMD__MAX); } @@ -498,8 +620,9 @@ int parsearg(struct texi *p, const char *buf, size_t sz, size_t *pos, size_t num) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && ismspace(buf[*pos])) advance(p, buf, pos); @@ -528,8 +651,10 @@ parsearg(struct texi *p, const char *buf, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -545,8 +670,9 @@ parsearg(struct texi *p, const char *buf, void parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && ismspace(buf[*pos])) advance(p, buf, pos); @@ -572,8 +698,10 @@ parsebracket(struct texi *p, const char *buf, size_t s continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -589,8 +717,9 @@ parsebracket(struct texi *p, const char *buf, size_t s void parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && '\n' != buf[*pos]) { while (*pos < sz && isws(buf[*pos])) { @@ -617,8 +746,10 @@ parseeoln(struct texi *p, const char *buf, size_t sz, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -633,8 +764,9 @@ parseeoln(struct texi *p, const char *buf, size_t sz, void parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; if ((*pos = advancenext(p, buf, sz, pos)) >= sz) return; @@ -657,8 +789,10 @@ parsesingle(struct texi *p, const char *buf, size_t sz return; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) return; if (NULL != texitoks[cmd].fp) @@ -713,9 +847,10 @@ void parseto(struct texi *p, const char *buf, size_t sz, size_t *pos, const char *endtoken) { - size_t end; - enum texicmd cmd; - size_t endtoksz; + size_t end; + enum texicmd cmd; + size_t endtoksz; + struct teximacro *macro; endtoksz = strlen(endtoken); assert(endtoksz > 0); @@ -739,7 +874,7 @@ parseto(struct texi *p, const char *buf, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); if (TEXICMD_END == cmd) { while (*pos < sz && isws(buf[*pos])) @@ -757,9 +892,13 @@ parseto(struct texi *p, const char *buf, texiwarn(p, "unexpected \"end\""); advanceeoln(p, buf, sz, pos, 0); continue; - } else if (TEXICMD__MAX != cmd) - if (NULL != texitoks[cmd].fp) - (*texitoks[cmd].fp)(p, cmd, buf, sz, pos); + } + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); + if (TEXICMD__MAX == cmd) + continue; + if (NULL != texitoks[cmd].fp) + (*texitoks[cmd].fp)(p, cmd, buf, sz, pos); } } @@ -952,4 +1091,89 @@ valueadd(struct texi *p, char *key, char *val) p->vals[p->valsz].value = val; p->valsz++; } +} + +/* + * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the + * declaration form, @macro foo {arg1, ...}) and textually convert it to + * an array of arguments of size "argsz". + * These need to be freed individually and as a whole. + * NOTE: this will puke on @, or @} macros, which can trick it into + * stopping argument parsing earlier. + * Ergo, textual: this doesn't interpret the arguments in any way. + */ +char ** +argparse(struct texi *p, const char *buf, + size_t sz, size_t *pos, size_t *argsz) +{ + char **args; + size_t start, end, stack; + + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + + args = NULL; + *argsz = 0; + + /* Check for no arguments. */ + if ('{' != buf[*pos]) + return(args); + + /* Parse til the closing '}', putting into the array. */ + advance(p, buf, pos); + while (*pos < sz) { + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + start = *pos; + stack = 0; + while (*pos < sz) { + /* + * According to the manual, commas within + * embedded commands are escaped. + * We keep track of embedded-ness in the "stack" + * state anyway, so this is free. + */ + if (0 == stack && ',' == buf[*pos]) + break; + else if (0 == stack && '}' == buf[*pos]) + break; + else if (0 != stack && '}' == buf[*pos]) + stack--; + else if ('{' == buf[*pos]) + stack++; + advance(p, buf, pos); + } + if (stack) + texiwarn(p, "unterminated macro " + "in macro arguments"); + if ((end = *pos) == sz) + break; + /* Test for zero-length '{ }'. */ + if (start == end && '}' == buf[*pos] && 0 == *argsz) + break; + if (start == end) + texierr(p, "zero-length argument"); + /* FIXME: use reallocarray. */ + args = realloc + (args, sizeof(char *) * + (*argsz + 1)); + if (NULL == args) + texiabort(p, NULL); + args[*argsz] = malloc(end - start + 1); + if (NULL == args[*argsz]) + texiabort(p, NULL); + memcpy(args[*argsz], + &buf[start], end - start); + args[*argsz][end - start] = '\0'; + (*argsz)++; + if ('}' == buf[*pos]) + break; + advance(p, buf, pos); + } + + if (*pos == sz) + texierr(p, "unterminated arguments"); + assert('}' == buf[*pos]); + advance(p, buf, pos); + return(args); }