===================================================================
RCS file: /cvs/texi2mdoc/util.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -p -r1.6 -r1.7
--- texi2mdoc/util.c	2015/02/21 22:01:32	1.6
+++ texi2mdoc/util.c	2015/02/23 11:44:30	1.7
@@ -1,4 +1,4 @@
-/*	$Id: util.c,v 1.6 2015/02/21 22:01:32 kristaps Exp $ */
+/*	$Id: util.c,v 1.7 2015/02/23 11:44:30 kristaps Exp $ */
 /*
  * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -46,6 +46,27 @@ texifilepop(struct texi *p)
 	munmap(f->map, f->mapsz);
 }
 
+static void
+teximacrofree(struct teximacro *p)
+{
+	size_t	 i;
+
+	for (i = 0; i < p->argsz; i++)
+		free(p->args[i]);
+
+	free(p->args);
+	free(p->key);
+	free(p->value);
+}
+
+static void
+texivaluefree(struct texivalue *p)
+{
+
+	free(p->key);
+	free(p->value);
+}
+
 /*
  * Unmap all files that we're currently using and free all resources
  * that we've allocated during the parse.
@@ -64,17 +85,16 @@ texiexit(struct texi *p)
 	while (p->filepos > 0)
 		texifilepop(p);
 
+	for (i = 0; i < p->macrosz; i++)
+		teximacrofree(&p->macros[i]);
 	for (i = 0; i < p->dirsz; i++)
 		free(p->dirs[i]);
-
 	for (i = 0; i < p->indexsz; i++)
 		free(p->indexs[i]);
+	for (i = 0; i < p->valsz; i++) 
+		texivaluefree(&p->vals[i]);
 
-	for (i = 0; i < p->valsz; i++) {
-		free(p->vals[i].value);
-		free(p->vals[i].key);
-	}
-
+	free(p->macros);
 	free(p->vals);
 	free(p->indexs);
 	free(p->dirs);
@@ -380,6 +400,95 @@ advanceto(struct texi *p, const char *buf, size_t *pos
 		advance(p, buf, pos);
 }
 
+static void
+texiexecmacro(struct texi *p, struct teximacro *m,
+	const char *buf, size_t sz, size_t *pos)
+{
+	size_t	  valsz, realsz, aasz, asz, 
+		  ssz, i, j, k, start, end;
+	char	 *val;
+	char	**args;
+
+	args = argparse(p, buf, sz, pos, &asz);
+	if (asz != m->argsz)
+		texiwarn(p, "invalid macro argument length");
+	aasz = asz < m->argsz ? asz : m->argsz;
+
+	if (0 == aasz) {
+		parseeof(p, m->value, strlen(m->value));
+		return;
+	}
+
+	valsz = realsz = strlen(m->value);
+	val = strdup(m->value);
+
+	for (i = j = 0; i < realsz; i++) {
+		/* Parse blindly til the backslash delimiter. */
+		if ('\\' != m->value[i]) {
+			val[j++] = m->value[i];
+			val[j] = '\0';
+			continue;
+		} else if (i == realsz - 1)
+			texierr(p, "trailing argument name delimiter");
+
+		/* Double-backslash is escaped. */
+		if ('\\' == m->value[i + 1]) {
+			val[j++] = m->value[i++];
+			val[j] = '\0';
+			continue;
+		}
+
+		assert('\\' == m->value[i] && i < realsz - 1);
+
+		/* Parse to terminating delimiter. */
+		/* FIXME: embedded, escaped delimiters? */
+		for (start = end = i + 1; end < realsz; end++) 
+			if ('\\' == m->value[end])
+				break;
+		if (end == realsz)
+			texierr(p, "unterminated argument name");
+
+		for (k = 0; k < aasz; k++) {
+			if ((ssz = strlen(m->args[k])) != (end - start))
+				continue;
+			if (strncmp(&m->value[start], m->args[k], ssz))
+				continue;
+			break;
+		}
+
+		/* 
+		 * Argument didn't exist in argument table. 
+		 * No need to reallocate here: we just copy the text
+		 * directly from the macro value into the buffer.
+		 */
+		if (k == aasz) {
+			for ( ; i < end; i++)
+				val[j++] = m->value[i];
+			assert('\\' == m->value[i]);
+			val[j++] = m->value[i];
+			val[j] = '\0';
+			continue;
+		}
+
+		if (strlen(args[k]) > ssz) {
+			valsz += strlen(args[k]);
+			val = realloc(val, valsz + 1);
+			if (NULL == val)
+				texiabort(p, NULL);
+		}
+
+		j = strlcat(val, args[k], valsz + 1);
+		i = end;
+	}
+
+	parseeof(p, val, strlen(val));
+
+	for (i = 0; i < asz; i++)
+		free(args[i]);
+	free(args);
+	free(val);
+} 
+
 /*
  * Output a free-form word in the input stream, progressing to the next
  * command or white-space.
@@ -430,13 +539,16 @@ texiword(struct texi *p, const char *buf, 
  * index after the command name.
  */
 enum texicmd
-texicmd(struct texi *p, const char *buf, 
-	size_t pos, size_t sz, size_t *end)
+texicmd(struct texi *p, const char *buf, size_t pos, 
+	size_t sz, size_t *end, struct teximacro **macro)
 {
 	size_t	 i, len, toksz;
 
 	assert('@' == buf[pos]);
 
+	if (NULL != macro)
+		*macro = NULL;
+
 	if ((*end = pos) == sz)
 		return(TEXICMD__MAX);
 	else if ((*end = ++pos) == sz)
@@ -479,9 +591,19 @@ texicmd(struct texi *p, const char *buf, 
 		if (strncmp(&buf[pos], p->indexs[i], toksz))
 			continue;
 		if (0 == strncmp(&buf[pos + toksz], "index", 5))
-			return(TEXICMD_INDEX);
+			return(TEXICMD_USER_INDEX);
 	}
 
+	for (i = 0; i < p->macrosz; i++) {
+		if (len != strlen(p->macros[i].key))
+			continue;
+		if (strncmp(&buf[pos], p->macros[i].key, len))
+			continue;
+		if (NULL != macro)
+			*macro = &p->macros[i];
+		return(TEXICMD__MAX);
+	}
+
 	texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
 	return(TEXICMD__MAX);
 }
@@ -498,8 +620,9 @@ int
 parsearg(struct texi *p, const char *buf, 
 	size_t sz, size_t *pos, size_t num)
 {
-	size_t		 end;
-	enum texicmd	 cmd;
+	size_t		  end;
+	enum texicmd	  cmd;
+	struct teximacro *macro;
 
 	while (*pos < sz && ismspace(buf[*pos]))
 		advance(p, buf, pos);
@@ -528,8 +651,10 @@ parsearg(struct texi *p, const char *buf, 
 			continue;
 		}
 
-		cmd = texicmd(p, buf, *pos, sz, &end);
+		cmd = texicmd(p, buf, *pos, sz, &end, &macro);
 		advanceto(p, buf, pos, end);
+		if (NULL != macro)
+			texiexecmacro(p, macro, buf, sz, pos);
 		if (TEXICMD__MAX == cmd) 
 			continue;
 		if (NULL != texitoks[cmd].fp)
@@ -545,8 +670,9 @@ parsearg(struct texi *p, const char *buf, 
 void
 parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
 {
-	size_t		 end;
-	enum texicmd	 cmd;
+	size_t		  end;
+	enum texicmd	  cmd;
+	struct teximacro *macro;
 
 	while (*pos < sz && ismspace(buf[*pos]))
 		advance(p, buf, pos);
@@ -572,8 +698,10 @@ parsebracket(struct texi *p, const char *buf, size_t s
 			continue;
 		}
 
-		cmd = texicmd(p, buf, *pos, sz, &end);
+		cmd = texicmd(p, buf, *pos, sz, &end, &macro);
 		advanceto(p, buf, pos, end);
+		if (NULL != macro)
+			texiexecmacro(p, macro, buf, sz, pos);
 		if (TEXICMD__MAX == cmd) 
 			continue;
 		if (NULL != texitoks[cmd].fp)
@@ -589,8 +717,9 @@ parsebracket(struct texi *p, const char *buf, size_t s
 void
 parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
 {
-	size_t		 end;
-	enum texicmd	 cmd;
+	size_t		  end;
+	enum texicmd	  cmd;
+	struct teximacro *macro;
 
 	while (*pos < sz && '\n' != buf[*pos]) {
 		while (*pos < sz && isws(buf[*pos])) {
@@ -617,8 +746,10 @@ parseeoln(struct texi *p, const char *buf, size_t sz, 
 			continue;
 		}
 
-		cmd = texicmd(p, buf, *pos, sz, &end);
+		cmd = texicmd(p, buf, *pos, sz, &end, &macro);
 		advanceto(p, buf, pos, end);
+		if (NULL != macro)
+			texiexecmacro(p, macro, buf, sz, pos);
 		if (TEXICMD__MAX == cmd) 
 			continue;
 		if (NULL != texitoks[cmd].fp)
@@ -633,8 +764,9 @@ parseeoln(struct texi *p, const char *buf, size_t sz, 
 void
 parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
 {
-	size_t		 end;
-	enum texicmd	 cmd;
+	size_t		  end;
+	enum texicmd	  cmd;
+	struct teximacro *macro;
 
 	if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
 		return;
@@ -657,8 +789,10 @@ parsesingle(struct texi *p, const char *buf, size_t sz
 		return;
 	}
 
-	cmd = texicmd(p, buf, *pos, sz, &end);
+	cmd = texicmd(p, buf, *pos, sz, &end, &macro);
 	advanceto(p, buf, pos, end);
+	if (NULL != macro)
+		texiexecmacro(p, macro, buf, sz, pos);
 	if (TEXICMD__MAX == cmd) 
 		return;
 	if (NULL != texitoks[cmd].fp)
@@ -713,9 +847,10 @@ void
 parseto(struct texi *p, const char *buf, 
 	size_t sz, size_t *pos, const char *endtoken)
 {
-	size_t		 end;
-	enum texicmd	 cmd;
-	size_t		 endtoksz;
+	size_t		  end;
+	enum texicmd	  cmd;
+	size_t		  endtoksz;
+	struct teximacro *macro;
 
 	endtoksz = strlen(endtoken);
 	assert(endtoksz > 0);
@@ -739,7 +874,7 @@ parseto(struct texi *p, const char *buf, 
 			continue;
 		}
 
-		cmd = texicmd(p, buf, *pos, sz, &end);
+		cmd = texicmd(p, buf, *pos, sz, &end, &macro);
 		advanceto(p, buf, pos, end);
 		if (TEXICMD_END == cmd) {
 			while (*pos < sz && isws(buf[*pos]))
@@ -757,9 +892,13 @@ parseto(struct texi *p, const char *buf, 
 				texiwarn(p, "unexpected \"end\"");
 			advanceeoln(p, buf, sz, pos, 0);
 			continue;
-		} else if (TEXICMD__MAX != cmd)
-			if (NULL != texitoks[cmd].fp) 
-				(*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
+		} 
+		if (NULL != macro)
+			texiexecmacro(p, macro, buf, sz, pos);
+		if (TEXICMD__MAX == cmd) 
+			continue;
+		if (NULL != texitoks[cmd].fp) 
+			(*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
 	}
 }
 
@@ -952,4 +1091,89 @@ valueadd(struct texi *p, char *key, char *val)
 		p->vals[p->valsz].value = val;
 		p->valsz++;
 	}
+}
+
+/*
+ * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
+ * declaration form, @macro foo {arg1, ...}) and textually convert it to
+ * an array of arguments of size "argsz".
+ * These need to be freed individually and as a whole.
+ * NOTE: this will puke on @, or @} macros, which can trick it into
+ * stopping argument parsing earlier.
+ * Ergo, textual: this doesn't interpret the arguments in any way.
+ */
+char **
+argparse(struct texi *p, const char *buf, 
+	size_t sz, size_t *pos, size_t *argsz)
+{
+	char	**args;
+	size_t	  start, end, stack;
+
+	while (*pos < sz && isws(buf[*pos]))
+		advance(p, buf, pos);
+
+	args = NULL;
+	*argsz = 0;
+
+	/* Check for no arguments. */
+	if ('{' != buf[*pos])
+		return(args);
+
+	/* Parse til the closing '}', putting into the array. */
+	advance(p, buf, pos);
+	while (*pos < sz) {
+		while (*pos < sz && isws(buf[*pos]))
+			advance(p, buf, pos);
+		start = *pos;
+		stack = 0;
+		while (*pos < sz) {
+			/* 
+			 * According to the manual, commas within
+			 * embedded commands are escaped.
+			 * We keep track of embedded-ness in the "stack"
+			 * state anyway, so this is free.
+			 */
+			if (0 == stack && ',' == buf[*pos])
+				break;
+			else if (0 == stack && '}' == buf[*pos])
+				break;
+			else if (0 != stack && '}' == buf[*pos])
+				stack--;
+			else if ('{' == buf[*pos])
+				stack++;
+			advance(p, buf, pos);
+		}
+		if (stack)
+			texiwarn(p, "unterminated macro "
+				"in macro arguments");
+		if ((end = *pos) == sz)
+			break;
+		/* Test for zero-length '{  }'. */
+		if (start == end && '}' == buf[*pos] && 0 == *argsz)
+			break;
+		if (start == end)
+			texierr(p, "zero-length argument");
+		/* FIXME: use reallocarray. */
+		args = realloc
+			(args, sizeof(char *) *
+			 (*argsz + 1));
+		if (NULL == args)
+			texiabort(p, NULL);
+		args[*argsz] = malloc(end - start + 1);
+		if (NULL == args[*argsz])
+			texiabort(p, NULL);
+		memcpy(args[*argsz],
+			&buf[start], end - start);
+		args[*argsz][end - start] = '\0';
+		(*argsz)++;
+		if ('}' == buf[*pos])
+			break;
+		advance(p, buf, pos);
+	}
+
+	if (*pos == sz) 
+		texierr(p, "unterminated arguments");
+	assert('}' == buf[*pos]);
+	advance(p, buf, pos);
+	return(args);
 }