[BACK]Return to eqn.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / eqn.c (download)

Revision 1.85, Wed Apr 13 20:26:19 2022 UTC (23 months, 2 weeks ago) by schwarze
Branch: MAIN
Changes since 1.84: +15 -11 lines

To prevent infinite recursion while expanding eqn(7) definitions,
we must not reset the recursion counter when moving beyond the end
of the *previous* expansion, but we may only do so when moving
beyond the rightmost position reached by *any* expansion in the
current equation.  This matters because definitions can nest;
consider:

.EQ
define inner "content"
define outer "inner outer"
outer
.EN

This endless loop was found by tb@ using afl(1).

Incidentally, GNU eqn(1) also performs an infinite loop in this
situation and then crashes when memory runs out, but that's not an
excuse for nasty behaviour of mandoc(1).

While here, consistently print the expanded content even when the
expansion is finally truncated.  While that is not likely to help
end-users, it may help authors of eqn(7) code to understand what's
going on.  Besides, it sends a very clear signal that something is
amiss, which was easy to miss in the past unless people
enabled -W error or used -T lint.

/* $Id: eqn.c,v 1.85 2022/04/13 20:26:19 schwarze Exp $ */
/*
 * Copyright (c) 2014, 2015, 2017, 2018, 2020, 2022
 *               Ingo Schwarze <schwarze@openbsd.org>
 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/types.h>

#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "mandoc_aux.h"
#include "mandoc.h"
#include "roff.h"
#include "eqn.h"
#include "libmandoc.h"
#include "eqn_parse.h"

#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
#define	STRNEQ(p1, sz1, p2, sz2) \
	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))

enum	eqn_tok {
	EQN_TOK_DYAD = 0,
	EQN_TOK_VEC,
	EQN_TOK_UNDER,
	EQN_TOK_BAR,
	EQN_TOK_TILDE,
	EQN_TOK_HAT,
	EQN_TOK_DOT,
	EQN_TOK_DOTDOT,
	EQN_TOK_FWD,
	EQN_TOK_BACK,
	EQN_TOK_DOWN,
	EQN_TOK_UP,
	EQN_TOK_FAT,
	EQN_TOK_ROMAN,
	EQN_TOK_ITALIC,
	EQN_TOK_BOLD,
	EQN_TOK_SIZE,
	EQN_TOK_SUB,
	EQN_TOK_SUP,
	EQN_TOK_SQRT,
	EQN_TOK_OVER,
	EQN_TOK_FROM,
	EQN_TOK_TO,
	EQN_TOK_BRACE_OPEN,
	EQN_TOK_BRACE_CLOSE,
	EQN_TOK_GSIZE,
	EQN_TOK_GFONT,
	EQN_TOK_MARK,
	EQN_TOK_LINEUP,
	EQN_TOK_LEFT,
	EQN_TOK_RIGHT,
	EQN_TOK_PILE,
	EQN_TOK_LPILE,
	EQN_TOK_RPILE,
	EQN_TOK_CPILE,
	EQN_TOK_MATRIX,
	EQN_TOK_CCOL,
	EQN_TOK_LCOL,
	EQN_TOK_RCOL,
	EQN_TOK_DELIM,
	EQN_TOK_DEFINE,
	EQN_TOK_TDEFINE,
	EQN_TOK_NDEFINE,
	EQN_TOK_UNDEF,
	EQN_TOK_ABOVE,
	EQN_TOK__MAX,
	EQN_TOK_FUNC,
	EQN_TOK_QUOTED,
	EQN_TOK_SYM,
	EQN_TOK_EOF
};

static	const char *eqn_toks[EQN_TOK__MAX] = {
	"dyad", /* EQN_TOK_DYAD */
	"vec", /* EQN_TOK_VEC */
	"under", /* EQN_TOK_UNDER */
	"bar", /* EQN_TOK_BAR */
	"tilde", /* EQN_TOK_TILDE */
	"hat", /* EQN_TOK_HAT */
	"dot", /* EQN_TOK_DOT */
	"dotdot", /* EQN_TOK_DOTDOT */
	"fwd", /* EQN_TOK_FWD * */
	"back", /* EQN_TOK_BACK */
	"down", /* EQN_TOK_DOWN */
	"up", /* EQN_TOK_UP */
	"fat", /* EQN_TOK_FAT */
	"roman", /* EQN_TOK_ROMAN */
	"italic", /* EQN_TOK_ITALIC */
	"bold", /* EQN_TOK_BOLD */
	"size", /* EQN_TOK_SIZE */
	"sub", /* EQN_TOK_SUB */
	"sup", /* EQN_TOK_SUP */
	"sqrt", /* EQN_TOK_SQRT */
	"over", /* EQN_TOK_OVER */
	"from", /* EQN_TOK_FROM */
	"to", /* EQN_TOK_TO */
	"{", /* EQN_TOK_BRACE_OPEN */
	"}", /* EQN_TOK_BRACE_CLOSE */
	"gsize", /* EQN_TOK_GSIZE */
	"gfont", /* EQN_TOK_GFONT */
	"mark", /* EQN_TOK_MARK */
	"lineup", /* EQN_TOK_LINEUP */
	"left", /* EQN_TOK_LEFT */
	"right", /* EQN_TOK_RIGHT */
	"pile", /* EQN_TOK_PILE */
	"lpile", /* EQN_TOK_LPILE */
	"rpile", /* EQN_TOK_RPILE */
	"cpile", /* EQN_TOK_CPILE */
	"matrix", /* EQN_TOK_MATRIX */
	"ccol", /* EQN_TOK_CCOL */
	"lcol", /* EQN_TOK_LCOL */
	"rcol", /* EQN_TOK_RCOL */
	"delim", /* EQN_TOK_DELIM */
	"define", /* EQN_TOK_DEFINE */
	"tdefine", /* EQN_TOK_TDEFINE */
	"ndefine", /* EQN_TOK_NDEFINE */
	"undef", /* EQN_TOK_UNDEF */
	"above", /* EQN_TOK_ABOVE */
};

static	const char *const eqn_func[] = {
	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
	"if",	"lim",	"ln",	"log",	"max",	"min",
	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
};

enum	eqn_symt {
	EQNSYM_alpha = 0,
	EQNSYM_beta,
	EQNSYM_chi,
	EQNSYM_delta,
	EQNSYM_epsilon,
	EQNSYM_eta,
	EQNSYM_gamma,
	EQNSYM_iota,
	EQNSYM_kappa,
	EQNSYM_lambda,
	EQNSYM_mu,
	EQNSYM_nu,
	EQNSYM_omega,
	EQNSYM_omicron,
	EQNSYM_phi,
	EQNSYM_pi,
	EQNSYM_ps,
	EQNSYM_rho,
	EQNSYM_sigma,
	EQNSYM_tau,
	EQNSYM_theta,
	EQNSYM_upsilon,
	EQNSYM_xi,
	EQNSYM_zeta,
	EQNSYM_DELTA,
	EQNSYM_GAMMA,
	EQNSYM_LAMBDA,
	EQNSYM_OMEGA,
	EQNSYM_PHI,
	EQNSYM_PI,
	EQNSYM_PSI,
	EQNSYM_SIGMA,
	EQNSYM_THETA,
	EQNSYM_UPSILON,
	EQNSYM_XI,
	EQNSYM_inter,
	EQNSYM_union,
	EQNSYM_prod,
	EQNSYM_int,
	EQNSYM_sum,
	EQNSYM_grad,
	EQNSYM_del,
	EQNSYM_times,
	EQNSYM_cdot,
	EQNSYM_nothing,
	EQNSYM_approx,
	EQNSYM_prime,
	EQNSYM_half,
	EQNSYM_partial,
	EQNSYM_inf,
	EQNSYM_muchgreat,
	EQNSYM_muchless,
	EQNSYM_larrow,
	EQNSYM_rarrow,
	EQNSYM_pm,
	EQNSYM_nequal,
	EQNSYM_equiv,
	EQNSYM_lessequal,
	EQNSYM_moreequal,
	EQNSYM_minus,
	EQNSYM__MAX
};

struct	eqnsym {
	const char	*str;
	const char	*sym;
};

static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
	{ "alpha", "*a" }, /* EQNSYM_alpha */
	{ "beta", "*b" }, /* EQNSYM_beta */
	{ "chi", "*x" }, /* EQNSYM_chi */
	{ "delta", "*d" }, /* EQNSYM_delta */
	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
	{ "eta", "*y" }, /* EQNSYM_eta */
	{ "gamma", "*g" }, /* EQNSYM_gamma */
	{ "iota", "*i" }, /* EQNSYM_iota */
	{ "kappa", "*k" }, /* EQNSYM_kappa */
	{ "lambda", "*l" }, /* EQNSYM_lambda */
	{ "mu", "*m" }, /* EQNSYM_mu */
	{ "nu", "*n" }, /* EQNSYM_nu */
	{ "omega", "*w" }, /* EQNSYM_omega */
	{ "omicron", "*o" }, /* EQNSYM_omicron */
	{ "phi", "*f" }, /* EQNSYM_phi */
	{ "pi", "*p" }, /* EQNSYM_pi */
	{ "psi", "*q" }, /* EQNSYM_psi */
	{ "rho", "*r" }, /* EQNSYM_rho */
	{ "sigma", "*s" }, /* EQNSYM_sigma */
	{ "tau", "*t" }, /* EQNSYM_tau */
	{ "theta", "*h" }, /* EQNSYM_theta */
	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
	{ "xi", "*c" }, /* EQNSYM_xi */
	{ "zeta", "*z" }, /* EQNSYM_zeta */
	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
	{ "PHI", "*F" }, /* EQNSYM_PHI */
	{ "PI", "*P" }, /* EQNSYM_PI */
	{ "PSI", "*Q" }, /* EQNSYM_PSI */
	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
	{ "THETA", "*H" }, /* EQNSYM_THETA */
	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
	{ "XI", "*C" }, /* EQNSYM_XI */
	{ "inter", "ca" }, /* EQNSYM_inter */
	{ "union", "cu" }, /* EQNSYM_union */
	{ "prod", "product" }, /* EQNSYM_prod */
	{ "int", "integral" }, /* EQNSYM_int */
	{ "sum", "sum" }, /* EQNSYM_sum */
	{ "grad", "gr" }, /* EQNSYM_grad */
	{ "del", "gr" }, /* EQNSYM_del */
	{ "times", "mu" }, /* EQNSYM_times */
	{ "cdot", "pc" }, /* EQNSYM_cdot */
	{ "nothing", "&" }, /* EQNSYM_nothing */
	{ "approx", "~~" }, /* EQNSYM_approx */
	{ "prime", "fm" }, /* EQNSYM_prime */
	{ "half", "12" }, /* EQNSYM_half */
	{ "partial", "pd" }, /* EQNSYM_partial */
	{ "inf", "if" }, /* EQNSYM_inf */
	{ ">>", ">>" }, /* EQNSYM_muchgreat */
	{ "<<", "<<" }, /* EQNSYM_muchless */
	{ "<-", "<-" }, /* EQNSYM_larrow */
	{ "->", "->" }, /* EQNSYM_rarrow */
	{ "+-", "+-" }, /* EQNSYM_pm */
	{ "!=", "!=" }, /* EQNSYM_nequal */
	{ "==", "==" }, /* EQNSYM_equiv */
	{ "<=", "<=" }, /* EQNSYM_lessequal */
	{ ">=", ">=" }, /* EQNSYM_moreequal */
	{ "-", "mi" }, /* EQNSYM_minus */
};

enum	parse_mode {
	MODE_QUOTED,
	MODE_NOSUB,
	MODE_SUB,
	MODE_TOK
};

struct	eqn_def {
	char		 *key;
	size_t		  keysz;
	char		 *val;
	size_t		  valsz;
};

static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
				struct eqn_box *);
static	void		 eqn_def(struct eqn_node *);
static	struct eqn_def	*eqn_def_find(struct eqn_node *);
static	void		 eqn_delim(struct eqn_node *);
static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
static	void		 eqn_undef(struct eqn_node *);


struct eqn_node *
eqn_alloc(void)
{
	struct eqn_node *ep;

	ep = mandoc_calloc(1, sizeof(*ep));
	ep->gsize = EQN_DEFSIZE;
	return ep;
}

void
eqn_reset(struct eqn_node *ep)
{
	free(ep->data);
	ep->data = ep->start = ep->end = NULL;
	ep->sz = ep->toksz = 0;
}

void
eqn_read(struct eqn_node *ep, const char *p)
{
	char		*cp;

	if (ep->data == NULL) {
		ep->sz = strlen(p);
		ep->data = mandoc_strdup(p);
	} else {
		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
		free(ep->data);
		ep->data = cp;
	}
	ep->sz += 1;
}

/*
 * Find the key "key" of the give size within our eqn-defined values.
 */
static struct eqn_def *
eqn_def_find(struct eqn_node *ep)
{
	int		 i;

	for (i = 0; i < (int)ep->defsz; i++)
		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
		    ep->defs[i].keysz, ep->start, ep->toksz))
			return &ep->defs[i];

	return NULL;
}

/*
 * Parse a token from the input text.  The modes are:
 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
 *   before its next occurence.  Do not interpret the token in any
 *   way and return EQN_TOK_QUOTED.  All other modes behave like
 *   MODE_QUOTED when *ep->start is '"'.
 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
 *   otherwise, it ends before the next whitespace or brace.
 *   Do not interpret the token and return EQN_TOK__MAX.
 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
 *   alias created with define.  If it is an alias, replace it with
 *   its string value and reparse.
 * MODE_TOK: Like MODE_SUB, but also check the token against the list
 *   of tokens, and if there is a match, return that token.  Otherwise,
 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
 *   a token match, *ep->start is set to an allocated string that the
 *   caller is expected to free.
 * All modes skip whitespace following the end of the token.
 */
static enum eqn_tok
eqn_next(struct eqn_node *ep, enum parse_mode mode)
{
	struct eqn_def	*def;
	size_t		 start;
	int		 diff, i, newlen, quoted;
	enum eqn_tok	 tok;

	/*
	 * Reset the recursion counter after advancing
	 * beyond the end of the rightmost substitution.
	 */
	if (ep->end - ep->data >= ep->sublen)
		ep->subcnt = 0;

	ep->start = ep->end;
	quoted = mode == MODE_QUOTED;
	for (;;) {
		switch (*ep->start) {
		case '\0':
			ep->toksz = 0;
			return EQN_TOK_EOF;
		case '"':
			quoted = 1;
			break;
		case ' ':
		case '\t':
		case '~':
		case '^':
			if (quoted)
				break;
			ep->start++;
			continue;
		default:
			break;
		}
		if (quoted) {
			ep->end = strchr(ep->start + 1, *ep->start);
			ep->start++;  /* Skip opening quote. */
			if (ep->end == NULL) {
				mandoc_msg(MANDOCERR_ARG_QUOTE,
				    ep->node->line, ep->node->pos, NULL);
				ep->end = strchr(ep->start, '\0');
			}
		} else {
			ep->end = ep->start + 1;
			if (*ep->start != '{' && *ep->start != '}')
				ep->end += strcspn(ep->end, " ^~\"{}\t");
		}
		ep->toksz = ep->end - ep->start;
		if (quoted && *ep->end != '\0')
			ep->end++;  /* Skip closing quote. */
		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
			ep->end++;
		if (quoted)  /* Cannot return, may have to strndup. */
			break;
		if (mode == MODE_NOSUB)
			return EQN_TOK__MAX;
		if ((def = eqn_def_find(ep)) == NULL)
			break;
		if (++ep->subcnt > EQN_NEST_MAX) {
			mandoc_msg(MANDOCERR_ROFFLOOP,
			    ep->node->line, ep->node->pos, NULL);
			break;
		}

		/* Replace a defined name with its string value. */
		if ((diff = def->valsz - ep->toksz) > 0) {
			start = ep->start - ep->data;
			ep->sz += diff;
			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
			ep->start = ep->data + start;
			ep->sublen += diff;
		}
		if (diff)
			memmove(ep->start + def->valsz, ep->start + ep->toksz,
			    strlen(ep->start + ep->toksz) + 1);
		memcpy(ep->start, def->val, def->valsz);
		newlen = ep->start - ep->data + def->valsz;
		if (ep->sublen < newlen)
			ep->sublen = newlen;
	}
	if (mode != MODE_TOK)
		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
	if (quoted) {
		ep->start = mandoc_strndup(ep->start, ep->toksz);
		return EQN_TOK_QUOTED;
	}
	for (tok = 0; tok < EQN_TOK__MAX; tok++)
		if (STRNEQ(ep->start, ep->toksz,
		    eqn_toks[tok], strlen(eqn_toks[tok])))
			return tok;

	for (i = 0; i < EQNSYM__MAX; i++) {
		if (STRNEQ(ep->start, ep->toksz,
		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
			mandoc_asprintf(&ep->start,
			    "\\[%s]", eqnsyms[i].sym);
			return EQN_TOK_SYM;
		}
	}
	ep->start = mandoc_strndup(ep->start, ep->toksz);
	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
		if (STRNEQ(ep->start, ep->toksz,
		    eqn_func[i], strlen(eqn_func[i])))
			return EQN_TOK_FUNC;
	return EQN_TOK__MAX;
}

void
eqn_box_free(struct eqn_box *bp)
{
	if (bp == NULL)
		return;

	if (bp->first)
		eqn_box_free(bp->first);
	if (bp->next)
		eqn_box_free(bp->next);

	free(bp->text);
	free(bp->left);
	free(bp->right);
	free(bp->top);
	free(bp->bottom);
	free(bp);
}

struct eqn_box *
eqn_box_new(void)
{
	struct eqn_box	*bp;

	bp = mandoc_calloc(1, sizeof(*bp));
	bp->expectargs = UINT_MAX;
	return bp;
}

/*
 * Allocate a box as the last child of the parent node.
 */
static struct eqn_box *
eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
{
	struct eqn_box	*bp;

	bp = eqn_box_new();
	bp->parent = parent;
	bp->parent->args++;
	bp->font = bp->parent->font;
	bp->size = ep->gsize;

	if (NULL != parent->first) {
		parent->last->next = bp;
		bp->prev = parent->last;
	} else
		parent->first = bp;

	parent->last = bp;
	return bp;
}

/*
 * Reparent the current last node (of the current parent) under a new
 * EQN_SUBEXPR as the first element.
 * Then return the new parent.
 * The new EQN_SUBEXPR will have a two-child limit.
 */
static struct eqn_box *
eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
{
	struct eqn_box	*b, *newb;

	assert(NULL != parent->last);
	b = parent->last;
	if (parent->last == parent->first)
		parent->first = NULL;
	parent->args--;
	parent->last = b->prev;
	b->prev = NULL;
	newb = eqn_box_alloc(ep, parent);
	newb->type = EQN_SUBEXPR;
	newb->expectargs = 2;
	newb->args = 1;
	newb->first = newb->last = b;
	newb->first->next = NULL;
	b->parent = newb;
	return newb;
}

/*
 * Parse the "delim" control statement.
 */
static void
eqn_delim(struct eqn_node *ep)
{
	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "delim");
		if (ep->end[0] != '\0')
			ep->end++;
	} else if (strncmp(ep->end, "off", 3) == 0) {
		ep->delim = 0;
		ep->end += 3;
	} else if (strncmp(ep->end, "on", 2) == 0) {
		if (ep->odelim && ep->cdelim)
			ep->delim = 1;
		ep->end += 2;
	} else {
		ep->odelim = *ep->end++;
		ep->cdelim = *ep->end++;
		ep->delim = 1;
	}
}

/*
 * Undefine a previously-defined string.
 */
static void
eqn_undef(struct eqn_node *ep)
{
	struct eqn_def	*def;

	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "undef");
		return;
	}
	if ((def = eqn_def_find(ep)) == NULL)
		return;
	free(def->key);
	free(def->val);
	def->key = def->val = NULL;
	def->keysz = def->valsz = 0;
}

static void
eqn_def(struct eqn_node *ep)
{
	struct eqn_def	*def;
	int		 i;

	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "define");
		return;
	}

	/*
	 * Search for a key that already exists.
	 * Create a new key if none is found.
	 */
	if ((def = eqn_def_find(ep)) == NULL) {
		/* Find holes in string array. */
		for (i = 0; i < (int)ep->defsz; i++)
			if (0 == ep->defs[i].keysz)
				break;

		if (i == (int)ep->defsz) {
			ep->defsz++;
			ep->defs = mandoc_reallocarray(ep->defs,
			    ep->defsz, sizeof(struct eqn_def));
			ep->defs[i].key = ep->defs[i].val = NULL;
		}

		def = ep->defs + i;
		free(def->key);
		def->key = mandoc_strndup(ep->start, ep->toksz);
		def->keysz = ep->toksz;
	}

	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "define %s", def->key);
		free(def->key);
		free(def->val);
		def->key = def->val = NULL;
		def->keysz = def->valsz = 0;
		return;
	}
	free(def->val);
	def->val = mandoc_strndup(ep->start, ep->toksz);
	def->valsz = ep->toksz;
}

void
eqn_parse(struct eqn_node *ep)
{
	struct eqn_box	*cur, *nbox, *parent, *split;
	const char	*cp, *cpn;
	char		*p;
	enum eqn_tok	 tok;
	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
	int		 size;

	parent = ep->node->eqn;
	assert(parent != NULL);

	/*
	 * Empty equation.
	 * Do not add it to the high-level syntax tree.
	 */

	if (ep->data == NULL)
		return;

	ep->start = ep->end = ep->data;
	ep->sublen = 0;
	ep->subcnt = 0;

next_tok:
	tok = eqn_next(ep, MODE_TOK);
	switch (tok) {
	case EQN_TOK_UNDEF:
		eqn_undef(ep);
		break;
	case EQN_TOK_NDEFINE:
	case EQN_TOK_DEFINE:
		eqn_def(ep);
		break;
	case EQN_TOK_TDEFINE:
		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY,
			    ep->node->line, ep->node->pos, "tdefine");
		break;
	case EQN_TOK_DELIM:
		eqn_delim(ep);
		break;
	case EQN_TOK_GFONT:
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
		break;
	case EQN_TOK_MARK:
	case EQN_TOK_LINEUP:
		/* Ignore these. */
		break;
	case EQN_TOK_DYAD:
	case EQN_TOK_VEC:
	case EQN_TOK_UNDER:
	case EQN_TOK_BAR:
	case EQN_TOK_TILDE:
	case EQN_TOK_HAT:
	case EQN_TOK_DOT:
	case EQN_TOK_DOTDOT:
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		parent = eqn_box_makebinary(ep, parent);
		parent->type = EQN_LIST;
		parent->expectargs = 1;
		parent->font = EQNFONT_ROMAN;
		switch (tok) {
		case EQN_TOK_DOTDOT:
			parent->top = mandoc_strdup("\\[ad]");
			break;
		case EQN_TOK_VEC:
			parent->top = mandoc_strdup("\\[->]");
			break;
		case EQN_TOK_DYAD:
			parent->top = mandoc_strdup("\\[<>]");
			break;
		case EQN_TOK_TILDE:
			parent->top = mandoc_strdup("\\[a~]");
			break;
		case EQN_TOK_UNDER:
			parent->bottom = mandoc_strdup("\\[ul]");
			break;
		case EQN_TOK_BAR:
			parent->top = mandoc_strdup("\\[rn]");
			break;
		case EQN_TOK_DOT:
			parent->top = mandoc_strdup("\\[a.]");
			break;
		case EQN_TOK_HAT:
			parent->top = mandoc_strdup("\\[ha]");
			break;
		default:
			abort();
		}
		parent = parent->parent;
		break;
	case EQN_TOK_FWD:
	case EQN_TOK_BACK:
	case EQN_TOK_DOWN:
	case EQN_TOK_UP:
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
		break;
	case EQN_TOK_FAT:
	case EQN_TOK_ROMAN:
	case EQN_TOK_ITALIC:
	case EQN_TOK_BOLD:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		/*
		 * These values apply to the next word or sequence of
		 * words; thus, we mark that we'll have a child with
		 * exactly one of those.
		 */
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		parent->expectargs = 1;
		switch (tok) {
		case EQN_TOK_FAT:
			parent->font = EQNFONT_FAT;
			break;
		case EQN_TOK_ROMAN:
			parent->font = EQNFONT_ROMAN;
			break;
		case EQN_TOK_ITALIC:
			parent->font = EQNFONT_ITALIC;
			break;
		case EQN_TOK_BOLD:
			parent->font = EQNFONT_BOLD;
			break;
		default:
			abort();
		}
		break;
	case EQN_TOK_SIZE:
	case EQN_TOK_GSIZE:
		/* Accept two values: integral size and a single. */
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		size = mandoc_strntoi(ep->start, ep->toksz, 10);
		if (-1 == size) {
			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		if (EQN_TOK_GSIZE == tok) {
			ep->gsize = size;
			break;
		}
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		parent->expectargs = 1;
		parent->size = size;
		break;
	case EQN_TOK_FROM:
	case EQN_TOK_TO:
	case EQN_TOK_SUB:
	case EQN_TOK_SUP:
		/*
		 * We have a left-right-associative expression.
		 * Repivot under a positional node, open a child scope
		 * and keep on reading.
		 */
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		while (parent->expectargs == 1 && parent->args == 1)
			parent = parent->parent;
		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
			for (cur = parent; cur != NULL; cur = cur->parent)
				if (cur->pos == EQNPOS_SUB ||
				    cur->pos == EQNPOS_SUP ||
				    cur->pos == EQNPOS_SUBSUP ||
				    cur->pos == EQNPOS_SQRT ||
				    cur->pos == EQNPOS_OVER)
					break;
			if (cur != NULL)
				parent = cur->parent;
		}
		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
			parent->expectargs = 3;
			parent->pos = EQNPOS_SUBSUP;
			break;
		}
		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
			parent->expectargs = 3;
			parent->pos = EQNPOS_FROMTO;
			break;
		}
		parent = eqn_box_makebinary(ep, parent);
		switch (tok) {
		case EQN_TOK_FROM:
			parent->pos = EQNPOS_FROM;
			break;
		case EQN_TOK_TO:
			parent->pos = EQNPOS_TO;
			break;
		case EQN_TOK_SUP:
			parent->pos = EQNPOS_SUP;
			break;
		case EQN_TOK_SUB:
			parent->pos = EQNPOS_SUB;
			break;
		default:
			abort();
		}
		break;
	case EQN_TOK_SQRT:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		/*
		 * Accept a left-right-associative set of arguments just
		 * like sub and sup and friends but without rebalancing
		 * under a pivot.
		 */
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_SUBEXPR;
		parent->pos = EQNPOS_SQRT;
		parent->expectargs = 1;
		break;
	case EQN_TOK_OVER:
		/*
		 * We have a right-left-associative fraction.
		 * Close out anything that's currently open, then
		 * rebalance and continue reading.
		 */
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		while (EQN_SUBEXPR == parent->type)
			parent = parent->parent;
		parent = eqn_box_makebinary(ep, parent);
		parent->pos = EQNPOS_OVER;
		break;
	case EQN_TOK_RIGHT:
	case EQN_TOK_BRACE_CLOSE:
		/*
		 * Close out the existing brace.
		 * FIXME: this is a shitty sentinel: we should really
		 * have a native EQN_BRACE type or whatnot.
		 */
		for (cur = parent; cur != NULL; cur = cur->parent)
			if (cur->type == EQN_LIST &&
			    cur->expectargs > 1 &&
			    (tok == EQN_TOK_BRACE_CLOSE ||
			     cur->left != NULL))
				break;
		if (cur == NULL) {
			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		parent = cur;
		if (EQN_TOK_RIGHT == tok) {
			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
				mandoc_msg(MANDOCERR_REQ_EMPTY,
				    ep->node->line, ep->node->pos,
				    "%s", eqn_toks[tok]);
				break;
			}
			/* Handling depends on right/left. */
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
				parent->right = mandoc_strdup("\\[rc]");
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
				parent->right = mandoc_strdup("\\[rf]");
			else
				parent->right =
				    mandoc_strndup(ep->start, ep->toksz);
		}
		parent = parent->parent;
		if (tok == EQN_TOK_BRACE_CLOSE &&
		    (parent->type == EQN_PILE ||
		     parent->type == EQN_MATRIX))
			parent = parent->parent;
		/* Close out any "singleton" lists. */
		while (parent->type == EQN_LIST &&
		    parent->expectargs == 1 &&
		    parent->args == 1)
			parent = parent->parent;
		break;
	case EQN_TOK_BRACE_OPEN:
	case EQN_TOK_LEFT:
		/*
		 * If we already have something in the stack and we're
		 * in an expression, then rewind til we're not any more
		 * (just like with the text node).
		 */
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		if (EQN_TOK_LEFT == tok &&
		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		if (EQN_TOK_LEFT == tok) {
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
				parent->left = mandoc_strdup("\\[lc]");
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
				parent->left = mandoc_strdup("\\[lf]");
			else
				parent->left =
				    mandoc_strndup(ep->start, ep->toksz);
		}
		break;
	case EQN_TOK_PILE:
	case EQN_TOK_LPILE:
	case EQN_TOK_RPILE:
	case EQN_TOK_CPILE:
	case EQN_TOK_CCOL:
	case EQN_TOK_LCOL:
	case EQN_TOK_RCOL:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_PILE;
		parent->expectargs = 1;
		break;
	case EQN_TOK_ABOVE:
		for (cur = parent; cur != NULL; cur = cur->parent)
			if (cur->type == EQN_PILE)
				break;
		if (cur == NULL) {
			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		parent = eqn_box_alloc(ep, cur);
		parent->type = EQN_LIST;
		break;
	case EQN_TOK_MATRIX:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_MATRIX;
		parent->expectargs = 1;
		break;
	case EQN_TOK_EOF:
		return;
	case EQN_TOK__MAX:
	case EQN_TOK_FUNC:
	case EQN_TOK_QUOTED:
	case EQN_TOK_SYM:
		p = ep->start;
		assert(p != NULL);
		/*
		 * If we already have something in the stack and we're
		 * in an expression, then rewind til we're not any more.
		 */
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		cur = eqn_box_alloc(ep, parent);
		cur->type = EQN_TEXT;
		cur->text = p;
		switch (tok) {
		case EQN_TOK_FUNC:
			cur->font = EQNFONT_ROMAN;
			break;
		case EQN_TOK_QUOTED:
			if (cur->font == EQNFONT_NONE)
				cur->font = EQNFONT_ITALIC;
			break;
		case EQN_TOK_SYM:
			break;
		default:
			if (cur->font != EQNFONT_NONE || *p == '\0')
				break;
			cpn = p - 1;
			ccln = CCL_LET;
			split = NULL;
			for (;;) {
				/* Advance to next character. */
				cp = cpn++;
				ccl = ccln;
				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
				    isdigit((unsigned char)*cpn) ||
				    (*cpn == '.' && (ccl == CCL_DIG ||
				     isdigit((unsigned char)cpn[1]))) ?
				    CCL_DIG : CCL_PUN;
				/* No boundary before first character. */
				if (cp < p)
					continue;
				cur->font = ccl == CCL_LET ?
				    EQNFONT_ITALIC : EQNFONT_ROMAN;
				if (*cp == '\\')
					mandoc_escape(&cpn, NULL, NULL);
				/* No boundary after last character. */
				if (*cpn == '\0')
					break;
				if (ccln == ccl && *cp != ',' && *cpn != ',')
					continue;
				/* Boundary found, split the text. */
				if (parent->args == parent->expectargs) {
					/* Remove the text from the tree. */
					if (cur->prev == NULL)
						parent->first = cur->next;
					else
						cur->prev->next = NULL;
					parent->last = cur->prev;
					parent->args--;
					/* Set up a list instead. */
					split = eqn_box_alloc(ep, parent);
					split->type = EQN_LIST;
					/* Insert the word into the list. */
					split->first = split->last = cur;
					cur->parent = split;
					cur->prev = NULL;
					parent = split;
				}
				/* Append a new text box. */
				nbox = eqn_box_alloc(ep, parent);
				nbox->type = EQN_TEXT;
				nbox->text = mandoc_strdup(cpn);
				/* Truncate the old box. */
				p = mandoc_strndup(cur->text,
				    cpn - cur->text);
				free(cur->text);
				cur->text = p;
				/* Setup to process the new box. */
				cur = nbox;
				p = nbox->text;
				cpn = p - 1;
				ccln = CCL_LET;
			}
			if (split != NULL)
				parent = split->parent;
			break;
		}
		break;
	default:
		abort();
	}
	goto next_tok;
}

void
eqn_free(struct eqn_node *p)
{
	int		 i;

	if (p == NULL)
		return;

	for (i = 0; i < (int)p->defsz; i++) {
		free(p->defs[i].key);
		free(p->defs[i].val);
	}

	free(p->data);
	free(p->defs);
	free(p);
}