[BACK]Return to eqn.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / eqn.c (download)

Revision 1.71, Mon Jun 26 20:09:04 2017 UTC (6 years, 9 months ago) by schwarze
Branch: MAIN
Changes since 1.70: +161 -241 lines

Complete rewrite of the lexer in a single function with four operation
modes instead of four functions, resulting in considerable
simplification, fifty lines less of code, fifteen fewer automatic
variables, and several bug fixes, for example:

1. The delim control statement consumes exactly two bytes of input,
requires no whitespace after these two bytes, and does not treat
quotes in any special way.
2. If the argument of left, right, gfont, gsize, or size is defined
as an alias, only the first word of the value is used as the
delimiter, font name, or font size.
3. If a back, fwd, down, or up keyword is followed by another keyword
instead of the required number, GNU eqn does nothing useful, but
typically errors out.  So no need to have special handling (with
an ugly goto!) for it in mandoc.

Also getting rid of one pointless static buffer and twelve redundant
calls to strlcpy(3).

/*	$Id: eqn.c,v 1.71 2017/06/26 20:09:04 schwarze Exp $ */
/*
 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/types.h>

#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "mandoc.h"
#include "mandoc_aux.h"
#include "libmandoc.h"
#include "libroff.h"

#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
#define	STRNEQ(p1, sz1, p2, sz2) \
	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))

enum	eqn_tok {
	EQN_TOK_DYAD = 0,
	EQN_TOK_VEC,
	EQN_TOK_UNDER,
	EQN_TOK_BAR,
	EQN_TOK_TILDE,
	EQN_TOK_HAT,
	EQN_TOK_DOT,
	EQN_TOK_DOTDOT,
	EQN_TOK_FWD,
	EQN_TOK_BACK,
	EQN_TOK_DOWN,
	EQN_TOK_UP,
	EQN_TOK_FAT,
	EQN_TOK_ROMAN,
	EQN_TOK_ITALIC,
	EQN_TOK_BOLD,
	EQN_TOK_SIZE,
	EQN_TOK_SUB,
	EQN_TOK_SUP,
	EQN_TOK_SQRT,
	EQN_TOK_OVER,
	EQN_TOK_FROM,
	EQN_TOK_TO,
	EQN_TOK_BRACE_OPEN,
	EQN_TOK_BRACE_CLOSE,
	EQN_TOK_GSIZE,
	EQN_TOK_GFONT,
	EQN_TOK_MARK,
	EQN_TOK_LINEUP,
	EQN_TOK_LEFT,
	EQN_TOK_RIGHT,
	EQN_TOK_PILE,
	EQN_TOK_LPILE,
	EQN_TOK_RPILE,
	EQN_TOK_CPILE,
	EQN_TOK_MATRIX,
	EQN_TOK_CCOL,
	EQN_TOK_LCOL,
	EQN_TOK_RCOL,
	EQN_TOK_DELIM,
	EQN_TOK_DEFINE,
	EQN_TOK_TDEFINE,
	EQN_TOK_NDEFINE,
	EQN_TOK_UNDEF,
	EQN_TOK_ABOVE,
	EQN_TOK__MAX,
	EQN_TOK_FUNC,
	EQN_TOK_QUOTED,
	EQN_TOK_SYM,
	EQN_TOK_EOF
};

static	const char *eqn_toks[EQN_TOK__MAX] = {
	"dyad", /* EQN_TOK_DYAD */
	"vec", /* EQN_TOK_VEC */
	"under", /* EQN_TOK_UNDER */
	"bar", /* EQN_TOK_BAR */
	"tilde", /* EQN_TOK_TILDE */
	"hat", /* EQN_TOK_HAT */
	"dot", /* EQN_TOK_DOT */
	"dotdot", /* EQN_TOK_DOTDOT */
	"fwd", /* EQN_TOK_FWD * */
	"back", /* EQN_TOK_BACK */
	"down", /* EQN_TOK_DOWN */
	"up", /* EQN_TOK_UP */
	"fat", /* EQN_TOK_FAT */
	"roman", /* EQN_TOK_ROMAN */
	"italic", /* EQN_TOK_ITALIC */
	"bold", /* EQN_TOK_BOLD */
	"size", /* EQN_TOK_SIZE */
	"sub", /* EQN_TOK_SUB */
	"sup", /* EQN_TOK_SUP */
	"sqrt", /* EQN_TOK_SQRT */
	"over", /* EQN_TOK_OVER */
	"from", /* EQN_TOK_FROM */
	"to", /* EQN_TOK_TO */
	"{", /* EQN_TOK_BRACE_OPEN */
	"}", /* EQN_TOK_BRACE_CLOSE */
	"gsize", /* EQN_TOK_GSIZE */
	"gfont", /* EQN_TOK_GFONT */
	"mark", /* EQN_TOK_MARK */
	"lineup", /* EQN_TOK_LINEUP */
	"left", /* EQN_TOK_LEFT */
	"right", /* EQN_TOK_RIGHT */
	"pile", /* EQN_TOK_PILE */
	"lpile", /* EQN_TOK_LPILE */
	"rpile", /* EQN_TOK_RPILE */
	"cpile", /* EQN_TOK_CPILE */
	"matrix", /* EQN_TOK_MATRIX */
	"ccol", /* EQN_TOK_CCOL */
	"lcol", /* EQN_TOK_LCOL */
	"rcol", /* EQN_TOK_RCOL */
	"delim", /* EQN_TOK_DELIM */
	"define", /* EQN_TOK_DEFINE */
	"tdefine", /* EQN_TOK_TDEFINE */
	"ndefine", /* EQN_TOK_NDEFINE */
	"undef", /* EQN_TOK_UNDEF */
	"above", /* EQN_TOK_ABOVE */
};

static	const char *const eqn_func[] = {
	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
	"if",	"lim",	"ln",	"log",	"max",	"min",
	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
};

enum	eqn_symt {
	EQNSYM_alpha = 0,
	EQNSYM_beta,
	EQNSYM_chi,
	EQNSYM_delta,
	EQNSYM_epsilon,
	EQNSYM_eta,
	EQNSYM_gamma,
	EQNSYM_iota,
	EQNSYM_kappa,
	EQNSYM_lambda,
	EQNSYM_mu,
	EQNSYM_nu,
	EQNSYM_omega,
	EQNSYM_omicron,
	EQNSYM_phi,
	EQNSYM_pi,
	EQNSYM_ps,
	EQNSYM_rho,
	EQNSYM_sigma,
	EQNSYM_tau,
	EQNSYM_theta,
	EQNSYM_upsilon,
	EQNSYM_xi,
	EQNSYM_zeta,
	EQNSYM_DELTA,
	EQNSYM_GAMMA,
	EQNSYM_LAMBDA,
	EQNSYM_OMEGA,
	EQNSYM_PHI,
	EQNSYM_PI,
	EQNSYM_PSI,
	EQNSYM_SIGMA,
	EQNSYM_THETA,
	EQNSYM_UPSILON,
	EQNSYM_XI,
	EQNSYM_inter,
	EQNSYM_union,
	EQNSYM_prod,
	EQNSYM_int,
	EQNSYM_sum,
	EQNSYM_grad,
	EQNSYM_del,
	EQNSYM_times,
	EQNSYM_cdot,
	EQNSYM_nothing,
	EQNSYM_approx,
	EQNSYM_prime,
	EQNSYM_half,
	EQNSYM_partial,
	EQNSYM_inf,
	EQNSYM_muchgreat,
	EQNSYM_muchless,
	EQNSYM_larrow,
	EQNSYM_rarrow,
	EQNSYM_pm,
	EQNSYM_nequal,
	EQNSYM_equiv,
	EQNSYM_lessequal,
	EQNSYM_moreequal,
	EQNSYM_minus,
	EQNSYM__MAX
};

struct	eqnsym {
	const char	*str;
	const char	*sym;
};

static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
	{ "alpha", "*a" }, /* EQNSYM_alpha */
	{ "beta", "*b" }, /* EQNSYM_beta */
	{ "chi", "*x" }, /* EQNSYM_chi */
	{ "delta", "*d" }, /* EQNSYM_delta */
	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
	{ "eta", "*y" }, /* EQNSYM_eta */
	{ "gamma", "*g" }, /* EQNSYM_gamma */
	{ "iota", "*i" }, /* EQNSYM_iota */
	{ "kappa", "*k" }, /* EQNSYM_kappa */
	{ "lambda", "*l" }, /* EQNSYM_lambda */
	{ "mu", "*m" }, /* EQNSYM_mu */
	{ "nu", "*n" }, /* EQNSYM_nu */
	{ "omega", "*w" }, /* EQNSYM_omega */
	{ "omicron", "*o" }, /* EQNSYM_omicron */
	{ "phi", "*f" }, /* EQNSYM_phi */
	{ "pi", "*p" }, /* EQNSYM_pi */
	{ "psi", "*q" }, /* EQNSYM_psi */
	{ "rho", "*r" }, /* EQNSYM_rho */
	{ "sigma", "*s" }, /* EQNSYM_sigma */
	{ "tau", "*t" }, /* EQNSYM_tau */
	{ "theta", "*h" }, /* EQNSYM_theta */
	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
	{ "xi", "*c" }, /* EQNSYM_xi */
	{ "zeta", "*z" }, /* EQNSYM_zeta */
	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
	{ "PHI", "*F" }, /* EQNSYM_PHI */
	{ "PI", "*P" }, /* EQNSYM_PI */
	{ "PSI", "*Q" }, /* EQNSYM_PSI */
	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
	{ "THETA", "*H" }, /* EQNSYM_THETA */
	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
	{ "XI", "*C" }, /* EQNSYM_XI */
	{ "inter", "ca" }, /* EQNSYM_inter */
	{ "union", "cu" }, /* EQNSYM_union */
	{ "prod", "product" }, /* EQNSYM_prod */
	{ "int", "integral" }, /* EQNSYM_int */
	{ "sum", "sum" }, /* EQNSYM_sum */
	{ "grad", "gr" }, /* EQNSYM_grad */
	{ "del", "gr" }, /* EQNSYM_del */
	{ "times", "mu" }, /* EQNSYM_times */
	{ "cdot", "pc" }, /* EQNSYM_cdot */
	{ "nothing", "&" }, /* EQNSYM_nothing */
	{ "approx", "~~" }, /* EQNSYM_approx */
	{ "prime", "fm" }, /* EQNSYM_prime */
	{ "half", "12" }, /* EQNSYM_half */
	{ "partial", "pd" }, /* EQNSYM_partial */
	{ "inf", "if" }, /* EQNSYM_inf */
	{ ">>", ">>" }, /* EQNSYM_muchgreat */
	{ "<<", "<<" }, /* EQNSYM_muchless */
	{ "<-", "<-" }, /* EQNSYM_larrow */
	{ "->", "->" }, /* EQNSYM_rarrow */
	{ "+-", "+-" }, /* EQNSYM_pm */
	{ "!=", "!=" }, /* EQNSYM_nequal */
	{ "==", "==" }, /* EQNSYM_equiv */
	{ "<=", "<=" }, /* EQNSYM_lessequal */
	{ ">=", ">=" }, /* EQNSYM_moreequal */
	{ "-", "mi" }, /* EQNSYM_minus */
};

enum	parse_mode {
	MODE_QUOTED,
	MODE_NOSUB,
	MODE_SUB,
	MODE_TOK
};

static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
static	void		 eqn_box_free(struct eqn_box *);
static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
				enum eqn_post, struct eqn_box *);
static	void		 eqn_def(struct eqn_node *);
static	struct eqn_def	*eqn_def_find(struct eqn_node *);
static	void		 eqn_delim(struct eqn_node *);
static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
static	enum rofferr	 eqn_parse(struct eqn_node *, struct eqn_box *);
static	void		 eqn_undef(struct eqn_node *);


enum rofferr
eqn_read(struct eqn_node **epp, int ln,
		const char *p, int pos, int *offs)
{
	size_t		 sz;
	struct eqn_node	*ep;
	enum rofferr	 er;

	ep = *epp;

	/*
	 * If we're the terminating mark, unset our equation status and
	 * validate the full equation.
	 */

	if (0 == strncmp(p, ".EN", 3)) {
		er = eqn_end(epp);
		p += 3;
		while (' ' == *p || '\t' == *p)
			p++;
		if ('\0' == *p)
			return er;
		mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
		    ln, pos, "EN %s", p);
		return er;
	}

	/*
	 * Build up the full string, replacing all newlines with regular
	 * whitespace.
	 */

	sz = strlen(p + pos) + 1;
	ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);

	/* First invocation: nil terminate the string. */

	if (0 == ep->sz)
		*ep->data = '\0';

	ep->sz += sz;
	strlcat(ep->data, p + pos, ep->sz + 1);
	strlcat(ep->data, " ", ep->sz + 1);
	return ROFF_IGN;
}

struct eqn_node *
eqn_alloc(int pos, int line, struct mparse *parse)
{
	struct eqn_node	*p;

	p = mandoc_calloc(1, sizeof(struct eqn_node));

	p->parse = parse;
	p->eqn.ln = line;
	p->eqn.pos = pos;
	p->gsize = EQN_DEFSIZE;

	return p;
}

/*
 * Find the key "key" of the give size within our eqn-defined values.
 */
static struct eqn_def *
eqn_def_find(struct eqn_node *ep)
{
	int		 i;

	for (i = 0; i < (int)ep->defsz; i++)
		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
		    ep->defs[i].keysz, ep->start, ep->toksz))
			return &ep->defs[i];

	return NULL;
}

/*
 * Parse a token from the input text.  The modes are:
 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
 *   before its next occurence.  Do not interpret the token in any
 *   way and return EQN_TOK_QUOTED.  All other modes behave like
 *   MODE_QUOTED when *ep->start is '"'.
 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
 *   otherwise, it ends before the next whitespace or brace.
 *   Do not interpret the token and return EQN_TOK__MAX.
 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
 *   alias created with define.  If it is an alias, replace it with
 *   its string value and reparse.
 * MODE_TOK: Like MODE_SUB, but also check the token against the list
 *   of tokens, and if there is a match, return that token.  Otherwise,
 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
 *   a token match, *ep->start is set to an allocated string that the
 *   caller is expected to free.
 * All modes skip whitespace following the end of the token.
 */
static enum eqn_tok
eqn_next(struct eqn_node *ep, enum parse_mode mode)
{
	static int	 last_len, lim;

	struct eqn_def	*def;
	size_t		 start;
	int		 diff, i, quoted;
	enum eqn_tok	 tok;

	/*
	 * Reset the recursion counter after advancing
	 * beyond the end of the previous substitution.
	 */
	if (ep->end - ep->data >= last_len)
		lim = 0;

	ep->start = ep->end;
	quoted = mode == MODE_QUOTED;
	for (;;) {
		switch (*ep->start) {
		case '\0':
			ep->toksz = 0;
			return EQN_TOK_EOF;
		case '"':
			quoted = 1;
			break;
		default:
			break;
		}
		if (quoted) {
			ep->end = strchr(ep->start + 1, *ep->start);
			ep->start++;  /* Skip opening quote. */
			if (ep->end == NULL) {
				mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
				    ep->eqn.ln, ep->eqn.pos, NULL);
				ep->end = strchr(ep->start, '\0');
			}
		} else {
			ep->end = ep->start + 1;
			if (*ep->start != '{' && *ep->start != '}')
				ep->end += strcspn(ep->end, " ^~\"{}\t");
		}
		ep->toksz = ep->end - ep->start;
		if (quoted && *ep->end != '\0')
			ep->end++;  /* Skip closing quote. */
		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
			ep->end++;
		if (quoted)  /* Cannot return, may have to strndup. */
			break;
		if (mode == MODE_NOSUB)
			return EQN_TOK__MAX;
		if ((def = eqn_def_find(ep)) == NULL)
			break;
		if (++lim > EQN_NEST_MAX) {
			mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, NULL);
			return EQN_TOK_EOF;
		}

		/* Replace a defined name with its string value. */
		if ((diff = def->valsz - ep->toksz) > 0) {
			start = ep->start - ep->data;
			ep->sz += diff;
			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
			ep->start = ep->data + start;
		}
		if (diff)
			memmove(ep->start + def->valsz, ep->start + ep->toksz,
			    strlen(ep->start + ep->toksz) + 1);
		memcpy(ep->start, def->val, def->valsz);
		last_len = ep->start - ep->data + def->valsz;
	}
	if (mode != MODE_TOK)
		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
	if (quoted) {
		ep->start = mandoc_strndup(ep->start, ep->toksz);
		return EQN_TOK_QUOTED;
	}
	for (tok = 0; tok < EQN_TOK__MAX; tok++)
		if (STRNEQ(ep->start, ep->toksz,
		    eqn_toks[tok], strlen(eqn_toks[tok])))
			return tok;

	for (i = 0; i < EQNSYM__MAX; i++) {
		if (STRNEQ(ep->start, ep->toksz,
		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
			mandoc_asprintf(&ep->start,
			    "\\[%s]", eqnsyms[i].sym);
			return EQN_TOK_SYM;
		}
	}
	ep->start = mandoc_strndup(ep->start, ep->toksz);
	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
		if (STRNEQ(ep->start, ep->toksz,
		    eqn_func[i], strlen(eqn_func[i])))
			return EQN_TOK_FUNC;
	return EQN_TOK__MAX;
}

static void
eqn_box_free(struct eqn_box *bp)
{

	if (bp->first)
		eqn_box_free(bp->first);
	if (bp->next)
		eqn_box_free(bp->next);

	free(bp->text);
	free(bp->left);
	free(bp->right);
	free(bp->top);
	free(bp->bottom);
	free(bp);
}

/*
 * Allocate a box as the last child of the parent node.
 */
static struct eqn_box *
eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
{
	struct eqn_box	*bp;

	bp = mandoc_calloc(1, sizeof(struct eqn_box));
	bp->parent = parent;
	bp->parent->args++;
	bp->expectargs = UINT_MAX;
	bp->font = bp->parent->font;
	bp->size = ep->gsize;

	if (NULL != parent->first) {
		parent->last->next = bp;
		bp->prev = parent->last;
	} else
		parent->first = bp;

	parent->last = bp;
	return bp;
}

/*
 * Reparent the current last node (of the current parent) under a new
 * EQN_SUBEXPR as the first element.
 * Then return the new parent.
 * The new EQN_SUBEXPR will have a two-child limit.
 */
static struct eqn_box *
eqn_box_makebinary(struct eqn_node *ep,
	enum eqn_post pos, struct eqn_box *parent)
{
	struct eqn_box	*b, *newb;

	assert(NULL != parent->last);
	b = parent->last;
	if (parent->last == parent->first)
		parent->first = NULL;
	parent->args--;
	parent->last = b->prev;
	b->prev = NULL;
	newb = eqn_box_alloc(ep, parent);
	newb->pos = pos;
	newb->type = EQN_SUBEXPR;
	newb->expectargs = 2;
	newb->args = 1;
	newb->first = newb->last = b;
	newb->first->next = NULL;
	b->parent = newb;
	return newb;
}

/*
 * Parse the "delim" control statement.
 */
static void
eqn_delim(struct eqn_node *ep)
{
	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
		    ep->eqn.ln, ep->eqn.pos, "delim");
		if (ep->end[0] != '\0')
			ep->end++;
	} else if (strncmp(ep->end, "off", 3) == 0) {
		ep->delim = 0;
		ep->end += 3;
	} else if (strncmp(ep->end, "on", 2) == 0) {
		if (ep->odelim && ep->cdelim)
			ep->delim = 1;
		ep->end += 2;
	} else {
		ep->odelim = *ep->end++;
		ep->cdelim = *ep->end++;
		ep->delim = 1;
	}
}

/*
 * Undefine a previously-defined string.
 */
static void
eqn_undef(struct eqn_node *ep)
{
	struct eqn_def	*def;

	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
		    ep->eqn.ln, ep->eqn.pos, "undef");
		return;
	}
	if ((def = eqn_def_find(ep)) == NULL)
		return;
	free(def->key);
	free(def->val);
	def->key = def->val = NULL;
	def->keysz = def->valsz = 0;
}

static void
eqn_def(struct eqn_node *ep)
{
	struct eqn_def	*def;
	int		 i;

	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
		    ep->eqn.ln, ep->eqn.pos, "define");
		return;
	}

	/*
	 * Search for a key that already exists.
	 * Create a new key if none is found.
	 */
	if ((def = eqn_def_find(ep)) == NULL) {
		/* Find holes in string array. */
		for (i = 0; i < (int)ep->defsz; i++)
			if (0 == ep->defs[i].keysz)
				break;

		if (i == (int)ep->defsz) {
			ep->defsz++;
			ep->defs = mandoc_reallocarray(ep->defs,
			    ep->defsz, sizeof(struct eqn_def));
			ep->defs[i].key = ep->defs[i].val = NULL;
		}

		def = ep->defs + i;
		free(def->key);
		def->key = mandoc_strndup(ep->start, ep->toksz);
		def->keysz = ep->toksz;
	}

	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
		    ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
		free(def->key);
		free(def->val);
		def->key = def->val = NULL;
		def->keysz = def->valsz = 0;
		return;
	}
	free(def->val);
	def->val = mandoc_strndup(ep->start, ep->toksz);
	def->valsz = ep->toksz;
}

/*
 * Recursively parse an eqn(7) expression.
 */
static enum rofferr
eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
{
	struct eqn_box	*cur, *nbox, *split;
	const char	*cp, *cpn;
	char		*p;
	enum eqn_tok	 tok;
	enum eqn_post	 pos;
	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
	int		 size;

	assert(parent != NULL);

	/*
	 * Empty equation.
	 * Do not add it to the high-level syntax tree.
	 */

	if (ep->data == NULL)
		return ROFF_IGN;

	ep->start = ep->end = ep->data;

next_tok:
	tok = eqn_next(ep, MODE_TOK);
	switch (tok) {
	case EQN_TOK_UNDEF:
		eqn_undef(ep);
		break;
	case EQN_TOK_NDEFINE:
	case EQN_TOK_DEFINE:
		eqn_def(ep);
		break;
	case EQN_TOK_TDEFINE:
		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, "tdefine");
		break;
	case EQN_TOK_DELIM:
		eqn_delim(ep);
		break;
	case EQN_TOK_GFONT:
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
		break;
	case EQN_TOK_MARK:
	case EQN_TOK_LINEUP:
		/* Ignore these. */
		break;
	case EQN_TOK_DYAD:
	case EQN_TOK_VEC:
	case EQN_TOK_UNDER:
	case EQN_TOK_BAR:
	case EQN_TOK_TILDE:
	case EQN_TOK_HAT:
	case EQN_TOK_DOT:
	case EQN_TOK_DOTDOT:
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
		parent->type = EQN_LISTONE;
		parent->expectargs = 1;
		parent->font = EQNFONT_ROMAN;
		switch (tok) {
		case EQN_TOK_DOTDOT:
			parent->top = mandoc_strdup("\\[ad]");
			break;
		case EQN_TOK_VEC:
			parent->top = mandoc_strdup("\\[->]");
			break;
		case EQN_TOK_DYAD:
			parent->top = mandoc_strdup("\\[<>]");
			break;
		case EQN_TOK_TILDE:
			parent->top = mandoc_strdup("\\[a~]");
			break;
		case EQN_TOK_UNDER:
			parent->bottom = mandoc_strdup("\\[ul]");
			break;
		case EQN_TOK_BAR:
			parent->top = mandoc_strdup("\\[rl]");
			break;
		case EQN_TOK_DOT:
			parent->top = mandoc_strdup("\\[a.]");
			break;
		case EQN_TOK_HAT:
			parent->top = mandoc_strdup("\\[ha]");
			break;
		default:
			abort();
		}
		parent = parent->parent;
		break;
	case EQN_TOK_FWD:
	case EQN_TOK_BACK:
	case EQN_TOK_DOWN:
	case EQN_TOK_UP:
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
		break;
	case EQN_TOK_FAT:
	case EQN_TOK_ROMAN:
	case EQN_TOK_ITALIC:
	case EQN_TOK_BOLD:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		/*
		 * These values apply to the next word or sequence of
		 * words; thus, we mark that we'll have a child with
		 * exactly one of those.
		 */
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LISTONE;
		parent->expectargs = 1;
		switch (tok) {
		case EQN_TOK_FAT:
			parent->font = EQNFONT_FAT;
			break;
		case EQN_TOK_ROMAN:
			parent->font = EQNFONT_ROMAN;
			break;
		case EQN_TOK_ITALIC:
			parent->font = EQNFONT_ITALIC;
			break;
		case EQN_TOK_BOLD:
			parent->font = EQNFONT_BOLD;
			break;
		default:
			abort();
		}
		break;
	case EQN_TOK_SIZE:
	case EQN_TOK_GSIZE:
		/* Accept two values: integral size and a single. */
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			break;
		}
		size = mandoc_strntoi(ep->start, ep->toksz, 10);
		if (-1 == size) {
			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			break;
		}
		if (EQN_TOK_GSIZE == tok) {
			ep->gsize = size;
			break;
		}
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LISTONE;
		parent->expectargs = 1;
		parent->size = size;
		break;
	case EQN_TOK_FROM:
	case EQN_TOK_TO:
	case EQN_TOK_SUB:
	case EQN_TOK_SUP:
		/*
		 * We have a left-right-associative expression.
		 * Repivot under a positional node, open a child scope
		 * and keep on reading.
		 */
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		/* Handle the "subsup" and "fromto" positions. */
		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
			parent->expectargs = 3;
			parent->pos = EQNPOS_SUBSUP;
			break;
		}
		if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
			parent->expectargs = 3;
			parent->pos = EQNPOS_FROMTO;
			break;
		}
		switch (tok) {
		case EQN_TOK_FROM:
			pos = EQNPOS_FROM;
			break;
		case EQN_TOK_TO:
			pos = EQNPOS_TO;
			break;
		case EQN_TOK_SUP:
			pos = EQNPOS_SUP;
			break;
		case EQN_TOK_SUB:
			pos = EQNPOS_SUB;
			break;
		default:
			abort();
		}
		parent = eqn_box_makebinary(ep, pos, parent);
		break;
	case EQN_TOK_SQRT:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		/*
		 * Accept a left-right-associative set of arguments just
		 * like sub and sup and friends but without rebalancing
		 * under a pivot.
		 */
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_SUBEXPR;
		parent->pos = EQNPOS_SQRT;
		parent->expectargs = 1;
		break;
	case EQN_TOK_OVER:
		/*
		 * We have a right-left-associative fraction.
		 * Close out anything that's currently open, then
		 * rebalance and continue reading.
		 */
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		while (EQN_SUBEXPR == parent->type)
			parent = parent->parent;
		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
		break;
	case EQN_TOK_RIGHT:
	case EQN_TOK_BRACE_CLOSE:
		/*
		 * Close out the existing brace.
		 * FIXME: this is a shitty sentinel: we should really
		 * have a native EQN_BRACE type or whatnot.
		 */
		for (cur = parent; cur != NULL; cur = cur->parent)
			if (cur->type == EQN_LIST &&
			    (tok == EQN_TOK_BRACE_CLOSE ||
			     cur->left != NULL))
				break;
		if (cur == NULL) {
			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			break;
		}
		parent = cur;
		if (EQN_TOK_RIGHT == tok) {
			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
				mandoc_msg(MANDOCERR_REQ_EMPTY,
				    ep->parse, ep->eqn.ln,
				    ep->eqn.pos, eqn_toks[tok]);
				break;
			}
			/* Handling depends on right/left. */
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
				parent->right = mandoc_strdup("\\[rc]");
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
				parent->right = mandoc_strdup("\\[rf]");
			else
				parent->right =
				    mandoc_strndup(ep->start, ep->toksz);
		}
		parent = parent->parent;
		if (tok == EQN_TOK_BRACE_CLOSE &&
		    (parent->type == EQN_PILE ||
		     parent->type == EQN_MATRIX))
			parent = parent->parent;
		/* Close out any "singleton" lists. */
		while (parent->type == EQN_LISTONE &&
		    parent->args == parent->expectargs)
			parent = parent->parent;
		break;
	case EQN_TOK_BRACE_OPEN:
	case EQN_TOK_LEFT:
		/*
		 * If we already have something in the stack and we're
		 * in an expression, then rewind til we're not any more
		 * (just like with the text node).
		 */
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		if (EQN_TOK_LEFT == tok &&
		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			break;
		}
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		if (EQN_TOK_LEFT == tok) {
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
				parent->left = mandoc_strdup("\\[lc]");
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
				parent->left = mandoc_strdup("\\[lf]");
			else
				parent->left =
				    mandoc_strndup(ep->start, ep->toksz);
		}
		break;
	case EQN_TOK_PILE:
	case EQN_TOK_LPILE:
	case EQN_TOK_RPILE:
	case EQN_TOK_CPILE:
	case EQN_TOK_CCOL:
	case EQN_TOK_LCOL:
	case EQN_TOK_RCOL:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_PILE;
		parent->expectargs = 1;
		break;
	case EQN_TOK_ABOVE:
		for (cur = parent; cur != NULL; cur = cur->parent)
			if (cur->type == EQN_PILE)
				break;
		if (cur == NULL) {
			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
			break;
		}
		parent = eqn_box_alloc(ep, cur);
		parent->type = EQN_LIST;
		break;
	case EQN_TOK_MATRIX:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_MATRIX;
		parent->expectargs = 1;
		break;
	case EQN_TOK_EOF:
		/*
		 * End of file!
		 * TODO: make sure we're not in an open subexpression.
		 */
		return ROFF_EQN;
	case EQN_TOK__MAX:
	case EQN_TOK_FUNC:
	case EQN_TOK_QUOTED:
	case EQN_TOK_SYM:
		p = ep->start;
		assert(p != NULL);
		/*
		 * If we already have something in the stack and we're
		 * in an expression, then rewind til we're not any more.
		 */
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		cur = eqn_box_alloc(ep, parent);
		cur->type = EQN_TEXT;
		cur->text = p;
		switch (tok) {
		case EQN_TOK_FUNC:
			cur->font = EQNFONT_ROMAN;
			break;
		case EQN_TOK_QUOTED:
			if (cur->font == EQNFONT_NONE)
				cur->font = EQNFONT_ITALIC;
			break;
		case EQN_TOK_SYM:
			break;
		default:
			if (cur->font != EQNFONT_NONE || *p == '\0')
				break;
			cpn = p - 1;
			ccln = CCL_LET;
			split = NULL;
			for (;;) {
				/* Advance to next character. */
				cp = cpn++;
				ccl = ccln;
				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
				    isdigit((unsigned char)*cpn) ||
				    (*cpn == '.' && (ccl == CCL_DIG ||
				     isdigit((unsigned char)cpn[1]))) ?
				    CCL_DIG : CCL_PUN;
				/* No boundary before first character. */
				if (cp < p)
					continue;
				cur->font = ccl == CCL_LET ?
				    EQNFONT_ITALIC : EQNFONT_ROMAN;
				if (*cp == '\\')
					mandoc_escape(&cpn, NULL, NULL);
				/* No boundary after last character. */
				if (*cpn == '\0')
					break;
				if (ccln == ccl)
					continue;
				/* Boundary found, split the text. */
				if (parent->args == parent->expectargs) {
					/* Remove the text from the tree. */
					if (cur->prev == NULL)
						parent->first = cur->next;
					else
						cur->prev->next = NULL;
					parent->last = cur->prev;
					parent->args--;
					/* Set up a list instead. */
					split = eqn_box_alloc(ep, parent);
					split->type = EQN_LIST;
					/* Insert the word into the list. */
					split->first = split->last = cur;
					cur->parent = split;
					cur->prev = NULL;
					parent = split;
				}
				/* Append a new text box. */
				nbox = eqn_box_alloc(ep, parent);
				nbox->type = EQN_TEXT;
				nbox->text = mandoc_strdup(cpn);
				/* Truncate the old box. */
				p = mandoc_strndup(cur->text,
				    cpn - cur->text);
				free(cur->text);
				cur->text = p;
				/* Setup to process the new box. */
				cur = nbox;
				p = nbox->text;
				cpn = p - 1;
				ccln = CCL_LET;
			}
			if (split != NULL)
				parent = split->parent;
			break;
		}
		/*
		 * Post-process list status.
		 */
		while (parent->type == EQN_LISTONE &&
		    parent->args == parent->expectargs)
			parent = parent->parent;
		break;
	default:
		abort();
	}
	goto next_tok;
}

enum rofferr
eqn_end(struct eqn_node **epp)
{
	struct eqn_node	*ep;

	ep = *epp;
	*epp = NULL;

	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
	ep->eqn.root->expectargs = UINT_MAX;
	return eqn_parse(ep, ep->eqn.root);
}

void
eqn_free(struct eqn_node *p)
{
	int		 i;

	eqn_box_free(p->eqn.root);

	for (i = 0; i < (int)p->defsz; i++) {
		free(p->defs[i].key);
		free(p->defs[i].val);
	}

	free(p->data);
	free(p->defs);
	free(p);
}