[BACK]Return to eqn.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / eqn.c (download)

Revision 1.84, Wed Jan 8 12:16:24 2020 UTC (17 months ago) by schwarze
Branch: MAIN
CVS Tags: HEAD
Changes since 1.83: +11 -3 lines

Skip whitespace before tokens, too.
Bug found by bentley@ with input like "delim $$ delim off".

/*	$Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */
/*
 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/types.h>

#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "mandoc_aux.h"
#include "mandoc.h"
#include "roff.h"
#include "eqn.h"
#include "libmandoc.h"
#include "eqn_parse.h"

#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
#define	STRNEQ(p1, sz1, p2, sz2) \
	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))

enum	eqn_tok {
	EQN_TOK_DYAD = 0,
	EQN_TOK_VEC,
	EQN_TOK_UNDER,
	EQN_TOK_BAR,
	EQN_TOK_TILDE,
	EQN_TOK_HAT,
	EQN_TOK_DOT,
	EQN_TOK_DOTDOT,
	EQN_TOK_FWD,
	EQN_TOK_BACK,
	EQN_TOK_DOWN,
	EQN_TOK_UP,
	EQN_TOK_FAT,
	EQN_TOK_ROMAN,
	EQN_TOK_ITALIC,
	EQN_TOK_BOLD,
	EQN_TOK_SIZE,
	EQN_TOK_SUB,
	EQN_TOK_SUP,
	EQN_TOK_SQRT,
	EQN_TOK_OVER,
	EQN_TOK_FROM,
	EQN_TOK_TO,
	EQN_TOK_BRACE_OPEN,
	EQN_TOK_BRACE_CLOSE,
	EQN_TOK_GSIZE,
	EQN_TOK_GFONT,
	EQN_TOK_MARK,
	EQN_TOK_LINEUP,
	EQN_TOK_LEFT,
	EQN_TOK_RIGHT,
	EQN_TOK_PILE,
	EQN_TOK_LPILE,
	EQN_TOK_RPILE,
	EQN_TOK_CPILE,
	EQN_TOK_MATRIX,
	EQN_TOK_CCOL,
	EQN_TOK_LCOL,
	EQN_TOK_RCOL,
	EQN_TOK_DELIM,
	EQN_TOK_DEFINE,
	EQN_TOK_TDEFINE,
	EQN_TOK_NDEFINE,
	EQN_TOK_UNDEF,
	EQN_TOK_ABOVE,
	EQN_TOK__MAX,
	EQN_TOK_FUNC,
	EQN_TOK_QUOTED,
	EQN_TOK_SYM,
	EQN_TOK_EOF
};

static	const char *eqn_toks[EQN_TOK__MAX] = {
	"dyad", /* EQN_TOK_DYAD */
	"vec", /* EQN_TOK_VEC */
	"under", /* EQN_TOK_UNDER */
	"bar", /* EQN_TOK_BAR */
	"tilde", /* EQN_TOK_TILDE */
	"hat", /* EQN_TOK_HAT */
	"dot", /* EQN_TOK_DOT */
	"dotdot", /* EQN_TOK_DOTDOT */
	"fwd", /* EQN_TOK_FWD * */
	"back", /* EQN_TOK_BACK */
	"down", /* EQN_TOK_DOWN */
	"up", /* EQN_TOK_UP */
	"fat", /* EQN_TOK_FAT */
	"roman", /* EQN_TOK_ROMAN */
	"italic", /* EQN_TOK_ITALIC */
	"bold", /* EQN_TOK_BOLD */
	"size", /* EQN_TOK_SIZE */
	"sub", /* EQN_TOK_SUB */
	"sup", /* EQN_TOK_SUP */
	"sqrt", /* EQN_TOK_SQRT */
	"over", /* EQN_TOK_OVER */
	"from", /* EQN_TOK_FROM */
	"to", /* EQN_TOK_TO */
	"{", /* EQN_TOK_BRACE_OPEN */
	"}", /* EQN_TOK_BRACE_CLOSE */
	"gsize", /* EQN_TOK_GSIZE */
	"gfont", /* EQN_TOK_GFONT */
	"mark", /* EQN_TOK_MARK */
	"lineup", /* EQN_TOK_LINEUP */
	"left", /* EQN_TOK_LEFT */
	"right", /* EQN_TOK_RIGHT */
	"pile", /* EQN_TOK_PILE */
	"lpile", /* EQN_TOK_LPILE */
	"rpile", /* EQN_TOK_RPILE */
	"cpile", /* EQN_TOK_CPILE */
	"matrix", /* EQN_TOK_MATRIX */
	"ccol", /* EQN_TOK_CCOL */
	"lcol", /* EQN_TOK_LCOL */
	"rcol", /* EQN_TOK_RCOL */
	"delim", /* EQN_TOK_DELIM */
	"define", /* EQN_TOK_DEFINE */
	"tdefine", /* EQN_TOK_TDEFINE */
	"ndefine", /* EQN_TOK_NDEFINE */
	"undef", /* EQN_TOK_UNDEF */
	"above", /* EQN_TOK_ABOVE */
};

static	const char *const eqn_func[] = {
	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
	"if",	"lim",	"ln",	"log",	"max",	"min",
	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
};

enum	eqn_symt {
	EQNSYM_alpha = 0,
	EQNSYM_beta,
	EQNSYM_chi,
	EQNSYM_delta,
	EQNSYM_epsilon,
	EQNSYM_eta,
	EQNSYM_gamma,
	EQNSYM_iota,
	EQNSYM_kappa,
	EQNSYM_lambda,
	EQNSYM_mu,
	EQNSYM_nu,
	EQNSYM_omega,
	EQNSYM_omicron,
	EQNSYM_phi,
	EQNSYM_pi,
	EQNSYM_ps,
	EQNSYM_rho,
	EQNSYM_sigma,
	EQNSYM_tau,
	EQNSYM_theta,
	EQNSYM_upsilon,
	EQNSYM_xi,
	EQNSYM_zeta,
	EQNSYM_DELTA,
	EQNSYM_GAMMA,
	EQNSYM_LAMBDA,
	EQNSYM_OMEGA,
	EQNSYM_PHI,
	EQNSYM_PI,
	EQNSYM_PSI,
	EQNSYM_SIGMA,
	EQNSYM_THETA,
	EQNSYM_UPSILON,
	EQNSYM_XI,
	EQNSYM_inter,
	EQNSYM_union,
	EQNSYM_prod,
	EQNSYM_int,
	EQNSYM_sum,
	EQNSYM_grad,
	EQNSYM_del,
	EQNSYM_times,
	EQNSYM_cdot,
	EQNSYM_nothing,
	EQNSYM_approx,
	EQNSYM_prime,
	EQNSYM_half,
	EQNSYM_partial,
	EQNSYM_inf,
	EQNSYM_muchgreat,
	EQNSYM_muchless,
	EQNSYM_larrow,
	EQNSYM_rarrow,
	EQNSYM_pm,
	EQNSYM_nequal,
	EQNSYM_equiv,
	EQNSYM_lessequal,
	EQNSYM_moreequal,
	EQNSYM_minus,
	EQNSYM__MAX
};

struct	eqnsym {
	const char	*str;
	const char	*sym;
};

static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
	{ "alpha", "*a" }, /* EQNSYM_alpha */
	{ "beta", "*b" }, /* EQNSYM_beta */
	{ "chi", "*x" }, /* EQNSYM_chi */
	{ "delta", "*d" }, /* EQNSYM_delta */
	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
	{ "eta", "*y" }, /* EQNSYM_eta */
	{ "gamma", "*g" }, /* EQNSYM_gamma */
	{ "iota", "*i" }, /* EQNSYM_iota */
	{ "kappa", "*k" }, /* EQNSYM_kappa */
	{ "lambda", "*l" }, /* EQNSYM_lambda */
	{ "mu", "*m" }, /* EQNSYM_mu */
	{ "nu", "*n" }, /* EQNSYM_nu */
	{ "omega", "*w" }, /* EQNSYM_omega */
	{ "omicron", "*o" }, /* EQNSYM_omicron */
	{ "phi", "*f" }, /* EQNSYM_phi */
	{ "pi", "*p" }, /* EQNSYM_pi */
	{ "psi", "*q" }, /* EQNSYM_psi */
	{ "rho", "*r" }, /* EQNSYM_rho */
	{ "sigma", "*s" }, /* EQNSYM_sigma */
	{ "tau", "*t" }, /* EQNSYM_tau */
	{ "theta", "*h" }, /* EQNSYM_theta */
	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
	{ "xi", "*c" }, /* EQNSYM_xi */
	{ "zeta", "*z" }, /* EQNSYM_zeta */
	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
	{ "PHI", "*F" }, /* EQNSYM_PHI */
	{ "PI", "*P" }, /* EQNSYM_PI */
	{ "PSI", "*Q" }, /* EQNSYM_PSI */
	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
	{ "THETA", "*H" }, /* EQNSYM_THETA */
	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
	{ "XI", "*C" }, /* EQNSYM_XI */
	{ "inter", "ca" }, /* EQNSYM_inter */
	{ "union", "cu" }, /* EQNSYM_union */
	{ "prod", "product" }, /* EQNSYM_prod */
	{ "int", "integral" }, /* EQNSYM_int */
	{ "sum", "sum" }, /* EQNSYM_sum */
	{ "grad", "gr" }, /* EQNSYM_grad */
	{ "del", "gr" }, /* EQNSYM_del */
	{ "times", "mu" }, /* EQNSYM_times */
	{ "cdot", "pc" }, /* EQNSYM_cdot */
	{ "nothing", "&" }, /* EQNSYM_nothing */
	{ "approx", "~~" }, /* EQNSYM_approx */
	{ "prime", "fm" }, /* EQNSYM_prime */
	{ "half", "12" }, /* EQNSYM_half */
	{ "partial", "pd" }, /* EQNSYM_partial */
	{ "inf", "if" }, /* EQNSYM_inf */
	{ ">>", ">>" }, /* EQNSYM_muchgreat */
	{ "<<", "<<" }, /* EQNSYM_muchless */
	{ "<-", "<-" }, /* EQNSYM_larrow */
	{ "->", "->" }, /* EQNSYM_rarrow */
	{ "+-", "+-" }, /* EQNSYM_pm */
	{ "!=", "!=" }, /* EQNSYM_nequal */
	{ "==", "==" }, /* EQNSYM_equiv */
	{ "<=", "<=" }, /* EQNSYM_lessequal */
	{ ">=", ">=" }, /* EQNSYM_moreequal */
	{ "-", "mi" }, /* EQNSYM_minus */
};

enum	parse_mode {
	MODE_QUOTED,
	MODE_NOSUB,
	MODE_SUB,
	MODE_TOK
};

struct	eqn_def {
	char		 *key;
	size_t		  keysz;
	char		 *val;
	size_t		  valsz;
};

static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
				struct eqn_box *);
static	void		 eqn_def(struct eqn_node *);
static	struct eqn_def	*eqn_def_find(struct eqn_node *);
static	void		 eqn_delim(struct eqn_node *);
static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
static	void		 eqn_undef(struct eqn_node *);


struct eqn_node *
eqn_alloc(void)
{
	struct eqn_node *ep;

	ep = mandoc_calloc(1, sizeof(*ep));
	ep->gsize = EQN_DEFSIZE;
	return ep;
}

void
eqn_reset(struct eqn_node *ep)
{
	free(ep->data);
	ep->data = ep->start = ep->end = NULL;
	ep->sz = ep->toksz = 0;
}

void
eqn_read(struct eqn_node *ep, const char *p)
{
	char		*cp;

	if (ep->data == NULL) {
		ep->sz = strlen(p);
		ep->data = mandoc_strdup(p);
	} else {
		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
		free(ep->data);
		ep->data = cp;
	}
	ep->sz += 1;
}

/*
 * Find the key "key" of the give size within our eqn-defined values.
 */
static struct eqn_def *
eqn_def_find(struct eqn_node *ep)
{
	int		 i;

	for (i = 0; i < (int)ep->defsz; i++)
		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
		    ep->defs[i].keysz, ep->start, ep->toksz))
			return &ep->defs[i];

	return NULL;
}

/*
 * Parse a token from the input text.  The modes are:
 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
 *   before its next occurence.  Do not interpret the token in any
 *   way and return EQN_TOK_QUOTED.  All other modes behave like
 *   MODE_QUOTED when *ep->start is '"'.
 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
 *   otherwise, it ends before the next whitespace or brace.
 *   Do not interpret the token and return EQN_TOK__MAX.
 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
 *   alias created with define.  If it is an alias, replace it with
 *   its string value and reparse.
 * MODE_TOK: Like MODE_SUB, but also check the token against the list
 *   of tokens, and if there is a match, return that token.  Otherwise,
 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
 *   a token match, *ep->start is set to an allocated string that the
 *   caller is expected to free.
 * All modes skip whitespace following the end of the token.
 */
static enum eqn_tok
eqn_next(struct eqn_node *ep, enum parse_mode mode)
{
	static int	 last_len, lim;

	struct eqn_def	*def;
	size_t		 start;
	int		 diff, i, quoted;
	enum eqn_tok	 tok;

	/*
	 * Reset the recursion counter after advancing
	 * beyond the end of the previous substitution.
	 */
	if (ep->end - ep->data >= last_len)
		lim = 0;

	ep->start = ep->end;
	quoted = mode == MODE_QUOTED;
	for (;;) {
		switch (*ep->start) {
		case '\0':
			ep->toksz = 0;
			return EQN_TOK_EOF;
		case '"':
			quoted = 1;
			break;
		case ' ':
		case '\t':
		case '~':
		case '^':
			if (quoted)
				break;
			ep->start++;
			continue;
		default:
			break;
		}
		if (quoted) {
			ep->end = strchr(ep->start + 1, *ep->start);
			ep->start++;  /* Skip opening quote. */
			if (ep->end == NULL) {
				mandoc_msg(MANDOCERR_ARG_QUOTE,
				    ep->node->line, ep->node->pos, NULL);
				ep->end = strchr(ep->start, '\0');
			}
		} else {
			ep->end = ep->start + 1;
			if (*ep->start != '{' && *ep->start != '}')
				ep->end += strcspn(ep->end, " ^~\"{}\t");
		}
		ep->toksz = ep->end - ep->start;
		if (quoted && *ep->end != '\0')
			ep->end++;  /* Skip closing quote. */
		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
			ep->end++;
		if (quoted)  /* Cannot return, may have to strndup. */
			break;
		if (mode == MODE_NOSUB)
			return EQN_TOK__MAX;
		if ((def = eqn_def_find(ep)) == NULL)
			break;
		if (++lim > EQN_NEST_MAX) {
			mandoc_msg(MANDOCERR_ROFFLOOP,
			    ep->node->line, ep->node->pos, NULL);
			return EQN_TOK_EOF;
		}

		/* Replace a defined name with its string value. */
		if ((diff = def->valsz - ep->toksz) > 0) {
			start = ep->start - ep->data;
			ep->sz += diff;
			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
			ep->start = ep->data + start;
		}
		if (diff)
			memmove(ep->start + def->valsz, ep->start + ep->toksz,
			    strlen(ep->start + ep->toksz) + 1);
		memcpy(ep->start, def->val, def->valsz);
		last_len = ep->start - ep->data + def->valsz;
	}
	if (mode != MODE_TOK)
		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
	if (quoted) {
		ep->start = mandoc_strndup(ep->start, ep->toksz);
		return EQN_TOK_QUOTED;
	}
	for (tok = 0; tok < EQN_TOK__MAX; tok++)
		if (STRNEQ(ep->start, ep->toksz,
		    eqn_toks[tok], strlen(eqn_toks[tok])))
			return tok;

	for (i = 0; i < EQNSYM__MAX; i++) {
		if (STRNEQ(ep->start, ep->toksz,
		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
			mandoc_asprintf(&ep->start,
			    "\\[%s]", eqnsyms[i].sym);
			return EQN_TOK_SYM;
		}
	}
	ep->start = mandoc_strndup(ep->start, ep->toksz);
	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
		if (STRNEQ(ep->start, ep->toksz,
		    eqn_func[i], strlen(eqn_func[i])))
			return EQN_TOK_FUNC;
	return EQN_TOK__MAX;
}

void
eqn_box_free(struct eqn_box *bp)
{
	if (bp == NULL)
		return;

	if (bp->first)
		eqn_box_free(bp->first);
	if (bp->next)
		eqn_box_free(bp->next);

	free(bp->text);
	free(bp->left);
	free(bp->right);
	free(bp->top);
	free(bp->bottom);
	free(bp);
}

struct eqn_box *
eqn_box_new(void)
{
	struct eqn_box	*bp;

	bp = mandoc_calloc(1, sizeof(*bp));
	bp->expectargs = UINT_MAX;
	return bp;
}

/*
 * Allocate a box as the last child of the parent node.
 */
static struct eqn_box *
eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
{
	struct eqn_box	*bp;

	bp = eqn_box_new();
	bp->parent = parent;
	bp->parent->args++;
	bp->font = bp->parent->font;
	bp->size = ep->gsize;

	if (NULL != parent->first) {
		parent->last->next = bp;
		bp->prev = parent->last;
	} else
		parent->first = bp;

	parent->last = bp;
	return bp;
}

/*
 * Reparent the current last node (of the current parent) under a new
 * EQN_SUBEXPR as the first element.
 * Then return the new parent.
 * The new EQN_SUBEXPR will have a two-child limit.
 */
static struct eqn_box *
eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
{
	struct eqn_box	*b, *newb;

	assert(NULL != parent->last);
	b = parent->last;
	if (parent->last == parent->first)
		parent->first = NULL;
	parent->args--;
	parent->last = b->prev;
	b->prev = NULL;
	newb = eqn_box_alloc(ep, parent);
	newb->type = EQN_SUBEXPR;
	newb->expectargs = 2;
	newb->args = 1;
	newb->first = newb->last = b;
	newb->first->next = NULL;
	b->parent = newb;
	return newb;
}

/*
 * Parse the "delim" control statement.
 */
static void
eqn_delim(struct eqn_node *ep)
{
	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "delim");
		if (ep->end[0] != '\0')
			ep->end++;
	} else if (strncmp(ep->end, "off", 3) == 0) {
		ep->delim = 0;
		ep->end += 3;
	} else if (strncmp(ep->end, "on", 2) == 0) {
		if (ep->odelim && ep->cdelim)
			ep->delim = 1;
		ep->end += 2;
	} else {
		ep->odelim = *ep->end++;
		ep->cdelim = *ep->end++;
		ep->delim = 1;
	}
}

/*
 * Undefine a previously-defined string.
 */
static void
eqn_undef(struct eqn_node *ep)
{
	struct eqn_def	*def;

	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "undef");
		return;
	}
	if ((def = eqn_def_find(ep)) == NULL)
		return;
	free(def->key);
	free(def->val);
	def->key = def->val = NULL;
	def->keysz = def->valsz = 0;
}

static void
eqn_def(struct eqn_node *ep)
{
	struct eqn_def	*def;
	int		 i;

	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "define");
		return;
	}

	/*
	 * Search for a key that already exists.
	 * Create a new key if none is found.
	 */
	if ((def = eqn_def_find(ep)) == NULL) {
		/* Find holes in string array. */
		for (i = 0; i < (int)ep->defsz; i++)
			if (0 == ep->defs[i].keysz)
				break;

		if (i == (int)ep->defsz) {
			ep->defsz++;
			ep->defs = mandoc_reallocarray(ep->defs,
			    ep->defsz, sizeof(struct eqn_def));
			ep->defs[i].key = ep->defs[i].val = NULL;
		}

		def = ep->defs + i;
		free(def->key);
		def->key = mandoc_strndup(ep->start, ep->toksz);
		def->keysz = ep->toksz;
	}

	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
		mandoc_msg(MANDOCERR_REQ_EMPTY,
		    ep->node->line, ep->node->pos, "define %s", def->key);
		free(def->key);
		free(def->val);
		def->key = def->val = NULL;
		def->keysz = def->valsz = 0;
		return;
	}
	free(def->val);
	def->val = mandoc_strndup(ep->start, ep->toksz);
	def->valsz = ep->toksz;
}

void
eqn_parse(struct eqn_node *ep)
{
	struct eqn_box	*cur, *nbox, *parent, *split;
	const char	*cp, *cpn;
	char		*p;
	enum eqn_tok	 tok;
	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
	int		 size;

	parent = ep->node->eqn;
	assert(parent != NULL);

	/*
	 * Empty equation.
	 * Do not add it to the high-level syntax tree.
	 */

	if (ep->data == NULL)
		return;

	ep->start = ep->end = ep->data;

next_tok:
	tok = eqn_next(ep, MODE_TOK);
	switch (tok) {
	case EQN_TOK_UNDEF:
		eqn_undef(ep);
		break;
	case EQN_TOK_NDEFINE:
	case EQN_TOK_DEFINE:
		eqn_def(ep);
		break;
	case EQN_TOK_TDEFINE:
		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY,
			    ep->node->line, ep->node->pos, "tdefine");
		break;
	case EQN_TOK_DELIM:
		eqn_delim(ep);
		break;
	case EQN_TOK_GFONT:
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
		break;
	case EQN_TOK_MARK:
	case EQN_TOK_LINEUP:
		/* Ignore these. */
		break;
	case EQN_TOK_DYAD:
	case EQN_TOK_VEC:
	case EQN_TOK_UNDER:
	case EQN_TOK_BAR:
	case EQN_TOK_TILDE:
	case EQN_TOK_HAT:
	case EQN_TOK_DOT:
	case EQN_TOK_DOTDOT:
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		parent = eqn_box_makebinary(ep, parent);
		parent->type = EQN_LIST;
		parent->expectargs = 1;
		parent->font = EQNFONT_ROMAN;
		switch (tok) {
		case EQN_TOK_DOTDOT:
			parent->top = mandoc_strdup("\\[ad]");
			break;
		case EQN_TOK_VEC:
			parent->top = mandoc_strdup("\\[->]");
			break;
		case EQN_TOK_DYAD:
			parent->top = mandoc_strdup("\\[<>]");
			break;
		case EQN_TOK_TILDE:
			parent->top = mandoc_strdup("\\[a~]");
			break;
		case EQN_TOK_UNDER:
			parent->bottom = mandoc_strdup("\\[ul]");
			break;
		case EQN_TOK_BAR:
			parent->top = mandoc_strdup("\\[rn]");
			break;
		case EQN_TOK_DOT:
			parent->top = mandoc_strdup("\\[a.]");
			break;
		case EQN_TOK_HAT:
			parent->top = mandoc_strdup("\\[ha]");
			break;
		default:
			abort();
		}
		parent = parent->parent;
		break;
	case EQN_TOK_FWD:
	case EQN_TOK_BACK:
	case EQN_TOK_DOWN:
	case EQN_TOK_UP:
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
		break;
	case EQN_TOK_FAT:
	case EQN_TOK_ROMAN:
	case EQN_TOK_ITALIC:
	case EQN_TOK_BOLD:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		/*
		 * These values apply to the next word or sequence of
		 * words; thus, we mark that we'll have a child with
		 * exactly one of those.
		 */
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		parent->expectargs = 1;
		switch (tok) {
		case EQN_TOK_FAT:
			parent->font = EQNFONT_FAT;
			break;
		case EQN_TOK_ROMAN:
			parent->font = EQNFONT_ROMAN;
			break;
		case EQN_TOK_ITALIC:
			parent->font = EQNFONT_ITALIC;
			break;
		case EQN_TOK_BOLD:
			parent->font = EQNFONT_BOLD;
			break;
		default:
			abort();
		}
		break;
	case EQN_TOK_SIZE:
	case EQN_TOK_GSIZE:
		/* Accept two values: integral size and a single. */
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		size = mandoc_strntoi(ep->start, ep->toksz, 10);
		if (-1 == size) {
			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		if (EQN_TOK_GSIZE == tok) {
			ep->gsize = size;
			break;
		}
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		parent->expectargs = 1;
		parent->size = size;
		break;
	case EQN_TOK_FROM:
	case EQN_TOK_TO:
	case EQN_TOK_SUB:
	case EQN_TOK_SUP:
		/*
		 * We have a left-right-associative expression.
		 * Repivot under a positional node, open a child scope
		 * and keep on reading.
		 */
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		while (parent->expectargs == 1 && parent->args == 1)
			parent = parent->parent;
		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
			for (cur = parent; cur != NULL; cur = cur->parent)
				if (cur->pos == EQNPOS_SUB ||
				    cur->pos == EQNPOS_SUP ||
				    cur->pos == EQNPOS_SUBSUP ||
				    cur->pos == EQNPOS_SQRT ||
				    cur->pos == EQNPOS_OVER)
					break;
			if (cur != NULL)
				parent = cur->parent;
		}
		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
			parent->expectargs = 3;
			parent->pos = EQNPOS_SUBSUP;
			break;
		}
		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
			parent->expectargs = 3;
			parent->pos = EQNPOS_FROMTO;
			break;
		}
		parent = eqn_box_makebinary(ep, parent);
		switch (tok) {
		case EQN_TOK_FROM:
			parent->pos = EQNPOS_FROM;
			break;
		case EQN_TOK_TO:
			parent->pos = EQNPOS_TO;
			break;
		case EQN_TOK_SUP:
			parent->pos = EQNPOS_SUP;
			break;
		case EQN_TOK_SUB:
			parent->pos = EQNPOS_SUB;
			break;
		default:
			abort();
		}
		break;
	case EQN_TOK_SQRT:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		/*
		 * Accept a left-right-associative set of arguments just
		 * like sub and sup and friends but without rebalancing
		 * under a pivot.
		 */
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_SUBEXPR;
		parent->pos = EQNPOS_SQRT;
		parent->expectargs = 1;
		break;
	case EQN_TOK_OVER:
		/*
		 * We have a right-left-associative fraction.
		 * Close out anything that's currently open, then
		 * rebalance and continue reading.
		 */
		if (parent->last == NULL) {
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			cur = eqn_box_alloc(ep, parent);
			cur->type = EQN_TEXT;
			cur->text = mandoc_strdup("");
		}
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		while (EQN_SUBEXPR == parent->type)
			parent = parent->parent;
		parent = eqn_box_makebinary(ep, parent);
		parent->pos = EQNPOS_OVER;
		break;
	case EQN_TOK_RIGHT:
	case EQN_TOK_BRACE_CLOSE:
		/*
		 * Close out the existing brace.
		 * FIXME: this is a shitty sentinel: we should really
		 * have a native EQN_BRACE type or whatnot.
		 */
		for (cur = parent; cur != NULL; cur = cur->parent)
			if (cur->type == EQN_LIST &&
			    cur->expectargs > 1 &&
			    (tok == EQN_TOK_BRACE_CLOSE ||
			     cur->left != NULL))
				break;
		if (cur == NULL) {
			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		parent = cur;
		if (EQN_TOK_RIGHT == tok) {
			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
				mandoc_msg(MANDOCERR_REQ_EMPTY,
				    ep->node->line, ep->node->pos,
				    "%s", eqn_toks[tok]);
				break;
			}
			/* Handling depends on right/left. */
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
				parent->right = mandoc_strdup("\\[rc]");
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
				parent->right = mandoc_strdup("\\[rf]");
			else
				parent->right =
				    mandoc_strndup(ep->start, ep->toksz);
		}
		parent = parent->parent;
		if (tok == EQN_TOK_BRACE_CLOSE &&
		    (parent->type == EQN_PILE ||
		     parent->type == EQN_MATRIX))
			parent = parent->parent;
		/* Close out any "singleton" lists. */
		while (parent->type == EQN_LIST &&
		    parent->expectargs == 1 &&
		    parent->args == 1)
			parent = parent->parent;
		break;
	case EQN_TOK_BRACE_OPEN:
	case EQN_TOK_LEFT:
		/*
		 * If we already have something in the stack and we're
		 * in an expression, then rewind til we're not any more
		 * (just like with the text node).
		 */
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		if (EQN_TOK_LEFT == tok &&
		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_LIST;
		if (EQN_TOK_LEFT == tok) {
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
				parent->left = mandoc_strdup("\\[lc]");
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
				parent->left = mandoc_strdup("\\[lf]");
			else
				parent->left =
				    mandoc_strndup(ep->start, ep->toksz);
		}
		break;
	case EQN_TOK_PILE:
	case EQN_TOK_LPILE:
	case EQN_TOK_RPILE:
	case EQN_TOK_CPILE:
	case EQN_TOK_CCOL:
	case EQN_TOK_LCOL:
	case EQN_TOK_RCOL:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_PILE;
		parent->expectargs = 1;
		break;
	case EQN_TOK_ABOVE:
		for (cur = parent; cur != NULL; cur = cur->parent)
			if (cur->type == EQN_PILE)
				break;
		if (cur == NULL) {
			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
			    ep->node->pos, "%s", eqn_toks[tok]);
			break;
		}
		parent = eqn_box_alloc(ep, cur);
		parent->type = EQN_LIST;
		break;
	case EQN_TOK_MATRIX:
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		parent = eqn_box_alloc(ep, parent);
		parent->type = EQN_MATRIX;
		parent->expectargs = 1;
		break;
	case EQN_TOK_EOF:
		return;
	case EQN_TOK__MAX:
	case EQN_TOK_FUNC:
	case EQN_TOK_QUOTED:
	case EQN_TOK_SYM:
		p = ep->start;
		assert(p != NULL);
		/*
		 * If we already have something in the stack and we're
		 * in an expression, then rewind til we're not any more.
		 */
		while (parent->args == parent->expectargs)
			parent = parent->parent;
		cur = eqn_box_alloc(ep, parent);
		cur->type = EQN_TEXT;
		cur->text = p;
		switch (tok) {
		case EQN_TOK_FUNC:
			cur->font = EQNFONT_ROMAN;
			break;
		case EQN_TOK_QUOTED:
			if (cur->font == EQNFONT_NONE)
				cur->font = EQNFONT_ITALIC;
			break;
		case EQN_TOK_SYM:
			break;
		default:
			if (cur->font != EQNFONT_NONE || *p == '\0')
				break;
			cpn = p - 1;
			ccln = CCL_LET;
			split = NULL;
			for (;;) {
				/* Advance to next character. */
				cp = cpn++;
				ccl = ccln;
				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
				    isdigit((unsigned char)*cpn) ||
				    (*cpn == '.' && (ccl == CCL_DIG ||
				     isdigit((unsigned char)cpn[1]))) ?
				    CCL_DIG : CCL_PUN;
				/* No boundary before first character. */
				if (cp < p)
					continue;
				cur->font = ccl == CCL_LET ?
				    EQNFONT_ITALIC : EQNFONT_ROMAN;
				if (*cp == '\\')
					mandoc_escape(&cpn, NULL, NULL);
				/* No boundary after last character. */
				if (*cpn == '\0')
					break;
				if (ccln == ccl && *cp != ',' && *cpn != ',')
					continue;
				/* Boundary found, split the text. */
				if (parent->args == parent->expectargs) {
					/* Remove the text from the tree. */
					if (cur->prev == NULL)
						parent->first = cur->next;
					else
						cur->prev->next = NULL;
					parent->last = cur->prev;
					parent->args--;
					/* Set up a list instead. */
					split = eqn_box_alloc(ep, parent);
					split->type = EQN_LIST;
					/* Insert the word into the list. */
					split->first = split->last = cur;
					cur->parent = split;
					cur->prev = NULL;
					parent = split;
				}
				/* Append a new text box. */
				nbox = eqn_box_alloc(ep, parent);
				nbox->type = EQN_TEXT;
				nbox->text = mandoc_strdup(cpn);
				/* Truncate the old box. */
				p = mandoc_strndup(cur->text,
				    cpn - cur->text);
				free(cur->text);
				cur->text = p;
				/* Setup to process the new box. */
				cur = nbox;
				p = nbox->text;
				cpn = p - 1;
				ccln = CCL_LET;
			}
			if (split != NULL)
				parent = split->parent;
			break;
		}
		break;
	default:
		abort();
	}
	goto next_tok;
}

void
eqn_free(struct eqn_node *p)
{
	int		 i;

	if (p == NULL)
		return;

	for (i = 0; i < (int)p->defsz; i++) {
		free(p->defs[i].key);
		free(p->defs[i].val);
	}

	free(p->data);
	free(p->defs);
	free(p);
}