[BACK]Return to docbook2mdoc.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / docbook2mdoc

File: [cvsweb.bsd.lv] / docbook2mdoc / docbook2mdoc.c (download)

Revision 1.46, Fri Mar 8 15:09:54 2019 UTC (5 years ago) by schwarze
Branch: MAIN
Changes since 1.45: +13 -13 lines

fix systematically wrong (int) casts in character classification functions

/*	$Id: docbook2mdoc.c,v 1.46 2019/03/08 15:09:54 schwarze Exp $ */
/*
 * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/queue.h>

#include <assert.h>
#include <ctype.h>
#include <expat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "extern.h"

/*
 * Global parse state.
 * Keep this as simple and small as possible.
 */
struct	parse {
	XML_Parser	 xml;
	enum nodeid	 node; /* current (NODE_ROOT if pre-tree) */
	const char	*fname; /* filename */
	int		 stop; /* should we stop now? */
#define	PARSE_EQN	 1
	unsigned int	 flags; /* document-wide flags */
	struct pnode	*root; /* root of parse tree */
	struct pnode	*cur; /* current node in tree */
	char		*b; /* nil-terminated buffer for pre-print */
	size_t		 bsz; /* current length of b */
	size_t		 mbsz; /* max bsz allocation */
	int		 newln; /* output: are we on a fresh line */
};

struct	node {
	const char	*name; /* docbook element name */
	unsigned int	 flags;
#define	NODE_IGNTEXT	 1 /* ignore all contained text */
};

TAILQ_HEAD(pnodeq, pnode);
TAILQ_HEAD(pattrq, pattr);

struct	pattr {
	enum attrkey	 key;
	enum attrval	 val;
	char		*rawval;
	TAILQ_ENTRY(pattr) child;
};

struct	pnode {
	enum nodeid	 node; /* node type */
	char		*b; /* binary data buffer */
	char		*real; /* store for "b" */
	size_t		 bsz; /* data buffer size */
	struct pnode	*parent; /* parent (or NULL if top) */
	struct pnodeq	 childq; /* queue of children */
	struct pattrq	 attrq; /* attributes of node */
	TAILQ_ENTRY(pnode) child;
};

static	const char *attrkeys[ATTRKEY__MAX] = {
	"choice",
	"close",
	"id",
	"open",
	"rep"
};

static	const char *attrvals[ATTRVAL__MAX] = {
	"norepeat",
	"opt",
	"plain",
	"repeat",
	"req"
};

static	const struct node nodes[NODE__MAX] = {
	{ NULL, 0 },
	{ "acronym", 0 },
	{ "anchor", NODE_IGNTEXT },
	{ "application", 0 },
	{ "arg", 0 },
	{ "caution", NODE_IGNTEXT },
	{ "citerefentry", NODE_IGNTEXT },
	{ "cmdsynopsis", NODE_IGNTEXT },
	{ "code", 0 },
	{ "colspec", NODE_IGNTEXT },
	{ "command", 0 },
	{ "constant", 0 },
	{ "copyright", NODE_IGNTEXT },
	{ "date", 0 },
	{ "emphasis", 0 },
	{ "entry", 0 },
	{ "envar", 0 },
	{ "fieldsynopsis", NODE_IGNTEXT },
	{ "filename", 0 },
	{ "funcdef", 0 },
	{ "funcprototype", NODE_IGNTEXT },
	{ "funcsynopsis", NODE_IGNTEXT },
	{ "funcsynopsisinfo", 0 },
	{ "function", 0 },
	{ "group", NODE_IGNTEXT },
	{ "holder", NODE_IGNTEXT },
	{ "info", NODE_IGNTEXT },
	{ "informalequation", NODE_IGNTEXT },
	{ "informaltable", NODE_IGNTEXT },
	{ "inlineequation", NODE_IGNTEXT },
	{ "itemizedlist", NODE_IGNTEXT },
	{ "link", 0 },
	{ "listitem", NODE_IGNTEXT },
	{ "literal", 0 },
	{ "manvolnum", 0 },
	{ "mml:math", NODE_IGNTEXT },
	{ "mml:mfenced", 0 },
	{ "mml:mfrac", 0 },
	{ "mml:mi", 0 },
	{ "mml:mn", 0 },
	{ "mml:mo", 0 },
	{ "mml:mrow", 0 },
	{ "mml:msub", 0 },
	{ "mml:msup", 0 },
	{ "modifier", 0 },
	{ "note", NODE_IGNTEXT },
	{ "option", 0 },
	{ "orderedlist", NODE_IGNTEXT },
	{ "para", 0 },
	{ "paramdef", 0 },
	{ "parameter", 0 },
	{ "programlisting", 0 },
	{ "prompt", 0 },
	{ "quote", 0 },
	{ "refclass", NODE_IGNTEXT },
	{ "refdescriptor", NODE_IGNTEXT },
	{ "refentry", NODE_IGNTEXT },
	{ "refentryinfo", NODE_IGNTEXT },
	{ "refentrytitle", 0 },
	{ "refmeta", NODE_IGNTEXT },
	{ "refmetainfo", NODE_IGNTEXT },
	{ "refmiscinfo", NODE_IGNTEXT },
	{ "refname", 0 },
	{ "refnamediv", NODE_IGNTEXT },
	{ "refpurpose", 0 },
	{ "refsect1", NODE_IGNTEXT },
	{ "refsect2", NODE_IGNTEXT },
	{ "refsect3", NODE_IGNTEXT },
	{ "refsection", NODE_IGNTEXT },
	{ "refsynopsisdiv", NODE_IGNTEXT },
	{ "replaceable", 0 },
	{ "row", NODE_IGNTEXT },
	{ "sbr", NODE_IGNTEXT },
	{ "screen", NODE_IGNTEXT },
	{ "sgmltag", 0 },
	{ "structname", 0 },
	{ "synopsis", 0 },
	{ "table", NODE_IGNTEXT },
	{ "tbody", NODE_IGNTEXT },
	{ "term", 0 },
	{ NULL, 0 },
	{ "tfoot", NODE_IGNTEXT },
	{ "tgroup", NODE_IGNTEXT },
	{ "thead", NODE_IGNTEXT },
	{ "tip", NODE_IGNTEXT },
	{ "title", 0 },
	{ "trademark", 0 },
	{ "type", 0 },
	{ "ulink", 0 },
	{ "userinput", 0 },
	{ "variablelist", NODE_IGNTEXT },
	{ "varlistentry", NODE_IGNTEXT },
	{ "varname", 0 },
	{ "warning", NODE_IGNTEXT },
	{ "wordasword", 0 },
	{ "year", NODE_IGNTEXT },
};

static	int warn = 0;

static void
pnode_print(struct parse *p, struct pnode *pn);

/*
 * Process a stream of characters.
 * We store text as nodes in and of themselves.
 * If a text node is already open, append to it.
 * If it's not open, open one under the current context.
 */
static void
xml_char(void *arg, const XML_Char *p, int sz)
{
	struct parse	*ps = arg;
	struct pnode	*dat;
	int		 i;

	/* Stopped or no tree yet. */
	if (ps->stop || NODE_ROOT == ps->node)
		return;

	/* Not supposed to be collecting text. */
	assert(NULL != ps->cur);
	if (NODE_IGNTEXT & nodes[ps->node].flags)
		return;

	/*
	 * Are we in the midst of processing text?
	 * If we're not processing text right now, then create a text
	 * node for doing so.
	 * However, don't do so unless we have some non-whitespace to
	 * process: strip out all leading whitespace to be sure.
	 */
	if (NODE_TEXT != ps->node) {
		for (i = 0; i < sz; i++)
			if ( ! isspace((unsigned char)p[i]))
				break;
		if (i == sz)
			return;
		p += i;
		sz -= i;
		dat = calloc(1, sizeof(struct pnode));
		if (NULL == dat) {
			perror(NULL);
			exit(EXIT_FAILURE);
		}

		dat->node = ps->node = NODE_TEXT;
		dat->parent = ps->cur;
		TAILQ_INIT(&dat->childq);
		TAILQ_INIT(&dat->attrq);
		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
		ps->cur = dat;
		assert(NULL != ps->root);
	}

	/* Append to current buffer. */
	assert(sz >= 0);
	ps->cur->b = realloc(ps->cur->b,
		ps->cur->bsz + (size_t)sz);
	if (NULL == ps->cur->b) {
		perror(NULL);
		exit(EXIT_FAILURE);
	}
	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
	ps->cur->bsz += (size_t)sz;
	ps->cur->real = ps->cur->b;
}

static void
pnode_trim(struct pnode *pn)
{

	assert(NODE_TEXT == pn->node);
	for ( ; pn->bsz > 0; pn->bsz--)
		if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
			break;
}

/*
 * Begin an element.
 * First, look for the element.
 * If we don't find it and we're not parsing, keep going.
 * If we don't find it and we're parsing, puke and exit.
 * If we find it but we're not parsing yet (i.e., it's not a refentry
 * and thus out of context), keep going.
 * If we find it and we're at the root and already have a tree, puke and
 * exit (FIXME: I don't think this is right?).
 * If we find it but we're parsing a text node, close out the text node,
 * return to its parent, and keep going.
 * Make sure that the element is in the right context.
 * Lastly, put the node onto our parse tree and continue.
 */
static void
xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
{
	struct parse	 *ps = arg;
	enum nodeid	  node;
	enum attrkey	  key;
	enum attrval	  val;
	struct pnode	 *dat;
	struct pattr	 *pattr;
	const XML_Char	**att;

	/* FIXME: find a better way to ditch other namespaces. */
	if (ps->stop || 0 == strcmp(name, "xi:include"))
		return;

	/* Close out text node, if applicable... */
	if (NODE_TEXT == ps->node) {
		assert(NULL != ps->cur);
		pnode_trim(ps->cur);
		ps->cur = ps->cur->parent;
		assert(NULL != ps->cur);
		ps->node = ps->cur->node;
	}

	for (node = 0; node < NODE__MAX; node++)
		if (NULL == nodes[node].name)
			continue;
		else if (0 == strcmp(nodes[node].name, name))
			break;

	if (NODE__MAX == node && NODE_ROOT == ps->node) {
		return;
	} else if (NODE__MAX == node) {
		fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
			ps->fname, XML_GetCurrentLineNumber(ps->xml),
			XML_GetCurrentColumnNumber(ps->xml), name);
		ps->stop = 1;
		return;
	} else if (NODE_ROOT == ps->node && NULL != ps->root) {
		fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
			ps->fname, XML_GetCurrentLineNumber(ps->xml),
			XML_GetCurrentColumnNumber(ps->xml));
		ps->stop = 1;
		return;
	} else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
		return;
	} else if ( ! isparent(node, ps->node)) {
		fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
			"of node \"%s\"\n",
			ps->fname, XML_GetCurrentLineNumber(ps->xml),
			XML_GetCurrentColumnNumber(ps->xml),
			NULL == nodes[ps->node].name ?
			"(none)" : nodes[ps->node].name,
			NULL == nodes[node].name ?
			"(none)" : nodes[node].name);
		ps->stop = 1;
		return;
	}

	if (NODE_INLINEEQUATION == node)
		ps->flags |= PARSE_EQN;

	if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
		perror(NULL);
		exit(EXIT_FAILURE);
	}

	dat->node = ps->node = node;
	dat->parent = ps->cur;
	TAILQ_INIT(&dat->childq);
	TAILQ_INIT(&dat->attrq);

	if (NULL != ps->cur)
		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);

	ps->cur = dat;
	if (NULL == ps->root)
		ps->root = dat;

	/*
	 * Process attributes.
	 */
	for (att = atts; NULL != *att; att += 2) {
		for (key = 0; key < ATTRKEY__MAX; key++)
			if (0 == strcmp(*att, attrkeys[key]))
				break;
		if (ATTRKEY__MAX == key) {
			if (warn)
				fprintf(stderr, "%s:%zu:%zu: warning: "
					"unknown attribute \"%s\"\n",
					ps->fname,
					XML_GetCurrentLineNumber(ps->xml),
					XML_GetCurrentColumnNumber(ps->xml),
					*att);
			continue;
		} else if ( ! isattrkey(node, key)) {
			if (warn)
				fprintf(stderr, "%s:%zu:%zu: warning: "
					"bad attribute \"%s\"\n",
					ps->fname,
					XML_GetCurrentLineNumber(ps->xml),
					XML_GetCurrentColumnNumber(ps->xml),
					*att);
			continue;
		}
		for (val = 0; val < ATTRVAL__MAX; val++)
			if (0 == strcmp(*(att + 1), attrvals[val]))
				break;
		if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
			if (warn)
				fprintf(stderr, "%s:%zu:%zu: warning: "
					"bad attribute value \"%s\"\n",
					ps->fname,
					XML_GetCurrentLineNumber(ps->xml),
					XML_GetCurrentColumnNumber(ps->xml),
					*(att + 1));
			continue;
		}
		pattr = calloc(1, sizeof(struct pattr));
		pattr->key = key;
		pattr->val = val;
		if (ATTRVAL__MAX == val)
			pattr->rawval = strdup(*(att + 1));
		TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
	}

}

/*
 * Roll up the parse tree.
 * If we're at a text node, roll that one up first.
 * If we hit the root, then assign ourselves as the NODE_ROOT.
 */
static void
xml_elem_end(void *arg, const XML_Char *name)
{
	struct parse	*ps = arg;

	/* FIXME: find a better way to ditch other namespaces. */
	if (ps->stop || NODE_ROOT == ps->node)
		return;
	else if (0 == strcmp(name, "xi:include"))
		return;

	/* Close out text node, if applicable... */
	if (NODE_TEXT == ps->node) {
		assert(NULL != ps->cur);
		pnode_trim(ps->cur);
		ps->cur = ps->cur->parent;
		assert(NULL != ps->cur);
		ps->node = ps->cur->node;
	}

	if (NULL == (ps->cur = ps->cur->parent))
		ps->node = NODE_ROOT;
	else
		ps->node = ps->cur->node;
}

/*
 * Recursively free a node (NULL is ok).
 */
static void
pnode_free(struct pnode *pn)
{
	struct pnode	*pp;
	struct pattr	*ap;

	if (NULL == pn)
		return;

	while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
		TAILQ_REMOVE(&pn->childq, pp, child);
		pnode_free(pp);
	}

	while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
		TAILQ_REMOVE(&pn->attrq, ap, child);
		free(ap->rawval);
		free(ap);
	}

	free(pn->real);
	free(pn);
}

/*
 * Unlink a node from its parent and pnode_free() it.
 */
static void
pnode_unlink(struct pnode *pn)
{

	if (NULL != pn->parent)
		TAILQ_REMOVE(&pn->parent->childq, pn, child);
	pnode_free(pn);
}

/*
 * Unlink all children of a node and pnode_free() them.
 */
static void
pnode_unlinksub(struct pnode *pn)
{

	while ( ! TAILQ_EMPTY(&pn->childq))
		pnode_unlink(TAILQ_FIRST(&pn->childq));
}

/*
 * Reset the lookaside buffer.
 */
static void
bufclear(struct parse *p)
{

	p->b[p->bsz = 0] = '\0';
}

/*
 * Append NODE_TEXT contents to the current buffer, reallocating its
 * size if necessary.
 * The buffer is ALWAYS nil-terminated.
 */
static void
bufappend(struct parse *p, struct pnode *pn)
{

	assert(NODE_TEXT == pn->node);
	if (p->bsz + pn->bsz + 1 > p->mbsz) {
		p->mbsz = p->bsz + pn->bsz + 1;
		if (NULL == (p->b = realloc(p->b, p->mbsz))) {
			perror(NULL);
			exit(EXIT_FAILURE);
		}
	}
	memcpy(p->b + p->bsz, pn->b, pn->bsz);
	p->bsz += pn->bsz;
	p->b[p->bsz] = '\0';
}

/*
 * Recursively append all NODE_TEXT nodes to the buffer.
 * This descends into non-text nodes, but doesn't do anything beyond
 * them.
 * In other words, this is a recursive text grok.
 */
static void
bufappend_r(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	if (NODE_TEXT == pn->node)
		bufappend(p, pn);
	TAILQ_FOREACH(pp, &pn->childq, child)
		bufappend_r(p, pp);
}

/*
 * Recursively search and return the first instance of "node".
 */
static struct pnode *
pnode_findfirst(struct pnode *pn, enum nodeid node)
{
	struct pnode	*pp, *res;

	res = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child) {
		res = pp->node == node ? pp :
			pnode_findfirst(pp, node);
		if (NULL != res)
			break;
	}

	return(res);
}

#define MACROLINE_NORM	0
#define MACROLINE_UPPER	1
#define	MACROLINE_NOWS 2
/*
 * Recursively print text presumably on a macro line.
 * Convert all whitespace to regular spaces.
 */
static void
pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
{
	char		*cp;

	if (0 == p->newln && ! (MACROLINE_NOWS & fl))
		putchar(' ');

	bufclear(p);
	bufappend_r(p, pn);

	/* Convert all space to spaces. */
	for (cp = p->b; '\0' != *cp; cp++)
		if (isspace((unsigned char)*cp))
			*cp = ' ';

	for (cp = p->b; isspace((unsigned char)*cp); cp++)
		/* Spin past whitespace (XXX: necessary?) */ ;
	for ( ; '\0' != *cp; cp++) {
		/* Escape us if we look like a macro. */
		if ((cp == p->b || ' ' == *(cp - 1)) &&
			isupper((unsigned char)*cp) &&
			'\0' != *(cp + 1) &&
			islower((unsigned char)*(cp + 1)) &&
			('\0' == *(cp + 2) ||
			 ' ' == *(cp + 2) ||
			 (islower((unsigned char)*(cp + 2)) &&
			  ('\0' == *(cp + 3) ||
			   ' ' == *(cp + 3)))))
			fputs("\\&", stdout);
		if (MACROLINE_UPPER & fl)
			putchar(toupper((unsigned char)*cp));
		else
			putchar(*cp);
		/* If we're a character escape, escape us. */
		if ('\\' == *cp)
			putchar('e');
	}
}

static void
pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
{

	pnode_printmacrolinetext(p, pn, 0);
}

/*
 * Just pnode_printmacrolinepart() but with a newline.
 * If no text, just the newline.
 */
static void
pnode_printmacroline(struct parse *p, struct pnode *pn)
{

	assert(0 == p->newln);
	pnode_printmacrolinetext(p, pn, 0);
	putchar('\n');
	p->newln = 1;
}

static void
pnode_printmopen(struct parse *p)
{
	if (p->newln) {
		putchar('.');
		p->newln = 0;
	} else
		putchar(' ');
}

static void
pnode_printmclose(struct parse *p, int sv)
{

	if (sv && ! p->newln) {
		putchar('\n');
		p->newln = 1;
	}
}

/*
 * Like pnode_printmclose() except we look to the next node, and, if
 * found, see if it starts with punctuation.
 * If it does, then we print that punctuation before the newline.
 */
static void
pnode_printmclosepunct(struct parse *p, struct pnode *pn, int sv)
{
	/* We wouldn't have done anything anyway. */
	if ( ! (sv && ! p->newln))
		return;

	/* No next node or it's not text. */
	if (NULL == (pn = TAILQ_NEXT(pn, child))) {
		pnode_printmclose(p, sv);
		return;
	} else if (NODE_TEXT != pn->node) {
		pnode_printmclose(p, sv);
		return;
	}

	/* Only do this for the comma/period. */
	if (pn->bsz > 0 &&
		(',' == pn->b[0] || '.' == pn->b[0]) &&
		(1 == pn->bsz || isspace((unsigned char)pn->b[1]))) {
		putchar(' ');
		putchar(pn->b[0]);
		pn->b++;
		pn->bsz--;
	}

	putchar('\n');
	p->newln = 1;
}

/*
 * If the SYNOPSIS macro has a superfluous title, kill it.
 */
static void
pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TITLE == pp->node) {
			pnode_unlink(pp);
			return;
		}
}

/*
 * Start a hopefully-named `Sh' section.
 */
static void
pnode_printrefsect(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TITLE == pp->node)
			break;

	switch (pn->node) {
	case (NODE_REFSECT1):
		fputs(".Sh", stdout);
		break;
	case (NODE_REFSECT2):
		fputs(".Ss", stdout);
		break;
	case (NODE_REFSECT3):
		puts(".Pp");
		fputs(".Sy", stdout);
		break;
	case (NODE_NOTE):
		/* FALLTHROUGH */
	case (NODE_REFSECTION):
		/* FALLTHROUGH */
	case (NODE_TIP):
		/* FALLTHROUGH */
	case (NODE_CAUTION):
		/* FALLTHROUGH */
	case (NODE_WARNING):
		puts(".Pp");
		if (NULL == pp)
			return;
		fputs(".Em", stdout);
		break;
	default:
		break;
	}

	p->newln = 0;

	if (NULL != pp) {
		pnode_printmacrolinetext(p, pp,
			NODE_REFSECT1 == pn->node ?
			MACROLINE_UPPER : 0);
		pnode_printmclose(p, 1);
		pnode_unlink(pp);
	} else {
		puts(NODE_REFSECT1 == pn->node ?
			"UNKNOWN" : "unknown");
		p->newln = 1;
	}
}

/*
 * Start a reference, extracting the title and volume.
 */
static void
pnode_printciterefentry(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *title, *manvol;

	title = manvol = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_MANVOLNUM == pp->node)
			manvol = pp;
		else if (NODE_REFENTRYTITLE == pp->node)
			title = pp;

	if (NULL != title) {
		pnode_printmacrolinepart(p, title);
	} else
		fputs(" unknown ", stdout);

	if (NULL == manvol) {
		puts(" 1");
		p->newln = 1;
	} else
		pnode_printmacrolinepart(p, manvol);
}

static void
pnode_printrefmeta(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *title, *manvol;

	title = manvol = NULL;
	assert(p->newln);
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_MANVOLNUM == pp->node)
			manvol = pp;
		else if (NODE_REFENTRYTITLE == pp->node)
			title = pp;

	puts(".Dd $Mdocdate" "$");
	fputs(".Dt", stdout);
	p->newln = 0;

	if (NULL != title)
		pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
	else
		fputs(" UNKNOWN ", stdout);

	if (NULL == manvol) {
		puts(" 1");
		p->newln = 1;
	} else
		pnode_printmacroline(p, manvol);

	puts(".Os");
}

static void
pnode_printfuncdef(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *ftype, *func;

	assert(p->newln);
	ftype = func = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TEXT == pp->node)
			ftype = pp;
		else if (NODE_FUNCTION == pp->node)
			func = pp;

	if (NULL != ftype) {
		fputs(".Ft", stdout);
		p->newln = 0;
		pnode_printmacroline(p, ftype);
	}

	if (NULL != func) {
		fputs(".Fo", stdout);
		p->newln = 0;
		pnode_printmacroline(p, func);
	} else {
		puts(".Fo UNKNOWN");
		p->newln = 1;
	}
}

static void
pnode_printparamdef(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *ptype, *param;

	assert(p->newln);
	ptype = param = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TEXT == pp->node)
			ptype = pp;
		else if (NODE_PARAMETER == pp->node)
			param = pp;

	fputs(".Fa \"", stdout);
	p->newln = 0;
	if (NULL != ptype) {
		pnode_printmacrolinetext(p, ptype, MACROLINE_NOWS);
		putchar(' ');
	}

	if (NULL != param)
		pnode_printmacrolinepart(p, param);

	puts("\"");
	p->newln = 1;
}

/*
 * The <mml:mfenced> node is a little peculiar.
 * First, it can have arbitrary open and closing tokens, which default
 * to parentheses.
 * Second, >1 arguments are separated by commas.
 */
static void
pnode_printmathfenced(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	struct pattr	*ap;

	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ATTRKEY_OPEN == ap->key) {
			printf("left %s ", ap->rawval);
			break;
		}
	if (NULL == ap)
		printf("left ( ");

	pp = TAILQ_FIRST(&pn->childq);
	pnode_print(p, pp);

	while (NULL != (pp = TAILQ_NEXT(pp, child))) {
		putchar(',');
		pnode_print(p, pp);
	}

	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ATTRKEY_CLOSE == ap->key) {
			printf("right %s ", ap->rawval);
			break;
		}
	if (NULL == ap)
		printf("right ) ");
}

/*
 * These math nodes require special handling because they have infix
 * syntax, instead of the usual prefix or prefix.
 * So we need to break up the first and second child node with a
 * particular eqn(7) word.
 */
static void
pnode_printmath(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pp = TAILQ_FIRST(&pn->childq);
	pnode_print(p, pp);

	switch (pn->node) {
	case (NODE_MML_MSUP):
		fputs(" sup ", stdout);
		break;
	case (NODE_MML_MFRAC):
		fputs(" over ", stdout);
		break;
	case (NODE_MML_MSUB):
		fputs(" sub ", stdout);
		break;
	default:
		break;
	}

	pp = TAILQ_NEXT(pp, child);
	pnode_print(p, pp);
}

static void
pnode_printfuncprototype(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *fdef;

	assert(p->newln);
	TAILQ_FOREACH(fdef, &pn->childq, child)
		if (NODE_FUNCDEF == fdef->node)
			break;

	if (NULL != fdef)
		pnode_printfuncdef(p, fdef);
	else
		puts(".Fo UNKNOWN");

	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_PARAMDEF == pp->node)
			pnode_printparamdef(p, pp);

	puts(".Fc");
	p->newln = 1;
}

/*
 * The <arg> element is more complicated than it should be because text
 * nodes are treated like ".Ar foo", but non-text nodes need to be
 * re-sent into the printer (i.e., without the preceding ".Ar").
 * This also handles the case of "repetition" (or in other words, the
 * ellipsis following an argument) and optionality.
 */
static void
pnode_printarg(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	struct pattr	*ap;
	int		 isop, isrep;

	isop = 1;
	isrep = 0;
	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ATTRKEY_CHOICE == ap->key &&
			(ATTRVAL_PLAIN == ap->val ||
			 ATTRVAL_REQ == ap->val))
			isop = 0;
		else if (ATTRKEY_REP == ap->key &&
			(ATTRVAL_REPEAT == ap->val))
			isrep = 1;

	if (isop) {
		pnode_printmopen(p);
		fputs("Op", stdout);
	}

	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (NODE_TEXT == pp->node) {
			pnode_printmopen(p);
			fputs("Ar", stdout);
		}
		pnode_print(p, pp);
		if (NODE_TEXT == pp->node && isrep)
			fputs("...", stdout);
	}
}

static void
pnode_printgroup(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *np;
	struct pattr	*ap;
	int		 isop, sv;

	isop = 1;
	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ATTRKEY_CHOICE == ap->key &&
			(ATTRVAL_PLAIN == ap->val ||
			 ATTRVAL_REQ == ap->val)) {
			isop = 0;
			break;
		}

	/*
	 * Make sure we're on a macro line.
	 * This will prevent pnode_print() for putting us on a
	 * subsequent line.
	 */
	sv = p->newln;
	pnode_printmopen(p);
	if (isop)
		fputs("Op", stdout);
	else if (sv)
		fputs("No", stdout);

	/*
	 * Keep on printing text separated by the vertical bar as long
	 * as we're within the same origin node as the group.
	 * This is kind of a nightmare.
	 * Eh, DocBook...
	 * FIXME: if there's a "Fl", we don't cut off the leading "-"
	 * like we do in pnode_print().
	 */
	TAILQ_FOREACH(pp, &pn->childq, child) {
		pnode_print(p, pp);
		np = TAILQ_NEXT(pp, child);
		while (NULL != np) {
			if (pp->node != np->node)
				break;
			fputs(" |", stdout);
			pnode_printmacrolinepart(p, np);
			pp = np;
			np = TAILQ_NEXT(np, child);
		}
	}

	pnode_printmclose(p, sv);
}

static void
pnode_printprologue(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pp = NULL == p->root ? NULL :
		pnode_findfirst(p->root, NODE_REFMETA);

	if (NULL != pp) {
		pnode_printrefmeta(p, pp);
		pnode_unlink(pp);
	} else {
		puts(".\\\" Supplying bogus prologue...");
		puts(".Dd $Mdocdate" "$");
		puts(".Dt UNKNOWN 1");
		puts(".Os");
	}

	if (PARSE_EQN & p->flags) {
		puts(".EQ");
		puts("delim $$");
		puts(".EN");
	}
}

/*
 * We can have multiple <term> elements within a <varlistentry>, which
 * we should comma-separate as list headers.
 */
static void
pnode_printvarlistentry(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	int		 first = 1;

	assert(p->newln);
	fputs(".It", stdout);
	p->newln = 0;

	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TERM == pp->node) {
			if ( ! first)
				putchar(',');
			pnode_print(p, pp);
			pnode_unlink(pp);
			first = 0;
		} else
			break;

	putchar('\n');
	p->newln = 1;
}

static void
pnode_printrow(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	puts(".Bl -dash -compact");

	TAILQ_FOREACH(pp, &pn->childq, child) {
		assert(p->newln);
		puts(".It");
		pnode_print(p, pp);
		pnode_printmclose(p, 1);
	}
	assert(p->newln);
	puts(".El");
}

static void
pnode_printtable(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	assert(p->newln);
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TITLE == pp->node) {
			puts(".Pp");
			pnode_print(p, pp);
			pnode_unlink(pp);
		}
	assert(p->newln);
	puts(".Bl -ohang");
	while (NULL != (pp = pnode_findfirst(pn, NODE_ROW))) {
		puts(".It Table Row");
		pnode_printrow(p, pp);
		pnode_printmclose(p, 1);
		pnode_unlink(pp);
	}
	assert(p->newln);
	puts(".El");
}

static void
pnode_printlist(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	assert(p->newln);
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TITLE == pp->node) {
			puts(".Pp");
			pnode_print(p, pp);
			pnode_unlink(pp);
		}
	assert(p->newln);

	if (NODE_ORDEREDLIST == pn->node)
		puts(".Bl -enum");
	else
		puts(".Bl -item");

	TAILQ_FOREACH(pp, &pn->childq, child) {
		assert(p->newln);
		puts(".It");
		pnode_print(p, pp);
		pnode_printmclose(p, 1);
	}
	assert(p->newln);
	puts(".El");
}

static void
pnode_printvariablelist(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	assert(p->newln);
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_TITLE == pp->node) {
			puts(".Pp");
			pnode_print(p, pp);
			pnode_unlink(pp);
		}

	assert(p->newln);
	puts(".Bl -tag -width Ds");
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (NODE_VARLISTENTRY != pp->node) {
			assert(p->newln);
			fputs(".It", stdout);
			pnode_printmacroline(p, pp);
		} else {
			assert(p->newln);
			pnode_print(p, pp);
		}
	assert(p->newln);
	puts(".El");
}

/*
 * Print a parsed node (or ignore it--whatever).
 * This is a recursive function.
 * FIXME: if we're in a literal context (<screen> or <programlisting> or
 * whatever), don't print inline macros.
 */
static void
pnode_print(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	char		*cp;
	int		 last, sv;

	if (NULL == pn)
		return;

	sv = p->newln;

	switch (pn->node) {
	case (NODE_APPLICATION):
		pnode_printmopen(p);
		fputs("Nm", stdout);
		break;
	case (NODE_ANCHOR):
		/* Don't print anything! */
		return;
	case (NODE_ARG):
		pnode_printarg(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_CITEREFENTRY):
		pnode_printmopen(p);
		fputs("Xr", stdout);
		pnode_printciterefentry(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_CODE):
		pnode_printmopen(p);
		fputs("Li", stdout);
		break;
	case (NODE_COMMAND):
		pnode_printmopen(p);
		fputs("Nm", stdout);
		break;
	case (NODE_CONSTANT):
		pnode_printmopen(p);
		fputs("Dv", stdout);
		break;
	case (NODE_EMPHASIS):
		pnode_printmopen(p);
		fputs("Em", stdout);
		break;
	case (NODE_ENVAR):
		pnode_printmopen(p);
		fputs("Ev", stdout);
		break;
	case (NODE_FILENAME):
		pnode_printmopen(p);
		fputs("Pa", stdout);
		break;
	case (NODE_FUNCTION):
		pnode_printmopen(p);
		fputs("Fn", stdout);
		break;
	case (NODE_FUNCPROTOTYPE):
		assert(p->newln);
		pnode_printfuncprototype(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_FUNCSYNOPSISINFO):
		pnode_printmopen(p);
		fputs("Fd", stdout);
		break;
	case (NODE_INFORMALEQUATION):
		if ( ! p->newln)
			putchar('\n');
		puts(".EQ");
		p->newln = 0;
		break;
	case (NODE_INLINEEQUATION):
		fputc('$', stdout);
		p->newln = 0;
		break;
	case (NODE_ITEMIZEDLIST):
		assert(p->newln);
		pnode_printlist(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_GROUP):
		pnode_printgroup(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_LITERAL):
		pnode_printmopen(p);
		fputs("Li", stdout);
		break;
	case (NODE_MML_MFENCED):
		pnode_printmathfenced(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_MML_MROW):
	case (NODE_MML_MI):
	case (NODE_MML_MN):
	case (NODE_MML_MO):
		if (TAILQ_EMPTY(&pn->childq))
			break;
		fputs(" { ", stdout);
		break;
	case (NODE_MML_MFRAC):
	case (NODE_MML_MSUB):
	case (NODE_MML_MSUP):
		pnode_printmath(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_OPTION):
		pnode_printmopen(p);
		fputs("Fl", stdout);
		break;
	case (NODE_ORDEREDLIST):
		assert(p->newln);
		pnode_printlist(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_PARA):
		assert(p->newln);
		if (NULL != pn->parent &&
			NODE_LISTITEM == pn->parent->node)
			break;
		puts(".Pp");
		break;
	case (NODE_PARAMETER):
		/* Suppress non-text children... */
		pnode_printmopen(p);
		fputs("Fa \"", stdout);
		pnode_printmacrolinetext(p, pn, MACROLINE_NOWS);
		fputs("\"", stdout);
		pnode_unlinksub(pn);
		break;
	case (NODE_QUOTE):
		pnode_printmopen(p);
		fputs("Qo", stdout);
		break;
	case (NODE_PROGRAMLISTING):
		/* FALLTHROUGH */
	case (NODE_SCREEN):
		assert(p->newln);
		puts(".Bd -literal");
		break;
	case (NODE_REFENTRYINFO):
		/* Suppress. */
		pnode_unlinksub(pn);
		break;
	case (NODE_REFMETA):
		abort();
		break;
	case (NODE_REFNAME):
		/* Suppress non-text children... */
		pnode_printmopen(p);
		fputs("Nm", stdout);
		p->newln = 0;
		pnode_printmacrolinepart(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_REFNAMEDIV):
		assert(p->newln);
		puts(".Sh NAME");
		break;
	case (NODE_REFPURPOSE):
		assert(p->newln);
		pnode_printmopen(p);
		fputs("Nd", stdout);
		break;
	case (NODE_REFSYNOPSISDIV):
		assert(p->newln);
		pnode_printrefsynopsisdiv(p, pn);
		puts(".Sh SYNOPSIS");
		break;
	case (NODE_REFSECT1):
		/* FALLTHROUGH */
	case (NODE_REFSECT2):
		/* FALLTHROUGH */
	case (NODE_REFSECT3):
		/* FALLTHROUGH */
	case (NODE_REFSECTION):
		/* FALLTHROUGH */
	case (NODE_NOTE):
		/* FALLTHROUGH */
	case (NODE_TIP):
		/* FALLTHROUGH */
	case (NODE_CAUTION):
		/* FALLTHROUGH */
	case (NODE_WARNING):
		assert(p->newln);
		pnode_printrefsect(p, pn);
		break;
	case (NODE_REPLACEABLE):
		pnode_printmopen(p);
		fputs("Ar", stdout);
		break;
	case (NODE_SBR):
		assert(p->newln);
		puts(".br");
		break;
	case (NODE_SGMLTAG):
		pnode_printmopen(p);
		fputs("Li", stdout);
		break;
	case (NODE_STRUCTNAME):
		pnode_printmopen(p);
		fputs("Vt", stdout);
		break;
	case (NODE_TABLE):
		/* FALLTHROUGH */
	case (NODE_INFORMALTABLE):
		assert(p->newln);
		pnode_printtable(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_TEXT):
		if (0 == p->newln)
			putchar(' ');

		bufclear(p);
		bufappend(p, pn);

		if (0 == p->bsz) {
			assert(pn->real != pn->b);
			break;
		}

		/*
		 * Output all characters, squeezing out whitespace
		 * between newlines.
		 * XXX: all whitespace, including tabs (?).
		 * Remember to escape control characters and escapes.
		 */
		assert(p->bsz);
		cp = p->b;

		/*
		 * There's often a superfluous "-" in its <option> tags
		 * before the actual flags themselves.
		 * "Fl" does this for us, so remove it.
		 */
		if (NULL != pn->parent &&
			NODE_OPTION == pn->parent->node &&
			'-' == *cp)
			cp++;
		for (last = '\n'; '\0' != *cp; ) {
			if ('\n' == last) {
				/* Consume all whitespace. */
				if (isspace((unsigned char)*cp)) {
					while (isspace((unsigned char)*cp))
						cp++;
					continue;
				} else if ('\'' == *cp || '.' == *cp)
					fputs("\\&", stdout);
			}
			putchar(last = *cp++);
			/* If we're a character escape, escape us. */
			if ('\\' == last)
				putchar('e');
		}
		p->newln = 0;
		break;
	case (NODE_TYPE):
		pnode_printmopen(p);
		fputs("Vt", stdout);
		break;
	case (NODE_USERINPUT):
		pnode_printmopen(p);
		fputs("Li", stdout);
		break;
	case (NODE_VARIABLELIST):
		assert(p->newln);
		pnode_printvariablelist(p, pn);
		pnode_unlinksub(pn);
		break;
	case (NODE_VARLISTENTRY):
		assert(p->newln);
		pnode_printvarlistentry(p, pn);
		break;
	case (NODE_VARNAME):
		pnode_printmopen(p);
		fputs("Va", stdout);
		break;
	default:
		break;
	}

	TAILQ_FOREACH(pp, &pn->childq, child)
		pnode_print(p, pp);

	switch (pn->node) {
	case (NODE_INFORMALEQUATION):
		if ( ! p->newln)
			putchar('\n');
		puts(".EN");
		p->newln = 1;
		break;
	case (NODE_INLINEEQUATION):
		fputs("$ ", stdout);
		p->newln = sv;
		break;
	case (NODE_MML_MROW):
	case (NODE_MML_MI):
	case (NODE_MML_MN):
	case (NODE_MML_MO):
		if (TAILQ_EMPTY(&pn->childq))
			break;
		fputs(" } ", stdout);
		break;
	case (NODE_APPLICATION):
	case (NODE_ARG):
	case (NODE_CITEREFENTRY):
	case (NODE_CODE):
	case (NODE_COMMAND):
	case (NODE_CONSTANT):
	case (NODE_EMPHASIS):
	case (NODE_ENVAR):
	case (NODE_FILENAME):
	case (NODE_FUNCTION):
	case (NODE_FUNCSYNOPSISINFO):
	case (NODE_LITERAL):
	case (NODE_OPTION):
	case (NODE_PARAMETER):
	case (NODE_REPLACEABLE):
	case (NODE_REFPURPOSE):
	case (NODE_SGMLTAG):
	case (NODE_STRUCTNAME):
	case (NODE_TEXT):
	case (NODE_TYPE):
	case (NODE_USERINPUT):
	case (NODE_VARNAME):
		pnode_printmclosepunct(p, pn, sv);
		break;
	case (NODE_QUOTE):
		pnode_printmclose(p, sv);
		sv = p->newln;
		pnode_printmopen(p);
		fputs("Qc", stdout);
		pnode_printmclose(p, sv);
		break;
	case (NODE_REFNAME):
		/*
		 * If we're in the NAME macro and we have multiple
		 * <refname> macros in sequence, then print out a
		 * trailing comma before the newline.
		 */
		if (NULL != pn->parent &&
			NODE_REFNAMEDIV == pn->parent->node &&
			NULL != TAILQ_NEXT(pn, child) &&
			NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
			fputs(" ,", stdout);
		pnode_printmclose(p, sv);
		break;
	case (NODE_PROGRAMLISTING):
		/* FALLTHROUGH */
	case (NODE_SCREEN):
		assert(p->newln);
		puts(".Ed");
		p->newln = 1;
		break;
	default:
		break;
	}
}

/*
 * Loop around the read buffer until we've drained it of all data.
 * Invoke the parser context with each buffer fill.
 */
static int
readfile(XML_Parser xp, int fd,
	char *b, size_t bsz, const char *fn)
{
	struct parse	 p;
	int		 rc;
	ssize_t		 ssz;

	memset(&p, 0, sizeof(struct parse));

	p.b = malloc(p.bsz = p.mbsz = 1024);
	p.fname = fn;
	p.xml = xp;

	XML_SetCharacterDataHandler(xp, xml_char);
	XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
	XML_SetUserData(xp, &p);

	while ((ssz = read(fd, b, bsz)) >= 0) {
		if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
			fprintf(stderr, "%s:%zu:%zu: %s\n", fn,
				XML_GetCurrentLineNumber(xp),
				XML_GetCurrentColumnNumber(xp),
				XML_ErrorString
				(XML_GetErrorCode(xp)));
		else if ( ! p.stop && ssz > 0)
			continue;
		/*
		 * Exit when we've read all or errors have occured
		 * during the parse sequence.
		 */
		p.newln = 1;
		pnode_printprologue(&p, p.root);
		pnode_print(&p, p.root);
		pnode_free(p.root);
		free(p.b);
		return(0 != rc && ! p.stop);
	}

	/* Read error has occured. */
	perror(fn);
	pnode_free(p.root);
	free(p.b);
	return(0);
}

int
main(int argc, char *argv[])
{
	XML_Parser	 xp;
	const char	*fname;
	char		*buf;
	int		 fd, rc, ch;
	const char	*progname;

	progname = strrchr(argv[0], '/');
	if (progname == NULL)
		progname = argv[0];
	else
		++progname;

	fname = "-";
	xp = NULL;
	buf = NULL;
	rc = 0;

	while (-1 != (ch = getopt(argc, argv, "W")))
		switch (ch) {
		case ('W'):
			warn = 1;
			break;
		default:
			goto usage;
		}

	argc -= optind;
	argv += optind;

	if (argc > 1) {
		fprintf(stderr, "%s: Too many arguments\n", argv[1]);
		goto usage;
	} else if (argc > 0)
		fname = argv[0];

	/* Read from stdin or a file. */
	fd = 0 == strcmp(fname, "-") ?
		STDIN_FILENO : open(fname, O_RDONLY, 0);

	/*
	 * Open file for reading.
	 * Allocate a read buffer.
	 * Create the parser context.
	 * Dive directly into the parse.
	 */
	if (-1 == fd)
		perror(fname);
	else if (NULL == (buf = malloc(4096)))
		perror(NULL);
	else if (NULL == (xp = XML_ParserCreate(NULL)))
		perror(NULL);
	else if ( ! readfile(xp, fd, buf, 4096, fname))
		rc = 1;

	XML_ParserFree(xp);
	free(buf);
	if (STDIN_FILENO != fd)
		close(fd);
	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);

usage:
	fprintf(stderr, "usage: %s [-W] [input_filename]\n", progname);
	return(EXIT_FAILURE);
}