[BACK]Return to docbook2mdoc.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / docbook2mdoc

File: [cvsweb.bsd.lv] / docbook2mdoc / docbook2mdoc.c (download)

Revision 1.71, Sun Mar 24 23:48:58 2019 UTC (5 years ago) by schwarze
Branch: MAIN
Changes since 1.70: +37 -44 lines

To avoid use after free, use TAILQ_FOREACH_SAFE(3) rather than
TAILQ_FOREACH(3) when deleting list elements during the iteration.
Factor out some repeated code into a new function pnode_printtitle().

Where pnode_print() calls per-element pnode_print*() functions,
call exactly one function per element and do everything that is
required inside, making the huge function pnode_print() slightly
smaller and the various pnode_print*() more self-contained.
In particular, call pnode_unlinksub() as close as possible to the
place where the processing justifying the deletion was done.

/* $Id: docbook2mdoc.c,v 1.71 2019/03/24 23:48:58 schwarze Exp $ */
/*
 * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/queue.h>

#include <assert.h>
#include <ctype.h>
#include <expat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "extern.h"

enum	linestate {
	LINE_NEW = 0,
	LINE_TEXT,
	LINE_MACRO
};

/*
 * Global parse state.
 * Keep this as simple and small as possible.
 */
struct	parse {
	XML_Parser	 xml;
	enum nodeid	 node; /* current (NODE_ROOT if pre-tree) */
	const char	*fname; /* filename */
	int		 stop; /* should we stop now? */
#define	PARSE_EQN	 1
	unsigned int	 flags; /* document-wide flags */
	struct pnode	*root; /* root of parse tree */
	struct pnode	*cur; /* current node in tree */
	char		*b; /* NUL-terminated buffer for pre-print */
	size_t		 bsz; /* current length of b */
	size_t		 mbsz; /* max bsz allocation */
	int		 level; /* header level, starting at 1 */
	enum linestate	 linestate;
};

struct	node {
	const char	*name; /* docbook element name */
	enum nodeid	 node; /* docbook element to generate */
};

TAILQ_HEAD(pnodeq, pnode);
TAILQ_HEAD(pattrq, pattr);

struct	pattr {
	enum attrkey	 key;
	enum attrval	 val;
	char		*rawval;
	TAILQ_ENTRY(pattr) child;
};

struct	pnode {
	enum nodeid	 node; /* node type */
	char		*b; /* binary data buffer */
	char		*real; /* store for "b" */
	size_t		 bsz; /* data buffer size */
	struct pnode	*parent; /* parent (or NULL if top) */
	struct pnodeq	 childq; /* queue of children */
	struct pattrq	 attrq; /* attributes of node */
	TAILQ_ENTRY(pnode) child;
};

static	const char *attrkeys[ATTRKEY__MAX] = {
	"choice",
	"class",
	"close",
	"id",
	"linkend",
	"open",
	"rep"
};

static	const char *attrvals[ATTRVAL__MAX] = {
	"monospaced",
	"norepeat",
	"opt",
	"plain",
	"repeat",
	"req"
};

static	const struct node nodes[] = {
	{ "acronym",		NODE_ACRONYM },
	{ "affiliation",	NODE_AFFILIATION },
	{ "anchor",		NODE_ANCHOR },
	{ "application",	NODE_APPLICATION },
	{ "arg",		NODE_ARG },
	{ "author",		NODE_AUTHOR },
	{ "authorgroup",	NODE_AUTHORGROUP },
	{ "blockquote",		NODE_BLOCKQUOTE },
	{ "book",		NODE_BOOK },
	{ "bookinfo",		NODE_BOOKINFO },
	{ "caution",		NODE_CAUTION },
	{ "chapter",		NODE_SECTION },
	{ "citerefentry",	NODE_CITEREFENTRY },
	{ "citetitle",		NODE_CITETITLE },
	{ "cmdsynopsis",	NODE_CMDSYNOPSIS },
	{ "code",		NODE_CODE },
	{ "colspec",		NODE_COLSPEC },
	{ "command",		NODE_COMMAND },
	{ "constant",		NODE_CONSTANT },
	{ "copyright",		NODE_COPYRIGHT },
	{ "date",		NODE_DATE },
	{ "editor",		NODE_EDITOR },
	{ "email",		NODE_EMAIL },
	{ "emphasis",		NODE_EMPHASIS },
	{ "entry",		NODE_ENTRY },
	{ "envar",		NODE_ENVAR },
	{ "fieldsynopsis",	NODE_FIELDSYNOPSIS },
	{ "filename",		NODE_FILENAME },
	{ "firstname",		NODE_FIRSTNAME },
	{ "firstterm",		NODE_FIRSTTERM },
	{ "footnote",		NODE_FOOTNOTE },
	{ "funcdef",		NODE_FUNCDEF },
	{ "funcprototype",	NODE_FUNCPROTOTYPE },
	{ "funcsynopsis",	NODE_FUNCSYNOPSIS },
	{ "funcsynopsisinfo",	NODE_FUNCSYNOPSISINFO },
	{ "function",		NODE_FUNCTION },
	{ "glossterm",		NODE_GLOSSTERM },
	{ "group",		NODE_GROUP },
	{ "holder",		NODE_HOLDER },
	{ "index",		NODE_INDEX },
	{ "indexterm",		NODE_INDEXTERM },
	{ "info",		NODE_INFO },
	{ "informalequation",	NODE_INFORMALEQUATION },
	{ "informaltable",	NODE_INFORMALTABLE },
	{ "inlineequation",	NODE_INLINEEQUATION },
	{ "itemizedlist",	NODE_ITEMIZEDLIST },
	{ "keysym",		NODE_KEYSYM },
	{ "legalnotice",	NODE_LEGALNOTICE },
	{ "link",		NODE_LINK },
	{ "listitem",		NODE_LISTITEM },
	{ "literal",		NODE_LITERAL },
	{ "literallayout",	NODE_LITERALLAYOUT },
	{ "manvolnum",		NODE_MANVOLNUM },
	{ "member",		NODE_MEMBER },
	{ "mml:math",		NODE_MML_MATH },
	{ "mml:mfenced",	NODE_MML_MFENCED },
	{ "mml:mfrac",		NODE_MML_MFRAC },
	{ "mml:mi",		NODE_MML_MI },
	{ "mml:mn",		NODE_MML_MN },
	{ "mml:mo",		NODE_MML_MO },
	{ "mml:mrow",		NODE_MML_MROW },
	{ "mml:msub",		NODE_MML_MSUB },
	{ "mml:msup",		NODE_MML_MSUP },
	{ "modifier",		NODE_MODIFIER },
	{ "note",		NODE_NOTE },
	{ "option",		NODE_OPTION },
	{ "orderedlist",	NODE_ORDEREDLIST },
	{ "orgname",		NODE_ORGNAME },
	{ "othername",		NODE_OTHERNAME },
	{ "para",		NODE_PARA },
	{ "paramdef",		NODE_PARAMDEF },
	{ "parameter",		NODE_PARAMETER },
	{ "part",		NODE_SECTION },
	{ "personname",		NODE_PERSONNAME },
	{ "phrase",		NODE_PHRASE },
	{ "preface",		NODE_PREFACE },
	{ "primary",		NODE_PRIMARY },
	{ "programlisting",	NODE_PROGRAMLISTING },
	{ "prompt",		NODE_PROMPT },
	{ "quote",		NODE_QUOTE },
	{ "refclass",		NODE_REFCLASS },
	{ "refdescriptor",	NODE_REFDESCRIPTOR },
	{ "refentry",		NODE_REFENTRY },
	{ "refentryinfo",	NODE_REFENTRYINFO },
	{ "refentrytitle",	NODE_REFENTRYTITLE },
	{ "refmeta",		NODE_REFMETA },
	{ "refmetainfo",	NODE_REFMETAINFO },
	{ "refmiscinfo",	NODE_REFMISCINFO },
	{ "refname",		NODE_REFNAME },
	{ "refnamediv",		NODE_REFNAMEDIV },
	{ "refpurpose",		NODE_REFPURPOSE },
	{ "refsect1",		NODE_SECTION },
	{ "refsect2",		NODE_SECTION },
	{ "refsect3",		NODE_SECTION },
	{ "refsection",		NODE_SECTION },
	{ "refsynopsisdiv",	NODE_REFSYNOPSISDIV },
	{ "releaseinfo",	NODE_RELEASEINFO },
	{ "replaceable",	NODE_REPLACEABLE },
	{ "row",		NODE_ROW },
	{ "sbr",		NODE_SBR },
	{ "screen",		NODE_SCREEN },
	{ "secondary",		NODE_SECONDARY },
	{ "sect1",		NODE_SECTION },
	{ "sect2",		NODE_SECTION },
	{ "section",		NODE_SECTION },
	{ "sgmltag",		NODE_SGMLTAG },
	{ "simplelist",		NODE_SIMPLELIST },
	{ "spanspec",		NODE_SPANSPEC },
	{ "structname",		NODE_STRUCTNAME },
	{ "subtitle",		NODE_SUBTITLE },
	{ "surname",		NODE_SURNAME },
	{ "synopsis",		NODE_SYNOPSIS },
	{ "table",		NODE_TABLE },
	{ "tbody",		NODE_TBODY },
	{ "term",		NODE_TERM },
	{ "tfoot",		NODE_TFOOT },
	{ "tgroup",		NODE_TGROUP },
	{ "thead",		NODE_THEAD },
	{ "tip",		NODE_TIP },
	{ "title",		NODE_TITLE },
	{ "trademark",		NODE_TRADEMARK },
	{ "type",		NODE_TYPE },
	{ "ulink",		NODE_ULINK },
	{ "userinput",		NODE_USERINPUT },
	{ "variablelist",	NODE_VARIABLELIST },
	{ "varlistentry",	NODE_VARLISTENTRY },
	{ "varname",		NODE_VARNAME },
	{ "warning",		NODE_WARNING },
	{ "wordasword",		NODE_WORDASWORD },
	{ "year",		NODE_YEAR },
	{ NULL,			NODE__MAX }
};

static	int warn = 0;

static void
pnode_print(struct parse *p, struct pnode *pn);

/*
 * Process a stream of characters.
 * We store text as nodes in and of themselves.
 * If a text node is already open, append to it.
 * If it's not open, open one under the current context.
 */
static void
xml_char(void *arg, const XML_Char *p, int sz)
{
	struct parse	*ps = arg;
	struct pnode	*dat;
	int		 i;

	/* Stopped or no tree yet. */
	if (ps->stop || ps->node == NODE_ROOT)
		return;

	assert(ps->cur != NULL);

	/*
	 * Are we in the midst of processing text?
	 * If we're not processing text right now, then create a text
	 * node for doing so.
	 * However, don't do so unless we have some non-whitespace to
	 * process: strip out all leading whitespace to be sure.
	 */
	if (ps->node != NODE_TEXT) {
		for (i = 0; i < sz; i++)
			if ( ! isspace((unsigned char)p[i]))
				break;
		if (i == sz)
			return;
		p += i;
		sz -= i;
		dat = calloc(1, sizeof(struct pnode));
		if (dat == NULL) {
			perror(NULL);
			exit(1);
		}

		dat->node = ps->node = NODE_TEXT;
		dat->parent = ps->cur;
		TAILQ_INIT(&dat->childq);
		TAILQ_INIT(&dat->attrq);
		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
		ps->cur = dat;
		assert(ps->root != NULL);
	}

	/* Append to current buffer. */
	assert(sz >= 0);
	ps->cur->b = realloc(ps->cur->b,
		ps->cur->bsz + (size_t)sz);
	if (ps->cur->b == NULL) {
		perror(NULL);
		exit(1);
	}
	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
	ps->cur->bsz += (size_t)sz;
	ps->cur->real = ps->cur->b;
}

static void
pnode_trim(struct pnode *pn)
{
	assert(pn->node == NODE_TEXT);
	for ( ; pn->bsz > 0; pn->bsz--)
		if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
			break;
}

/*
 * Begin an element.
 * First, look for the element.
 * If we don't find it and we're not parsing, keep going.
 * If we don't find it and we're parsing, puke and exit.
 * If we find it but we're not parsing yet (i.e., it's not a refentry
 * and thus out of context), keep going.
 * If we find it and we're at the root and already have a tree, puke and
 * exit (FIXME: I don't think this is right?).
 * If we find it but we're parsing a text node, close out the text node,
 * return to its parent, and keep going.
 * Make sure that the element is in the right context.
 * Lastly, put the node onto our parse tree and continue.
 */
static void
xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
{
	struct parse	 *ps = arg;
	const struct node *node;
	enum attrkey	  key;
	enum attrval	  val;
	struct pnode	 *dat;
	struct pattr	 *pattr;
	const XML_Char	**att;

	/* FIXME: find a better way to ditch other namespaces. */
	if (ps->stop || strcmp(name, "xi:include") == 0)
		return;

	/* Close out text node, if applicable... */
	if (ps->node == NODE_TEXT) {
		pnode_trim(ps->cur);
		ps->cur = ps->cur->parent;
		ps->node = ps->cur->node;
	}

	for (node = nodes; node->name != NULL; node++)
		if (strcmp(node->name, name) == 0)
			break;

	if (node->name == NULL) {
		if (ps->node == NODE_ROOT)
			return;
		fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
			ps->fname, XML_GetCurrentLineNumber(ps->xml),
			XML_GetCurrentColumnNumber(ps->xml), name);
		ps->stop = 1;
		return;
	} else if (ps->node == NODE_ROOT && ps->root != NULL) {
		fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
			ps->fname, XML_GetCurrentLineNumber(ps->xml),
			XML_GetCurrentColumnNumber(ps->xml));
		ps->stop = 1;
		return;
	}

	if (node->node == NODE_INLINEEQUATION)
		ps->flags |= PARSE_EQN;

	if ((dat = calloc(1, sizeof(struct pnode))) == NULL) {
		perror(NULL);
		exit(1);
	}

	dat->node = ps->node = node->node;
	dat->parent = ps->cur;
	TAILQ_INIT(&dat->childq);
	TAILQ_INIT(&dat->attrq);

	if (ps->cur != NULL)
		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);

	ps->cur = dat;
	if (ps->root == NULL)
		ps->root = dat;

	/*
	 * Process attributes.
	 */
	for (att = atts; *att != NULL; att += 2) {
		for (key = 0; key < ATTRKEY__MAX; key++)
			if (strcmp(*att, attrkeys[key]) == 0)
				break;
		if (key == ATTRKEY__MAX) {
			if (warn)
				fprintf(stderr, "%s:%zu:%zu: warning: "
					"unknown attribute \"%s\"\n",
					ps->fname,
					XML_GetCurrentLineNumber(ps->xml),
					XML_GetCurrentColumnNumber(ps->xml),
					*att);
			continue;
		}
		for (val = 0; val < ATTRVAL__MAX; val++)
			if (strcmp(att[1], attrvals[val]) == 0)
				break;
		pattr = calloc(1, sizeof(struct pattr));
		pattr->key = key;
		pattr->val = val;
		if (val == ATTRVAL__MAX)
			pattr->rawval = strdup(att[1]);
		TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
	}

}

/*
 * Roll up the parse tree.
 * If we're at a text node, roll that one up first.
 * If we hit the root, then assign ourselves as the NODE_ROOT.
 */
static void
xml_elem_end(void *arg, const XML_Char *name)
{
	struct parse	*ps = arg;

	/* FIXME: find a better way to ditch other namespaces. */
	if (ps->stop || ps->node == NODE_ROOT)
		return;
	else if (strcmp(name, "xi:include") == 0)
		return;

	/* Close out text node, if applicable... */
	if (ps->node == NODE_TEXT) {
		pnode_trim(ps->cur);
		ps->cur = ps->cur->parent;
		ps->node = ps->cur->node;
	}

	if ((ps->cur = ps->cur->parent) == NULL)
		ps->node = NODE_ROOT;
	else
		ps->node = ps->cur->node;
}

/*
 * Recursively free a node (NULL is ok).
 */
static void
pnode_free(struct pnode *pn)
{
	struct pnode	*pp;
	struct pattr	*ap;

	if (pn == NULL)
		return;

	while ((pp = TAILQ_FIRST(&pn->childq)) != NULL) {
		TAILQ_REMOVE(&pn->childq, pp, child);
		pnode_free(pp);
	}

	while ((ap = TAILQ_FIRST(&pn->attrq)) != NULL) {
		TAILQ_REMOVE(&pn->attrq, ap, child);
		free(ap->rawval);
		free(ap);
	}

	free(pn->real);
	free(pn);
}

/*
 * Unlink a node from its parent and pnode_free() it.
 */
static void
pnode_unlink(struct pnode *pn)
{
	if (pn->parent != NULL)
		TAILQ_REMOVE(&pn->parent->childq, pn, child);
	pnode_free(pn);
}

/*
 * Unlink all children of a node and pnode_free() them.
 */
static void
pnode_unlinksub(struct pnode *pn)
{

	while ( ! TAILQ_EMPTY(&pn->childq))
		pnode_unlink(TAILQ_FIRST(&pn->childq));
}

/*
 * Retrieve an enumeration attribute from a node.
 * Return ATTRVAL__MAX if the node has no such attribute.
 */
enum attrval
pnode_getattr(struct pnode *pn, enum attrkey key)
{
	struct pattr	*ap;

	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ap->key == key)
			return ap->val;
	return ATTRVAL__MAX;
}

/*
 * Retrieve an attribute string from a node.
 * Return defval if the node has no such attribute.
 */
const char *
pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval)
{
	struct pattr	*ap;

	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ap->key == key)
			return ap->val == ATTRVAL__MAX ? ap->rawval :
			    attrvals[ap->val];
	return defval;
}

/*
 * Reset the lookaside buffer.
 */
static void
bufclear(struct parse *p)
{

	p->b[p->bsz = 0] = '\0';
}

/*
 * Append NODE_TEXT contents to the current buffer, reallocating its
 * size if necessary.
 * The buffer is ALWAYS NUL-terminated.
 */
static void
bufappend(struct parse *p, struct pnode *pn)
{

	assert(pn->node == NODE_TEXT);
	if (p->bsz + pn->bsz + 1 > p->mbsz) {
		p->mbsz = p->bsz + pn->bsz + 1;
		if ((p->b = realloc(p->b, p->mbsz)) == NULL) {
			perror(NULL);
			exit(1);
		}
	}
	memcpy(p->b + p->bsz, pn->b, pn->bsz);
	p->bsz += pn->bsz;
	p->b[p->bsz] = '\0';
}

/*
 * Recursively append all NODE_TEXT nodes to the buffer.
 * This descends into non-text nodes, but doesn't do anything beyond
 * them.
 * In other words, this is a recursive text grok.
 */
static void
bufappend_r(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	if (pn->node == NODE_TEXT)
		bufappend(p, pn);
	TAILQ_FOREACH(pp, &pn->childq, child)
		bufappend_r(p, pp);
}

/*
 * Recursively search and return the first instance of "node".
 */
static struct pnode *
pnode_findfirst(struct pnode *pn, enum nodeid node)
{
	struct pnode	*pp, *res;

	res = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child) {
		res = pp->node == node ? pp :
			pnode_findfirst(pp, node);
		if (res != NULL)
			break;
	}

	return res;
}

static void
macro_open(struct parse *p, const char *name)
{
	switch (p->linestate) {
	case LINE_TEXT:
		putchar('\n');
		/* FALLTHROUGH */
	case LINE_NEW:
		putchar('.');
		p->linestate = LINE_MACRO;
		break;
	case LINE_MACRO:
		putchar(' ');
		break;
	}
	fputs(name, stdout);
}

static void
macro_close(struct parse *p)
{
	assert(p->linestate == LINE_MACRO);
	putchar('\n');
	p->linestate = LINE_NEW;
}

static void
macro_line(struct parse *p, const char *name)
{
	macro_open(p, name);
	macro_close(p);
}

#define	MACROLINE_UPPER	1
#define	MACROLINE_NOWS	2
/*
 * Print an argument string on a macro line, collapsing whitespace.
 */
static void
macro_addarg(struct parse *p, const char *arg, int fl)
{
	const char	*cp;
	int		 wantspace;

	assert(p->linestate == LINE_MACRO);
	wantspace = !(fl & MACROLINE_NOWS);
	for (cp = arg; *cp != '\0'; cp++) {
		if (isspace((unsigned char)*cp)) {
			wantspace = 1;
			continue;
		} else if (wantspace) {
			putchar(' ');
			wantspace = 0;
		}
		/* Escape us if we look like a macro. */
		if ((cp == arg || cp[-1] == ' ') &&
		    isupper((unsigned char)cp[0]) &&
		    islower((unsigned char)cp[1]) &&
		    (cp[2] == '\0' || cp[2] == ' ' ||
		     (islower((unsigned char)cp[2]) &&
		      (cp[3] == '\0' || cp[3] == ' '))))
			fputs("\\&", stdout);
		if (fl & MACROLINE_UPPER)
			putchar(toupper((unsigned char)*cp));
		else
			putchar(*cp);
		if (*cp == '\\')
			putchar('e');
	}
}

static void
macro_argline(struct parse *p, const char *name, const char *arg)
{
	macro_open(p, name);
	macro_addarg(p, arg, 0);
	macro_close(p);
}

/*
 * Recurse nodes to print arguments on a macro line.
 */
static void
macro_addnode(struct parse *p, struct pnode *pn, int fl)
{
	bufclear(p);
	bufappend_r(p, pn);
	macro_addarg(p, p->b, fl);
}

static void
macro_nodeline(struct parse *p, const char *name, struct pnode *pn)
{
	macro_open(p, name);
	macro_addnode(p, pn, 0);
	macro_close(p);
}

/*
 * If the next node is a text node starting with closing punctuation,
 * emit the closing punctuation as a trailing macro argument.
 */
static void
macro_closepunct(struct parse *p, struct pnode *pn)
{
	if ((pn = TAILQ_NEXT(pn, child)) != NULL &&
	    pn->node == NODE_TEXT && pn->bsz > 0 &&
	    (pn->b[0] == ',' || pn->b[0] == '.') &&
	    (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) {
		putchar(' ');
		putchar(pn->b[0]);
		pn->b++;
		pn->bsz--;
	}
	macro_close(p);
}

static void
print_text(struct parse *p, const char *word)
{
	switch (p->linestate) {
	case LINE_NEW:
		break;
	case LINE_TEXT:
		putchar(' ');
		break;
	case LINE_MACRO:
		macro_close(p);
		break;
	}
	fputs(word, stdout);
	p->linestate = LINE_TEXT;
}

static void
pnode_printpara(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	if ((pp = TAILQ_PREV(pn, pnodeq, child)) == NULL &&
	    (pp = pn->parent) == NULL)
		return;

	switch (pp->node) {
	case NODE_ENTRY:
	case NODE_LISTITEM:
		return;
	case NODE_PREFACE:
	case NODE_SECTION:
		if (p->level < 3)
			return;
		break;
	default:
		break;
	}
	macro_line(p, "Pp");
}

/*
 * If the SYNOPSIS macro has a superfluous title, kill it.
 */
static void
pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *pq;

	TAILQ_FOREACH_SAFE(pp, &pn->childq, child, pq)
		if (pp->node == NODE_TITLE)
			pnode_unlink(pp);

	macro_line(p, "Sh SYNOPSIS");
}

/*
 * Start a hopefully-named `Sh' section.
 */
static void
pnode_printrefsect(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	const char	*title;
	int		 flags, level;

	if (pn->parent == NULL)
		return;

	level = ++p->level;
	flags = level == 1 ? MACROLINE_UPPER : 0;
	if (level < 3) {
		switch (pn->node) {
		case NODE_CAUTION:
		case NODE_NOTE:
		case NODE_TIP:
		case NODE_WARNING:
			level = 3;
			break;
		default:
			break;
		}
	}

	TAILQ_FOREACH(pp, &pn->childq, child)
		if (pp->node == NODE_TITLE)
			break;

	if (pp == NULL) {
		switch (pn->node) {
		case NODE_PREFACE:
			title = "Preface";
			break;
		case NODE_CAUTION:
			title = "Caution";
			break;
		case NODE_NOTE:
			title = "Note";
			break;
		case NODE_TIP:
			title = "Tip";
			break;
		case NODE_WARNING:
			title = "Warning";
			break;
		default:
			title = "Unknown";
			break;
		}
	}

	switch (level) {
	case 1:
		macro_open(p, "Sh");
		break;
	case 2:
		macro_open(p, "Ss");
		break;
	default:
		pnode_printpara(p, pn);
		macro_open(p, "Sy");
		break;
	}

	if (pp != NULL) {
		macro_addnode(p, pp, flags);
		pnode_unlink(pp);
	} else
		macro_addarg(p, title, 0);
	macro_close(p);
}

/*
 * Start a reference, extracting the title and volume.
 */
static void
pnode_printciterefentry(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *title, *manvol;

	title = manvol = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node == NODE_MANVOLNUM)
			manvol = pp;
		else if (pp->node == NODE_REFENTRYTITLE)
			title = pp;
	}
	macro_open(p, "Xr");
	if (title == NULL)
		macro_addarg(p, "unknown", 0);
	else
		macro_addnode(p, title, 0);
	if (manvol == NULL)
		macro_addarg(p, "1", 0);
	else
		macro_addnode(p, manvol, 0);
	macro_close(p);
	pnode_unlinksub(pn);
}

static void
pnode_printrefmeta(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *title, *manvol;

	title = manvol = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node == NODE_MANVOLNUM)
			manvol = pp;
		else if (pp->node == NODE_REFENTRYTITLE)
			title = pp;
	}
	macro_open(p, "Dt");
	if (title == NULL)
		macro_addarg(p, "UNKNOWN", 0);
	else
		macro_addnode(p, title, MACROLINE_UPPER);
	if (manvol == NULL)
		macro_addarg(p, "1", 0);
	else
		macro_addnode(p, manvol, 0);
	macro_close(p);
	pnode_unlink(pn);
}

static void
pnode_printfuncdef(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *ftype, *func;

	ftype = func = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node == NODE_TEXT)
			ftype = pp;
		else if (pp->node == NODE_FUNCTION)
			func = pp;
	}
	if (ftype != NULL)
		macro_nodeline(p, "Ft", ftype);
	macro_open(p, "Fo");
	if (func == NULL)
		macro_addarg(p, "UNKNOWN", 0);
	else
		macro_addnode(p, func, 0);
	macro_close(p);
}

static void
pnode_printparamdef(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *ptype, *param;
	int		 flags;

	ptype = param = NULL;
	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node == NODE_TEXT)
			ptype = pp;
		else if (pp->node == NODE_PARAMETER)
			param = pp;
	}
	macro_open(p, "Fa \"");
	flags = MACROLINE_NOWS;
	if (ptype != NULL) {
		macro_addnode(p, ptype, flags);
		flags = 0;
	}
	if (param != NULL)
		macro_addnode(p, param, flags);
	flags = MACROLINE_NOWS;
	macro_addarg(p, "\"", flags);
	macro_close(p);
}

/*
 * The <mml:mfenced> node is a little peculiar.
 * First, it can have arbitrary open and closing tokens, which default
 * to parentheses.
 * Second, >1 arguments are separated by commas.
 */
static void
pnode_printmathfenced(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	printf("left %s ", pnode_getattr_raw(pn, ATTRKEY_OPEN, "("));

	pp = TAILQ_FIRST(&pn->childq);
	pnode_print(p, pp);

	while ((pp = TAILQ_NEXT(pp, child)) != NULL) {
		putchar(',');
		pnode_print(p, pp);
	}
	printf("right %s ", pnode_getattr_raw(pn, ATTRKEY_CLOSE, ")"));
	pnode_unlinksub(pn);
}

/*
 * These math nodes require special handling because they have infix
 * syntax, instead of the usual prefix or prefix.
 * So we need to break up the first and second child node with a
 * particular eqn(7) word.
 */
static void
pnode_printmath(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pp = TAILQ_FIRST(&pn->childq);
	pnode_print(p, pp);

	switch (pn->node) {
	case NODE_MML_MSUP:
		fputs(" sup ", stdout);
		break;
	case NODE_MML_MFRAC:
		fputs(" over ", stdout);
		break;
	case NODE_MML_MSUB:
		fputs(" sub ", stdout);
		break;
	default:
		break;
	}

	pp = TAILQ_NEXT(pp, child);
	pnode_print(p, pp);
	pnode_unlinksub(pn);
}

static void
pnode_printfuncprototype(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *fdef;

	TAILQ_FOREACH(fdef, &pn->childq, child)
		if (fdef->node == NODE_FUNCDEF)
			break;

	if (fdef != NULL)
		pnode_printfuncdef(p, fdef);
	else
		macro_line(p, "Fo UNKNOWN");

	TAILQ_FOREACH(pp, &pn->childq, child)
		if (pp->node == NODE_PARAMDEF)
			pnode_printparamdef(p, pp);

	macro_line(p, "Fc");
	pnode_unlinksub(pn);
}

/*
 * The <arg> element is more complicated than it should be because text
 * nodes are treated like ".Ar foo", but non-text nodes need to be
 * re-sent into the printer (i.e., without the preceding ".Ar").
 * This also handles the case of "repetition" (or in other words, the
 * ellipsis following an argument) and optionality.
 */
static void
pnode_printarg(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	struct pattr	*ap;
	int		 isop, isrep;

	isop = 1;
	isrep = 0;
	TAILQ_FOREACH(ap, &pn->attrq, child) {
		if (ap->key == ATTRKEY_CHOICE &&
		    (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ))
			isop = 0;
		else if (ap->key == ATTRKEY_REP && ap->val == ATTRVAL_REPEAT)
			isrep = 1;
	}
	if (isop)
		macro_open(p, "Op");

	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node == NODE_TEXT)
			macro_open(p, "Ar");
		pnode_print(p, pp);
		if (isrep && pp->node == NODE_TEXT)
			macro_addarg(p, "...", 0);
	}
	pnode_unlinksub(pn);
}

static void
pnode_printgroup(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *np;
	struct pattr	*ap;
	int		 isop, sv;

	isop = 1;
	TAILQ_FOREACH(ap, &pn->attrq, child)
		if (ap->key == ATTRKEY_CHOICE &&
		    (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) {
			isop = 0;
			break;
		}

	/*
	 * Make sure we're on a macro line.
	 * This will prevent pnode_print() for putting us on a
	 * subsequent line.
	 */
	sv = p->linestate == LINE_NEW;
	if (isop)
		macro_open(p, "Op");
	else if (sv)
		macro_open(p, "No");

	/*
	 * Keep on printing text separated by the vertical bar as long
	 * as we're within the same origin node as the group.
	 * This is kind of a nightmare.
	 * Eh, DocBook...
	 * FIXME: if there's a "Fl", we don't cut off the leading "-"
	 * like we do in pnode_print().
	 */
	TAILQ_FOREACH(pp, &pn->childq, child) {
		pnode_print(p, pp);
		np = TAILQ_NEXT(pp, child);
		while (np != NULL) {
			if (pp->node != np->node)
				break;
			macro_addarg(p, "|", 0);
			macro_addnode(p, np, 0);
			pp = np;
			np = TAILQ_NEXT(np, child);
		}
	}
	if (sv)
		macro_close(p);
	pnode_unlinksub(pn);
}

static void
pnode_printprologue(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pp = p->root == NULL ? NULL :
		pnode_findfirst(p->root, NODE_REFMETA);

	macro_line(p, "Dd $Mdocdate" "$");
	if (pp != NULL)
		pnode_printrefmeta(p, pp);
	else {
		macro_open(p, "Dt");
		macro_addarg(p,
		    pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), 0);
		macro_addarg(p, "1", 0);
		macro_close(p);
	}
	macro_line(p, "Os");

	if (p->flags & PARSE_EQN) {
		macro_line(p, "EQ");
		print_text(p, "delim $$");
		macro_line(p, "EN");
	}
}

/*
 * We can have multiple <term> elements within a <varlistentry>, which
 * we should comma-separate as list headers.
 */
static void
pnode_printvarlistentry(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	int		 first = 1;

	macro_open(p, "It");
	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node != NODE_TERM)
			continue;
		if ( ! first)
			macro_addarg(p, ",", MACROLINE_NOWS);
		pnode_print(p, pp);
		first = 0;
	}
	macro_close(p);
	TAILQ_FOREACH(pp, &pn->childq, child)
		if (pp->node != NODE_TERM)
			pnode_print(p, pp);
	pnode_unlinksub(pn);
}

static void
pnode_printtitle(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp, *pq;

	TAILQ_FOREACH_SAFE(pp, &pn->childq, child, pq) {
		if (pp->node == NODE_TITLE) {
			pnode_printpara(p, pp);
			pnode_print(p, pp);
			pnode_unlink(pp);
		}
	}
}

static void
pnode_printrow(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	macro_line(p, "Bl -dash -compact");
	TAILQ_FOREACH(pp, &pn->childq, child) {
		macro_line(p, "It");
		pnode_print(p, pp);
	}
	macro_line(p, "El");
	pnode_unlink(pn);
}

static void
pnode_printtable(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pnode_printtitle(p, pn);
	macro_line(p, "Bl -ohang");
	while ((pp = pnode_findfirst(pn, NODE_ROW)) != NULL) {
		macro_line(p, "It Table Row");
		pnode_printrow(p, pp);
	}
	macro_line(p, "El");
	pnode_unlinksub(pn);
}

static void
pnode_printlist(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pnode_printtitle(p, pn);
	macro_argline(p, "Bl",
	    pn->node == NODE_ORDEREDLIST ? "-enum" : "-bullet");
	TAILQ_FOREACH(pp, &pn->childq, child) {
		macro_line(p, "It");
		pnode_print(p, pp);
	}
	macro_line(p, "El");
	pnode_unlinksub(pn);
}

static void
pnode_printvariablelist(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;

	pnode_printtitle(p, pn);
	macro_line(p, "Bl -tag -width Ds");
	TAILQ_FOREACH(pp, &pn->childq, child) {
		if (pp->node == NODE_VARLISTENTRY)
			pnode_print(p, pp);
		else
			macro_nodeline(p, "It", pp);
	}
	macro_line(p, "El");
	pnode_unlinksub(pn);
}

/*
 * Print a parsed node (or ignore it--whatever).
 * This is a recursive function.
 * FIXME: if we're in a literal context (<screen> or <programlisting> or
 * whatever), don't print inline macros.
 */
static void
pnode_print(struct parse *p, struct pnode *pn)
{
	struct pnode	*pp;
	const char	*ccp;
	char		*cp;
	int		 last;
	enum linestate	 sv;

	if (pn == NULL)
		return;

	sv = p->linestate;

	switch (pn->node) {
	case NODE_APPLICATION:
		macro_open(p, "Nm");
		break;
	case NODE_ANCHOR:
		/* Don't print anything! */
		return;
	case NODE_ARG:
		pnode_printarg(p, pn);
		break;
	case NODE_AUTHOR:
		macro_open(p, "An");
		break;
	case NODE_AUTHORGROUP:
		macro_line(p, "An -split");
		break;
	case NODE_BOOKINFO:
		macro_line(p, "Sh NAME");
		break;
	case NODE_CITEREFENTRY:
		pnode_printciterefentry(p, pn);
		break;
	case NODE_CITETITLE:
		macro_open(p, "%T");
		break;
	case NODE_CODE:
		macro_open(p, "Li");
		break;
	case NODE_COMMAND:
		macro_open(p, "Nm");
		break;
	case NODE_CONSTANT:
		macro_open(p, "Dv");
		break;
	case NODE_EDITOR:
		print_text(p, "editor:");
		macro_open(p, "An");
		break;
	case NODE_EMAIL:
		macro_open(p, "Aq Mt");
		break;
	case NODE_EMPHASIS:
	case NODE_FIRSTTERM:
		macro_open(p, "Em");
		break;
	case NODE_ENVAR:
		macro_open(p, "Ev");
		break;
	case NODE_FILENAME:
		macro_open(p, "Pa");
		break;
	case NODE_FUNCTION:
		macro_open(p, "Fn");
		break;
	case NODE_FUNCPROTOTYPE:
		pnode_printfuncprototype(p, pn);
		break;
	case NODE_FUNCSYNOPSISINFO:
		macro_open(p, "Fd");
		break;
	case NODE_INDEXTERM:
		return;
	case NODE_INFORMALEQUATION:
		macro_line(p, "EQ");
		break;
	case NODE_INLINEEQUATION:
		if (p->linestate == LINE_NEW)
			p->linestate = LINE_TEXT;
		putchar('$');
		break;
	case NODE_ITEMIZEDLIST:
		pnode_printlist(p, pn);
		break;
	case NODE_GROUP:
		pnode_printgroup(p, pn);
		break;
	case NODE_KEYSYM:
		macro_open(p, "Sy");
		break;
	case NODE_LEGALNOTICE:
		macro_line(p, "Sh LEGAL NOTICE");
		break;
	case NODE_LINK:
		ccp = pnode_getattr_raw(pn, ATTRKEY_LINKEND, NULL);
		if (ccp == NULL)
			break;
		macro_argline(p, "Sx", ccp);
		return;
	case NODE_LITERAL:
		macro_open(p, "Li");
		break;
	case NODE_LITERALLAYOUT:
		macro_argline(p, "Bd", pnode_getattr(pn, ATTRKEY_CLASS) ==
		    ATTRVAL_MONOSPACED ? "-literal" : "-unfilled");
		break;
	case NODE_MML_MFENCED:
		pnode_printmathfenced(p, pn);
		break;
	case NODE_MML_MROW:
	case NODE_MML_MI:
	case NODE_MML_MN:
	case NODE_MML_MO:
		if (TAILQ_EMPTY(&pn->childq))
			break;
		fputs(" { ", stdout);
		break;
	case NODE_MML_MFRAC:
	case NODE_MML_MSUB:
	case NODE_MML_MSUP:
		pnode_printmath(p, pn);
		break;
	case NODE_OPTION:
		macro_open(p, "Fl");
		break;
	case NODE_ORDEREDLIST:
		pnode_printlist(p, pn);
		break;
	case NODE_PARA:
		pnode_printpara(p, pn);
		break;
	case NODE_PARAMETER:
		/* Suppress non-text children... */
		macro_open(p, "Fa \"");
		macro_addnode(p, pn, MACROLINE_NOWS);
		macro_addarg(p, "\"", MACROLINE_NOWS);
		macro_close(p);
		pnode_unlinksub(pn);
		break;
	case NODE_QUOTE:
		macro_open(p, "Qo");
		break;
	case NODE_PROGRAMLISTING:
	case NODE_SCREEN:
		macro_line(p, "Bd -literal");
		break;
	case NODE_REFENTRYINFO:
		/* Suppress. */
		pnode_unlinksub(pn);
		break;
	case NODE_REFMETA:
		abort();
		break;
	case NODE_REFNAME:
		/* Suppress non-text children... */
		macro_open(p, "Nm");
		macro_addnode(p, pn, 0);
		pnode_unlinksub(pn);
		break;
	case NODE_REFNAMEDIV:
		macro_line(p, "Sh NAME");
		break;
	case NODE_REFPURPOSE:
		macro_open(p, "Nd");
		break;
	case NODE_REFSYNOPSISDIV:
		pnode_printrefsynopsisdiv(p, pn);
		break;
	case NODE_PREFACE:
	case NODE_SECTION:
	case NODE_NOTE:
	case NODE_TIP:
	case NODE_CAUTION:
	case NODE_WARNING:
		pnode_printrefsect(p, pn);
		break;
	case NODE_REPLACEABLE:
		macro_open(p, "Ar");
		break;
	case NODE_SBR:
		macro_line(p, "br");
		break;
	case NODE_SGMLTAG:
		macro_open(p, "Li");
		break;
	case NODE_STRUCTNAME:
		macro_open(p, "Vt");
		break;
	case NODE_TABLE:
	case NODE_INFORMALTABLE:
		pnode_printtable(p, pn);
		break;
	case NODE_TEXT:
		bufclear(p);
		bufappend(p, pn);
		if (p->bsz == 0) {
			assert(pn->real != pn->b);
			break;
		}
		if (p->linestate == LINE_NEW)
			p->linestate = LINE_TEXT;
		else
			putchar(' ');

		/*
		 * Output all characters, squeezing out whitespace
		 * between newlines.
		 * XXX: all whitespace, including tabs (?).
		 * Remember to escape control characters and escapes.
		 */
		cp = p->b;

		/*
		 * There's often a superfluous "-" in its <option> tags
		 * before the actual flags themselves.
		 * "Fl" does this for us, so remove it.
		 */
		if (pn->parent != NULL &&
		    pn->parent->node == NODE_OPTION &&
		    *cp == '-')
			cp++;
		for (last = '\n'; *cp != '\0'; ) {
			if (last == '\n') {
				/* Consume all whitespace. */
				if (isspace((unsigned char)*cp)) {
					while (isspace((unsigned char)*cp))
						cp++;
					continue;
				} else if (*cp == '\'' || *cp == '.')
					fputs("\\&", stdout);
			}
			putchar(last = *cp++);
			/* If we're a character escape, escape us. */
			if (last == '\\')
				putchar('e');
		}
		break;
	case NODE_TITLE:
		if (pn->parent->node == NODE_BOOKINFO)
			macro_open(p, "Nd");
		break;
	case NODE_TYPE:
		macro_open(p, "Vt");
		break;
	case NODE_USERINPUT:
		macro_open(p, "Li");
		break;
	case NODE_VARIABLELIST:
		pnode_printvariablelist(p, pn);
		break;
	case NODE_VARLISTENTRY:
		pnode_printvarlistentry(p, pn);
		break;
	case NODE_VARNAME:
		macro_open(p, "Va");
		break;
	default:
		break;
	}

	TAILQ_FOREACH(pp, &pn->childq, child)
		pnode_print(p, pp);

	switch (pn->node) {
	case NODE_INFORMALEQUATION:
		macro_line(p, "EN");
		break;
	case NODE_INLINEEQUATION:
		fputs("$ ", stdout);
		p->linestate = sv;
		break;
	case NODE_MML_MROW:
	case NODE_MML_MI:
	case NODE_MML_MN:
	case NODE_MML_MO:
		if (TAILQ_EMPTY(&pn->childq))
			break;
		fputs(" } ", stdout);
		break;
	case NODE_APPLICATION:
	case NODE_ARG:
	case NODE_AUTHOR:
	case NODE_CITEREFENTRY:
	case NODE_CITETITLE:
	case NODE_CODE:
	case NODE_COMMAND:
	case NODE_CONSTANT:
	case NODE_EDITOR:
	case NODE_EMAIL:
	case NODE_EMPHASIS:
	case NODE_ENVAR:
	case NODE_FILENAME:
	case NODE_FIRSTTERM:
	case NODE_FUNCTION:
	case NODE_FUNCSYNOPSISINFO:
	case NODE_KEYSYM:
	case NODE_LITERAL:
	case NODE_OPTION:
	case NODE_PARAMETER:
	case NODE_REPLACEABLE:
	case NODE_REFPURPOSE:
	case NODE_SGMLTAG:
	case NODE_STRUCTNAME:
	case NODE_TYPE:
	case NODE_USERINPUT:
	case NODE_VARNAME:
		if (sv != LINE_MACRO && p->linestate == LINE_MACRO)
			macro_closepunct(p, pn);
		break;
	case NODE_QUOTE:
		if (sv == LINE_NEW)
			macro_close(p);
		sv = p->linestate;
		macro_open(p, "Qc");
		if (sv == LINE_NEW)
			macro_close(p);
		break;
	case NODE_REFNAME:
		/*
		 * If we're in the NAME macro and we have multiple
		 * <refname> macros in sequence, then print out a
		 * trailing comma before the newline.
		 */
		if (pn->parent != NULL &&
		    pn->parent->node == NODE_REFNAMEDIV &&
		    TAILQ_NEXT(pn, child) != NULL &&
		    TAILQ_NEXT(pn, child)->node == NODE_REFNAME)
			macro_addarg(p, ",", 0);
		if (sv == LINE_NEW)
			macro_close(p);
		break;
	case NODE_PREFACE:
	case NODE_SECTION:
	case NODE_NOTE:
	case NODE_TIP:
	case NODE_CAUTION:
	case NODE_WARNING:
		p->level--;
		break;
	case NODE_LITERALLAYOUT:
	case NODE_PROGRAMLISTING:
	case NODE_SCREEN:
		macro_line(p, "Ed");
		break;
	case NODE_TITLE:
		if (pn->parent->node == NODE_BOOKINFO)
			macro_line(p, "Sh AUTHORS");
		break;
	default:
		break;
	}
}

/*
 * Loop around the read buffer until we've drained it of all data.
 * Invoke the parser context with each buffer fill.
 */
static int
readfile(XML_Parser xp, int fd,
	char *b, size_t bsz, const char *fn)
{
	struct parse	 p;
	int		 rc;
	ssize_t		 ssz;

	memset(&p, 0, sizeof(struct parse));

	p.b = malloc(p.bsz = p.mbsz = 1024);
	p.fname = fn;
	p.xml = xp;

	XML_SetCharacterDataHandler(xp, xml_char);
	XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
	XML_SetUserData(xp, &p);

	while ((ssz = read(fd, b, bsz)) >= 0) {
		if ((rc = XML_Parse(xp, b, ssz, 0 == ssz)) == 0)
			fprintf(stderr, "%s:%zu:%zu: %s\n", fn,
				XML_GetCurrentLineNumber(xp),
				XML_GetCurrentColumnNumber(xp),
				XML_ErrorString
				(XML_GetErrorCode(xp)));
		else if ( ! p.stop && ssz > 0)
			continue;
		/*
		 * Exit when we've read all or errors have occured
		 * during the parse sequence.
		 */
		p.linestate = LINE_NEW;
		pnode_printprologue(&p, p.root);
		pnode_print(&p, p.root);
		if (p.linestate != LINE_NEW)
			putchar('\n');
		pnode_free(p.root);
		free(p.b);
		return rc != 0 && p.stop == 0;
	}

	/* Read error has occured. */
	perror(fn);
	pnode_free(p.root);
	free(p.b);
	return 0;
}

int
main(int argc, char *argv[])
{
	XML_Parser	 xp;
	const char	*fname;
	char		*buf;
	int		 fd, rc, ch;
	const char	*progname;

	progname = strrchr(argv[0], '/');
	if (progname == NULL)
		progname = argv[0];
	else
		++progname;

	fname = "-";
	xp = NULL;
	buf = NULL;
	rc = 1;

	while ((ch = getopt(argc, argv, "W")) != -1)
		switch (ch) {
		case 'W':
			warn = 1;
			break;
		default:
			goto usage;
		}

	argc -= optind;
	argv += optind;

	if (argc > 1) {
		fprintf(stderr, "%s: Too many arguments\n", argv[1]);
		goto usage;
	} else if (argc > 0)
		fname = argv[0];

	/* Read from stdin or a file. */
	fd = strcmp(fname, "-") == 0 ?
		STDIN_FILENO : open(fname, O_RDONLY, 0);

	/*
	 * Open file for reading.
	 * Allocate a read buffer.
	 * Create the parser context.
	 * Dive directly into the parse.
	 */
	if (fd == -1)
		perror(fname);
	else if ((buf = malloc(4096)) == NULL)
		perror(NULL);
	else if ((xp = XML_ParserCreate(NULL)) == NULL)
		perror(NULL);
	else if (readfile(xp, fd, buf, 4096, fname))
		rc = 0;

	XML_ParserFree(xp);
	free(buf);
	if (fd != STDIN_FILENO)
		close(fd);
	return rc;

usage:
	fprintf(stderr, "usage: %s [-W] [input_filename]\n", progname);
	return 1;
}