===================================================================
RCS file: /cvs/docbook2mdoc/parse.c,v
retrieving revision 1.11
retrieving revision 1.55
diff -u -p -r1.11 -r1.55
--- docbook2mdoc/parse.c	2019/04/03 11:46:09	1.11
+++ docbook2mdoc/parse.c	2019/04/29 02:00:50	1.55
@@ -1,4 +1,4 @@
-/* $Id: parse.c,v 1.11 2019/04/03 11:46:09 schwarze Exp $ */
+/* $Id: parse.c,v 1.55 2019/04/29 02:00:50 schwarze Exp $ */
 /*
  * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,14 +15,20 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
+#include <sys/types.h>
+
 #include <assert.h>
 #include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
+#include "xmalloc.h"
 #include "node.h"
 #include "parse.h"
 
@@ -30,6 +36,14 @@
  * The implementation of the DocBook parser.
  */
 
+enum	pstate {
+	PARSE_ELEM,
+	PARSE_TAG,
+	PARSE_ARG,
+	PARSE_SQ,
+	PARSE_DQ
+};
+
 /*
  * Global parse state.
  * Keep this as simple and small as possible.
@@ -37,6 +51,7 @@
 struct	parse {
 	const char	*fname;  /* Name of the input file. */
 	struct ptree	*tree;   /* Complete parse result. */
+	struct pnode	*doctype;
 	struct pnode	*cur;	 /* Current node in the tree. */
 	enum nodeid	 ncur;   /* Type of the current node. */
 	int		 line;   /* Line number in the input file. */
@@ -44,148 +59,78 @@ struct	parse {
 	int		 nline;  /* Line number of next token. */
 	int		 ncol;   /* Column number of next token. */
 	int		 del;    /* Levels of nested nodes being deleted. */
-	int		 attr;   /* The most recent attribute is valid. */
-	int		 warn;
+	int		 nofill; /* Levels of open no-fill displays. */
+	int		 flags;
+#define	PFLAG_WARN	 (1 << 0)  /* Print warning messages. */
+#define	PFLAG_LINE	 (1 << 1)  /* New line before the next element. */
+#define	PFLAG_SPC	 (1 << 2)  /* Whitespace before the next element. */
+#define	PFLAG_ATTR	 (1 << 3)  /* The most recent attribute is valid. */
+#define	PFLAG_EEND	 (1 << 4)  /* This element is self-closing. */
 };
 
-struct	element {
+struct	alias {
 	const char	*name;   /* DocBook element name. */
 	enum nodeid	 node;   /* Node type to generate. */
 };
 
-static	const struct element elements[] = {
+static	const struct alias aliases[] = {
 	{ "acronym",		NODE_IGNORE },
-	{ "affiliation",	NODE_AFFILIATION },
+	{ "affiliation",	NODE_IGNORE },
 	{ "anchor",		NODE_DELETE },
-	{ "application",	NODE_APPLICATION },
-	{ "arg",		NODE_ARG },
-	{ "author",		NODE_AUTHOR },
-	{ "authorgroup",	NODE_AUTHORGROUP },
-	{ "blockquote",		NODE_BLOCKQUOTE },
-	{ "book",		NODE_BOOK },
-	{ "bookinfo",		NODE_BOOKINFO },
-	{ "caution",		NODE_CAUTION },
+	{ "application",	NODE_COMMAND },
+	{ "article",		NODE_SECTION },
+	{ "articleinfo",	NODE_BOOKINFO },
+	{ "book",		NODE_SECTION },
 	{ "chapter",		NODE_SECTION },
-	{ "citerefentry",	NODE_CITEREFENTRY },
-	{ "citetitle",		NODE_CITETITLE },
-	{ "cmdsynopsis",	NODE_CMDSYNOPSIS },
-	{ "code",		NODE_CODE },
-	{ "colspec",		NODE_COLSPEC },
-	{ "command",		NODE_COMMAND },
-	{ "constant",		NODE_CONSTANT },
-	{ "contrib",		NODE_CONTRIB },
-	{ "copyright",		NODE_COPYRIGHT },
-	{ "date",		NODE_DATE },
-	{ "editor",		NODE_EDITOR },
-	{ "email",		NODE_EMAIL },
-	{ "emphasis",		NODE_EMPHASIS },
-	{ "entry",		NODE_ENTRY },
-	{ "envar",		NODE_ENVAR },
-	{ "fieldsynopsis",	NODE_FIELDSYNOPSIS },
-	{ "filename",		NODE_FILENAME },
+	{ "caption",		NODE_IGNORE },
+	{ "code",		NODE_LITERAL },
+	{ "computeroutput",	NODE_LITERAL },
+	{ "!doctype",		NODE_DOCTYPE },
+	{ "figure",		NODE_IGNORE },
 	{ "firstname",		NODE_PERSONNAME },
-	{ "firstterm",		NODE_FIRSTTERM },
-	{ "footnote",		NODE_FOOTNOTE },
-	{ "funcdef",		NODE_FUNCDEF },
-	{ "funcprototype",	NODE_FUNCPROTOTYPE },
-	{ "funcsynopsis",	NODE_FUNCSYNOPSIS },
-	{ "funcsynopsisinfo",	NODE_FUNCSYNOPSISINFO },
-	{ "function",		NODE_FUNCTION },
-	{ "glossterm",		NODE_GLOSSTERM },
-	{ "group",		NODE_GROUP },
-	{ "holder",		NODE_HOLDER },
-	{ "index",		NODE_INDEX },
+	{ "glossary",		NODE_VARIABLELIST },
+	{ "glossdef",		NODE_IGNORE },
+	{ "glossdiv",		NODE_IGNORE },
+	{ "glossentry",		NODE_VARLISTENTRY },
+	{ "glosslist",		NODE_VARIABLELIST },
+	{ "holder",		NODE_IGNORE },
+	{ "imageobject",	NODE_IGNORE },
 	{ "indexterm",		NODE_DELETE },
-	{ "info",		NODE_INFO },
-	{ "informalequation",	NODE_INFORMALEQUATION },
 	{ "informaltable",	NODE_TABLE },
-	{ "inlineequation",	NODE_INLINEEQUATION },
-	{ "itemizedlist",	NODE_ITEMIZEDLIST },
-	{ "keysym",		NODE_KEYSYM },
-	{ "legalnotice",	NODE_LEGALNOTICE },
-	{ "link",		NODE_LINK },
-	{ "listitem",		NODE_LISTITEM },
-	{ "literal",		NODE_LITERAL },
-	{ "literallayout",	NODE_LITERALLAYOUT },
-	{ "manvolnum",		NODE_MANVOLNUM },
-	{ "member",		NODE_MEMBER },
-	{ "mml:math",		NODE_MML_MATH },
-	{ "mml:mfenced",	NODE_MML_MFENCED },
-	{ "mml:mfrac",		NODE_MML_MFRAC },
-	{ "mml:mi",		NODE_MML_MI },
-	{ "mml:mn",		NODE_MML_MN },
-	{ "mml:mo",		NODE_MML_MO },
-	{ "mml:mrow",		NODE_MML_MROW },
-	{ "mml:msub",		NODE_MML_MSUB },
-	{ "mml:msup",		NODE_MML_MSUP },
-	{ "modifier",		NODE_MODIFIER },
-	{ "note",		NODE_NOTE },
-	{ "option",		NODE_OPTION },
-	{ "orderedlist",	NODE_ORDEREDLIST },
-	{ "orgname",		NODE_ORGNAME },
+	{ "keycap",		NODE_KEYSYM },
+	{ "keycode",		NODE_IGNORE },
+	{ "keycombo",		NODE_IGNORE },
+	{ "mediaobject",	NODE_BLOCKQUOTE },
+	{ "orgname",		NODE_IGNORE },
+	{ "othercredit",	NODE_AUTHOR },
 	{ "othername",		NODE_PERSONNAME },
-	{ "para",		NODE_PARA },
-	{ "paramdef",		NODE_PARAMDEF },
-	{ "parameter",		NODE_PARAMETER },
 	{ "part",		NODE_SECTION },
-	{ "personname",		NODE_PERSONNAME },
 	{ "phrase",		NODE_IGNORE },
-	{ "preface",		NODE_PREFACE },
 	{ "primary",		NODE_DELETE },
-	{ "programlisting",	NODE_PROGRAMLISTING },
-	{ "prompt",		NODE_PROMPT },
-	{ "quote",		NODE_QUOTE },
-	{ "refclass",		NODE_REFCLASS },
-	{ "refdescriptor",	NODE_REFDESCRIPTOR },
-	{ "refentry",		NODE_REFENTRY },
-	{ "refentryinfo",	NODE_REFENTRYINFO },
-	{ "refentrytitle",	NODE_REFENTRYTITLE },
-	{ "refmeta",		NODE_REFMETA },
-	{ "refmetainfo",	NODE_REFMETAINFO },
-	{ "refmiscinfo",	NODE_REFMISCINFO },
-	{ "refname",		NODE_REFNAME },
-	{ "refnamediv",		NODE_REFNAMEDIV },
-	{ "refpurpose",		NODE_REFPURPOSE },
+	{ "property",		NODE_PARAMETER },
+	{ "reference",		NODE_SECTION },
 	{ "refsect1",		NODE_SECTION },
 	{ "refsect2",		NODE_SECTION },
 	{ "refsect3",		NODE_SECTION },
 	{ "refsection",		NODE_SECTION },
-	{ "refsynopsisdiv",	NODE_REFSYNOPSISDIV },
-	{ "releaseinfo",	NODE_RELEASEINFO },
-	{ "replaceable",	NODE_REPLACEABLE },
-	{ "row",		NODE_ROW },
-	{ "sbr",		NODE_SBR },
-	{ "screen",		NODE_SCREEN },
+	{ "releaseinfo",	NODE_IGNORE },
+	{ "returnvalue",	NODE_IGNORE },
 	{ "secondary",		NODE_DELETE },
 	{ "sect1",		NODE_SECTION },
 	{ "sect2",		NODE_SECTION },
-	{ "section",		NODE_SECTION },
-	{ "sgmltag",		NODE_SGMLTAG },
-	{ "simplelist",		NODE_SIMPLELIST },
-	{ "spanspec",		NODE_SPANSPEC },
-	{ "structname",		NODE_STRUCTNAME },
-	{ "subtitle",		NODE_SUBTITLE },
+	{ "sect3",		NODE_SECTION },
+	{ "sect4",		NODE_SECTION },
+	{ "sgmltag",		NODE_MARKUP },
+	{ "simpara",		NODE_PARA },
+	{ "structfield",	NODE_PARAMETER },
+	{ "structname",		NODE_TYPE },
 	{ "surname",		NODE_PERSONNAME },
-	{ "synopsis",		NODE_SYNOPSIS },
-	{ "table",		NODE_TABLE },
-	{ "tbody",		NODE_TBODY },
-	{ "term",		NODE_TERM },
-	{ "tfoot",		NODE_TFOOT },
-	{ "tgroup",		NODE_TGROUP },
-	{ "thead",		NODE_THEAD },
-	{ "tip",		NODE_TIP },
-	{ "title",		NODE_TITLE },
+	{ "symbol",		NODE_CONSTANT },
+	{ "tag",		NODE_MARKUP },
 	{ "trademark",		NODE_IGNORE },
-	{ "type",		NODE_TYPE },
-	{ "ulink",		NODE_ULINK },
-	{ "userinput",		NODE_USERINPUT },
-	{ "variablelist",	NODE_VARIABLELIST },
-	{ "varlistentry",	NODE_VARLISTENTRY },
-	{ "varname",		NODE_VARNAME },
-	{ "warning",		NODE_WARNING },
-	{ "wordasword",		NODE_WORDASWORD },
-	{ "xi:include",		NODE_DELETE_WARN },
-	{ "year",		NODE_YEAR },
+	{ "ulink",		NODE_LINK },
+	{ "userinput",		NODE_LITERAL },
+	{ "year",		NODE_IGNORE },
 	{ NULL,			NODE_IGNORE }
 };
 
@@ -245,17 +190,22 @@ static	const struct entity entities[] = {
 	{ NULL,		NULL }
 };
 
+static size_t	 parse_string(struct parse *, char *, size_t,
+			 enum pstate *, int);
+static void	 parse_fd(struct parse *, int);
+
+
 static void
 error_msg(struct parse *p, const char *fmt, ...)
 {
 	va_list		 ap;
 
-	fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col);
+	fprintf(stderr, "%s:%d:%d: ERROR: ", p->fname, p->line, p->col);
 	va_start(ap, fmt);
 	vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	fputc('\n', stderr);
-	p->tree->flags |= TREE_FAIL;
+	p->tree->flags |= TREE_ERROR;
 }
 
 static void
@@ -263,14 +213,15 @@ warn_msg(struct parse *p, const char *fmt, ...)
 {
 	va_list		 ap;
 
-	if (p->warn == 0)
+	if ((p->flags & PFLAG_WARN) == 0)
 		return;
 
-	fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col);
+	fprintf(stderr, "%s:%d:%d: WARNING: ", p->fname, p->line, p->col);
 	va_start(ap, fmt);
 	vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	fputc('\n', stderr);
+	p->tree->flags |= TREE_WARN;
 }
 
 /*
@@ -279,63 +230,151 @@ warn_msg(struct parse *p, const char *fmt, ...)
  * Otherwise, create a new one as a child of the current node.
  */
 static void
-xml_char(struct parse *ps, const char *p, int sz)
+xml_text(struct parse *p, const char *word, int sz)
 {
-	struct pnode	*dat;
+	struct pnode	*n, *np;
+	size_t		 oldsz, newsz;
+	int		 i;
 
-	if (ps->del > 0)
+	assert(sz > 0);
+	if (p->del > 0)
 		return;
 
-	if (ps->cur == NULL) {
-		error_msg(ps, "discarding text before document: %.*s", sz, p);
+	if ((n = p->cur) == NULL) {
+		error_msg(p, "discarding text before document: %.*s",
+		    sz, word);
 		return;
 	}
 
-	if (ps->cur->node != NODE_TEXT) {
-		if ((dat = calloc(1, sizeof(*dat))) == NULL) {
-			perror(NULL);
-			exit(1);
-		}
-		dat->node = NODE_TEXT;
-		dat->parent = ps->cur;
-		TAILQ_INIT(&dat->childq);
-		TAILQ_INIT(&dat->attrq);
-		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
-		ps->cur = dat;
+	/* Append to the current text node, if one is open. */
+
+	if (n->node == NODE_TEXT) {
+		oldsz = strlen(n->b);
+		newsz = oldsz + sz;
+		if (oldsz && (p->flags & PFLAG_SPC))
+			newsz++;
+		n->b = xrealloc(n->b, newsz + 1);
+		if (oldsz && (p->flags & PFLAG_SPC))
+			n->b[oldsz++] = ' ';
+		memcpy(n->b + oldsz, word, sz);
+		n->b[newsz] = '\0';
+		p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
+		return;
 	}
 
-	if (ps->tree->flags & TREE_CLOSED &&
-	    ps->cur->parent == ps->tree->root)
-		warn_msg(ps, "text after end of document: %.*s", sz, p);
+	if (p->tree->flags & TREE_CLOSED && n == p->tree->root)
+		warn_msg(p, "text after end of document: %.*s", sz, word);
 
-	/* Append to the current text node. */
+	/* Create a new text node. */
 
-	assert(sz >= 0);
-	ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
-	if (ps->cur->b == NULL) {
-		perror(NULL);
-		exit(1);
+	n = pnode_alloc(p->cur);
+	n->node = NODE_TEXT;
+	n->flags = ((p->flags & PFLAG_LINE) ? NFLAG_LINE : 0) |
+	    ((p->flags & PFLAG_SPC) ? NFLAG_SPC : 0);
+	p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
+
+	/*
+	 * If this node follows an in-line macro without intervening
+	 * whitespace, keep the text in it as short as possible,
+	 * and do not keep it open.
+	 */
+
+	np = n->flags & NFLAG_SPC ? NULL : TAILQ_PREV(n, pnodeq, child);
+	while (np != NULL) {
+		switch (pnode_class(np->node)) {
+		case CLASS_VOID:
+		case CLASS_TEXT:
+		case CLASS_BLOCK:
+		case CLASS_NOFILL:
+			np = NULL;
+			break;
+		case CLASS_TRANS:
+			np = TAILQ_LAST(&np->childq, pnodeq);
+			continue;
+		case CLASS_LINE:
+		case CLASS_ENCL:
+			break;
+		}
+		break;
 	}
-	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
-	ps->cur->bsz += sz;
-	ps->cur->b[ps->cur->bsz] = '\0';
-	ps->cur->real = ps->cur->b;
+	if (np != NULL) {
+		i = 0;
+		while (i < sz && !isspace((unsigned char)word[i]))
+			i++;
+		n->b = xstrndup(word, i);
+		if (i == sz)
+			return;
+		while (i < sz && isspace((unsigned char)word[i]))
+			i++;
+		if (i == sz) {
+			p->flags |= PFLAG_SPC;
+			return;
+		}
+
+		/* Put any remaining text into a second node. */
+
+		n = pnode_alloc(p->cur);
+		n->node = NODE_TEXT;
+		n->flags |= NFLAG_SPC;
+		word += i;
+		sz -= i;
+	}
+	n->b = xstrndup(word, sz);
+
+	/* The new node remains open for later pnode_closetext(). */
+
+	p->cur = n;
 }
 
+/*
+ * Close out the text node and strip trailing whitespace, if one is open.
+ */
 static void
-pnode_trim(struct pnode *pn)
+pnode_closetext(struct parse *p, int check_last_word)
 {
-	assert(pn->node == NODE_TEXT);
-	for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
-		if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
-			break;
+	struct pnode	*n;
+	char		*cp, *last_word;
+
+	if ((n = p->cur) == NULL || n->node != NODE_TEXT)
+		return;
+	p->cur = n->parent;
+	for (cp = strchr(n->b, '\0');
+	    cp > n->b && isspace((unsigned char)cp[-1]);
+	    *--cp = '\0')
+		p->flags |= PFLAG_SPC;
+
+	if (p->flags & PFLAG_SPC || !check_last_word)
+		return;
+
+	/*
+	 * Find the beginning of the last word
+	 * and delete whitespace before it.
+	 */
+
+	while (cp > n->b && !isspace((unsigned char)cp[-1]))
+		cp--;
+	if (cp == n->b)
+		return;
+
+	last_word = cp;
+	while (cp > n->b && isspace((unsigned char)cp[-1]))
+	    *--cp = '\0';
+
+	/* Move the last word into its own node, for use with .Pf. */
+
+	n = pnode_alloc_text(p->cur, last_word);
+	n->flags |= NFLAG_SPC;
 }
 
 static void
 xml_entity(struct parse *p, const char *name)
 {
 	const struct entity	*entity;
-	struct pnode		*dat;
+	struct pnode		*n;
+	const char		*ccp;
+	char			*cp;
+	unsigned int		 codepoint;
+	enum pstate		 pstate;
 
 	if (p->del > 0)
 		return;
@@ -345,11 +384,7 @@ xml_entity(struct parse *p, const char *name)
 		return;
 	}
 
-	/* Close out the text node, if there is one. */
-	if (p->cur->node == NODE_TEXT) {
-		pnode_trim(p->cur);
-		p->cur = p->cur->parent;
-	}
+	pnode_closetext(p, 0);
 
 	if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root)
 		warn_msg(p, "entity after end of document: &%s;", name);
@@ -359,133 +394,208 @@ xml_entity(struct parse *p, const char *name)
 			break;
 
 	if (entity->roff == NULL) {
+		if (p->doctype != NULL) {
+			TAILQ_FOREACH(n, &p->doctype->childq, child) {
+				if ((ccp = pnode_getattr_raw(n,
+				     ATTRKEY_NAME, NULL)) == NULL ||
+				    strcmp(ccp, name) != 0)
+					continue;
+				if ((ccp = pnode_getattr_raw(n,
+				    ATTRKEY_SYSTEM, NULL)) != NULL) {
+					parse_file(p, -1, ccp);
+					p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
+					return;
+				}
+				if ((ccp = pnode_getattr_raw(n,
+				     ATTRKEY_DEFINITION, NULL)) == NULL)
+					continue;
+				cp = xstrdup(ccp);
+				pstate = PARSE_ELEM;
+				parse_string(p, cp, strlen(cp), &pstate, 0);
+				p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
+				free(cp);
+				return;
+			}
+		}
+		if (*name == '#') {
+			codepoint = strtonum(name + 1, 0, 0x10ffff, &ccp);
+			if (ccp == NULL) {
+				n = pnode_alloc(p->cur);
+				xasprintf(&n->b, "\\[u%4.4X]", codepoint);
+				goto done;
+			}
+		}
 		error_msg(p, "unknown entity &%s;", name);
 		return;
 	}
 
 	/* Create, append, and close out an entity node. */
-	if ((dat = calloc(1, sizeof(*dat))) == NULL ||
-	    (dat->b = dat->real = strdup(entity->roff)) == NULL) {
-		perror(NULL);
-		exit(1);
-	}
-	dat->node = NODE_ESCAPE;
-	dat->bsz = strlen(dat->b);
-	dat->parent = p->cur;
-	TAILQ_INIT(&dat->childq);
-	TAILQ_INIT(&dat->attrq);
-	TAILQ_INSERT_TAIL(&p->cur->childq, dat, child);
+	n = pnode_alloc(p->cur);
+	n->b = xstrdup(entity->roff);
+done:
+	n->node = NODE_ESCAPE;
+	n->flags = ((p->flags & PFLAG_LINE) ? NFLAG_LINE : 0) |
+	    ((p->flags & PFLAG_SPC) ? NFLAG_SPC : 0);
+	p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
 }
 
 /*
+ * Parse an element name.
+ */
+static enum nodeid
+xml_name2node(struct parse *p, const char *name)
+{
+	const struct alias	*alias;
+	enum nodeid		 node;
+
+	if ((node = pnode_parse(name)) < NODE_UNKNOWN)
+		return node;
+
+	for (alias = aliases; alias->name != NULL; alias++)
+		if (strcmp(alias->name, name) == 0)
+			return alias->node;
+
+	return NODE_UNKNOWN;
+}
+
+/*
  * Begin an element.
  */
 static void
-xml_elem_start(struct parse *ps, const char *name)
+xml_elem_start(struct parse *p, const char *name)
 {
-	const struct element	*elem;
-	struct pnode		*dat;
+	struct pnode		*n;
 
-	if (*name == '!' || *name == '?')
-		return;
-
 	/*
 	 * An ancestor is excluded from the tree;
 	 * keep track of the number of levels excluded.
 	 */
-	if (ps->del > 0) {
-		ps->del++;
+	if (p->del > 0) {
+		if (*name != '!' && *name != '?')
+			p->del++;
 		return;
 	}
 
-	/* Close out the text node, if there is one. */
-	if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
-		pnode_trim(ps->cur);
-		ps->cur = ps->cur->parent;
-	}
-
-	for (elem = elements; elem->name != NULL; elem++)
-		if (strcmp(elem->name, name) == 0)
-			break;
-
-	if (elem->name == NULL)
-		error_msg(ps, "unknown element <%s>", name);
-
-	ps->ncur = elem->node;
-
-	switch (ps->ncur) {
+	switch (p->ncur = xml_name2node(p, name)) {
 	case NODE_DELETE_WARN:
-		warn_msg(ps, "skipping element <%s>", name);
+		warn_msg(p, "skipping element <%s>", name);
 		/* FALLTHROUGH */
 	case NODE_DELETE:
-		ps->del = 1;
+		p->del = 1;
 		/* FALLTHROUGH */
 	case NODE_IGNORE:
 		return;
-	case NODE_INLINEEQUATION:
-		ps->tree->flags |= TREE_EQN;
-		break;
+	case NODE_UNKNOWN:
+		if (*name != '!' && *name != '?')
+			error_msg(p, "unknown element <%s>", name);
+		return;
 	default:
 		break;
 	}
 
-	if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL)
-		warn_msg(ps, "element after end of document: <%s>", name);
+	if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL)
+		warn_msg(p, "element after end of document: <%s>", name);
 
-	if ((dat = calloc(1, sizeof(*dat))) == NULL) {
-		perror(NULL);
-		exit(1);
+	switch (pnode_class(p->ncur)) {
+	case CLASS_LINE:
+	case CLASS_ENCL:
+		pnode_closetext(p, 1);
+		break;
+	default:
+		pnode_closetext(p, 0);
+		break;
 	}
-	dat->node = elem->node;
-	dat->parent = ps->cur;
-	TAILQ_INIT(&dat->childq);
-	TAILQ_INIT(&dat->attrq);
 
-	if (ps->cur != NULL)
-		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
+	n = pnode_alloc(p->cur);
 
-	ps->cur = dat;
-	if (ps->tree->root == NULL)
-		ps->tree->root = dat;
+	/*
+	 * Some elements are self-closing.
+	 * Nodes that begin a new macro or request line or start by
+	 * printing text always want whitespace before themselves.
+	 */
+
+	switch (n->node = p->ncur) {
+	case NODE_DOCTYPE:
+	case NODE_ENTITY:
+	case NODE_SBR:
+	case NODE_VOID:
+		p->flags |= PFLAG_EEND;
+		break;
+	default:
+		break;
+	}
+	switch (pnode_class(p->ncur)) {
+	case CLASS_LINE:
+	case CLASS_ENCL:
+		n->flags = ((p->flags & PFLAG_LINE) ? NFLAG_LINE : 0) |
+		    ((p->flags & PFLAG_SPC) ? NFLAG_SPC : 0);
+		break;
+	case CLASS_NOFILL:
+		p->nofill++;
+		/* FALLTHROUGH */
+	default:
+		n->flags |= NFLAG_SPC;
+		break;
+	}
+	p->cur = n;
+	if (n->node == NODE_DOCTYPE) {
+		if (p->doctype == NULL)
+			p->doctype = n;
+		else
+			error_msg(p, "duplicate doctype");
+	} else if (n->parent == NULL && p->tree->root == NULL)
+		p->tree->root = n;
 }
 
 static void
-xml_attrkey(struct parse *ps, const char *name)
+xml_attrkey(struct parse *p, const char *name)
 {
-	struct pattr	*attr;
+	struct pattr	*a;
+	const char	*value;
 	enum attrkey	 key;
 
-	if (ps->del > 0 || *name == '\0')
+	if (p->del > 0 || p->ncur >= NODE_UNKNOWN || *name == '\0')
 		return;
+
+	if ((p->ncur == NODE_DOCTYPE || p->ncur == NODE_ENTITY) &&
+	    TAILQ_FIRST(&p->cur->attrq) == NULL) {
+		value = name;
+		name = "NAME";
+	} else
+		value = NULL;
+
 	if ((key = attrkey_parse(name)) == ATTRKEY__MAX) {
-		ps->attr = 0;
+		p->flags &= ~PFLAG_ATTR;
 		return;
 	}
-	if ((attr = calloc(1, sizeof(*attr))) == NULL) {
-		perror(NULL);
-		exit(1);
+	a = xcalloc(1, sizeof(*a));
+	a->key = key;
+	a->val = ATTRVAL__MAX;
+	if (value == NULL) {
+		a->rawval = NULL;
+		p->flags |= PFLAG_ATTR;
+	} else {
+		a->rawval = xstrdup(value);
+		p->flags &= ~PFLAG_ATTR;
 	}
-	attr->key = key;
-	attr->val = ATTRVAL__MAX;
-	attr->rawval = NULL;
-	TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child);
-	ps->attr = 1;
+	TAILQ_INSERT_TAIL(&p->cur->attrq, a, child);
+	if (p->ncur == NODE_ENTITY && key == ATTRKEY_NAME)
+		xml_attrkey(p, "DEFINITION");
 }
 
 static void
-xml_attrval(struct parse *ps, const char *name)
+xml_attrval(struct parse *p, const char *name)
 {
-	struct pattr	*attr;
+	struct pattr	*a;
 
-	if (ps->del > 0 || ps->attr == 0)
+	if (p->del > 0 || p->ncur >= NODE_UNKNOWN ||
+	    (p->flags & PFLAG_ATTR) == 0)
 		return;
-	if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL)
+	if ((a = TAILQ_LAST(&p->cur->attrq, pattrq)) == NULL)
 		return;
-	if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX &&
-	    (attr->rawval = strdup(name)) == NULL) {
-		perror(NULL);
-		exit(1);
-	}
+	if ((a->val = attrval_parse(name)) == ATTRVAL__MAX)
+		a->rawval = xstrdup(name);
+	p->flags &= ~PFLAG_ATTR;
 }
 
 /*
@@ -493,47 +603,59 @@ xml_attrval(struct parse *ps, const char *name)
  * If we're at a text node, roll that one up first.
  */
 static void
-xml_elem_end(struct parse *ps, const char *name)
+xml_elem_end(struct parse *p, const char *name)
 {
-	const struct element	*elem;
+	struct pnode		*n;
+	const char		*cp;
 	enum nodeid		 node;
 
 	/*
 	 * An ancestor is excluded from the tree;
 	 * keep track of the number of levels excluded.
 	 */
-	if (ps->del > 1) {
-		ps->del--;
+	if (p->del > 1) {
+		p->del--;
 		return;
 	}
 
-	/* Close out the text node, if there is one. */
-	if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) {
-		pnode_trim(ps->cur);
-		ps->cur = ps->cur->parent;
-	}
+	if (p->del == 0)
+		pnode_closetext(p, 0);
 
-	if (name != NULL) {
-		for (elem = elements; elem->name != NULL; elem++)
-			if (strcmp(elem->name, name) == 0)
-				break;
-		node = elem->node;
-	} else
-		node = ps->ncur;
+	n = p->cur;
+	node = name == NULL ? p->ncur : xml_name2node(p, name);
 
 	switch (node) {
 	case NODE_DELETE_WARN:
 	case NODE_DELETE:
-		if (ps->del > 0)
-			ps->del--;
+		if (p->del > 0)
+			p->del--;
 		break;
 	case NODE_IGNORE:
+	case NODE_UNKNOWN:
 		break;
+	case NODE_INCLUDE:
+		p->cur = n->parent;
+		cp = pnode_getattr_raw(n, ATTRKEY_HREF, NULL);
+		if (cp == NULL)
+			error_msg(p, "<xi:include> element "
+			    "without href attribute");
+		else
+			parse_file(p, -1, cp);
+		pnode_unlink(n);
+		p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
+		break;
+	case NODE_DOCTYPE:
+	case NODE_SBR:
+	case NODE_VOID:
+		p->flags &= ~PFLAG_EEND;
+		/* FALLTHROUGH */
 	default:
-		if (ps->cur == NULL || node != ps->cur->node) {
-			warn_msg(ps, "element not open: </%s>", name);
+		if (n == NULL || node != n->node) {
+			warn_msg(p, "element not open: </%s>", name);
 			break;
 		}
+		if (pnode_class(node) == CLASS_NOFILL)
+			p->nofill--;
 
 		/*
 		 * Refrain from actually closing the document element.
@@ -542,13 +664,24 @@ xml_elem_end(struct parse *ps, const char *name)
 		 * obviously better than discarding it or crashing.
 		 */
 
-		if (ps->cur->parent == NULL)
-			ps->tree->flags |= TREE_CLOSED;
-		else
-			ps->cur = ps->cur->parent;
+		if (n->parent != NULL || node == NODE_DOCTYPE) {
+			p->cur = n->parent;
+			if (p->cur != NULL)
+				p->ncur = p->cur->node;
+		} else
+			p->tree->flags |= TREE_CLOSED;
+		p->flags &= ~(PFLAG_LINE | PFLAG_SPC);
+
+		/* Include a file containing entity declarations. */
+
+		if (node == NODE_ENTITY && strcmp("%",
+		    pnode_getattr_raw(n, ATTRKEY_NAME, "")) == 0 &&
+		    (cp = pnode_getattr_raw(n, ATTRKEY_SYSTEM, NULL)) != NULL)
+			parse_file(p, -1, cp);
+
 		break;
 	}
-	assert(ps->del == 0);
+	assert(p->del == 0);
 }
 
 struct parse *
@@ -556,14 +689,12 @@ parse_alloc(int warn)
 {
 	struct parse	*p;
 
-	if ((p = calloc(1, sizeof(*p))) == NULL)
-		return NULL;
-
-	if ((p->tree = calloc(1, sizeof(*p->tree))) == NULL) {
-		free(p);
-		return NULL;
-	}
-	p->warn = warn;
+	p = xcalloc(1, sizeof(*p));
+	p->tree = xcalloc(1, sizeof(*p->tree));
+	if (warn)
+		p->flags |= PFLAG_WARN;
+	else
+		p->flags &= ~PFLAG_WARN;
 	return p;
 }
 
@@ -579,6 +710,19 @@ parse_free(struct parse *p)
 	free(p);
 }
 
+static void
+increment(struct parse *p, char *b, size_t *pend, int refill)
+{
+	if (refill) {
+		if (b[*pend] == '\n') {
+			p->nline++;
+			p->ncol = 1;
+		} else
+			p->ncol++;
+	}
+	++*pend;
+}
+
 /*
  * Advance the pend pointer to the next character in the charset.
  * If the charset starts with a space, it stands for any whitespace.
@@ -589,7 +733,7 @@ parse_free(struct parse *p)
  */
 static int
 advance(struct parse *p, char *b, size_t rlen, size_t *pend,
-    const char *charset)
+    const char *charset, int refill)
 {
 	int		 space;
 
@@ -599,235 +743,316 @@ advance(struct parse *p, char *b, size_t rlen, size_t 
 	} else
 		space = 0;
 
-	p->nline = p->line;
-	p->ncol = p->col;
+	if (refill) {
+		p->nline = p->line;
+		p->ncol = p->col;
+	}
 	while (*pend < rlen) {
-		if (b[*pend] == '\n') {
-			p->nline++;
-			p->ncol = 1;
-		} else
-			p->ncol++;
 		if (space && isspace((unsigned char)b[*pend]))
 			break;
 		if (strchr(charset, b[*pend]) != NULL)
 			break;
-		++*pend;
+		increment(p, b, pend, refill);
 	}
 	if (*pend == rlen) {
 		b[rlen] = '\0';
-		return 1;
+		return refill;
 	} else
 		return 0;
 }
 
-struct ptree *
-parse_file(struct parse *p, int fd, const char *fname)
+size_t
+parse_string(struct parse *p, char *b, size_t rlen,
+    enum pstate *pstate, int refill)
 {
-	char		 b[4096];
 	char		*cp;
-	ssize_t		 rsz;	/* Return value from read(2). */
-	size_t		 rlen;  /* Number of bytes in b[]. */
+	size_t		 pws;	/* Parse offset including whitespace. */
 	size_t		 poff;  /* Parse offset in b[]. */
 	size_t		 pend;  /* Offset of the end of the current word. */
-	int		 in_tag, in_arg, in_quotes, elem_end;
+	int		 elem_end;
 
-	p->fname = fname;
-	p->nline = 1;
-	p->ncol = 1;
-	rlen = 0;
-	in_tag = in_arg = in_quotes = 0;
+	pend = pws = 0;
+	for (;;) {
 
-	/*
-	 * Read loop.
-	 *
-	 * We have to enter the read loop once more even on EOF
-	 * because the previous token may have been incomplete,
-	 * such that it asked for more input.
-	 * Once rsz is 0, incomplete tokens will no longer ask
-	 * for more input but instead use whatever there is,
-	 * and then exit the read loop.
-	 * The minus one on the size limit for read(2) is needed
-	 * such that advance() can set b[rlen] to NUL when needed.
-	 */
+		/* Proceed to the next token, skipping whitespace. */
 
-	while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0) {
-		if ((rlen += rsz) == 0)
+		if (refill) {
+			p->line = p->nline;
+			p->col = p->ncol;
+		}
+		if ((poff = pend) == rlen)
 			break;
+		if (isspace((unsigned char)b[pend])) {
+			p->flags |= PFLAG_SPC;
+			if (b[pend] == '\n') {
+				p->flags |= PFLAG_LINE;
+				pws = pend + 1;
+			}
+			increment(p, b, &pend, refill);
+			continue;
+		}
 
-		/* Token loop. */
+		/*
+		 * The following four cases (ARG, TAG, and starting an
+		 * entity or a tag) all parse a word or quoted string.
+		 * If that extends beyond the read buffer and the last
+		 * read(2) still got data, they all break out of the
+		 * token loop to request more data from the read loop.
+		 *
+		 * Also, three of them detect self-closing tags, those
+		 * ending with "/>", setting the flag elem_end and
+		 * calling xml_elem_end() at the very end, after
+		 * handling the attribute value, attribute name, or
+		 * tag name, respectively.
+		 */
 
-		pend = 0;
-		for (;;) {
+		/* Parse an attribute value. */
 
-			/* Proceed to the next token, skipping whitespace. */
-
-			p->line = p->nline;
-			p->col = p->ncol;
-			if ((poff = pend) == rlen)
-				break;
-			if (isspace((unsigned char)b[pend])) {
-				if (b[pend++] == '\n') {
-					p->nline++;
-					p->ncol = 1;
-				} else
-					p->ncol++;
+		if (*pstate >= PARSE_ARG) {
+			if (*pstate == PARSE_ARG &&
+			    (b[pend] == '\'' || b[pend] == '"')) {
+				*pstate = b[pend] == '"' ?
+				    PARSE_DQ : PARSE_SQ;
+				increment(p, b, &pend, refill);
 				continue;
 			}
+			if (advance(p, b, rlen, &pend,
+			    *pstate == PARSE_DQ ? "\"" :
+			    *pstate == PARSE_SQ ? "'" : " >", refill))
+				break;
+			*pstate = PARSE_TAG;
+			elem_end = 0;
+			if (b[pend] == '>') {
+				*pstate = PARSE_ELEM;
+				if (pend > 0 && b[pend - 1] == '/') {
+					b[pend - 1] = '\0';
+					elem_end = 1;
+				}
+				if (p->flags & PFLAG_EEND)
+					elem_end = 1;
+			}
+			b[pend] = '\0';
+			if (pend < rlen)
+				increment(p, b, &pend, refill);
+			xml_attrval(p, b + poff);
+			if (elem_end)
+				xml_elem_end(p, NULL);
 
-			/*
-			 * The following four cases (in_arg, in_tag, and
-			 * starting an entity or a tag) all parse a word
-			 * or quoted string.  If that extends beyond the
-			 * read buffer and the last read(2) still got
-			 * data, they all break out of the token loop
-			 * to request more data from the read loop.
-			 *
-			 * Also, three of them detect self-closing tags,
-			 * those ending with "/>", setting the flag
-			 * elem_end and calling xml_elem_end() at the
-			 * very end, after handling the attribute value,
-			 * attribute name, or tag name, respectively.
-			 */
+		/* Look for an attribute name. */
 
-			/* Parse an attribute value. */
-
-			if (in_arg) {
-				if (in_quotes == 0 &&
-				    (b[pend] == '\'' || b[pend] == '"')) {
-					in_quotes = b[pend] == '"' ? 2 : 1;
-					p->ncol++;
-					pend++;
+		} else if (*pstate == PARSE_TAG) {
+			switch (p->ncur) {
+			case NODE_DOCTYPE:
+				if (b[pend] == '[') {
+					*pstate = PARSE_ELEM;
+					increment(p, b, &pend, refill);
 					continue;
 				}
-				if (advance(p, b, rlen, &pend,
-				    in_quotes == 2 ? "\"" :
-				    in_quotes == 1 ? "'" : " >") && rsz > 0)
-					break;
-				in_arg = in_quotes = elem_end = 0;
-				if (b[pend] == '>') {
-					in_tag = 0;
-					if (pend > 0 && b[pend - 1] == '/') {
-						b[pend - 1] = '\0';
-						elem_end = 1;
-					}
-				}
-				b[pend] = '\0';
-				if (pend < rlen)
-					pend++;
-				xml_attrval(p, b + poff);
-				if (elem_end)
-					xml_elem_end(p, NULL);
-
-			/* Look for an attribute name. */
-
-			} else if (in_tag) {
-				if (advance(p, b, rlen, &pend, " =>") &&
-				    rsz > 0)
-					break;
-				elem_end = 0;
-				switch (b[pend]) {
-				case '>':
-					in_tag = 0;
-					if (pend > 0 && b[pend - 1] == '/') {
-						b[pend - 1] = '\0';
-						elem_end = 1;
-					}
-					break;
-				case '=':
-					in_arg = 1;
-					break;
-				default:
-					break;
-				}
-				b[pend] = '\0';
-				if (pend < rlen)
-					pend++;
-				xml_attrkey(p, b + poff);
-				if (elem_end)
-					xml_elem_end(p, NULL);
-
-			/* Begin an opening or closing tag. */
-
-			} else if (b[poff] == '<') {
-				if (advance(p, b, rlen, &pend, " >") &&
-				    rsz > 0)
-					break;
-				if (pend > poff + 3 &&
-				    strncmp(b + poff, "<!--", 4) == 0) {
-
-					/* Skip a comment. */
-
-					cp = strstr(b + pend - 2, "-->");
-					if (cp == NULL) {
-						if (rsz > 0) {
-							pend = rlen;
-							break;
-						}
-						cp = b + rlen;
-					} else
-						cp += 3;
-					while (b + pend < cp) {
-						if (b[++pend] == '\n') {
-							p->nline++;
-							p->ncol = 1;
-						} else
-							p->ncol++;
-					}
+				/* FALLTHROUGH */
+			case NODE_ENTITY:
+				if (b[pend] == '"' || b[pend] == '\'') {
+					*pstate = PARSE_ARG;
 					continue;
 				}
-				elem_end = 0;
-				if (b[pend] != '>')
-					in_tag = 1;
-				else if (pend > 0 && b[pend - 1] == '/') {
+				break;
+			default:
+				break;
+			}
+			if (advance(p, b, rlen, &pend, " =>", refill))
+				break;
+			elem_end = 0;
+			switch (b[pend]) {
+			case '>':
+				*pstate = PARSE_ELEM;
+				if (pend > 0 && b[pend - 1] == '/') {
 					b[pend - 1] = '\0';
 					elem_end = 1;
 				}
-				b[pend] = '\0';
-				if (pend < rlen)
-					pend++;
-				if (b[++poff] == '/') {
+				if (p->flags & PFLAG_EEND)
 					elem_end = 1;
-					poff++;
-				} else
-					xml_elem_start(p, b + poff);
-				if (elem_end)
-					xml_elem_end(p, b + poff);
+				break;
+			case '=':
+				*pstate = PARSE_ARG;
+				break;
+			default:
+				break;
+			}
+			b[pend] = '\0';
+			if (pend < rlen)
+				increment(p, b, &pend, refill);
+			xml_attrkey(p, b + poff);
+			if (elem_end)
+				xml_elem_end(p, NULL);
 
-			/* Process an entity. */
+		/* Begin an opening or closing tag. */
 
-			} else if (b[poff] == '&') {
-				if (advance(p, b, rlen, &pend, ";") &&
-				    rsz > 0)
-					break;
-				b[pend] = '\0';
-				if (pend < rlen)
-					pend++;
-				xml_entity(p, b + poff + 1);
+		} else if (b[poff] == '<') {
+			if (advance(p, b, rlen, &pend, " >", refill))
+				break;
+			if (pend > poff + 3 &&
+			    strncmp(b + poff, "<!--", 4) == 0) {
 
-			/* Process text up to the next tag or entity. */
+				/* Skip a comment. */
 
+				cp = strstr(b + pend - 2, "-->");
+				if (cp == NULL) {
+					if (refill)
+						break;
+					cp = b + rlen;
+				} else
+					cp += 3;
+				while (b + pend < cp)
+					increment(p, b, &pend, refill);
+				continue;
+			}
+			elem_end = 0;
+			if (b[pend] != '>')
+				*pstate = PARSE_TAG;
+			else if (pend > 0 && b[pend - 1] == '/') {
+				b[pend - 1] = '\0';
+				elem_end = 1;
+			}
+			b[pend] = '\0';
+			if (pend < rlen)
+				increment(p, b, &pend, refill);
+			if (b[++poff] == '/') {
+				elem_end = 1;
+				poff++;
 			} else {
-				if (advance(p, b, rlen, &pend, "<&") == 0)
-					p->ncol--;
-				xml_char(p, b + poff, pend - poff);
+				xml_elem_start(p, b + poff);
+				if (*pstate == PARSE_ELEM &&
+				    p->flags & PFLAG_EEND)
+					elem_end = 1;
 			}
+			if (elem_end)
+				xml_elem_end(p, b + poff);
+
+		/* Close a doctype. */
+
+		} else if (p->ncur == NODE_DOCTYPE && b[poff] == ']') {
+			*pstate = PARSE_TAG;
+			increment(p, b, &pend, refill);
+
+		/* Process an entity. */
+
+		} else if (b[poff] == '&') {
+			if (advance(p, b, rlen, &pend, ";", refill))
+				break;
+			b[pend] = '\0';
+			if (pend < rlen)
+				increment(p, b, &pend, refill);
+			xml_entity(p, b + poff + 1);
+
+		/* Process text up to the next tag, entity, or EOL. */
+
+		} else {
+			advance(p, b, rlen, &pend,
+			    p->ncur == NODE_DOCTYPE ? "<&]\n" : "<&\n",
+			    refill);
+			if (p->nofill)
+				poff = pws;
+			xml_text(p, b + poff, pend - poff);
+			if (b[pend] == '\n')
+				pnode_closetext(p, 0);
 		}
+		pws = pend;
+	}
+	return poff;
+}
 
-		/* Buffer exhausted; shift left and re-fill. */
 
+/*
+ * The read loop.
+ * If the previous token was incomplete and asked for more input,
+ * we have to enter the read loop once more even on EOF.
+ * Once rsz is 0, incomplete tokens will no longer ask for more input
+ * but instead use whatever there is, and then exit the read loop.
+ * The minus one on the size limit for read(2) is needed such that
+ * advance() can set b[rlen] to NUL when needed.
+ */
+static void
+parse_fd(struct parse *p, int fd)
+{
+	char		 b[4096];
+	ssize_t		 rsz;	/* Return value from read(2). */
+	size_t		 rlen;	/* Number of bytes in b[]. */
+	size_t		 poff;  /* Parse offset in b[]. */
+	enum pstate	 pstate;
+
+	rlen = 0;
+	pstate = PARSE_ELEM;
+	while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0 &&
+	    (rlen += rsz) > 0) {
+		poff = parse_string(p, b, rlen, &pstate, rsz > 0);
+		/* Buffer exhausted; shift left and re-fill. */
 		assert(poff > 0);
-		memmove(b, b + poff, rlen - poff);
 		rlen -= poff;
+		memmove(b, b + poff, rlen);
 	}
-	if (rsz < 0) {
-		perror(fname);
-		p->tree->flags |= TREE_FAIL;
+	if (rsz < 0)
+		error_msg(p, "read: %s", strerror(errno));
+}
+
+/*
+ * Open and parse a file.
+ */
+struct ptree *
+parse_file(struct parse *p, int fd, const char *fname)
+{
+	const char	*save_fname;
+	int		 save_line, save_col;
+
+	/* Save and initialize reporting data. */
+
+	save_fname = p->fname;
+	save_line = p->nline;
+	save_col = p->ncol;
+	p->fname = fname;
+	p->line = 0;
+	p->col = 0;
+
+	/* Open the file, unless it is already open. */
+
+	if (fd == -1 && (fd = open(fname, O_RDONLY, 0)) == -1) {
+		error_msg(p, "open: %s", strerror(errno));
+		p->fname = save_fname;
+		return p->tree;
 	}
-	if (p->cur != NULL && p->cur->node == NODE_TEXT) {
-		pnode_trim(p->cur);
-		p->cur = p->cur->parent;
+
+	/*
+	 * After opening the starting file, change to the directory it
+	 * is located in, in case it wants to include any further files,
+	 * which are typically given with relative paths in DocBook.
+	 * Do this on a best-effort basis; don't complain about failure.
+	 */
+
+	if (save_fname == NULL && (fname = dirname(fname)) != NULL &&
+	    strcmp(fname, ".") != 0)
+		(void)chdir(fname);
+
+	/* Run the read loop. */
+
+	p->nline = 1;
+	p->ncol = 1;
+	parse_fd(p, fd);
+
+	/* On the top level, finalize the parse tree. */
+
+	if (save_fname == NULL) {
+		pnode_closetext(p, 0);
+		if (p->tree->root == NULL)
+			error_msg(p, "empty document");
+		else if ((p->tree->flags & TREE_CLOSED) == 0)
+			warn_msg(p, "document not closed");
+		pnode_unlink(p->doctype);
 	}
-	if ((p->tree->flags & TREE_CLOSED) == 0)
-		warn_msg(p, "document not closed");
+
+	/* Clean up. */
+
+	if (fd != STDIN_FILENO)
+		close(fd);
+	p->fname = save_fname;
+	p->nline = save_line;
+	p->ncol = save_col;
 	return p->tree;
 }