/* $Id: docbook2mdoc.c,v 1.17 2014/03/30 16:32:03 kristaps Exp $ */
/*
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/queue.h>
#include <assert.h>
#include <ctype.h>
#include <expat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "extern.h"
/*
* Global parse state.
* Keep this as simple and small as possible.
*/
struct parse {
XML_Parser xml;
enum nodeid node; /* current (NODE_ROOT if pre-tree) */
const char *fname; /* filename */
int stop; /* should we stop now? */
struct pnode *root; /* root of parse tree */
struct pnode *cur; /* current node in tree */
char *b; /* nil-terminated buffer for pre-print */
size_t bsz; /* current length of b */
size_t mbsz; /* max bsz allocation */
int newln; /* output: are we on a fresh line */
};
struct node {
const char *name; /* docbook element name */
unsigned int flags;
#define NODE_IGNTEXT 1 /* ignore all contained text */
};
TAILQ_HEAD(pnodeq, pnode);
TAILQ_HEAD(pattrq, pattr);
struct pattr {
enum attrkey key;
enum attrval val;
char *rawval;
TAILQ_ENTRY(pattr) child;
};
struct pnode {
enum nodeid node; /* node type */
char *b; /* binary data buffer */
size_t bsz; /* data buffer size */
struct pnode *parent; /* parent (or NULL if top) */
struct pnodeq childq; /* queue of children */
struct pattrq attrq; /* attributes of node */
TAILQ_ENTRY(pnode) child;
};
static const char *attrkeys[ATTRKEY__MAX] = {
"choice",
"id",
"rep"
};
static const char *attrvals[ATTRVAL__MAX] = {
"norepeat",
"opt",
"plain",
"repeat",
"req"
};
static const struct node nodes[NODE__MAX] = {
{ NULL, 0 },
{ "arg", 0 },
{ "citerefentry", NODE_IGNTEXT },
{ "cmdsynopsis", NODE_IGNTEXT },
{ "code", 0 },
{ "command", 0 },
{ "date", 0 },
{ "emphasis", 0 },
{ "filename", 0 },
{ "funcdef", 0 },
{ "funcprototype", NODE_IGNTEXT },
{ "funcsynopsis", NODE_IGNTEXT },
{ "funcsynopsisinfo", 0 },
{ "function", 0 },
{ "itemizedlist", NODE_IGNTEXT },
{ "link", 0 },
{ "listitem", NODE_IGNTEXT },
{ "manvolnum", 0 },
{ "option", 0 },
{ "para", 0 },
{ "paramdef", 0 },
{ "parameter", 0 },
{ "programlisting", 0 },
{ "refclass", NODE_IGNTEXT },
{ "refdescriptor", NODE_IGNTEXT },
{ "refentry", NODE_IGNTEXT },
{ "refentryinfo", NODE_IGNTEXT },
{ "refentrytitle", 0 },
{ "refmeta", NODE_IGNTEXT },
{ "refmiscinfo", NODE_IGNTEXT },
{ "refname", 0 },
{ "refnamediv", NODE_IGNTEXT },
{ "refpurpose", 0 },
{ "refsect1", 0 },
{ "refsynopsisdiv", NODE_IGNTEXT },
{ "replaceable", 0 },
{ "structname", 0 },
{ "synopsis", 0 },
{ "term", 0 },
{ NULL, 0 },
{ "title", 0 },
{ "ulink", 0 },
{ "variablelist", NODE_IGNTEXT },
{ "varlistentry", NODE_IGNTEXT },
};
static void
pnode_print(struct parse *p, struct pnode *pn);
/*
* Process a stream of characters.
* We store text as nodes in and of themselves.
* If a text node is already open, append to it.
* If it's not open, open one under the current context.
*/
static void
xml_char(void *arg, const XML_Char *p, int sz)
{
struct parse *ps = arg;
struct pnode *dat;
int i;
/* Stopped or no tree yet. */
if (ps->stop || NODE_ROOT == ps->node)
return;
/* Not supposed to be collecting text. */
assert(NULL != ps->cur);
if (NODE_IGNTEXT & nodes[ps->node].flags)
return;
/*
* Are we in the midst of processing text?
* If we're not processing text right now, then create a text
* node for doing so.
* However, don't do so unless we have some non-whitespace to
* process: strip out all leading whitespace to be sure.
*/
if (NODE_TEXT != ps->node) {
for (i = 0; i < sz; i++)
if ( ! isspace((int)p[i]))
break;
if (i == sz)
return;
p += i;
sz -= i;
dat = calloc(1, sizeof(struct pnode));
if (NULL == dat) {
perror(NULL);
exit(EXIT_FAILURE);
}
dat->node = ps->node = NODE_TEXT;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
assert(NULL != ps->root);
}
/* Append to current buffer. */
assert(sz >= 0);
ps->cur->b = realloc(ps->cur->b,
ps->cur->bsz + (size_t)sz);
if (NULL == ps->cur->b) {
perror(NULL);
exit(EXIT_FAILURE);
}
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
ps->cur->bsz += (size_t)sz;
}
static void
pnode_trim(struct pnode *pn)
{
assert(NODE_TEXT == pn->node);
for ( ; pn->bsz > 0; pn->bsz--)
if ( ! isspace((int)pn->b[pn->bsz - 1]))
break;
}
/*
* Begin an element.
* First, look for the element.
* If we don't find it and we're not parsing, keep going.
* If we don't find it and we're parsing, puke and exit.
* If we find it but we're not parsing yet (i.e., it's not a refentry
* and thus out of context), keep going.
* If we find it and we're at the root and already have a tree, puke and
* exit (FIXME: I don't think this is right?).
* If we find it but we're parsing a text node, close out the text node,
* return to its parent, and keep going.
* Make sure that the element is in the right context.
* Lastly, put the node onto our parse tree and continue.
*/
static void
xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
{
struct parse *ps = arg;
enum nodeid node;
enum attrkey key;
enum attrval val;
struct pnode *dat;
struct pattr *pattr;
const XML_Char **att;
if (ps->stop)
return;
/* Close out text node, if applicable... */
if (NODE_TEXT == ps->node) {
assert(NULL != ps->cur);
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
assert(NULL != ps->cur);
ps->node = ps->cur->node;
}
for (node = 0; node < NODE__MAX; node++)
if (NULL == nodes[node].name)
continue;
else if (0 == strcmp(nodes[node].name, name))
break;
if (NODE__MAX == node && NODE_ROOT == ps->node) {
return;
} else if (NODE__MAX == node) {
fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml), name);
ps->stop = 1;
return;
} else if (NODE_ROOT == ps->node && NULL != ps->root) {
fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml));
ps->stop = 1;
return;
} else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
return;
} else if ( ! isparent(node, ps->node)) {
fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
"of node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
NULL == nodes[ps->node].name ?
"(none)" : nodes[ps->node].name,
NULL == nodes[node].name ?
"(none)" : nodes[node].name);
ps->stop = 1;
return;
}
if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
perror(NULL);
exit(EXIT_FAILURE);
}
dat->node = ps->node = node;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
if (NULL != ps->cur)
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
if (NULL == ps->root)
ps->root = dat;
/*
* Process attributes.
*/
for (att = atts; NULL != *att; att += 2) {
for (key = 0; key < ATTRKEY__MAX; key++)
if (0 == strcmp(*att, attrkeys[key]))
break;
if (ATTRKEY__MAX == key) {
fprintf(stderr, "%s:%zu:%zu: unknown "
"attribute \"%s\"\n", ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*att);
continue;
} else if ( ! isattrkey(node, key)) {
fprintf(stderr, "%s:%zu:%zu: bad "
"attribute \"%s\"\n", ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*att);
continue;
}
for (val = 0; val < ATTRVAL__MAX; val++)
if (0 == strcmp(*(att + 1), attrvals[val]))
break;
if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
fprintf(stderr, "%s:%zu:%zu: bad "
"value \"%s\"\n", ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*(att + 1));
continue;
}
pattr = calloc(1, sizeof(struct pattr));
pattr->key = key;
pattr->val = val;
if (ATTRVAL__MAX == val)
pattr->rawval = strdup(*(att + 1));
TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
}
}
/*
* Roll up the parse tree.
* If we're at a text node, roll that one up first.
* If we hit the root, then assign ourselves as the NODE_ROOT.
*/
static void
xml_elem_end(void *arg, const XML_Char *name)
{
struct parse *ps = arg;
if (ps->stop || NODE_ROOT == ps->node)
return;
/* Close out text node, if applicable... */
if (NODE_TEXT == ps->node) {
assert(NULL != ps->cur);
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
assert(NULL != ps->cur);
ps->node = ps->cur->node;
}
if (NULL == (ps->cur = ps->cur->parent))
ps->node = NODE_ROOT;
else
ps->node = ps->cur->node;
}
/*
* Recursively free a node (NULL is ok).
*/
static void
pnode_free(struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
if (NULL == pn)
return;
while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
TAILQ_REMOVE(&pn->childq, pp, child);
pnode_free(pp);
}
while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
TAILQ_REMOVE(&pn->attrq, ap, child);
free(ap->rawval);
free(ap);
}
free(pn->b);
free(pn);
}
/*
* Unlink a node from its parent and pnode_free() it.
*/
static void
pnode_unlink(struct pnode *pn)
{
if (NULL != pn->parent)
TAILQ_REMOVE(&pn->parent->childq, pn, child);
pnode_free(pn);
}
/*
* Unlink all children of a node and pnode_free() them.
*/
static void
pnode_unlinksub(struct pnode *pn)
{
while ( ! TAILQ_EMPTY(&pn->childq))
pnode_unlink(TAILQ_FIRST(&pn->childq));
}
/*
* Reset the lookaside buffer.
*/
static void
bufclear(struct parse *p)
{
p->b[p->bsz = 0] = '\0';
}
/*
* Append NODE_TEXT contents to the current buffer, reallocating its
* size if necessary.
* The buffer is ALWAYS nil-terminated.
*/
static void
bufappend(struct parse *p, struct pnode *pn)
{
assert(NODE_TEXT == pn->node);
if (p->bsz + pn->bsz + 1 > p->mbsz) {
p->mbsz = p->bsz + pn->bsz + 1;
if (NULL == (p->b = realloc(p->b, p->mbsz))) {
perror(NULL);
exit(EXIT_FAILURE);
}
}
memcpy(p->b + p->bsz, pn->b, pn->bsz);
p->bsz += pn->bsz;
p->b[p->bsz] = '\0';
}
/*
* Recursively append all NODE_TEXT nodes to the buffer.
* This descends into non-text nodes, but doesn't do anything beyond
* them.
* In other words, this is a recursive text grok.
*/
static void
bufappend_r(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
if (NODE_TEXT == pn->node)
bufappend(p, pn);
TAILQ_FOREACH(pp, &pn->childq, child)
bufappend_r(p, pp);
}
#define MACROLINE_NORM 0
#define MACROLINE_UPPER 1
/*
* Recursively print text presumably on a macro line.
* Convert all whitespace to regular spaces.
*/
static void
pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
{
char *cp;
if (0 == p->newln)
putchar(' ');
bufclear(p);
bufappend_r(p, pn);
/* Convert all space to spaces. */
for (cp = p->b; '\0' != *cp; cp++)
if (isspace((int)*cp))
*cp = ' ';
for (cp = p->b; isspace((int)*cp); cp++)
/* Spin past whitespace (XXX: necessary?) */ ;
for ( ; '\0' != *cp; cp++) {
/* Escape us if we look like a macro. */
if ((cp == p->b || ' ' == *(cp - 1)) &&
isupper((int)*cp) &&
'\0' != *(cp + 1) &&
islower((int)*(cp + 1)) &&
('\0' == *(cp + 2) ||
' ' == *(cp + 2) ||
(islower((int)*(cp + 2)) &&
('\0' == *(cp + 3) ||
' ' == *(cp + 3)))))
fputs("\\&", stdout);
if (MACROLINE_UPPER & fl)
putchar(toupper((int)*cp));
else
putchar((int)*cp);
/* If we're a character escape, escape us. */
if ('\\' == *cp)
putchar('e');
}
}
static void
pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
{
pnode_printmacrolinetext(p, pn, 0);
}
/*
* Just pnode_printmacrolinepart() but with a newline.
* If no text, just the newline.
*/
static void
pnode_printmacroline(struct parse *p, struct pnode *pn)
{
assert(0 == p->newln);
pnode_printmacrolinetext(p, pn, 0);
putchar('\n');
p->newln = 1;
}
static void
pnode_printmopen(struct parse *p)
{
if (p->newln) {
putchar('.');
p->newln = 0;
} else
putchar(' ');
}
static void
pnode_printmclose(struct parse *p, int sv)
{
if (sv && ! p->newln) {
putchar('\n');
p->newln = 1;
}
}
/*
* If the SYNOPSIS macro has a superfluous title, kill it.
*/
static void
pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
pnode_unlink(pp);
return;
}
}
/*
* Start a hopefully-named `Sh' section.
*/
static void
pnode_printrefsect(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node)
break;
fputs(".Sh", stdout);
p->newln = 0;
if (NULL != pp) {
pnode_printmacroline(p, pp);
pnode_unlink(pp);
} else {
puts("UNKNOWN");
p->newln = 1;
}
}
/*
* Start a reference, extracting the title and volume.
*/
static void
pnode_printciterefentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_MANVOLNUM == pp->node)
manvol = pp;
else if (NODE_REFENTRYTITLE == pp->node)
title = pp;
fputs(".Xr", stdout);
p->newln = 0;
if (NULL != title) {
pnode_printmacrolinepart(p, title);
} else
fputs(" unknown ", stdout);
if (NULL == manvol) {
puts(" 1");
p->newln = 1;
} else
pnode_printmacroline(p, manvol);
}
static void
pnode_printrefmeta(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_MANVOLNUM == pp->node)
manvol = pp;
else if (NODE_REFENTRYTITLE == pp->node)
title = pp;
puts(".Dd $Mdocdate" "$");
fputs(".Dt", stdout);
p->newln = 0;
if (NULL != title)
pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
else
fputs(" UNKNOWN ", stdout);
if (NULL == manvol) {
puts(" 1");
p->newln = 1;
} else
pnode_printmacroline(p, manvol);
puts(".Os");
}
static void
pnode_printfuncdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ftype, *func;
assert(p->newln);
ftype = func = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TEXT == pp->node)
ftype = pp;
else if (NODE_FUNCTION == pp->node)
func = pp;
if (NULL != ftype) {
fputs(".Ft", stdout);
p->newln = 0;
pnode_printmacroline(p, ftype);
}
if (NULL != func) {
fputs(".Fo", stdout);
p->newln = 0;
pnode_printmacroline(p, func);
} else {
puts(".Fo UNKNOWN");
p->newln = 1;
}
}
static void
pnode_printparamdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ptype, *param;
assert(p->newln);
ptype = param = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TEXT == pp->node)
ptype = pp;
else if (NODE_PARAMETER == pp->node)
param = pp;
fputs(".Fa \"", stdout);
p->newln = 0;
if (NULL != ptype) {
pnode_printmacrolinepart(p, ptype);
putchar(' ');
}
if (NULL != param)
pnode_printmacrolinepart(p, param);
puts("\"");
p->newln = 1;
}
static void
pnode_printfuncprototype(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *fdef;
assert(p->newln);
TAILQ_FOREACH(fdef, &pn->childq, child)
if (NODE_FUNCDEF == fdef->node)
break;
if (NULL != fdef)
pnode_printfuncdef(p, fdef);
else
puts(".Fo UNKNOWN");
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_PARAMDEF == pp->node)
pnode_printparamdef(p, pp);
puts(".Fc");
p->newln = 1;
}
/*
* The <arg> element is more complicated than it should be because text
* nodes are treated like ".Ar foo", but non-text nodes need to be
* re-sent into the printer (i.e., without the preceding ".Ar").
* This also handles the case of "repetition" (or in other words, the
* ellipsis following an argument) and optionality.
*/
static void
pnode_printarg(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
int isop, isrep;
isop = 1;
isrep = 0;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ATTRKEY_CHOICE == ap->key &&
(ATTRVAL_PLAIN == ap->val ||
ATTRVAL_REQ == ap->val))
isop = 0;
else if (ATTRKEY_REP == ap->key &&
(ATTRVAL_REPEAT == ap->val))
isrep = 1;
if (isop) {
pnode_printmopen(p);
fputs("Op", stdout);
}
TAILQ_FOREACH(pp, &pn->childq, child) {
if (NODE_TEXT == pp->node) {
pnode_printmopen(p);
fputs("Ar", stdout);
}
pnode_print(p, pp);
if (NODE_TEXT == pp->node && isrep)
fputs("...", stdout);
}
}
/*
* Recursively search and return the first instance of "node".
*/
static struct pnode *
pnode_findfirst(struct pnode *pn, enum nodeid node)
{
struct pnode *pp, *res;
res = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
res = pp->node == node ? pp :
pnode_findfirst(pp, node);
if (NULL != res)
break;
}
return(res);
}
static void
pnode_printprologue(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
pp = NULL == p->root ? NULL :
pnode_findfirst(p->root, NODE_REFMETA);
if (NULL != pp) {
pnode_printrefmeta(p, pp);
pnode_unlink(pp);
} else {
puts(".\\\" Supplying bogus prologue...");
puts(".Dd $Mdocdate" "$");
puts(".Dt UNKNOWN 1");
puts(".Os");
}
}
static void
pnode_printvarlistentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TERM == pp->node) {
fputs(".It", stdout);
p->newln = 0;
pnode_print(p, pp);
pnode_unlink(pp);
pnode_printmclose(p, 1);
return;
}
puts(".It");
p->newln = 1;
}
static void
pnode_printitemizedlist(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
puts(".Pp");
pnode_print(p, pp);
pnode_unlink(pp);
}
assert(p->newln);
puts(".Bl -item");
TAILQ_FOREACH(pp, &pn->childq, child) {
assert(p->newln);
puts(".It");
pnode_print(p, pp);
pnode_printmclose(p, 1);
}
assert(p->newln);
puts(".El");
}
static void
pnode_printvariablelist(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
puts(".Pp");
pnode_print(p, pp);
pnode_unlink(pp);
}
assert(p->newln);
puts(".Bl -tag -width Ds");
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_VARLISTENTRY != pp->node) {
assert(p->newln);
fputs(".It", stdout);
pnode_printmacroline(p, pp);
} else {
assert(p->newln);
pnode_print(p, pp);
}
assert(p->newln);
puts(".El");
}
/*
* Print a parsed node (or ignore it--whatever).
* This is a recursive function.
* FIXME: macro line continuation?
*/
static void
pnode_print(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
char *cp;
int last, sv;
if (NULL == pn)
return;
sv = p->newln;
switch (pn->node) {
case (NODE_ARG):
pnode_printarg(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_CITEREFENTRY):
assert(p->newln);
pnode_printciterefentry(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_CODE):
pnode_printmopen(p);
fputs("Li", stdout);
break;
case (NODE_COMMAND):
pnode_printmopen(p);
fputs("Nm", stdout);
break;
case (NODE_EMPHASIS):
pnode_printmopen(p);
fputs("Em", stdout);
break;
case (NODE_FILENAME):
pnode_printmopen(p);
fputs("Pa", stdout);
break;
case (NODE_FUNCTION):
pnode_printmopen(p);
fputs("Fn", stdout);
break;
case (NODE_FUNCPROTOTYPE):
assert(p->newln);
pnode_printfuncprototype(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_FUNCSYNOPSISINFO):
pnode_printmopen(p);
fputs("Fd", stdout);
break;
case (NODE_ITEMIZEDLIST):
assert(p->newln);
pnode_printitemizedlist(p, pn);
break;
case (NODE_OPTION):
pnode_printmopen(p);
fputs("Fl", stdout);
/* FIXME: bogus leading '-'? */
break;
case (NODE_PARA):
assert(p->newln);
if (NULL != pn->parent &&
NODE_LISTITEM == pn->parent->node)
break;
puts(".Pp");
break;
case (NODE_PARAMETER):
/* Suppress non-text children... */
pnode_printmopen(p);
fputs("Fa \"", stdout);
pnode_printmacrolinepart(p, pn);
puts("\"");
pnode_unlinksub(pn);
break;
case (NODE_PROGRAMLISTING):
assert(p->newln);
puts(".Bd -literal");
break;
case (NODE_REFENTRYINFO):
/* Suppress. */
pnode_unlinksub(pn);
break;
case (NODE_REFMETA):
abort();
break;
case (NODE_REFNAME):
/* Suppress non-text children... */
pnode_printmopen(p);
fputs("Nm", stdout);
p->newln = 0;
pnode_printmacrolinepart(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_REFNAMEDIV):
assert(p->newln);
puts(".Sh NAME");
break;
case (NODE_REFPURPOSE):
assert(p->newln);
pnode_printmopen(p);
fputs("Nd", stdout);
break;
case (NODE_REFSYNOPSISDIV):
assert(p->newln);
pnode_printrefsynopsisdiv(p, pn);
puts(".Sh SYNOPSIS");
break;
case (NODE_REFSECT1):
assert(p->newln);
pnode_printrefsect(p, pn);
break;
case (NODE_REPLACEABLE):
pnode_printmopen(p);
fputs("Ar", stdout);
break;
case (NODE_STRUCTNAME):
pnode_printmopen(p);
fputs("Vt", stdout);
break;
case (NODE_TEXT):
if (0 == p->newln)
putchar(' ');
bufclear(p);
bufappend(p, pn);
/*
* Output all characters, squeezing out whitespace
* between newlines.
* XXX: all whitespace, including tabs (?).
* Remember to escape control characters and escapes.
*/
assert(p->bsz);
for (last = '\n', cp = p->b; '\0' != *cp; ) {
if ('\n' == last) {
/* Consume all whitespace. */
if (isspace((int)*cp)) {
while (isspace((int)*cp))
cp++;
continue;
} else if ('\'' == *cp || '.' == *cp)
fputs("\\&", stdout);
}
putchar(last = *cp++);
/* If we're a character escape, escape us. */
if ('\\' == last)
putchar('e');
}
p->newln = 0;
break;
case (NODE_VARIABLELIST):
assert(p->newln);
pnode_printvariablelist(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_VARLISTENTRY):
assert(p->newln);
pnode_printvarlistentry(p, pn);
break;
default:
break;
}
TAILQ_FOREACH(pp, &pn->childq, child)
pnode_print(p, pp);
switch (pn->node) {
case (NODE_ARG):
case (NODE_CODE):
case (NODE_COMMAND):
case (NODE_EMPHASIS):
case (NODE_FILENAME):
case (NODE_FUNCTION):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_OPTION):
case (NODE_PARAMETER):
case (NODE_REPLACEABLE):
case (NODE_REFPURPOSE):
case (NODE_STRUCTNAME):
case (NODE_TEXT):
pnode_printmclose(p, sv);
break;
case (NODE_REFNAME):
/*
* If we're in the NAME macro and we have multiple
* <refname> macros in sequence, then print out a
* trailing comma before the newline.
*/
if (NULL != pn->parent &&
NODE_REFNAMEDIV == pn->parent->node &&
NULL != TAILQ_NEXT(pn, child) &&
NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
fputs(" ,", stdout);
pnode_printmclose(p, sv);
break;
case (NODE_PROGRAMLISTING):
assert(p->newln);
puts(".Ed");
p->newln = 1;
break;
default:
break;
}
}
/*
* Loop around the read buffer until we've drained it of all data.
* Invoke the parser context with each buffer fill.
*/
static int
readfile(XML_Parser xp, int fd,
char *b, size_t bsz, const char *fn)
{
struct parse p;
int rc;
ssize_t ssz;
memset(&p, 0, sizeof(struct parse));
p.b = malloc(p.bsz = p.mbsz = 1024);
p.fname = fn;
p.xml = xp;
XML_SetCharacterDataHandler(xp, xml_char);
XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
XML_SetUserData(xp, &p);
while ((ssz = read(fd, b, bsz)) >= 0) {
if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
fprintf(stderr, "%s: %s\n", fn,
XML_ErrorString
(XML_GetErrorCode(xp)));
else if ( ! p.stop && ssz > 0)
continue;
/*
* Exit when we've read all or errors have occured
* during the parse sequence.
*/
p.newln = 1;
pnode_printprologue(&p, p.root);
pnode_print(&p, p.root);
pnode_free(p.root);
free(p.b);
return(0 != rc && ! p.stop);
}
/* Read error has occured. */
perror(fn);
pnode_free(p.root);
free(p.b);
return(0);
}
int
main(int argc, char *argv[])
{
XML_Parser xp;
const char *fname;
char *buf;
int fd, rc;
fname = "-";
xp = NULL;
buf = NULL;
rc = 0;
if (-1 != getopt(argc, argv, ""))
return(EXIT_FAILURE);
argc -= optind;
argv += optind;
if (argc > 1)
return(EXIT_FAILURE);
else if (argc > 0)
fname = argv[0];
/* Read from stdin or a file. */
fd = 0 == strcmp(fname, "-") ?
STDIN_FILENO : open(fname, O_RDONLY, 0);
/*
* Open file for reading.
* Allocate a read buffer.
* Create the parser context.
* Dive directly into the parse.
*/
if (-1 == fd)
perror(fname);
else if (NULL == (buf = malloc(4096)))
perror(NULL);
else if (NULL == (xp = XML_ParserCreate(NULL)))
perror(NULL);
else if ( ! readfile(xp, fd, buf, 4096, fname))
rc = 1;
XML_ParserFree(xp);
free(buf);
if (STDIN_FILENO != fd)
close(fd);
return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
}