File: [cvsweb.bsd.lv] / docbook2mdoc / docbook2mdoc.c (download)
Revision 1.12, Sat Mar 29 22:44:06 2014 UTC (10 years, 6 months ago) by kristaps
Branch: MAIN
CVS Tags: VERSION_0_0_4 Changes since 1.11: +211 -16 lines
Let there be attributes!
Allow <arg> to properly have its attributes.
Comma-separate multiple Nm in a refname div.
Clean up error reporting.
|
/* $Id: docbook2mdoc.c,v 1.12 2014/03/29 22:44:06 kristaps Exp $ */
/*
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/queue.h>
#include <assert.h>
#include <ctype.h>
#include <expat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/*
* All recognised node types.
*/
enum nodeid {
NODE_ROOT = 0, /* Must comes first. */
/* Alpha-ordered hereafter. */
NODE_ARG,
NODE_CITEREFENTRY,
NODE_CMDSYNOPSIS,
NODE_CODE,
NODE_COMMAND,
NODE_FUNCDEF,
NODE_FUNCPROTOTYPE,
NODE_FUNCSYNOPSIS,
NODE_FUNCSYNOPSISINFO,
NODE_FUNCTION,
NODE_MANVOLNUM,
NODE_OPTION,
NODE_PARA,
NODE_PARAMDEF,
NODE_PARAMETER,
NODE_PROGRAMLISTING,
NODE_REFCLASS,
NODE_REFDESCRIPTOR,
NODE_REFENTRY,
NODE_REFENTRYTITLE,
NODE_REFMETA,
NODE_REFMISCINFO,
NODE_REFNAME,
NODE_REFNAMEDIV,
NODE_REFPURPOSE,
NODE_REFSECT1,
NODE_REFSYNOPSISDIV,
NODE_STRUCTNAME,
NODE_SYNOPSIS,
NODE_TEXT,
NODE_TITLE,
NODE__MAX
};
/*
* All recognised attribute keys.
*/
enum attrkey {
/* Alpha-order... */
ATTRKEY_CHOICE = 0,
ATTRKEY_ID,
ATTRKEY_REP,
ATTRKEY__MAX
};
/*
* All [explicitly] recognised attribute values.
* If an attribute has ATTRVAL__MAX, it could be a free-form.
*/
enum attrval {
/* Alpha-order... */
ATTRVAL_NOREPEAT,
ATTRVAL_OPT,
ATTRVAL_PLAIN,
ATTRVAL_REPEAT,
ATTRVAL_REQ,
ATTRVAL__MAX
};
/*
* Global parse state.
* Keep this as simple and small as possible.
*/
struct parse {
XML_Parser xml;
enum nodeid node; /* current (NODE_ROOT if pre-tree) */
const char *fname; /* filename */
int stop; /* should we stop now? */
struct pnode *root; /* root of parse tree */
struct pnode *cur; /* current node in tree */
char *b; /* nil-terminated buffer for pre-print */
size_t bsz; /* current length of b */
size_t mbsz; /* max bsz allocation */
int newln; /* output: are we on a fresh line */
};
struct node {
const char *name; /* docbook element name */
unsigned int flags;
#define NODE_IGNTEXT 1 /* ignore all contained text */
};
TAILQ_HEAD(pnodeq, pnode);
TAILQ_HEAD(pattrq, pattr);
struct pattr {
enum attrkey key;
enum attrval val;
char *rawval;
TAILQ_ENTRY(pattr) child;
};
struct pnode {
enum nodeid node; /* node type */
char *b; /* binary data buffer */
size_t bsz; /* data buffer size */
struct pnode *parent; /* parent (or NULL if top) */
struct pnodeq childq; /* queue of children */
struct pattrq attrq; /* attributes of node */
TAILQ_ENTRY(pnode) child;
};
static const char *attrkeys[ATTRKEY__MAX] = {
"choice",
"id",
"rep"
};
static const char *attrvals[ATTRVAL__MAX] = {
"norepeat",
"opt",
"plain",
"repeat",
"req"
};
static const struct node nodes[NODE__MAX] = {
{ NULL, 0 },
{ "arg", 0 },
{ "citerefentry", NODE_IGNTEXT },
{ "cmdsynopsis", NODE_IGNTEXT },
{ "code", 0 },
{ "command", 0 },
{ "funcdef", 0 },
{ "funcprototype", NODE_IGNTEXT },
{ "funcsynopsis", NODE_IGNTEXT },
{ "funcsynopsisinfo", 0 },
{ "function", 0 },
{ "manvolnum", 0 },
{ "option", 0 },
{ "para", 0 },
{ "paramdef", 0 },
{ "parameter", 0 },
{ "programlisting", 0 },
{ "refclass", NODE_IGNTEXT },
{ "refdescriptor", NODE_IGNTEXT },
{ "refentry", NODE_IGNTEXT },
{ "refentrytitle", 0 },
{ "refmeta", NODE_IGNTEXT },
{ "refmiscinfo", NODE_IGNTEXT },
{ "refname", 0 },
{ "refnamediv", NODE_IGNTEXT },
{ "refpurpose", 0 },
{ "refsect1", 0 },
{ "refsynopsisdiv", NODE_IGNTEXT },
{ "structname", 0 },
{ "synopsis", 0 },
{ NULL, 0 },
{ "title", 0 },
};
static void
pnode_print(struct parse *p, struct pnode *pn);
static int
isattrkey(enum nodeid node, enum attrkey key)
{
switch (key) {
case (ATTRKEY_CHOICE):
return(node == NODE_ARG);
case (ATTRKEY_ID):
/* Common to all. */
return(1);
case (ATTRKEY_REP):
return(node == NODE_ARG);
default:
break;
}
abort();
return(0);
}
static int
isattrval(enum attrkey key, enum attrval val)
{
switch (val) {
case (ATTRVAL_OPT):
case (ATTRVAL_PLAIN):
case (ATTRVAL_REQ):
return(key == ATTRKEY_CHOICE);
case (ATTRVAL_REPEAT):
case (ATTRVAL_NOREPEAT):
return(key == ATTRKEY_REP);
default:
break;
}
abort();
return(0);
}
/*
* Look up whether "parent" is a valid parent for "node".
* This is sucked directly from the DocBook specification: look at the
* "children" and "parent" sections of each node.
*/
static int
isparent(enum nodeid node, enum nodeid parent)
{
switch (node) {
case (NODE_ROOT):
return(0);
case (NODE_ARG):
switch (parent) {
case (NODE_ARG):
case (NODE_CMDSYNOPSIS):
return(1);
default:
break;
}
return(0);
case (NODE_CITEREFENTRY):
switch (parent) {
case (NODE_FUNCSYNOPSISINFO):
case (NODE_PARA):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_CMDSYNOPSIS):
switch (parent) {
case (NODE_PARA):
case (NODE_REFSECT1):
case (NODE_REFSYNOPSISDIV):
return(1);
default:
break;
}
return(0);
case (NODE_CODE):
switch (parent) {
case (NODE_FUNCSYNOPSISINFO):
case (NODE_PARA):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_COMMAND):
switch (parent) {
case (NODE_CMDSYNOPSIS):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_PARA):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_FUNCDEF):
return(NODE_FUNCPROTOTYPE == parent);
case (NODE_FUNCPROTOTYPE):
return(NODE_FUNCSYNOPSIS == parent);
case (NODE_FUNCSYNOPSIS):
switch (parent) {
case (NODE_PARA):
case (NODE_REFSECT1):
case (NODE_REFSYNOPSISDIV):
return(1);
default:
break;
}
return(0);
case (NODE_FUNCSYNOPSISINFO):
return(NODE_FUNCSYNOPSIS == parent);
case (NODE_FUNCTION):
switch (parent) {
case (NODE_CODE):
case (NODE_FUNCDEF):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_PARA):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_MANVOLNUM):
switch (parent) {
case (NODE_CITEREFENTRY):
case (NODE_REFMETA):
return(1);
default:
break;
}
return(0);
case (NODE_OPTION):
switch (parent) {
case (NODE_ARG):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_PARA):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_PARA):
switch (parent) {
case (NODE_REFSECT1):
case (NODE_REFSYNOPSISDIV):
return(1);
default:
break;
}
return(0);
case (NODE_PARAMDEF):
return(NODE_FUNCPROTOTYPE == parent);
case (NODE_PARAMETER):
switch (parent) {
case (NODE_CODE):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_PARA):
case (NODE_PARAMDEF):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_PROGRAMLISTING):
switch (parent) {
case (NODE_PARA):
case (NODE_REFSECT1):
case (NODE_REFSYNOPSISDIV):
return(1);
default:
break;
}
return(0);
case (NODE_REFCLASS):
return(parent == NODE_REFNAMEDIV);
case (NODE_REFDESCRIPTOR):
return(parent == NODE_REFNAMEDIV);
case (NODE_REFENTRY):
return(parent == NODE_ROOT);
case (NODE_REFENTRYTITLE):
switch (parent) {
case (NODE_CITEREFENTRY):
case (NODE_REFMETA):
return(1);
default:
break;
}
case (NODE_REFMETA):
return(parent == NODE_REFENTRY);
case (NODE_REFMISCINFO):
return(parent == NODE_REFMETA);
case (NODE_REFNAME):
return(parent == NODE_REFNAMEDIV);
case (NODE_REFNAMEDIV):
return(parent == NODE_REFENTRY);
case (NODE_REFPURPOSE):
return(parent == NODE_REFNAMEDIV);
case (NODE_REFSECT1):
return(parent == NODE_REFENTRY);
case (NODE_REFSYNOPSISDIV):
return(parent == NODE_REFENTRY);
case (NODE_STRUCTNAME):
switch (parent) {
case (NODE_CODE):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_FUNCTION):
case (NODE_OPTION):
case (NODE_PARA):
case (NODE_PARAMETER):
case (NODE_PROGRAMLISTING):
case (NODE_REFDESCRIPTOR):
case (NODE_REFENTRYTITLE):
case (NODE_REFNAME):
case (NODE_REFPURPOSE):
case (NODE_SYNOPSIS):
case (NODE_TITLE):
return(1);
default:
break;
}
return(0);
case (NODE_SYNOPSIS):
switch (parent) {
case (NODE_REFSYNOPSISDIV):
case (NODE_REFSECT1):
return(1);
default:
break;
}
return(0);
case (NODE_TITLE):
switch (parent) {
case (NODE_REFSECT1):
case (NODE_REFSYNOPSISDIV):
return(1);
default:
break;
}
return(0);
case (NODE_TEXT):
return(1);
case (NODE__MAX):
break;
}
abort();
return(0);
}
/*
* Process a stream of characters.
* We store text as nodes in and of themselves.
* If a text node is already open, append to it.
* If it's not open, open one under the current context.
*/
static void
xml_char(void *arg, const XML_Char *p, int sz)
{
struct parse *ps = arg;
struct pnode *dat;
int i;
/* Stopped or no tree yet. */
if (ps->stop || NODE_ROOT == ps->node)
return;
/* Not supposed to be collecting text. */
assert(NULL != ps->cur);
if (NODE_IGNTEXT & nodes[ps->node].flags)
return;
/*
* Are we in the midst of processing text?
* If we're not processing text right now, then create a text
* node for doing so.
* However, don't do so unless we have some non-whitespace to
* process: strip out all leading whitespace to be sure.
*/
if (NODE_TEXT != ps->node) {
for (i = 0; i < sz; i++)
if ( ! isspace((int)p[i]))
break;
if (i == sz)
return;
p += i;
sz -= i;
dat = calloc(1, sizeof(struct pnode));
if (NULL == dat) {
perror(NULL);
exit(EXIT_FAILURE);
}
dat->node = ps->node = NODE_TEXT;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
assert(NULL != ps->root);
}
/* Append to current buffer. */
assert(sz >= 0);
ps->cur->b = realloc(ps->cur->b,
ps->cur->bsz + (size_t)sz);
if (NULL == ps->cur->b) {
perror(NULL);
exit(EXIT_FAILURE);
}
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
ps->cur->bsz += (size_t)sz;
}
static void
pnode_trim(struct pnode *pn)
{
assert(NODE_TEXT == pn->node);
for ( ; pn->bsz > 0; pn->bsz--)
if ( ! isspace((int)pn->b[pn->bsz - 1]))
break;
}
/*
* Begin an element.
* First, look for the element.
* If we don't find it and we're not parsing, keep going.
* If we don't find it and we're parsing, puke and exit.
* If we find it but we're not parsing yet (i.e., it's not a refentry
* and thus out of context), keep going.
* If we find it and we're at the root and already have a tree, puke and
* exit (FIXME: I don't think this is right?).
* If we find it but we're parsing a text node, close out the text node,
* return to its parent, and keep going.
* Make sure that the element is in the right context.
* Lastly, put the node onto our parse tree and continue.
*/
static void
xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
{
struct parse *ps = arg;
enum nodeid node;
enum attrkey key;
enum attrval val;
struct pnode *dat;
struct pattr *pattr;
const XML_Char **att;
if (ps->stop)
return;
/* Close out text node, if applicable... */
if (NODE_TEXT == ps->node) {
assert(NULL != ps->cur);
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
assert(NULL != ps->cur);
ps->node = ps->cur->node;
}
for (node = 0; node < NODE__MAX; node++)
if (NULL == nodes[node].name)
continue;
else if (0 == strcmp(nodes[node].name, name))
break;
if (NODE__MAX == node && NODE_ROOT == ps->node) {
return;
} else if (NODE__MAX == node) {
fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml), name);
ps->stop = 1;
return;
} else if (NODE_ROOT == ps->node && NULL != ps->root) {
fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml));
ps->stop = 1;
return;
} else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
return;
} else if ( ! isparent(node, ps->node)) {
fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
NULL == nodes[ps->node].name ?
"(none)" : nodes[ps->node].name);
ps->stop = 1;
return;
}
if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
perror(NULL);
exit(EXIT_FAILURE);
}
dat->node = ps->node = node;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
if (NULL != ps->cur)
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
if (NULL == ps->root)
ps->root = dat;
/*
* Process attributes.
*/
for (att = atts; NULL != *att; att += 2) {
for (key = 0; key < ATTRKEY__MAX; key++)
if (0 == strcmp(*att, attrkeys[key]))
break;
if (ATTRKEY__MAX == key) {
fprintf(stderr, "%s:%zu:%zu: unknown "
"attribute \"%s\"\n", ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*att);
continue;
} else if ( ! isattrkey(node, key)) {
fprintf(stderr, "%s:%zu:%zu: bad "
"attribute \"%s\"\n", ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*att);
continue;
}
for (val = 0; val < ATTRVAL__MAX; val++)
if (0 == strcmp(*(att + 1), attrvals[val]))
break;
if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
fprintf(stderr, "%s:%zu:%zu: bad "
"value \"%s\"\n", ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*(att + 1));
continue;
}
pattr = calloc(1, sizeof(struct pattr));
pattr->key = key;
pattr->val = val;
if (ATTRVAL__MAX == val)
pattr->rawval = strdup(*(att + 1));
TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
}
}
/*
* Roll up the parse tree.
* If we're at a text node, roll that one up first.
* If we hit the root, then assign ourselves as the NODE_ROOT.
*/
static void
xml_elem_end(void *arg, const XML_Char *name)
{
struct parse *ps = arg;
if (ps->stop || NODE_ROOT == ps->node)
return;
/* Close out text node, if applicable... */
if (NODE_TEXT == ps->node) {
assert(NULL != ps->cur);
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
assert(NULL != ps->cur);
ps->node = ps->cur->node;
}
if (NULL == (ps->cur = ps->cur->parent))
ps->node = NODE_ROOT;
else
ps->node = ps->cur->node;
}
/*
* Recursively free a node (NULL is ok).
*/
static void
pnode_free(struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
if (NULL == pn)
return;
while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
TAILQ_REMOVE(&pn->childq, pp, child);
pnode_free(pp);
}
while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
TAILQ_REMOVE(&pn->attrq, ap, child);
free(ap->rawval);
free(ap);
}
free(pn->b);
free(pn);
}
/*
* Unlink a node from its parent and pnode_free() it.
*/
static void
pnode_unlink(struct pnode *pn)
{
if (NULL != pn->parent)
TAILQ_REMOVE(&pn->parent->childq, pn, child);
pnode_free(pn);
}
/*
* Unlink all children of a node and pnode_free() them.
*/
static void
pnode_unlinksub(struct pnode *pn)
{
while ( ! TAILQ_EMPTY(&pn->childq))
pnode_unlink(TAILQ_FIRST(&pn->childq));
}
/*
* Reset the lookaside buffer.
*/
static void
bufclear(struct parse *p)
{
p->b[p->bsz = 0] = '\0';
}
/*
* Append NODE_TEXT contents to the current buffer, reallocating its
* size if necessary.
* The buffer is ALWAYS nil-terminated.
*/
static void
bufappend(struct parse *p, struct pnode *pn)
{
assert(NODE_TEXT == pn->node);
if (p->bsz + pn->bsz + 1 > p->mbsz) {
p->mbsz = p->bsz + pn->bsz + 1;
if (NULL == (p->b = realloc(p->b, p->mbsz))) {
perror(NULL);
exit(EXIT_FAILURE);
}
}
memcpy(p->b + p->bsz, pn->b, pn->bsz);
p->bsz += pn->bsz;
p->b[p->bsz] = '\0';
}
/*
* Recursively append all NODE_TEXT nodes to the buffer.
* This descends into non-text nodes, but doesn't do anything beyond
* them.
* In other words, this is a recursive text grok.
*/
static void
bufappend_r(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
if (NODE_TEXT == pn->node)
bufappend(p, pn);
TAILQ_FOREACH(pp, &pn->childq, child)
bufappend_r(p, pp);
}
#define MACROLINE_NORM 0
#define MACROLINE_UPPER 1
/*
* Recursively print text presumably on a macro line.
* Convert all whitespace to regular spaces.
*/
static void
pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
{
char *cp;
bufclear(p);
bufappend_r(p, pn);
/* Convert all space to spaces. */
for (cp = p->b; '\0' != *cp; cp++)
if (isspace((int)*cp))
*cp = ' ';
for (cp = p->b; isspace((int)*cp); cp++)
/* Spin past whitespace (XXX: necessary?) */ ;
for ( ; '\0' != *cp; cp++) {
/* Escape us if we look like a macro. */
if ((cp == p->b || ' ' == *(cp - 1)) &&
isupper((int)*cp) &&
'\0' != *(cp + 1) &&
islower((int)*(cp + 1)) &&
('\0' == *(cp + 2) ||
' ' == *(cp + 2) ||
(islower((int)*(cp + 2)) &&
('\0' == *(cp + 3) ||
' ' == *(cp + 3)))))
fputs("\\&", stdout);
if (MACROLINE_UPPER & fl)
putchar(toupper((int)*cp));
else
putchar((int)*cp);
/* If we're a character escape, escape us. */
if ('\\' == *cp)
putchar('e');
}
}
static void
pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
{
pnode_printmacrolinetext(p, pn, 0);
}
/*
* Just pnode_printmacrolinepart() but with a newline.
* If no text, just the newline.
*/
static void
pnode_printmacroline(struct parse *p, struct pnode *pn)
{
pnode_printmacrolinetext(p, pn, 0);
putchar('\n');
}
static void
pnode_printmopen(struct parse *p)
{
if (p->newln) {
putchar('.');
p->newln = 0;
} else
putchar(' ');
}
static void
pnode_printmclose(struct parse *p, int sv)
{
if (sv && ! p->newln) {
putchar('\n');
p->newln = 1;
}
}
/*
* If the SYNOPSIS macro has a superfluous title, kill it.
*/
static void
pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
pnode_unlink(pp);
return;
}
}
/*
* Start a hopefully-named `Sh' section.
*/
static void
pnode_printrefsect(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node)
break;
fputs(".Sh ", stdout);
if (NULL != pp) {
pnode_printmacroline(p, pp);
pnode_unlink(pp);
} else
puts("UNKNOWN");
}
/*
* Start a reference, extracting the title and volume.
*/
static void
pnode_printciterefentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_MANVOLNUM == pp->node)
manvol = pp;
else if (NODE_REFENTRYTITLE == pp->node)
title = pp;
fputs(".Xr ", stdout);
if (NULL != title) {
pnode_printmacrolinepart(p, title);
putchar(' ');
} else
fputs("unknown ", stdout);
if (NULL != manvol)
pnode_printmacroline(p, manvol);
else
puts("1");
}
static void
pnode_printrefmeta(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_MANVOLNUM == pp->node)
manvol = pp;
else if (NODE_REFENTRYTITLE == pp->node)
title = pp;
puts(".Dd $Mdocdate" "$");
fputs(".Dt ", stdout);
if (NULL != title) {
/* FIXME: uppercase. */
pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
putchar(' ');
} else
fputs("UNKNOWN ", stdout);
if (NULL != manvol)
pnode_printmacroline(p, manvol);
else
puts("1");
puts(".Os");
}
static void
pnode_printfuncdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ftype, *func;
ftype = func = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TEXT == pp->node)
ftype = pp;
else if (NODE_FUNCTION == pp->node)
func = pp;
if (NULL != ftype) {
fputs(".Ft ", stdout);
pnode_printmacroline(p, ftype);
}
if (NULL != func) {
fputs(".Fo ", stdout);
pnode_printmacroline(p, func);
} else
puts(".Fo UNKNOWN");
}
static void
pnode_printparamdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ptype, *param;
ptype = param = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TEXT == pp->node)
ptype = pp;
else if (NODE_PARAMETER == pp->node)
param = pp;
fputs(".Fa \"", stdout);
if (NULL != ptype) {
pnode_printmacrolinepart(p, ptype);
putchar(' ');
}
if (NULL != param)
pnode_printmacrolinepart(p, param);
puts("\"");
}
static void
pnode_printfuncprototype(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *fdef;
TAILQ_FOREACH(fdef, &pn->childq, child)
if (NODE_FUNCDEF == fdef->node)
break;
if (NULL != fdef)
pnode_printfuncdef(p, fdef);
else
puts(".Fo UNKNOWN");
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_PARAMDEF == pp->node)
pnode_printparamdef(p, pp);
puts(".Fc");
}
/*
* The <arg> element is more complicated than it should be because text
* nodes are treated like ".Ar foo", but non-text nodes need to be
* re-sent into the printer (i.e., without the preceding ".Ar").
* This also handles the case of "repetition" (or in other words, the
* ellipsis following an argument) and optionality.
*/
static void
pnode_printarg(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
int isop, isrep;
isop = 1;
isrep = 0;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ATTRKEY_CHOICE == ap->key &&
(ATTRVAL_PLAIN == ap->val ||
ATTRVAL_REQ == ap->val))
isop = 0;
else if (ATTRKEY_REP == ap->key &&
(ATTRVAL_REPEAT == ap->val))
isrep = 1;
if (isop) {
pnode_printmopen(p);
fputs("Op ", stdout);
}
TAILQ_FOREACH(pp, &pn->childq, child) {
if (NODE_TEXT == pp->node) {
pnode_printmopen(p);
fputs("Ar ", stdout);
}
pnode_print(p, pp);
if (NODE_TEXT == pp->node && isrep)
fputs("...", stdout);
}
}
/*
* Recursively search and return the first instance of "node".
*/
static struct pnode *
pnode_findfirst(struct pnode *pn, enum nodeid node)
{
struct pnode *pp, *res;
res = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
res = pp->node == node ? pp :
pnode_findfirst(pp, node);
if (NULL != res)
break;
}
return(res);
}
static void
pnode_printprologue(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
pp = NULL == p->root ? NULL :
pnode_findfirst(p->root, NODE_REFMETA);
if (NULL != pp) {
pnode_printrefmeta(p, pp);
pnode_unlink(pp);
} else {
puts(".\\\" Supplying bogus prologue...");
puts(".Dd $Mdocdate" "$");
puts(".Dt UNKNOWN 1");
puts(".Os");
}
}
/*
* Print a parsed node (or ignore it--whatever).
* This is a recursive function.
* FIXME: macro line continuation?
*/
static void
pnode_print(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
char *cp;
int last, sv;
if (NULL == pn)
return;
sv = p->newln;
switch (pn->node) {
case (NODE_ARG):
pnode_printarg(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_CITEREFENTRY):
assert(p->newln);
pnode_printciterefentry(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_CODE):
pnode_printmopen(p);
fputs("Li ", stdout);
break;
case (NODE_COMMAND):
pnode_printmopen(p);
fputs("Nm ", stdout);
break;
case (NODE_FUNCTION):
pnode_printmopen(p);
fputs("Fn ", stdout);
break;
case (NODE_FUNCPROTOTYPE):
assert(p->newln);
pnode_printfuncprototype(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_FUNCSYNOPSISINFO):
pnode_printmopen(p);
fputs("Fd ", stdout);
break;
case (NODE_OPTION):
pnode_printmopen(p);
fputs("Fl ", stdout);
break;
case (NODE_PARA):
assert(p->newln);
puts(".Pp");
break;
case (NODE_PARAMETER):
/* Suppress non-text children... */
pnode_printmopen(p);
fputs("Fa \"", stdout);
pnode_printmacrolinepart(p, pn);
puts("\"");
pnode_unlinksub(pn);
break;
case (NODE_PROGRAMLISTING):
assert(p->newln);
puts(".Bd -literal");
break;
case (NODE_REFMETA):
abort();
break;
case (NODE_REFNAME):
/* Suppress non-text children... */
pnode_printmopen(p);
fputs("Nm ", stdout);
pnode_printmacrolinepart(p, pn);
pnode_unlinksub(pn);
break;
case (NODE_REFNAMEDIV):
assert(p->newln);
puts(".Sh NAME");
break;
case (NODE_REFPURPOSE):
assert(p->newln);
fputs(".Nd ", stdout);
break;
case (NODE_REFSYNOPSISDIV):
assert(p->newln);
pnode_printrefsynopsisdiv(p, pn);
puts(".Sh SYNOPSIS");
break;
case (NODE_REFSECT1):
assert(p->newln);
pnode_printrefsect(p, pn);
break;
case (NODE_STRUCTNAME):
pnode_printmopen(p);
fputs("Vt ", stdout);
break;
case (NODE_TEXT):
bufclear(p);
bufappend(p, pn);
/*
* Output all characters, squeezing out whitespace
* between newlines.
* XXX: all whitespace, including tabs (?).
* Remember to escape control characters and escapes.
*/
assert(p->bsz);
for (last = '\n', cp = p->b; '\0' != *cp; ) {
if ('\n' == last) {
/* Consume all whitespace. */
if (isspace((int)*cp)) {
while (isspace((int)*cp))
cp++;
continue;
} else if ('\'' == *cp || '.' == *cp)
fputs("\\&", stdout);
}
putchar(last = *cp++);
/* If we're a character escape, escape us. */
if ('\\' == last)
putchar('e');
}
p->newln = 0;
break;
default:
break;
}
TAILQ_FOREACH(pp, &pn->childq, child)
pnode_print(p, pp);
switch (pn->node) {
case (NODE_ARG):
case (NODE_CODE):
case (NODE_COMMAND):
case (NODE_FUNCTION):
case (NODE_FUNCSYNOPSISINFO):
case (NODE_OPTION):
case (NODE_PARAMETER):
case (NODE_STRUCTNAME):
case (NODE_TEXT):
pnode_printmclose(p, sv);
break;
case (NODE_REFNAME):
/*
* If we're in the NAME macro and we have multiple
* <refname> macros in sequence, then print out a
* trailing comma before the newline.
*/
if (NULL != pn->parent &&
NODE_REFNAMEDIV == pn->parent->node &&
NULL != TAILQ_NEXT(pn, child) &&
NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
fputs(" ,", stdout);
pnode_printmclose(p, sv);
break;
case (NODE_PROGRAMLISTING):
assert(p->newln);
puts(".Ed");
p->newln = 1;
break;
default:
break;
}
}
/*
* Loop around the read buffer until we've drained it of all data.
* Invoke the parser context with each buffer fill.
*/
static int
readfile(XML_Parser xp, int fd,
char *b, size_t bsz, const char *fn)
{
struct parse p;
int rc;
ssize_t ssz;
memset(&p, 0, sizeof(struct parse));
p.b = malloc(p.bsz = p.mbsz = 1024);
p.fname = fn;
p.xml = xp;
XML_SetCharacterDataHandler(xp, xml_char);
XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
XML_SetUserData(xp, &p);
while ((ssz = read(fd, b, bsz)) >= 0) {
if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
fprintf(stderr, "%s: %s\n", fn,
XML_ErrorString
(XML_GetErrorCode(xp)));
else if ( ! p.stop && ssz > 0)
continue;
/*
* Exit when we've read all or errors have occured
* during the parse sequence.
*/
p.newln = 1;
pnode_printprologue(&p, p.root);
pnode_print(&p, p.root);
pnode_free(p.root);
free(p.b);
return(0 != rc && ! p.stop);
}
/* Read error has occured. */
perror(fn);
pnode_free(p.root);
free(p.b);
return(0);
}
int
main(int argc, char *argv[])
{
XML_Parser xp;
const char *fname;
char *buf;
int fd, rc;
fname = "-";
xp = NULL;
buf = NULL;
rc = 0;
if (-1 != getopt(argc, argv, ""))
return(EXIT_FAILURE);
argc -= optind;
argv += optind;
if (argc > 1)
return(EXIT_FAILURE);
else if (argc > 0)
fname = argv[0];
/* Read from stdin or a file. */
fd = 0 == strcmp(fname, "-") ?
STDIN_FILENO : open(fname, O_RDONLY, 0);
/*
* Open file for reading.
* Allocate a read buffer.
* Create the parser context.
* Dive directly into the parse.
*/
if (-1 == fd)
perror(fname);
else if (NULL == (buf = malloc(4096)))
perror(NULL);
else if (NULL == (xp = XML_ParserCreate(NULL)))
perror(NULL);
else if ( ! readfile(xp, fd, buf, 4096, fname))
rc = 1;
XML_ParserFree(xp);
free(buf);
if (STDIN_FILENO != fd)
close(fd);
return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
}