/* $Id: docbook2mdoc.c,v 1.62 2019/03/22 17:42:53 schwarze Exp $ */
/*
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/queue.h>
#include <assert.h>
#include <ctype.h>
#include <expat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "extern.h"
/*
* Global parse state.
* Keep this as simple and small as possible.
*/
struct parse {
XML_Parser xml;
enum nodeid node; /* current (NODE_ROOT if pre-tree) */
const char *fname; /* filename */
int stop; /* should we stop now? */
#define PARSE_EQN 1
unsigned int flags; /* document-wide flags */
struct pnode *root; /* root of parse tree */
struct pnode *cur; /* current node in tree */
char *b; /* NUL-terminated buffer for pre-print */
size_t bsz; /* current length of b */
size_t mbsz; /* max bsz allocation */
int level; /* header level, starting at 1 */
int newln; /* output: are we on a fresh line */
};
struct node {
const char *name; /* docbook element name */
enum nodeid node; /* docbook element to generate */
};
TAILQ_HEAD(pnodeq, pnode);
TAILQ_HEAD(pattrq, pattr);
struct pattr {
enum attrkey key;
enum attrval val;
char *rawval;
TAILQ_ENTRY(pattr) child;
};
struct pnode {
enum nodeid node; /* node type */
char *b; /* binary data buffer */
char *real; /* store for "b" */
size_t bsz; /* data buffer size */
struct pnode *parent; /* parent (or NULL if top) */
struct pnodeq childq; /* queue of children */
struct pattrq attrq; /* attributes of node */
TAILQ_ENTRY(pnode) child;
};
static const char *attrkeys[ATTRKEY__MAX] = {
"choice",
"class",
"close",
"id",
"linkend",
"open",
"rep"
};
static const char *attrvals[ATTRVAL__MAX] = {
"monospaced",
"norepeat",
"opt",
"plain",
"repeat",
"req"
};
static const struct node nodes[] = {
{ "acronym", NODE_ACRONYM },
{ "affiliation", NODE_AFFILIATION },
{ "anchor", NODE_ANCHOR },
{ "application", NODE_APPLICATION },
{ "arg", NODE_ARG },
{ "author", NODE_AUTHOR },
{ "authorgroup", NODE_AUTHORGROUP },
{ "blockquote", NODE_BLOCKQUOTE },
{ "book", NODE_BOOK },
{ "bookinfo", NODE_BOOKINFO },
{ "caution", NODE_CAUTION },
{ "chapter", NODE_SECTION },
{ "citerefentry", NODE_CITEREFENTRY },
{ "citetitle", NODE_CITETITLE },
{ "cmdsynopsis", NODE_CMDSYNOPSIS },
{ "code", NODE_CODE },
{ "colspec", NODE_COLSPEC },
{ "command", NODE_COMMAND },
{ "constant", NODE_CONSTANT },
{ "copyright", NODE_COPYRIGHT },
{ "date", NODE_DATE },
{ "editor", NODE_EDITOR },
{ "emphasis", NODE_EMPHASIS },
{ "entry", NODE_ENTRY },
{ "envar", NODE_ENVAR },
{ "fieldsynopsis", NODE_FIELDSYNOPSIS },
{ "filename", NODE_FILENAME },
{ "firstname", NODE_FIRSTNAME },
{ "firstterm", NODE_FIRSTTERM },
{ "footnote", NODE_FOOTNOTE },
{ "funcdef", NODE_FUNCDEF },
{ "funcprototype", NODE_FUNCPROTOTYPE },
{ "funcsynopsis", NODE_FUNCSYNOPSIS },
{ "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO },
{ "function", NODE_FUNCTION },
{ "glossterm", NODE_GLOSSTERM },
{ "group", NODE_GROUP },
{ "holder", NODE_HOLDER },
{ "index", NODE_INDEX },
{ "indexterm", NODE_INDEXTERM },
{ "info", NODE_INFO },
{ "informalequation", NODE_INFORMALEQUATION },
{ "informaltable", NODE_INFORMALTABLE },
{ "inlineequation", NODE_INLINEEQUATION },
{ "itemizedlist", NODE_ITEMIZEDLIST },
{ "keysym", NODE_KEYSYM },
{ "legalnotice", NODE_LEGALNOTICE },
{ "link", NODE_LINK },
{ "listitem", NODE_LISTITEM },
{ "literal", NODE_LITERAL },
{ "literallayout", NODE_LITERALLAYOUT },
{ "manvolnum", NODE_MANVOLNUM },
{ "member", NODE_MEMBER },
{ "mml:math", NODE_MML_MATH },
{ "mml:mfenced", NODE_MML_MFENCED },
{ "mml:mfrac", NODE_MML_MFRAC },
{ "mml:mi", NODE_MML_MI },
{ "mml:mn", NODE_MML_MN },
{ "mml:mo", NODE_MML_MO },
{ "mml:mrow", NODE_MML_MROW },
{ "mml:msub", NODE_MML_MSUB },
{ "mml:msup", NODE_MML_MSUP },
{ "modifier", NODE_MODIFIER },
{ "note", NODE_NOTE },
{ "option", NODE_OPTION },
{ "orderedlist", NODE_ORDEREDLIST },
{ "orgname", NODE_ORGNAME },
{ "othername", NODE_OTHERNAME },
{ "para", NODE_PARA },
{ "paramdef", NODE_PARAMDEF },
{ "parameter", NODE_PARAMETER },
{ "part", NODE_SECTION },
{ "phrase", NODE_PHRASE },
{ "preface", NODE_PREFACE },
{ "primary", NODE_PRIMARY },
{ "programlisting", NODE_PROGRAMLISTING },
{ "prompt", NODE_PROMPT },
{ "quote", NODE_QUOTE },
{ "refclass", NODE_REFCLASS },
{ "refdescriptor", NODE_REFDESCRIPTOR },
{ "refentry", NODE_REFENTRY },
{ "refentryinfo", NODE_REFENTRYINFO },
{ "refentrytitle", NODE_REFENTRYTITLE },
{ "refmeta", NODE_REFMETA },
{ "refmetainfo", NODE_REFMETAINFO },
{ "refmiscinfo", NODE_REFMISCINFO },
{ "refname", NODE_REFNAME },
{ "refnamediv", NODE_REFNAMEDIV },
{ "refpurpose", NODE_REFPURPOSE },
{ "refsect1", NODE_SECTION },
{ "refsect2", NODE_SECTION },
{ "refsect3", NODE_SECTION },
{ "refsection", NODE_SECTION },
{ "refsynopsisdiv", NODE_REFSYNOPSISDIV },
{ "releaseinfo", NODE_RELEASEINFO },
{ "replaceable", NODE_REPLACEABLE },
{ "row", NODE_ROW },
{ "sbr", NODE_SBR },
{ "screen", NODE_SCREEN },
{ "secondary", NODE_SECONDARY },
{ "sect1", NODE_SECTION },
{ "sect2", NODE_SECTION },
{ "section", NODE_SECTION },
{ "sgmltag", NODE_SGMLTAG },
{ "simplelist", NODE_SIMPLELIST },
{ "spanspec", NODE_SPANSPEC },
{ "structname", NODE_STRUCTNAME },
{ "subtitle", NODE_SUBTITLE },
{ "surname", NODE_SURNAME },
{ "synopsis", NODE_SYNOPSIS },
{ "table", NODE_TABLE },
{ "tbody", NODE_TBODY },
{ "term", NODE_TERM },
{ "tfoot", NODE_TFOOT },
{ "tgroup", NODE_TGROUP },
{ "thead", NODE_THEAD },
{ "tip", NODE_TIP },
{ "title", NODE_TITLE },
{ "trademark", NODE_TRADEMARK },
{ "type", NODE_TYPE },
{ "ulink", NODE_ULINK },
{ "userinput", NODE_USERINPUT },
{ "variablelist", NODE_VARIABLELIST },
{ "varlistentry", NODE_VARLISTENTRY },
{ "varname", NODE_VARNAME },
{ "warning", NODE_WARNING },
{ "wordasword", NODE_WORDASWORD },
{ "year", NODE_YEAR },
{ NULL, NODE__MAX }
};
static int warn = 0;
static void
pnode_print(struct parse *p, struct pnode *pn);
/*
* Process a stream of characters.
* We store text as nodes in and of themselves.
* If a text node is already open, append to it.
* If it's not open, open one under the current context.
*/
static void
xml_char(void *arg, const XML_Char *p, int sz)
{
struct parse *ps = arg;
struct pnode *dat;
int i;
/* Stopped or no tree yet. */
if (ps->stop || NODE_ROOT == ps->node)
return;
assert(NULL != ps->cur);
/*
* Are we in the midst of processing text?
* If we're not processing text right now, then create a text
* node for doing so.
* However, don't do so unless we have some non-whitespace to
* process: strip out all leading whitespace to be sure.
*/
if (NODE_TEXT != ps->node) {
for (i = 0; i < sz; i++)
if ( ! isspace((unsigned char)p[i]))
break;
if (i == sz)
return;
p += i;
sz -= i;
dat = calloc(1, sizeof(struct pnode));
if (NULL == dat) {
perror(NULL);
exit(EXIT_FAILURE);
}
dat->node = ps->node = NODE_TEXT;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
assert(NULL != ps->root);
}
/* Append to current buffer. */
assert(sz >= 0);
ps->cur->b = realloc(ps->cur->b,
ps->cur->bsz + (size_t)sz);
if (NULL == ps->cur->b) {
perror(NULL);
exit(EXIT_FAILURE);
}
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
ps->cur->bsz += (size_t)sz;
ps->cur->real = ps->cur->b;
}
static void
pnode_trim(struct pnode *pn)
{
assert(NODE_TEXT == pn->node);
for ( ; pn->bsz > 0; pn->bsz--)
if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
break;
}
/*
* Begin an element.
* First, look for the element.
* If we don't find it and we're not parsing, keep going.
* If we don't find it and we're parsing, puke and exit.
* If we find it but we're not parsing yet (i.e., it's not a refentry
* and thus out of context), keep going.
* If we find it and we're at the root and already have a tree, puke and
* exit (FIXME: I don't think this is right?).
* If we find it but we're parsing a text node, close out the text node,
* return to its parent, and keep going.
* Make sure that the element is in the right context.
* Lastly, put the node onto our parse tree and continue.
*/
static void
xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
{
struct parse *ps = arg;
const struct node *node;
enum attrkey key;
enum attrval val;
struct pnode *dat;
struct pattr *pattr;
const XML_Char **att;
/* FIXME: find a better way to ditch other namespaces. */
if (ps->stop || 0 == strcmp(name, "xi:include"))
return;
/* Close out text node, if applicable... */
if (NODE_TEXT == ps->node) {
assert(NULL != ps->cur);
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
assert(NULL != ps->cur);
ps->node = ps->cur->node;
}
for (node = nodes; NULL != node->name; node++)
if (0 == strcmp(node->name, name))
break;
if (NULL == node->name) {
if (NODE_ROOT == ps->node)
return;
fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml), name);
ps->stop = 1;
return;
} else if (NODE_ROOT == ps->node && NULL != ps->root) {
fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml));
ps->stop = 1;
return;
}
if (NODE_INLINEEQUATION == node->node)
ps->flags |= PARSE_EQN;
if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
perror(NULL);
exit(EXIT_FAILURE);
}
dat->node = ps->node = node->node;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
if (NULL != ps->cur)
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
if (NULL == ps->root)
ps->root = dat;
/*
* Process attributes.
*/
for (att = atts; NULL != *att; att += 2) {
for (key = 0; key < ATTRKEY__MAX; key++)
if (0 == strcmp(*att, attrkeys[key]))
break;
if (ATTRKEY__MAX == key) {
if (warn)
fprintf(stderr, "%s:%zu:%zu: warning: "
"unknown attribute \"%s\"\n",
ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*att);
continue;
}
for (val = 0; val < ATTRVAL__MAX; val++)
if (0 == strcmp(*(att + 1), attrvals[val]))
break;
pattr = calloc(1, sizeof(struct pattr));
pattr->key = key;
pattr->val = val;
if (ATTRVAL__MAX == val)
pattr->rawval = strdup(*(att + 1));
TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
}
}
/*
* Roll up the parse tree.
* If we're at a text node, roll that one up first.
* If we hit the root, then assign ourselves as the NODE_ROOT.
*/
static void
xml_elem_end(void *arg, const XML_Char *name)
{
struct parse *ps = arg;
/* FIXME: find a better way to ditch other namespaces. */
if (ps->stop || NODE_ROOT == ps->node)
return;
else if (0 == strcmp(name, "xi:include"))
return;
/* Close out text node, if applicable... */
if (NODE_TEXT == ps->node) {
assert(NULL != ps->cur);
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
assert(NULL != ps->cur);
ps->node = ps->cur->node;
}
if (NULL == (ps->cur = ps->cur->parent))
ps->node = NODE_ROOT;
else
ps->node = ps->cur->node;
}
/*
* Recursively free a node (NULL is ok).
*/
static void
pnode_free(struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
if (NULL == pn)
return;
while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
TAILQ_REMOVE(&pn->childq, pp, child);
pnode_free(pp);
}
while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
TAILQ_REMOVE(&pn->attrq, ap, child);
free(ap->rawval);
free(ap);
}
free(pn->real);
free(pn);
}
/*
* Unlink a node from its parent and pnode_free() it.
*/
static void
pnode_unlink(struct pnode *pn)
{
if (NULL != pn->parent)
TAILQ_REMOVE(&pn->parent->childq, pn, child);
pnode_free(pn);
}
/*
* Unlink all children of a node and pnode_free() them.
*/
static void
pnode_unlinksub(struct pnode *pn)
{
while ( ! TAILQ_EMPTY(&pn->childq))
pnode_unlink(TAILQ_FIRST(&pn->childq));
}
/*
* Retrieve an enumeration attribute from a node.
* Return ATTRVAL__MAX if the node has no such attribute.
*/
enum attrval
pnode_getattr(struct pnode *pn, enum attrkey key)
{
struct pattr *ap;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val;
return(ATTRVAL__MAX);
}
/*
* Retrieve an attribute string from a node.
* Return defval if the node has no such attribute.
*/
const char *
pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval)
{
struct pattr *ap;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ATTRVAL__MAX == ap->val ? ap->rawval :
attrvals[ap->val];
return(defval);
}
/*
* Reset the lookaside buffer.
*/
static void
bufclear(struct parse *p)
{
p->b[p->bsz = 0] = '\0';
}
/*
* Append NODE_TEXT contents to the current buffer, reallocating its
* size if necessary.
* The buffer is ALWAYS NUL-terminated.
*/
static void
bufappend(struct parse *p, struct pnode *pn)
{
assert(NODE_TEXT == pn->node);
if (p->bsz + pn->bsz + 1 > p->mbsz) {
p->mbsz = p->bsz + pn->bsz + 1;
if (NULL == (p->b = realloc(p->b, p->mbsz))) {
perror(NULL);
exit(EXIT_FAILURE);
}
}
memcpy(p->b + p->bsz, pn->b, pn->bsz);
p->bsz += pn->bsz;
p->b[p->bsz] = '\0';
}
/*
* Recursively append all NODE_TEXT nodes to the buffer.
* This descends into non-text nodes, but doesn't do anything beyond
* them.
* In other words, this is a recursive text grok.
*/
static void
bufappend_r(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
if (NODE_TEXT == pn->node)
bufappend(p, pn);
TAILQ_FOREACH(pp, &pn->childq, child)
bufappend_r(p, pp);
}
/*
* Recursively search and return the first instance of "node".
*/
static struct pnode *
pnode_findfirst(struct pnode *pn, enum nodeid node)
{
struct pnode *pp, *res;
res = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
res = pp->node == node ? pp :
pnode_findfirst(pp, node);
if (NULL != res)
break;
}
return(res);
}
#define MACROLINE_NORM 0
#define MACROLINE_UPPER 1
#define MACROLINE_NOWS 2
/*
* Recursively print text presumably on a macro line.
* Convert all whitespace to regular spaces.
*/
static void
pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
{
char *cp;
if (0 == p->newln && ! (MACROLINE_NOWS & fl))
putchar(' ');
bufclear(p);
bufappend_r(p, pn);
/* Convert all space to spaces. */
for (cp = p->b; '\0' != *cp; cp++)
if (isspace((unsigned char)*cp))
*cp = ' ';
for (cp = p->b; isspace((unsigned char)*cp); cp++)
/* Spin past whitespace (XXX: necessary?) */ ;
for ( ; '\0' != *cp; cp++) {
/* Escape us if we look like a macro. */
if ((cp == p->b || ' ' == *(cp - 1)) &&
isupper((unsigned char)*cp) &&
'\0' != *(cp + 1) &&
islower((unsigned char)*(cp + 1)) &&
('\0' == *(cp + 2) ||
' ' == *(cp + 2) ||
(islower((unsigned char)*(cp + 2)) &&
('\0' == *(cp + 3) ||
' ' == *(cp + 3)))))
fputs("\\&", stdout);
if (MACROLINE_UPPER & fl)
putchar(toupper((unsigned char)*cp));
else
putchar(*cp);
/* If we're a character escape, escape us. */
if ('\\' == *cp)
putchar('e');
}
}
static void
pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
{
pnode_printmacrolinetext(p, pn, 0);
}
/*
* Just pnode_printmacrolinepart() but with a newline.
* If no text, just the newline.
*/
static void
pnode_printmacroline(struct parse *p, struct pnode *pn)
{
assert(0 == p->newln);
pnode_printmacrolinetext(p, pn, 0);
putchar('\n');
p->newln = 1;
}
static void
pnode_printmopen(struct parse *p)
{
if (p->newln) {
putchar('.');
p->newln = 0;
} else
putchar(' ');
}
static void
pnode_printmclose(struct parse *p, int sv)
{
if (sv && ! p->newln) {
putchar('\n');
p->newln = 1;
}
}
/*
* Like pnode_printmclose() except we look to the next node, and, if
* found, see if it starts with punctuation.
* If it does, then we print that punctuation before the newline.
*/
static void
pnode_printmclosepunct(struct parse *p, struct pnode *pn, int sv)
{
/* We wouldn't have done anything anyway. */
if ( ! (sv && ! p->newln))
return;
/* No next node or it's not text. */
if (NULL == (pn = TAILQ_NEXT(pn, child))) {
pnode_printmclose(p, sv);
return;
} else if (NODE_TEXT != pn->node) {
pnode_printmclose(p, sv);
return;
}
/* Only do this for the comma/period. */
if (pn->bsz > 0 &&
(',' == pn->b[0] || '.' == pn->b[0]) &&
(1 == pn->bsz || isspace((unsigned char)pn->b[1]))) {
putchar(' ');
putchar(pn->b[0]);
pn->b++;
pn->bsz--;
}
putchar('\n');
p->newln = 1;
}
static void
pnode_printpara(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
if ((pp = TAILQ_PREV(pn, pnodeq, child)) == NULL &&
(pp = pn->parent) == NULL)
return;
switch (pp->node) {
case NODE_ENTRY:
case NODE_LISTITEM:
return;
case NODE_PREFACE:
case NODE_SECTION:
if (p->level < 3)
return;
break;
default:
break;
}
puts(".Pp");
}
/*
* If the SYNOPSIS macro has a superfluous title, kill it.
*/
static void
pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
pnode_unlink(pp);
return;
}
}
/*
* Start a hopefully-named `Sh' section.
*/
static void
pnode_printrefsect(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
const char *title;
int flags, level;
if (NULL == pn->parent)
return;
level = ++p->level;
flags = 1 == level ? MACROLINE_UPPER : 0;
if (3 > level) {
switch (pn->node) {
case NODE_CAUTION:
case NODE_NOTE:
case NODE_TIP:
case NODE_WARNING:
level = 3;
break;
default:
break;
}
}
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node)
break;
if (NULL == pp) {
switch (pn->node) {
case NODE_PREFACE:
title = "Preface";
break;
case NODE_CAUTION:
title = "Caution";
break;
case NODE_NOTE:
title = "Note";
break;
case NODE_TIP:
title = "Tip";
break;
case NODE_WARNING:
title = "Warning";
break;
default:
title = "Unknown";
break;
}
}
switch (level) {
case 1:
fputs(".Sh", stdout);
break;
case 2:
fputs(".Ss", stdout);
break;
default:
pnode_printpara(p, pn);
fputs(".Sy", stdout);
break;
}
if (NULL != pp) {
p->newln = 0;
pnode_printmacrolinetext(p, pp, flags);
pnode_printmclose(p, 1);
pnode_unlink(pp);
} else
printf(" %s\n", title);
}
/*
* Start a reference, extracting the title and volume.
*/
static void
pnode_printciterefentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_MANVOLNUM == pp->node)
manvol = pp;
else if (NODE_REFENTRYTITLE == pp->node)
title = pp;
if (NULL != title) {
pnode_printmacrolinepart(p, title);
} else
fputs(" unknown ", stdout);
if (NULL == manvol) {
puts(" 1");
p->newln = 1;
} else
pnode_printmacrolinepart(p, manvol);
}
static void
pnode_printrefmeta(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_MANVOLNUM == pp->node)
manvol = pp;
else if (NODE_REFENTRYTITLE == pp->node)
title = pp;
fputs(".Dt", stdout);
p->newln = 0;
if (NULL != title)
pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
else
fputs(" UNKNOWN ", stdout);
if (NULL == manvol) {
puts(" 1");
p->newln = 1;
} else
pnode_printmacroline(p, manvol);
}
static void
pnode_printfuncdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ftype, *func;
assert(p->newln);
ftype = func = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TEXT == pp->node)
ftype = pp;
else if (NODE_FUNCTION == pp->node)
func = pp;
if (NULL != ftype) {
fputs(".Ft", stdout);
p->newln = 0;
pnode_printmacroline(p, ftype);
}
if (NULL != func) {
fputs(".Fo", stdout);
p->newln = 0;
pnode_printmacroline(p, func);
} else {
puts(".Fo UNKNOWN");
p->newln = 1;
}
}
static void
pnode_printparamdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ptype, *param;
assert(p->newln);
ptype = param = NULL;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TEXT == pp->node)
ptype = pp;
else if (NODE_PARAMETER == pp->node)
param = pp;
fputs(".Fa \"", stdout);
p->newln = 0;
if (NULL != ptype) {
pnode_printmacrolinetext(p, ptype, MACROLINE_NOWS);
putchar(' ');
}
if (NULL != param)
pnode_printmacrolinepart(p, param);
puts("\"");
p->newln = 1;
}
/*
* The <mml:mfenced> node is a little peculiar.
* First, it can have arbitrary open and closing tokens, which default
* to parentheses.
* Second, >1 arguments are separated by commas.
*/
static void
pnode_printmathfenced(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
printf("left %s ", pnode_getattr_raw(pn, ATTRKEY_OPEN, "("));
pp = TAILQ_FIRST(&pn->childq);
pnode_print(p, pp);
while (NULL != (pp = TAILQ_NEXT(pp, child))) {
putchar(',');
pnode_print(p, pp);
}
printf("right %s ", pnode_getattr_raw(pn, ATTRKEY_CLOSE, ")"));
}
/*
* These math nodes require special handling because they have infix
* syntax, instead of the usual prefix or prefix.
* So we need to break up the first and second child node with a
* particular eqn(7) word.
*/
static void
pnode_printmath(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
pp = TAILQ_FIRST(&pn->childq);
pnode_print(p, pp);
switch (pn->node) {
case NODE_MML_MSUP:
fputs(" sup ", stdout);
break;
case NODE_MML_MFRAC:
fputs(" over ", stdout);
break;
case NODE_MML_MSUB:
fputs(" sub ", stdout);
break;
default:
break;
}
pp = TAILQ_NEXT(pp, child);
pnode_print(p, pp);
}
static void
pnode_printfuncprototype(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *fdef;
assert(p->newln);
TAILQ_FOREACH(fdef, &pn->childq, child)
if (NODE_FUNCDEF == fdef->node)
break;
if (NULL != fdef)
pnode_printfuncdef(p, fdef);
else
puts(".Fo UNKNOWN");
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_PARAMDEF == pp->node)
pnode_printparamdef(p, pp);
puts(".Fc");
p->newln = 1;
}
/*
* The <arg> element is more complicated than it should be because text
* nodes are treated like ".Ar foo", but non-text nodes need to be
* re-sent into the printer (i.e., without the preceding ".Ar").
* This also handles the case of "repetition" (or in other words, the
* ellipsis following an argument) and optionality.
*/
static void
pnode_printarg(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
int isop, isrep;
isop = 1;
isrep = 0;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ATTRKEY_CHOICE == ap->key &&
(ATTRVAL_PLAIN == ap->val ||
ATTRVAL_REQ == ap->val))
isop = 0;
else if (ATTRKEY_REP == ap->key &&
(ATTRVAL_REPEAT == ap->val))
isrep = 1;
if (isop) {
pnode_printmopen(p);
fputs("Op", stdout);
}
TAILQ_FOREACH(pp, &pn->childq, child) {
if (NODE_TEXT == pp->node) {
pnode_printmopen(p);
fputs("Ar", stdout);
}
pnode_print(p, pp);
if (NODE_TEXT == pp->node && isrep)
fputs("...", stdout);
}
}
static void
pnode_printgroup(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *np;
struct pattr *ap;
int isop, sv;
isop = 1;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ATTRKEY_CHOICE == ap->key &&
(ATTRVAL_PLAIN == ap->val ||
ATTRVAL_REQ == ap->val)) {
isop = 0;
break;
}
/*
* Make sure we're on a macro line.
* This will prevent pnode_print() for putting us on a
* subsequent line.
*/
sv = p->newln;
pnode_printmopen(p);
if (isop)
fputs("Op", stdout);
else if (sv)
fputs("No", stdout);
/*
* Keep on printing text separated by the vertical bar as long
* as we're within the same origin node as the group.
* This is kind of a nightmare.
* Eh, DocBook...
* FIXME: if there's a "Fl", we don't cut off the leading "-"
* like we do in pnode_print().
*/
TAILQ_FOREACH(pp, &pn->childq, child) {
pnode_print(p, pp);
np = TAILQ_NEXT(pp, child);
while (NULL != np) {
if (pp->node != np->node)
break;
fputs(" |", stdout);
pnode_printmacrolinepart(p, np);
pp = np;
np = TAILQ_NEXT(np, child);
}
}
pnode_printmclose(p, sv);
}
static void
pnode_printprologue(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
pp = NULL == p->root ? NULL :
pnode_findfirst(p->root, NODE_REFMETA);
puts(".Dd $Mdocdate" "$");
if (NULL != pp) {
pnode_printrefmeta(p, pp);
pnode_unlink(pp);
} else
printf(".Dt %s 1\n",
pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"));
puts(".Os");
if (PARSE_EQN & p->flags) {
puts(".EQ");
puts("delim $$");
puts(".EN");
}
}
/*
* We can have multiple <term> elements within a <varlistentry>, which
* we should comma-separate as list headers.
*/
static void
pnode_printvarlistentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
int first = 1;
assert(p->newln);
fputs(".It", stdout);
p->newln = 0;
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TERM == pp->node) {
if ( ! first)
putchar(',');
pnode_print(p, pp);
pnode_unlink(pp);
first = 0;
} else
break;
putchar('\n');
p->newln = 1;
}
static void
pnode_printrow(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
puts(".Bl -dash -compact");
TAILQ_FOREACH(pp, &pn->childq, child) {
assert(p->newln);
puts(".It");
pnode_print(p, pp);
pnode_printmclose(p, 1);
}
assert(p->newln);
puts(".El");
}
static void
pnode_printtable(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
pnode_printpara(p, pp);
pnode_print(p, pp);
pnode_unlink(pp);
}
assert(p->newln);
puts(".Bl -ohang");
while (NULL != (pp = pnode_findfirst(pn, NODE_ROW))) {
puts(".It Table Row");
pnode_printrow(p, pp);
pnode_printmclose(p, 1);
pnode_unlink(pp);
}
assert(p->newln);
puts(".El");
}
static void
pnode_printlist(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
pnode_printpara(p, pp);
pnode_print(p, pp);
pnode_unlink(pp);
}
assert(p->newln);
if (NODE_ORDEREDLIST == pn->node)
puts(".Bl -enum");
else
puts(".Bl -bullet");
TAILQ_FOREACH(pp, &pn->childq, child) {
assert(p->newln);
puts(".It");
pnode_print(p, pp);
pnode_printmclose(p, 1);
}
assert(p->newln);
puts(".El");
}
static void
pnode_printvariablelist(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
assert(p->newln);
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_TITLE == pp->node) {
pnode_printpara(p, pp);
pnode_print(p, pp);
pnode_unlink(pp);
}
assert(p->newln);
puts(".Bl -tag -width Ds");
TAILQ_FOREACH(pp, &pn->childq, child)
if (NODE_VARLISTENTRY != pp->node) {
assert(p->newln);
fputs(".It", stdout);
pnode_printmacroline(p, pp);
} else {
assert(p->newln);
pnode_print(p, pp);
}
assert(p->newln);
puts(".El");
}
/*
* Print a parsed node (or ignore it--whatever).
* This is a recursive function.
* FIXME: if we're in a literal context (<screen> or <programlisting> or
* whatever), don't print inline macros.
*/
static void
pnode_print(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
const char *ccp;
char *cp;
int last, sv;
if (NULL == pn)
return;
sv = p->newln;
switch (pn->node) {
case NODE_APPLICATION:
pnode_printmopen(p);
fputs("Nm", stdout);
break;
case NODE_ANCHOR:
/* Don't print anything! */
return;
case NODE_ARG:
pnode_printarg(p, pn);
pnode_unlinksub(pn);
break;
case NODE_AUTHOR:
pnode_printmopen(p);
fputs("An", stdout);
break;
case NODE_AUTHORGROUP:
assert(p->newln);
puts(".An -split");
break;
case NODE_BOOKINFO:
assert(p->newln);
puts(".Sh NAME");
break;
case NODE_CITEREFENTRY:
pnode_printmopen(p);
fputs("Xr", stdout);
pnode_printciterefentry(p, pn);
pnode_unlinksub(pn);
break;
case NODE_CODE:
pnode_printmopen(p);
fputs("Li", stdout);
break;
case NODE_COMMAND:
pnode_printmopen(p);
fputs("Nm", stdout);
break;
case NODE_CONSTANT:
pnode_printmopen(p);
fputs("Dv", stdout);
break;
case NODE_EDITOR:
puts("editor: ");
pnode_printmopen(p);
fputs("An", stdout);
break;
case NODE_EMPHASIS:
case NODE_FIRSTTERM:
pnode_printmopen(p);
fputs("Em", stdout);
break;
case NODE_ENVAR:
pnode_printmopen(p);
fputs("Ev", stdout);
break;
case NODE_FILENAME:
pnode_printmopen(p);
fputs("Pa", stdout);
break;
case NODE_FUNCTION:
pnode_printmopen(p);
fputs("Fn", stdout);
break;
case NODE_FUNCPROTOTYPE:
assert(p->newln);
pnode_printfuncprototype(p, pn);
pnode_unlinksub(pn);
break;
case NODE_FUNCSYNOPSISINFO:
pnode_printmopen(p);
fputs("Fd", stdout);
break;
case NODE_INDEXTERM:
return;
case NODE_INFORMALEQUATION:
if ( ! p->newln)
putchar('\n');
puts(".EQ");
p->newln = 0;
break;
case NODE_INLINEEQUATION:
fputc('$', stdout);
p->newln = 0;
break;
case NODE_ITEMIZEDLIST:
assert(p->newln);
pnode_printlist(p, pn);
pnode_unlinksub(pn);
break;
case NODE_GROUP:
pnode_printgroup(p, pn);
pnode_unlinksub(pn);
break;
case NODE_LEGALNOTICE:
assert(p->newln);
puts(".Sh LEGAL NOTICE");
break;
case NODE_LINK:
ccp = pnode_getattr_raw(pn, ATTRKEY_LINKEND, NULL);
if (NULL == ccp)
break;
pnode_printmopen(p);
printf("Sx %s\n", ccp);
p->newln = 1;
return;
case NODE_LITERAL:
pnode_printmopen(p);
fputs("Li", stdout);
break;
case NODE_LITERALLAYOUT:
assert(p->newln);
puts(".Bd -literal");
break;
case NODE_MML_MFENCED:
pnode_printmathfenced(p, pn);
pnode_unlinksub(pn);
break;
case NODE_MML_MROW:
case NODE_MML_MI:
case NODE_MML_MN:
case NODE_MML_MO:
if (TAILQ_EMPTY(&pn->childq))
break;
fputs(" { ", stdout);
break;
case NODE_MML_MFRAC:
case NODE_MML_MSUB:
case NODE_MML_MSUP:
pnode_printmath(p, pn);
pnode_unlinksub(pn);
break;
case NODE_OPTION:
pnode_printmopen(p);
fputs("Fl", stdout);
break;
case NODE_ORDEREDLIST:
assert(p->newln);
pnode_printlist(p, pn);
pnode_unlinksub(pn);
break;
case NODE_PARA:
pnode_printpara(p, pn);
break;
case NODE_PARAMETER:
/* Suppress non-text children... */
pnode_printmopen(p);
fputs("Fa \"", stdout);
pnode_printmacrolinetext(p, pn, MACROLINE_NOWS);
fputs("\"", stdout);
pnode_unlinksub(pn);
break;
case NODE_QUOTE:
pnode_printmopen(p);
fputs("Qo", stdout);
break;
case NODE_PROGRAMLISTING:
case NODE_SCREEN:
assert(p->newln);
printf(".Bd %s\n", ATTRVAL_MONOSPACED ==
pnode_getattr(pn, ATTRKEY_CLASS) ?
"-literal" : "-unfilled");
break;
case NODE_REFENTRYINFO:
/* Suppress. */
pnode_unlinksub(pn);
break;
case NODE_REFMETA:
abort();
break;
case NODE_REFNAME:
/* Suppress non-text children... */
pnode_printmopen(p);
fputs("Nm", stdout);
p->newln = 0;
pnode_printmacrolinepart(p, pn);
pnode_unlinksub(pn);
break;
case NODE_REFNAMEDIV:
assert(p->newln);
puts(".Sh NAME");
break;
case NODE_REFPURPOSE:
assert(p->newln);
pnode_printmopen(p);
fputs("Nd", stdout);
break;
case NODE_REFSYNOPSISDIV:
assert(p->newln);
pnode_printrefsynopsisdiv(p, pn);
puts(".Sh SYNOPSIS");
break;
case NODE_PREFACE:
case NODE_SECTION:
case NODE_NOTE:
case NODE_TIP:
case NODE_CAUTION:
case NODE_WARNING:
assert(p->newln);
pnode_printrefsect(p, pn);
break;
case NODE_REPLACEABLE:
pnode_printmopen(p);
fputs("Ar", stdout);
break;
case NODE_SBR:
assert(p->newln);
puts(".br");
break;
case NODE_SGMLTAG:
pnode_printmopen(p);
fputs("Li", stdout);
break;
case NODE_STRUCTNAME:
pnode_printmopen(p);
fputs("Vt", stdout);
break;
case NODE_TABLE:
case NODE_INFORMALTABLE:
assert(p->newln);
pnode_printtable(p, pn);
pnode_unlinksub(pn);
break;
case NODE_TEXT:
if (0 == p->newln)
putchar(' ');
bufclear(p);
bufappend(p, pn);
if (0 == p->bsz) {
assert(pn->real != pn->b);
break;
}
/*
* Output all characters, squeezing out whitespace
* between newlines.
* XXX: all whitespace, including tabs (?).
* Remember to escape control characters and escapes.
*/
assert(p->bsz);
cp = p->b;
/*
* There's often a superfluous "-" in its <option> tags
* before the actual flags themselves.
* "Fl" does this for us, so remove it.
*/
if (NULL != pn->parent &&
NODE_OPTION == pn->parent->node &&
'-' == *cp)
cp++;
for (last = '\n'; '\0' != *cp; ) {
if ('\n' == last) {
/* Consume all whitespace. */
if (isspace((unsigned char)*cp)) {
while (isspace((unsigned char)*cp))
cp++;
continue;
} else if ('\'' == *cp || '.' == *cp)
fputs("\\&", stdout);
}
putchar(last = *cp++);
/* If we're a character escape, escape us. */
if ('\\' == last)
putchar('e');
}
p->newln = 0;
break;
case NODE_TITLE:
if (pn->parent->node == NODE_BOOKINFO) {
pnode_printmopen(p);
fputs("Nd", stdout);
}
break;
case NODE_TYPE:
pnode_printmopen(p);
fputs("Vt", stdout);
break;
case NODE_USERINPUT:
pnode_printmopen(p);
fputs("Li", stdout);
break;
case NODE_VARIABLELIST:
assert(p->newln);
pnode_printvariablelist(p, pn);
pnode_unlinksub(pn);
break;
case NODE_VARLISTENTRY:
assert(p->newln);
pnode_printvarlistentry(p, pn);
break;
case NODE_VARNAME:
pnode_printmopen(p);
fputs("Va", stdout);
break;
default:
break;
}
TAILQ_FOREACH(pp, &pn->childq, child)
pnode_print(p, pp);
switch (pn->node) {
case NODE_INFORMALEQUATION:
if ( ! p->newln)
putchar('\n');
puts(".EN");
p->newln = 1;
break;
case NODE_INLINEEQUATION:
fputs("$ ", stdout);
p->newln = sv;
break;
case NODE_MML_MROW:
case NODE_MML_MI:
case NODE_MML_MN:
case NODE_MML_MO:
if (TAILQ_EMPTY(&pn->childq))
break;
fputs(" } ", stdout);
break;
case NODE_APPLICATION:
case NODE_ARG:
case NODE_AUTHOR:
case NODE_CITEREFENTRY:
case NODE_CODE:
case NODE_COMMAND:
case NODE_CONSTANT:
case NODE_EDITOR:
case NODE_EMPHASIS:
case NODE_ENVAR:
case NODE_FILENAME:
case NODE_FIRSTTERM:
case NODE_FUNCTION:
case NODE_FUNCSYNOPSISINFO:
case NODE_LITERAL:
case NODE_OPTION:
case NODE_PARAMETER:
case NODE_REPLACEABLE:
case NODE_REFPURPOSE:
case NODE_SGMLTAG:
case NODE_STRUCTNAME:
case NODE_TEXT:
case NODE_TYPE:
case NODE_USERINPUT:
case NODE_VARNAME:
pnode_printmclosepunct(p, pn, sv);
break;
case NODE_QUOTE:
pnode_printmclose(p, sv);
sv = p->newln;
pnode_printmopen(p);
fputs("Qc", stdout);
pnode_printmclose(p, sv);
break;
case NODE_REFNAME:
/*
* If we're in the NAME macro and we have multiple
* <refname> macros in sequence, then print out a
* trailing comma before the newline.
*/
if (NULL != pn->parent &&
NODE_REFNAMEDIV == pn->parent->node &&
NULL != TAILQ_NEXT(pn, child) &&
NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
fputs(" ,", stdout);
pnode_printmclose(p, sv);
break;
case NODE_PREFACE:
case NODE_SECTION:
case NODE_NOTE:
case NODE_TIP:
case NODE_CAUTION:
case NODE_WARNING:
p->level--;
break;
case NODE_LITERALLAYOUT:
case NODE_PROGRAMLISTING:
case NODE_SCREEN:
assert(p->newln);
puts(".Ed");
p->newln = 1;
break;
case NODE_TITLE:
if (pn->parent->node == NODE_BOOKINFO) {
pnode_printmclose(p, 1);
puts(".Sh AUTHORS");
}
break;
default:
break;
}
}
/*
* Loop around the read buffer until we've drained it of all data.
* Invoke the parser context with each buffer fill.
*/
static int
readfile(XML_Parser xp, int fd,
char *b, size_t bsz, const char *fn)
{
struct parse p;
int rc;
ssize_t ssz;
memset(&p, 0, sizeof(struct parse));
p.b = malloc(p.bsz = p.mbsz = 1024);
p.fname = fn;
p.xml = xp;
XML_SetCharacterDataHandler(xp, xml_char);
XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
XML_SetUserData(xp, &p);
while ((ssz = read(fd, b, bsz)) >= 0) {
if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
fprintf(stderr, "%s:%zu:%zu: %s\n", fn,
XML_GetCurrentLineNumber(xp),
XML_GetCurrentColumnNumber(xp),
XML_ErrorString
(XML_GetErrorCode(xp)));
else if ( ! p.stop && ssz > 0)
continue;
/*
* Exit when we've read all or errors have occured
* during the parse sequence.
*/
p.newln = 1;
pnode_printprologue(&p, p.root);
pnode_print(&p, p.root);
pnode_free(p.root);
free(p.b);
return(0 != rc && ! p.stop);
}
/* Read error has occured. */
perror(fn);
pnode_free(p.root);
free(p.b);
return(0);
}
int
main(int argc, char *argv[])
{
XML_Parser xp;
const char *fname;
char *buf;
int fd, rc, ch;
const char *progname;
progname = strrchr(argv[0], '/');
if (progname == NULL)
progname = argv[0];
else
++progname;
fname = "-";
xp = NULL;
buf = NULL;
rc = 0;
while (-1 != (ch = getopt(argc, argv, "W")))
switch (ch) {
case 'W':
warn = 1;
break;
default:
goto usage;
}
argc -= optind;
argv += optind;
if (argc > 1) {
fprintf(stderr, "%s: Too many arguments\n", argv[1]);
goto usage;
} else if (argc > 0)
fname = argv[0];
/* Read from stdin or a file. */
fd = 0 == strcmp(fname, "-") ?
STDIN_FILENO : open(fname, O_RDONLY, 0);
/*
* Open file for reading.
* Allocate a read buffer.
* Create the parser context.
* Dive directly into the parse.
*/
if (-1 == fd)
perror(fname);
else if (NULL == (buf = malloc(4096)))
perror(NULL);
else if (NULL == (xp = XML_ParserCreate(NULL)))
perror(NULL);
else if ( ! readfile(xp, fd, buf, 4096, fname))
rc = 1;
XML_ParserFree(xp);
free(buf);
if (STDIN_FILENO != fd)
close(fd);
return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
usage:
fprintf(stderr, "usage: %s [-W] [input_filename]\n", progname);
return(EXIT_FAILURE);
}