File: [cvsweb.bsd.lv] / docbook2mdoc / docbook2mdoc.c (download)
Revision 1.70, Sun Mar 24 21:00:11 2019 UTC (5 years, 3 months ago) by schwarze
Branch: MAIN
Changes since 1.69: +24 -9 lines
Avoid reckless use of low-level stdio output functions like putchar(3),
puts(3), and fputs(3) in high-level formatting code. For clarity
and robustness, be explicit whether we are printing to a text line
with print_text() or to a macro line with macro_addarg().
While here, fix NODE_REFNAME formatting which i broke previously.
For now, leave MathML to eqn(7) translation alone. Usually, that
happens in .EQ blocks, i.e. writing text lines. But it can also
happen in inline ($$) context on macro lines. That is certainly
very fragile and will often fail especially for non-trivial formulae,
but switching to print_text() would break it even more. So postpone
repairs until i come round to work on equation formatting for real.
|
/* $Id: docbook2mdoc.c,v 1.70 2019/03/24 21:00:11 schwarze Exp $ */
/*
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/queue.h>
#include <assert.h>
#include <ctype.h>
#include <expat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "extern.h"
enum linestate {
LINE_NEW = 0,
LINE_TEXT,
LINE_MACRO
};
/*
* Global parse state.
* Keep this as simple and small as possible.
*/
struct parse {
XML_Parser xml;
enum nodeid node; /* current (NODE_ROOT if pre-tree) */
const char *fname; /* filename */
int stop; /* should we stop now? */
#define PARSE_EQN 1
unsigned int flags; /* document-wide flags */
struct pnode *root; /* root of parse tree */
struct pnode *cur; /* current node in tree */
char *b; /* NUL-terminated buffer for pre-print */
size_t bsz; /* current length of b */
size_t mbsz; /* max bsz allocation */
int level; /* header level, starting at 1 */
enum linestate linestate;
};
struct node {
const char *name; /* docbook element name */
enum nodeid node; /* docbook element to generate */
};
TAILQ_HEAD(pnodeq, pnode);
TAILQ_HEAD(pattrq, pattr);
struct pattr {
enum attrkey key;
enum attrval val;
char *rawval;
TAILQ_ENTRY(pattr) child;
};
struct pnode {
enum nodeid node; /* node type */
char *b; /* binary data buffer */
char *real; /* store for "b" */
size_t bsz; /* data buffer size */
struct pnode *parent; /* parent (or NULL if top) */
struct pnodeq childq; /* queue of children */
struct pattrq attrq; /* attributes of node */
TAILQ_ENTRY(pnode) child;
};
static const char *attrkeys[ATTRKEY__MAX] = {
"choice",
"class",
"close",
"id",
"linkend",
"open",
"rep"
};
static const char *attrvals[ATTRVAL__MAX] = {
"monospaced",
"norepeat",
"opt",
"plain",
"repeat",
"req"
};
static const struct node nodes[] = {
{ "acronym", NODE_ACRONYM },
{ "affiliation", NODE_AFFILIATION },
{ "anchor", NODE_ANCHOR },
{ "application", NODE_APPLICATION },
{ "arg", NODE_ARG },
{ "author", NODE_AUTHOR },
{ "authorgroup", NODE_AUTHORGROUP },
{ "blockquote", NODE_BLOCKQUOTE },
{ "book", NODE_BOOK },
{ "bookinfo", NODE_BOOKINFO },
{ "caution", NODE_CAUTION },
{ "chapter", NODE_SECTION },
{ "citerefentry", NODE_CITEREFENTRY },
{ "citetitle", NODE_CITETITLE },
{ "cmdsynopsis", NODE_CMDSYNOPSIS },
{ "code", NODE_CODE },
{ "colspec", NODE_COLSPEC },
{ "command", NODE_COMMAND },
{ "constant", NODE_CONSTANT },
{ "copyright", NODE_COPYRIGHT },
{ "date", NODE_DATE },
{ "editor", NODE_EDITOR },
{ "email", NODE_EMAIL },
{ "emphasis", NODE_EMPHASIS },
{ "entry", NODE_ENTRY },
{ "envar", NODE_ENVAR },
{ "fieldsynopsis", NODE_FIELDSYNOPSIS },
{ "filename", NODE_FILENAME },
{ "firstname", NODE_FIRSTNAME },
{ "firstterm", NODE_FIRSTTERM },
{ "footnote", NODE_FOOTNOTE },
{ "funcdef", NODE_FUNCDEF },
{ "funcprototype", NODE_FUNCPROTOTYPE },
{ "funcsynopsis", NODE_FUNCSYNOPSIS },
{ "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO },
{ "function", NODE_FUNCTION },
{ "glossterm", NODE_GLOSSTERM },
{ "group", NODE_GROUP },
{ "holder", NODE_HOLDER },
{ "index", NODE_INDEX },
{ "indexterm", NODE_INDEXTERM },
{ "info", NODE_INFO },
{ "informalequation", NODE_INFORMALEQUATION },
{ "informaltable", NODE_INFORMALTABLE },
{ "inlineequation", NODE_INLINEEQUATION },
{ "itemizedlist", NODE_ITEMIZEDLIST },
{ "keysym", NODE_KEYSYM },
{ "legalnotice", NODE_LEGALNOTICE },
{ "link", NODE_LINK },
{ "listitem", NODE_LISTITEM },
{ "literal", NODE_LITERAL },
{ "literallayout", NODE_LITERALLAYOUT },
{ "manvolnum", NODE_MANVOLNUM },
{ "member", NODE_MEMBER },
{ "mml:math", NODE_MML_MATH },
{ "mml:mfenced", NODE_MML_MFENCED },
{ "mml:mfrac", NODE_MML_MFRAC },
{ "mml:mi", NODE_MML_MI },
{ "mml:mn", NODE_MML_MN },
{ "mml:mo", NODE_MML_MO },
{ "mml:mrow", NODE_MML_MROW },
{ "mml:msub", NODE_MML_MSUB },
{ "mml:msup", NODE_MML_MSUP },
{ "modifier", NODE_MODIFIER },
{ "note", NODE_NOTE },
{ "option", NODE_OPTION },
{ "orderedlist", NODE_ORDEREDLIST },
{ "orgname", NODE_ORGNAME },
{ "othername", NODE_OTHERNAME },
{ "para", NODE_PARA },
{ "paramdef", NODE_PARAMDEF },
{ "parameter", NODE_PARAMETER },
{ "part", NODE_SECTION },
{ "personname", NODE_PERSONNAME },
{ "phrase", NODE_PHRASE },
{ "preface", NODE_PREFACE },
{ "primary", NODE_PRIMARY },
{ "programlisting", NODE_PROGRAMLISTING },
{ "prompt", NODE_PROMPT },
{ "quote", NODE_QUOTE },
{ "refclass", NODE_REFCLASS },
{ "refdescriptor", NODE_REFDESCRIPTOR },
{ "refentry", NODE_REFENTRY },
{ "refentryinfo", NODE_REFENTRYINFO },
{ "refentrytitle", NODE_REFENTRYTITLE },
{ "refmeta", NODE_REFMETA },
{ "refmetainfo", NODE_REFMETAINFO },
{ "refmiscinfo", NODE_REFMISCINFO },
{ "refname", NODE_REFNAME },
{ "refnamediv", NODE_REFNAMEDIV },
{ "refpurpose", NODE_REFPURPOSE },
{ "refsect1", NODE_SECTION },
{ "refsect2", NODE_SECTION },
{ "refsect3", NODE_SECTION },
{ "refsection", NODE_SECTION },
{ "refsynopsisdiv", NODE_REFSYNOPSISDIV },
{ "releaseinfo", NODE_RELEASEINFO },
{ "replaceable", NODE_REPLACEABLE },
{ "row", NODE_ROW },
{ "sbr", NODE_SBR },
{ "screen", NODE_SCREEN },
{ "secondary", NODE_SECONDARY },
{ "sect1", NODE_SECTION },
{ "sect2", NODE_SECTION },
{ "section", NODE_SECTION },
{ "sgmltag", NODE_SGMLTAG },
{ "simplelist", NODE_SIMPLELIST },
{ "spanspec", NODE_SPANSPEC },
{ "structname", NODE_STRUCTNAME },
{ "subtitle", NODE_SUBTITLE },
{ "surname", NODE_SURNAME },
{ "synopsis", NODE_SYNOPSIS },
{ "table", NODE_TABLE },
{ "tbody", NODE_TBODY },
{ "term", NODE_TERM },
{ "tfoot", NODE_TFOOT },
{ "tgroup", NODE_TGROUP },
{ "thead", NODE_THEAD },
{ "tip", NODE_TIP },
{ "title", NODE_TITLE },
{ "trademark", NODE_TRADEMARK },
{ "type", NODE_TYPE },
{ "ulink", NODE_ULINK },
{ "userinput", NODE_USERINPUT },
{ "variablelist", NODE_VARIABLELIST },
{ "varlistentry", NODE_VARLISTENTRY },
{ "varname", NODE_VARNAME },
{ "warning", NODE_WARNING },
{ "wordasword", NODE_WORDASWORD },
{ "year", NODE_YEAR },
{ NULL, NODE__MAX }
};
static int warn = 0;
static void
pnode_print(struct parse *p, struct pnode *pn);
/*
* Process a stream of characters.
* We store text as nodes in and of themselves.
* If a text node is already open, append to it.
* If it's not open, open one under the current context.
*/
static void
xml_char(void *arg, const XML_Char *p, int sz)
{
struct parse *ps = arg;
struct pnode *dat;
int i;
/* Stopped or no tree yet. */
if (ps->stop || ps->node == NODE_ROOT)
return;
assert(ps->cur != NULL);
/*
* Are we in the midst of processing text?
* If we're not processing text right now, then create a text
* node for doing so.
* However, don't do so unless we have some non-whitespace to
* process: strip out all leading whitespace to be sure.
*/
if (ps->node != NODE_TEXT) {
for (i = 0; i < sz; i++)
if ( ! isspace((unsigned char)p[i]))
break;
if (i == sz)
return;
p += i;
sz -= i;
dat = calloc(1, sizeof(struct pnode));
if (dat == NULL) {
perror(NULL);
exit(1);
}
dat->node = ps->node = NODE_TEXT;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
assert(ps->root != NULL);
}
/* Append to current buffer. */
assert(sz >= 0);
ps->cur->b = realloc(ps->cur->b,
ps->cur->bsz + (size_t)sz);
if (ps->cur->b == NULL) {
perror(NULL);
exit(1);
}
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
ps->cur->bsz += (size_t)sz;
ps->cur->real = ps->cur->b;
}
static void
pnode_trim(struct pnode *pn)
{
assert(pn->node == NODE_TEXT);
for ( ; pn->bsz > 0; pn->bsz--)
if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
break;
}
/*
* Begin an element.
* First, look for the element.
* If we don't find it and we're not parsing, keep going.
* If we don't find it and we're parsing, puke and exit.
* If we find it but we're not parsing yet (i.e., it's not a refentry
* and thus out of context), keep going.
* If we find it and we're at the root and already have a tree, puke and
* exit (FIXME: I don't think this is right?).
* If we find it but we're parsing a text node, close out the text node,
* return to its parent, and keep going.
* Make sure that the element is in the right context.
* Lastly, put the node onto our parse tree and continue.
*/
static void
xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
{
struct parse *ps = arg;
const struct node *node;
enum attrkey key;
enum attrval val;
struct pnode *dat;
struct pattr *pattr;
const XML_Char **att;
/* FIXME: find a better way to ditch other namespaces. */
if (ps->stop || strcmp(name, "xi:include") == 0)
return;
/* Close out text node, if applicable... */
if (ps->node == NODE_TEXT) {
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
ps->node = ps->cur->node;
}
for (node = nodes; node->name != NULL; node++)
if (strcmp(node->name, name) == 0)
break;
if (node->name == NULL) {
if (ps->node == NODE_ROOT)
return;
fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml), name);
ps->stop = 1;
return;
} else if (ps->node == NODE_ROOT && ps->root != NULL) {
fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml));
ps->stop = 1;
return;
}
if (node->node == NODE_INLINEEQUATION)
ps->flags |= PARSE_EQN;
if ((dat = calloc(1, sizeof(struct pnode))) == NULL) {
perror(NULL);
exit(1);
}
dat->node = ps->node = node->node;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
if (ps->cur != NULL)
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
if (ps->root == NULL)
ps->root = dat;
/*
* Process attributes.
*/
for (att = atts; *att != NULL; att += 2) {
for (key = 0; key < ATTRKEY__MAX; key++)
if (strcmp(*att, attrkeys[key]) == 0)
break;
if (key == ATTRKEY__MAX) {
if (warn)
fprintf(stderr, "%s:%zu:%zu: warning: "
"unknown attribute \"%s\"\n",
ps->fname,
XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml),
*att);
continue;
}
for (val = 0; val < ATTRVAL__MAX; val++)
if (strcmp(att[1], attrvals[val]) == 0)
break;
pattr = calloc(1, sizeof(struct pattr));
pattr->key = key;
pattr->val = val;
if (val == ATTRVAL__MAX)
pattr->rawval = strdup(att[1]);
TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
}
}
/*
* Roll up the parse tree.
* If we're at a text node, roll that one up first.
* If we hit the root, then assign ourselves as the NODE_ROOT.
*/
static void
xml_elem_end(void *arg, const XML_Char *name)
{
struct parse *ps = arg;
/* FIXME: find a better way to ditch other namespaces. */
if (ps->stop || ps->node == NODE_ROOT)
return;
else if (strcmp(name, "xi:include") == 0)
return;
/* Close out text node, if applicable... */
if (ps->node == NODE_TEXT) {
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
ps->node = ps->cur->node;
}
if ((ps->cur = ps->cur->parent) == NULL)
ps->node = NODE_ROOT;
else
ps->node = ps->cur->node;
}
/*
* Recursively free a node (NULL is ok).
*/
static void
pnode_free(struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
if (pn == NULL)
return;
while ((pp = TAILQ_FIRST(&pn->childq)) != NULL) {
TAILQ_REMOVE(&pn->childq, pp, child);
pnode_free(pp);
}
while ((ap = TAILQ_FIRST(&pn->attrq)) != NULL) {
TAILQ_REMOVE(&pn->attrq, ap, child);
free(ap->rawval);
free(ap);
}
free(pn->real);
free(pn);
}
/*
* Unlink a node from its parent and pnode_free() it.
*/
static void
pnode_unlink(struct pnode *pn)
{
if (pn->parent != NULL)
TAILQ_REMOVE(&pn->parent->childq, pn, child);
pnode_free(pn);
}
/*
* Unlink all children of a node and pnode_free() them.
*/
static void
pnode_unlinksub(struct pnode *pn)
{
while ( ! TAILQ_EMPTY(&pn->childq))
pnode_unlink(TAILQ_FIRST(&pn->childq));
}
/*
* Retrieve an enumeration attribute from a node.
* Return ATTRVAL__MAX if the node has no such attribute.
*/
enum attrval
pnode_getattr(struct pnode *pn, enum attrkey key)
{
struct pattr *ap;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val;
return ATTRVAL__MAX;
}
/*
* Retrieve an attribute string from a node.
* Return defval if the node has no such attribute.
*/
const char *
pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval)
{
struct pattr *ap;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val == ATTRVAL__MAX ? ap->rawval :
attrvals[ap->val];
return defval;
}
/*
* Reset the lookaside buffer.
*/
static void
bufclear(struct parse *p)
{
p->b[p->bsz = 0] = '\0';
}
/*
* Append NODE_TEXT contents to the current buffer, reallocating its
* size if necessary.
* The buffer is ALWAYS NUL-terminated.
*/
static void
bufappend(struct parse *p, struct pnode *pn)
{
assert(pn->node == NODE_TEXT);
if (p->bsz + pn->bsz + 1 > p->mbsz) {
p->mbsz = p->bsz + pn->bsz + 1;
if ((p->b = realloc(p->b, p->mbsz)) == NULL) {
perror(NULL);
exit(1);
}
}
memcpy(p->b + p->bsz, pn->b, pn->bsz);
p->bsz += pn->bsz;
p->b[p->bsz] = '\0';
}
/*
* Recursively append all NODE_TEXT nodes to the buffer.
* This descends into non-text nodes, but doesn't do anything beyond
* them.
* In other words, this is a recursive text grok.
*/
static void
bufappend_r(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
if (pn->node == NODE_TEXT)
bufappend(p, pn);
TAILQ_FOREACH(pp, &pn->childq, child)
bufappend_r(p, pp);
}
/*
* Recursively search and return the first instance of "node".
*/
static struct pnode *
pnode_findfirst(struct pnode *pn, enum nodeid node)
{
struct pnode *pp, *res;
res = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
res = pp->node == node ? pp :
pnode_findfirst(pp, node);
if (res != NULL)
break;
}
return res;
}
static void
macro_open(struct parse *p, const char *name)
{
switch (p->linestate) {
case LINE_TEXT:
putchar('\n');
/* FALLTHROUGH */
case LINE_NEW:
putchar('.');
p->linestate = LINE_MACRO;
break;
case LINE_MACRO:
putchar(' ');
break;
}
fputs(name, stdout);
}
static void
macro_close(struct parse *p)
{
assert(p->linestate == LINE_MACRO);
putchar('\n');
p->linestate = LINE_NEW;
}
static void
macro_line(struct parse *p, const char *name)
{
macro_open(p, name);
macro_close(p);
}
#define MACROLINE_UPPER 1
#define MACROLINE_NOWS 2
/*
* Print an argument string on a macro line, collapsing whitespace.
*/
static void
macro_addarg(struct parse *p, const char *arg, int fl)
{
const char *cp;
int wantspace;
assert(p->linestate == LINE_MACRO);
wantspace = !(fl & MACROLINE_NOWS);
for (cp = arg; *cp != '\0'; cp++) {
if (isspace((unsigned char)*cp)) {
wantspace = 1;
continue;
} else if (wantspace) {
putchar(' ');
wantspace = 0;
}
/* Escape us if we look like a macro. */
if ((cp == arg || cp[-1] == ' ') &&
isupper((unsigned char)cp[0]) &&
islower((unsigned char)cp[1]) &&
(cp[2] == '\0' || cp[2] == ' ' ||
(islower((unsigned char)cp[2]) &&
(cp[3] == '\0' || cp[3] == ' '))))
fputs("\\&", stdout);
if (fl & MACROLINE_UPPER)
putchar(toupper((unsigned char)*cp));
else
putchar(*cp);
if (*cp == '\\')
putchar('e');
}
}
static void
macro_argline(struct parse *p, const char *name, const char *arg)
{
macro_open(p, name);
macro_addarg(p, arg, 0);
macro_close(p);
}
/*
* Recurse nodes to print arguments on a macro line.
*/
static void
macro_addnode(struct parse *p, struct pnode *pn, int fl)
{
bufclear(p);
bufappend_r(p, pn);
macro_addarg(p, p->b, fl);
}
static void
macro_nodeline(struct parse *p, const char *name, struct pnode *pn)
{
macro_open(p, name);
macro_addnode(p, pn, 0);
macro_close(p);
}
/*
* If the next node is a text node starting with closing punctuation,
* emit the closing punctuation as a trailing macro argument.
*/
static void
macro_closepunct(struct parse *p, struct pnode *pn)
{
if ((pn = TAILQ_NEXT(pn, child)) != NULL &&
pn->node == NODE_TEXT && pn->bsz > 0 &&
(pn->b[0] == ',' || pn->b[0] == '.') &&
(pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) {
putchar(' ');
putchar(pn->b[0]);
pn->b++;
pn->bsz--;
}
macro_close(p);
}
static void
print_text(struct parse *p, const char *word)
{
switch (p->linestate) {
case LINE_NEW:
break;
case LINE_TEXT:
putchar(' ');
break;
case LINE_MACRO:
macro_close(p);
break;
}
fputs(word, stdout);
p->linestate = LINE_TEXT;
}
static void
pnode_printpara(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
if ((pp = TAILQ_PREV(pn, pnodeq, child)) == NULL &&
(pp = pn->parent) == NULL)
return;
switch (pp->node) {
case NODE_ENTRY:
case NODE_LISTITEM:
return;
case NODE_PREFACE:
case NODE_SECTION:
if (p->level < 3)
return;
break;
default:
break;
}
macro_line(p, "Pp");
}
/*
* If the SYNOPSIS macro has a superfluous title, kill it.
*/
static void
pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child)
if (pp->node == NODE_TITLE) {
pnode_unlink(pp);
return;
}
}
/*
* Start a hopefully-named `Sh' section.
*/
static void
pnode_printrefsect(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
const char *title;
int flags, level;
if (pn->parent == NULL)
return;
level = ++p->level;
flags = level == 1 ? MACROLINE_UPPER : 0;
if (level < 3) {
switch (pn->node) {
case NODE_CAUTION:
case NODE_NOTE:
case NODE_TIP:
case NODE_WARNING:
level = 3;
break;
default:
break;
}
}
TAILQ_FOREACH(pp, &pn->childq, child)
if (pp->node == NODE_TITLE)
break;
if (pp == NULL) {
switch (pn->node) {
case NODE_PREFACE:
title = "Preface";
break;
case NODE_CAUTION:
title = "Caution";
break;
case NODE_NOTE:
title = "Note";
break;
case NODE_TIP:
title = "Tip";
break;
case NODE_WARNING:
title = "Warning";
break;
default:
title = "Unknown";
break;
}
}
switch (level) {
case 1:
macro_open(p, "Sh");
break;
case 2:
macro_open(p, "Ss");
break;
default:
pnode_printpara(p, pn);
macro_open(p, "Sy");
break;
}
if (pp != NULL) {
macro_addnode(p, pp, flags);
pnode_unlink(pp);
} else
macro_addarg(p, title, 0);
macro_close(p);
}
/*
* Start a reference, extracting the title and volume.
*/
static void
pnode_printciterefentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_MANVOLNUM)
manvol = pp;
else if (pp->node == NODE_REFENTRYTITLE)
title = pp;
}
macro_open(p, "Xr");
if (title == NULL)
macro_addarg(p, "unknown", 0);
else
macro_addnode(p, title, 0);
if (manvol == NULL)
macro_addarg(p, "1", 0);
else
macro_addnode(p, manvol, 0);
macro_close(p);
}
static void
pnode_printrefmeta(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *title, *manvol;
title = manvol = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_MANVOLNUM)
manvol = pp;
else if (pp->node == NODE_REFENTRYTITLE)
title = pp;
}
macro_open(p, "Dt");
if (title == NULL)
macro_addarg(p, "UNKNOWN", 0);
else
macro_addnode(p, title, MACROLINE_UPPER);
if (manvol == NULL)
macro_addarg(p, "1", 0);
else
macro_addnode(p, manvol, 0);
macro_close(p);
}
static void
pnode_printfuncdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ftype, *func;
ftype = func = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_TEXT)
ftype = pp;
else if (pp->node == NODE_FUNCTION)
func = pp;
}
if (ftype != NULL)
macro_nodeline(p, "Ft", ftype);
macro_open(p, "Fo");
if (func == NULL)
macro_addarg(p, "UNKNOWN", 0);
else
macro_addnode(p, func, 0);
macro_close(p);
}
static void
pnode_printparamdef(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *ptype, *param;
int flags;
ptype = param = NULL;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_TEXT)
ptype = pp;
else if (pp->node == NODE_PARAMETER)
param = pp;
}
macro_open(p, "Fa \"");
flags = MACROLINE_NOWS;
if (ptype != NULL) {
macro_addnode(p, ptype, flags);
flags = 0;
}
if (param != NULL)
macro_addnode(p, param, flags);
flags = MACROLINE_NOWS;
macro_addarg(p, "\"", flags);
macro_close(p);
}
/*
* The <mml:mfenced> node is a little peculiar.
* First, it can have arbitrary open and closing tokens, which default
* to parentheses.
* Second, >1 arguments are separated by commas.
*/
static void
pnode_printmathfenced(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
printf("left %s ", pnode_getattr_raw(pn, ATTRKEY_OPEN, "("));
pp = TAILQ_FIRST(&pn->childq);
pnode_print(p, pp);
while ((pp = TAILQ_NEXT(pp, child)) != NULL) {
putchar(',');
pnode_print(p, pp);
}
printf("right %s ", pnode_getattr_raw(pn, ATTRKEY_CLOSE, ")"));
}
/*
* These math nodes require special handling because they have infix
* syntax, instead of the usual prefix or prefix.
* So we need to break up the first and second child node with a
* particular eqn(7) word.
*/
static void
pnode_printmath(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
pp = TAILQ_FIRST(&pn->childq);
pnode_print(p, pp);
switch (pn->node) {
case NODE_MML_MSUP:
fputs(" sup ", stdout);
break;
case NODE_MML_MFRAC:
fputs(" over ", stdout);
break;
case NODE_MML_MSUB:
fputs(" sub ", stdout);
break;
default:
break;
}
pp = TAILQ_NEXT(pp, child);
pnode_print(p, pp);
}
static void
pnode_printfuncprototype(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *fdef;
TAILQ_FOREACH(fdef, &pn->childq, child)
if (fdef->node == NODE_FUNCDEF)
break;
if (fdef != NULL)
pnode_printfuncdef(p, fdef);
else
macro_line(p, "Fo UNKNOWN");
TAILQ_FOREACH(pp, &pn->childq, child)
if (pp->node == NODE_PARAMDEF)
pnode_printparamdef(p, pp);
macro_line(p, "Fc");
}
/*
* The <arg> element is more complicated than it should be because text
* nodes are treated like ".Ar foo", but non-text nodes need to be
* re-sent into the printer (i.e., without the preceding ".Ar").
* This also handles the case of "repetition" (or in other words, the
* ellipsis following an argument) and optionality.
*/
static void
pnode_printarg(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
struct pattr *ap;
int isop, isrep;
isop = 1;
isrep = 0;
TAILQ_FOREACH(ap, &pn->attrq, child) {
if (ap->key == ATTRKEY_CHOICE &&
(ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ))
isop = 0;
else if (ap->key == ATTRKEY_REP && ap->val == ATTRVAL_REPEAT)
isrep = 1;
}
if (isop)
macro_open(p, "Op");
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_TEXT)
macro_open(p, "Ar");
pnode_print(p, pp);
if (isrep && pp->node == NODE_TEXT)
macro_addarg(p, "...", 0);
}
}
static void
pnode_printgroup(struct parse *p, struct pnode *pn)
{
struct pnode *pp, *np;
struct pattr *ap;
int isop, sv;
isop = 1;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == ATTRKEY_CHOICE &&
(ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) {
isop = 0;
break;
}
/*
* Make sure we're on a macro line.
* This will prevent pnode_print() for putting us on a
* subsequent line.
*/
sv = p->linestate == LINE_NEW;
if (isop)
macro_open(p, "Op");
else if (sv)
macro_open(p, "No");
/*
* Keep on printing text separated by the vertical bar as long
* as we're within the same origin node as the group.
* This is kind of a nightmare.
* Eh, DocBook...
* FIXME: if there's a "Fl", we don't cut off the leading "-"
* like we do in pnode_print().
*/
TAILQ_FOREACH(pp, &pn->childq, child) {
pnode_print(p, pp);
np = TAILQ_NEXT(pp, child);
while (np != NULL) {
if (pp->node != np->node)
break;
macro_addarg(p, "|", 0);
macro_addnode(p, np, 0);
pp = np;
np = TAILQ_NEXT(np, child);
}
}
if (sv)
macro_close(p);
}
static void
pnode_printprologue(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
pp = p->root == NULL ? NULL :
pnode_findfirst(p->root, NODE_REFMETA);
macro_line(p, "Dd $Mdocdate" "$");
if (pp != NULL) {
pnode_printrefmeta(p, pp);
pnode_unlink(pp);
} else {
macro_open(p, "Dt");
macro_addarg(p,
pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), 0);
macro_addarg(p, "1", 0);
macro_close(p);
}
macro_line(p, "Os");
if (p->flags & PARSE_EQN) {
macro_line(p, "EQ");
print_text(p, "delim $$");
macro_line(p, "EN");
}
}
/*
* We can have multiple <term> elements within a <varlistentry>, which
* we should comma-separate as list headers.
*/
static void
pnode_printvarlistentry(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
int first = 1;
macro_open(p, "It");
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node != NODE_TERM)
continue;
if ( ! first)
macro_addarg(p, ",", MACROLINE_NOWS);
pnode_print(p, pp);
first = 0;
}
macro_close(p);
TAILQ_FOREACH(pp, &pn->childq, child)
if (pp->node != NODE_TERM)
pnode_print(p, pp);
}
static void
pnode_printrow(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
macro_line(p, "Bl -dash -compact");
TAILQ_FOREACH(pp, &pn->childq, child) {
macro_line(p, "It");
pnode_print(p, pp);
}
macro_line(p, "El");
}
static void
pnode_printtable(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_TITLE) {
pnode_printpara(p, pp);
pnode_print(p, pp);
pnode_unlink(pp);
}
}
macro_line(p, "Bl -ohang");
while ((pp = pnode_findfirst(pn, NODE_ROW)) != NULL) {
macro_line(p, "It Table Row");
pnode_printrow(p, pp);
pnode_unlink(pp);
}
macro_line(p, "El");
}
static void
pnode_printlist(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_TITLE) {
pnode_printpara(p, pp);
pnode_print(p, pp);
pnode_unlink(pp);
}
}
macro_argline(p, "Bl",
pn->node == NODE_ORDEREDLIST ? "-enum" : "-bullet");
TAILQ_FOREACH(pp, &pn->childq, child) {
macro_line(p, "It");
pnode_print(p, pp);
}
macro_line(p, "El");
}
static void
pnode_printvariablelist(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_TITLE) {
pnode_printpara(p, pp);
pnode_print(p, pp);
pnode_unlink(pp);
}
}
macro_line(p, "Bl -tag -width Ds");
TAILQ_FOREACH(pp, &pn->childq, child) {
if (pp->node == NODE_VARLISTENTRY)
pnode_print(p, pp);
else
macro_nodeline(p, "It", pp);
}
macro_line(p, "El");
}
/*
* Print a parsed node (or ignore it--whatever).
* This is a recursive function.
* FIXME: if we're in a literal context (<screen> or <programlisting> or
* whatever), don't print inline macros.
*/
static void
pnode_print(struct parse *p, struct pnode *pn)
{
struct pnode *pp;
const char *ccp;
char *cp;
int last;
enum linestate sv;
if (pn == NULL)
return;
sv = p->linestate;
switch (pn->node) {
case NODE_APPLICATION:
macro_open(p, "Nm");
break;
case NODE_ANCHOR:
/* Don't print anything! */
return;
case NODE_ARG:
pnode_printarg(p, pn);
pnode_unlinksub(pn);
break;
case NODE_AUTHOR:
macro_open(p, "An");
break;
case NODE_AUTHORGROUP:
macro_line(p, "An -split");
break;
case NODE_BOOKINFO:
macro_line(p, "Sh NAME");
break;
case NODE_CITEREFENTRY:
pnode_printciterefentry(p, pn);
pnode_unlinksub(pn);
break;
case NODE_CITETITLE:
macro_open(p, "%T");
break;
case NODE_CODE:
macro_open(p, "Li");
break;
case NODE_COMMAND:
macro_open(p, "Nm");
break;
case NODE_CONSTANT:
macro_open(p, "Dv");
break;
case NODE_EDITOR:
print_text(p, "editor:");
macro_open(p, "An");
break;
case NODE_EMAIL:
macro_open(p, "Aq Mt");
break;
case NODE_EMPHASIS:
case NODE_FIRSTTERM:
macro_open(p, "Em");
break;
case NODE_ENVAR:
macro_open(p, "Ev");
break;
case NODE_FILENAME:
macro_open(p, "Pa");
break;
case NODE_FUNCTION:
macro_open(p, "Fn");
break;
case NODE_FUNCPROTOTYPE:
pnode_printfuncprototype(p, pn);
pnode_unlinksub(pn);
break;
case NODE_FUNCSYNOPSISINFO:
macro_open(p, "Fd");
break;
case NODE_INDEXTERM:
return;
case NODE_INFORMALEQUATION:
macro_line(p, "EQ");
break;
case NODE_INLINEEQUATION:
if (p->linestate == LINE_NEW)
p->linestate = LINE_TEXT;
putchar('$');
break;
case NODE_ITEMIZEDLIST:
pnode_printlist(p, pn);
pnode_unlinksub(pn);
break;
case NODE_GROUP:
pnode_printgroup(p, pn);
pnode_unlinksub(pn);
break;
case NODE_KEYSYM:
macro_open(p, "Sy");
break;
case NODE_LEGALNOTICE:
macro_line(p, "Sh LEGAL NOTICE");
break;
case NODE_LINK:
ccp = pnode_getattr_raw(pn, ATTRKEY_LINKEND, NULL);
if (ccp == NULL)
break;
macro_argline(p, "Sx", ccp);
return;
case NODE_LITERAL:
macro_open(p, "Li");
break;
case NODE_LITERALLAYOUT:
macro_argline(p, "Bd", pnode_getattr(pn, ATTRKEY_CLASS) ==
ATTRVAL_MONOSPACED ? "-literal" : "-unfilled");
break;
case NODE_MML_MFENCED:
pnode_printmathfenced(p, pn);
pnode_unlinksub(pn);
break;
case NODE_MML_MROW:
case NODE_MML_MI:
case NODE_MML_MN:
case NODE_MML_MO:
if (TAILQ_EMPTY(&pn->childq))
break;
fputs(" { ", stdout);
break;
case NODE_MML_MFRAC:
case NODE_MML_MSUB:
case NODE_MML_MSUP:
pnode_printmath(p, pn);
pnode_unlinksub(pn);
break;
case NODE_OPTION:
macro_open(p, "Fl");
break;
case NODE_ORDEREDLIST:
pnode_printlist(p, pn);
pnode_unlinksub(pn);
break;
case NODE_PARA:
pnode_printpara(p, pn);
break;
case NODE_PARAMETER:
/* Suppress non-text children... */
macro_open(p, "Fa \"");
macro_addnode(p, pn, MACROLINE_NOWS);
macro_addarg(p, "\"", MACROLINE_NOWS);
macro_close(p);
pnode_unlinksub(pn);
break;
case NODE_QUOTE:
macro_open(p, "Qo");
break;
case NODE_PROGRAMLISTING:
case NODE_SCREEN:
macro_line(p, "Bd -literal");
break;
case NODE_REFENTRYINFO:
/* Suppress. */
pnode_unlinksub(pn);
break;
case NODE_REFMETA:
abort();
break;
case NODE_REFNAME:
/* Suppress non-text children... */
macro_open(p, "Nm");
macro_addnode(p, pn, 0);
pnode_unlinksub(pn);
break;
case NODE_REFNAMEDIV:
macro_line(p, "Sh NAME");
break;
case NODE_REFPURPOSE:
macro_open(p, "Nd");
break;
case NODE_REFSYNOPSISDIV:
pnode_printrefsynopsisdiv(p, pn);
macro_line(p, "Sh SYNOPSIS");
break;
case NODE_PREFACE:
case NODE_SECTION:
case NODE_NOTE:
case NODE_TIP:
case NODE_CAUTION:
case NODE_WARNING:
pnode_printrefsect(p, pn);
break;
case NODE_REPLACEABLE:
macro_open(p, "Ar");
break;
case NODE_SBR:
macro_line(p, "br");
break;
case NODE_SGMLTAG:
macro_open(p, "Li");
break;
case NODE_STRUCTNAME:
macro_open(p, "Vt");
break;
case NODE_TABLE:
case NODE_INFORMALTABLE:
pnode_printtable(p, pn);
pnode_unlinksub(pn);
break;
case NODE_TEXT:
bufclear(p);
bufappend(p, pn);
if (p->bsz == 0) {
assert(pn->real != pn->b);
break;
}
if (p->linestate == LINE_NEW)
p->linestate = LINE_TEXT;
else
putchar(' ');
/*
* Output all characters, squeezing out whitespace
* between newlines.
* XXX: all whitespace, including tabs (?).
* Remember to escape control characters and escapes.
*/
cp = p->b;
/*
* There's often a superfluous "-" in its <option> tags
* before the actual flags themselves.
* "Fl" does this for us, so remove it.
*/
if (pn->parent != NULL &&
pn->parent->node == NODE_OPTION &&
*cp == '-')
cp++;
for (last = '\n'; *cp != '\0'; ) {
if (last == '\n') {
/* Consume all whitespace. */
if (isspace((unsigned char)*cp)) {
while (isspace((unsigned char)*cp))
cp++;
continue;
} else if (*cp == '\'' || *cp == '.')
fputs("\\&", stdout);
}
putchar(last = *cp++);
/* If we're a character escape, escape us. */
if (last == '\\')
putchar('e');
}
break;
case NODE_TITLE:
if (pn->parent->node == NODE_BOOKINFO)
macro_open(p, "Nd");
break;
case NODE_TYPE:
macro_open(p, "Vt");
break;
case NODE_USERINPUT:
macro_open(p, "Li");
break;
case NODE_VARIABLELIST:
pnode_printvariablelist(p, pn);
pnode_unlinksub(pn);
break;
case NODE_VARLISTENTRY:
pnode_printvarlistentry(p, pn);
pnode_unlinksub(pn);
break;
case NODE_VARNAME:
macro_open(p, "Va");
break;
default:
break;
}
TAILQ_FOREACH(pp, &pn->childq, child)
pnode_print(p, pp);
switch (pn->node) {
case NODE_INFORMALEQUATION:
macro_line(p, "EN");
break;
case NODE_INLINEEQUATION:
fputs("$ ", stdout);
p->linestate = sv;
break;
case NODE_MML_MROW:
case NODE_MML_MI:
case NODE_MML_MN:
case NODE_MML_MO:
if (TAILQ_EMPTY(&pn->childq))
break;
fputs(" } ", stdout);
break;
case NODE_APPLICATION:
case NODE_ARG:
case NODE_AUTHOR:
case NODE_CITEREFENTRY:
case NODE_CITETITLE:
case NODE_CODE:
case NODE_COMMAND:
case NODE_CONSTANT:
case NODE_EDITOR:
case NODE_EMAIL:
case NODE_EMPHASIS:
case NODE_ENVAR:
case NODE_FILENAME:
case NODE_FIRSTTERM:
case NODE_FUNCTION:
case NODE_FUNCSYNOPSISINFO:
case NODE_KEYSYM:
case NODE_LITERAL:
case NODE_OPTION:
case NODE_PARAMETER:
case NODE_REPLACEABLE:
case NODE_REFPURPOSE:
case NODE_SGMLTAG:
case NODE_STRUCTNAME:
case NODE_TYPE:
case NODE_USERINPUT:
case NODE_VARNAME:
if (sv != LINE_MACRO && p->linestate == LINE_MACRO)
macro_closepunct(p, pn);
break;
case NODE_QUOTE:
if (sv == LINE_NEW)
macro_close(p);
sv = p->linestate;
macro_open(p, "Qc");
if (sv == LINE_NEW)
macro_close(p);
break;
case NODE_REFNAME:
/*
* If we're in the NAME macro and we have multiple
* <refname> macros in sequence, then print out a
* trailing comma before the newline.
*/
if (pn->parent != NULL &&
pn->parent->node == NODE_REFNAMEDIV &&
TAILQ_NEXT(pn, child) != NULL &&
TAILQ_NEXT(pn, child)->node == NODE_REFNAME)
macro_addarg(p, ",", 0);
if (sv == LINE_NEW)
macro_close(p);
break;
case NODE_PREFACE:
case NODE_SECTION:
case NODE_NOTE:
case NODE_TIP:
case NODE_CAUTION:
case NODE_WARNING:
p->level--;
break;
case NODE_LITERALLAYOUT:
case NODE_PROGRAMLISTING:
case NODE_SCREEN:
macro_line(p, "Ed");
break;
case NODE_TITLE:
if (pn->parent->node == NODE_BOOKINFO)
macro_line(p, "Sh AUTHORS");
break;
default:
break;
}
}
/*
* Loop around the read buffer until we've drained it of all data.
* Invoke the parser context with each buffer fill.
*/
static int
readfile(XML_Parser xp, int fd,
char *b, size_t bsz, const char *fn)
{
struct parse p;
int rc;
ssize_t ssz;
memset(&p, 0, sizeof(struct parse));
p.b = malloc(p.bsz = p.mbsz = 1024);
p.fname = fn;
p.xml = xp;
XML_SetCharacterDataHandler(xp, xml_char);
XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
XML_SetUserData(xp, &p);
while ((ssz = read(fd, b, bsz)) >= 0) {
if ((rc = XML_Parse(xp, b, ssz, 0 == ssz)) == 0)
fprintf(stderr, "%s:%zu:%zu: %s\n", fn,
XML_GetCurrentLineNumber(xp),
XML_GetCurrentColumnNumber(xp),
XML_ErrorString
(XML_GetErrorCode(xp)));
else if ( ! p.stop && ssz > 0)
continue;
/*
* Exit when we've read all or errors have occured
* during the parse sequence.
*/
p.linestate = LINE_NEW;
pnode_printprologue(&p, p.root);
pnode_print(&p, p.root);
if (p.linestate != LINE_NEW)
putchar('\n');
pnode_free(p.root);
free(p.b);
return rc != 0 && p.stop == 0;
}
/* Read error has occured. */
perror(fn);
pnode_free(p.root);
free(p.b);
return 0;
}
int
main(int argc, char *argv[])
{
XML_Parser xp;
const char *fname;
char *buf;
int fd, rc, ch;
const char *progname;
progname = strrchr(argv[0], '/');
if (progname == NULL)
progname = argv[0];
else
++progname;
fname = "-";
xp = NULL;
buf = NULL;
rc = 1;
while ((ch = getopt(argc, argv, "W")) != -1)
switch (ch) {
case 'W':
warn = 1;
break;
default:
goto usage;
}
argc -= optind;
argv += optind;
if (argc > 1) {
fprintf(stderr, "%s: Too many arguments\n", argv[1]);
goto usage;
} else if (argc > 0)
fname = argv[0];
/* Read from stdin or a file. */
fd = strcmp(fname, "-") == 0 ?
STDIN_FILENO : open(fname, O_RDONLY, 0);
/*
* Open file for reading.
* Allocate a read buffer.
* Create the parser context.
* Dive directly into the parse.
*/
if (fd == -1)
perror(fname);
else if ((buf = malloc(4096)) == NULL)
perror(NULL);
else if ((xp = XML_ParserCreate(NULL)) == NULL)
perror(NULL);
else if (readfile(xp, fd, buf, 4096, fname))
rc = 0;
XML_ParserFree(xp);
free(buf);
if (fd != STDIN_FILENO)
close(fd);
return rc;
usage:
fprintf(stderr, "usage: %s [-W] [input_filename]\n", progname);
return 1;
}