=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.44 retrieving revision 1.97 diff -u -p -r1.44 -r1.97 --- docbook2mdoc/docbook2mdoc.c 2015/03/19 10:04:32 1.44 +++ docbook2mdoc/docbook2mdoc.c 2019/04/07 17:42:36 1.97 @@ -1,6 +1,7 @@ -/* $Id: docbook2mdoc.c,v 1.44 2015/03/19 10:04:32 schwarze Exp $ */ +/* $Id: docbook2mdoc.c,v 1.97 2019/04/07 17:42:36 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons + * Copyright (c) 2019 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,860 +15,277 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include - #include #include -#include -#include -#include #include #include -#include -#include -#include "extern.h" +#include "node.h" +#include "macro.h" +#include "format.h" /* - * Global parse state. - * Keep this as simple and small as possible. + * The implementation of the mdoc(7) formatter. */ -struct parse { - XML_Parser xml; - enum nodeid node; /* current (NODE_ROOT if pre-tree) */ - const char *fname; /* filename */ - int stop; /* should we stop now? */ -#define PARSE_EQN 1 - unsigned int flags; /* document-wide flags */ - struct pnode *root; /* root of parse tree */ - struct pnode *cur; /* current node in tree */ - char *b; /* nil-terminated buffer for pre-print */ - size_t bsz; /* current length of b */ - size_t mbsz; /* max bsz allocation */ - int newln; /* output: are we on a fresh line */ -}; -struct node { - const char *name; /* docbook element name */ - unsigned int flags; -#define NODE_IGNTEXT 1 /* ignore all contained text */ -}; +static void pnode_print(struct format *, struct pnode *); -TAILQ_HEAD(pnodeq, pnode); -TAILQ_HEAD(pattrq, pattr); -struct pattr { - enum attrkey key; - enum attrval val; - char *rawval; - TAILQ_ENTRY(pattr) child; -}; - -struct pnode { - enum nodeid node; /* node type */ - char *b; /* binary data buffer */ - char *real; /* store for "b" */ - size_t bsz; /* data buffer size */ - struct pnode *parent; /* parent (or NULL if top) */ - struct pnodeq childq; /* queue of children */ - struct pattrq attrq; /* attributes of node */ - TAILQ_ENTRY(pnode) child; -}; - -static const char *attrkeys[ATTRKEY__MAX] = { - "choice", - "close", - "id", - "open", - "rep" -}; - -static const char *attrvals[ATTRVAL__MAX] = { - "norepeat", - "opt", - "plain", - "repeat", - "req" -}; - -static const struct node nodes[NODE__MAX] = { - { NULL, 0 }, - { "acronym", 0 }, - { "anchor", NODE_IGNTEXT }, - { "application", 0 }, - { "arg", 0 }, - { "caution", NODE_IGNTEXT }, - { "citerefentry", NODE_IGNTEXT }, - { "cmdsynopsis", NODE_IGNTEXT }, - { "code", 0 }, - { "colspec", NODE_IGNTEXT }, - { "command", 0 }, - { "constant", 0 }, - { "copyright", NODE_IGNTEXT }, - { "date", 0 }, - { "emphasis", 0 }, - { "entry", 0 }, - { "envar", 0 }, - { "fieldsynopsis", NODE_IGNTEXT }, - { "filename", 0 }, - { "funcdef", 0 }, - { "funcprototype", NODE_IGNTEXT }, - { "funcsynopsis", NODE_IGNTEXT }, - { "funcsynopsisinfo", 0 }, - { "function", 0 }, - { "group", NODE_IGNTEXT }, - { "holder", NODE_IGNTEXT }, - { "info", NODE_IGNTEXT }, - { "informalequation", NODE_IGNTEXT }, - { "informaltable", NODE_IGNTEXT }, - { "inlineequation", NODE_IGNTEXT }, - { "itemizedlist", NODE_IGNTEXT }, - { "link", 0 }, - { "listitem", NODE_IGNTEXT }, - { "literal", 0 }, - { "manvolnum", 0 }, - { "mml:math", NODE_IGNTEXT }, - { "mml:mfenced", 0 }, - { "mml:mfrac", 0 }, - { "mml:mi", 0 }, - { "mml:mn", 0 }, - { "mml:mo", 0 }, - { "mml:mrow", 0 }, - { "mml:msub", 0 }, - { "mml:msup", 0 }, - { "modifier", 0 }, - { "note", NODE_IGNTEXT }, - { "option", 0 }, - { "orderedlist", NODE_IGNTEXT }, - { "para", 0 }, - { "paramdef", 0 }, - { "parameter", 0 }, - { "programlisting", 0 }, - { "prompt", 0 }, - { "quote", 0 }, - { "refclass", NODE_IGNTEXT }, - { "refdescriptor", NODE_IGNTEXT }, - { "refentry", NODE_IGNTEXT }, - { "refentryinfo", NODE_IGNTEXT }, - { "refentrytitle", 0 }, - { "refmeta", NODE_IGNTEXT }, - { "refmetainfo", NODE_IGNTEXT }, - { "refmiscinfo", NODE_IGNTEXT }, - { "refname", 0 }, - { "refnamediv", NODE_IGNTEXT }, - { "refpurpose", 0 }, - { "refsect1", NODE_IGNTEXT }, - { "refsect2", NODE_IGNTEXT }, - { "refsect3", NODE_IGNTEXT }, - { "refsection", NODE_IGNTEXT }, - { "refsynopsisdiv", NODE_IGNTEXT }, - { "replaceable", 0 }, - { "row", NODE_IGNTEXT }, - { "sbr", NODE_IGNTEXT }, - { "screen", NODE_IGNTEXT }, - { "sgmltag", 0 }, - { "structname", 0 }, - { "synopsis", 0 }, - { "table", NODE_IGNTEXT }, - { "tbody", NODE_IGNTEXT }, - { "term", 0 }, - { NULL, 0 }, - { "tfoot", NODE_IGNTEXT }, - { "tgroup", NODE_IGNTEXT }, - { "thead", NODE_IGNTEXT }, - { "tip", NODE_IGNTEXT }, - { "title", 0 }, - { "trademark", 0 }, - { "type", 0 }, - { "ulink", 0 }, - { "userinput", 0 }, - { "variablelist", NODE_IGNTEXT }, - { "varlistentry", NODE_IGNTEXT }, - { "varname", 0 }, - { "warning", NODE_IGNTEXT }, - { "wordasword", 0 }, - { "year", NODE_IGNTEXT }, -}; - -static int warn = 0; - static void -pnode_print(struct parse *p, struct pnode *pn); - -/* - * Process a stream of characters. - * We store text as nodes in and of themselves. - * If a text node is already open, append to it. - * If it's not open, open one under the current context. - */ -static void -xml_char(void *arg, const XML_Char *p, int sz) +pnode_printtext(struct format *f, struct pnode *n) { - struct parse *ps = arg; - struct pnode *dat; - int i; + struct pnode *nn; + char *cp; + char last; - /* Stopped or no tree yet. */ - if (ps->stop || NODE_ROOT == ps->node) + if (n->bsz == 0) { + assert(n->real < n->b); return; + } - /* Not supposed to be collecting text. */ - assert(NULL != ps->cur); - if (NODE_IGNTEXT & nodes[ps->node].flags) - return; - /* - * Are we in the midst of processing text? - * If we're not processing text right now, then create a text - * node for doing so. - * However, don't do so unless we have some non-whitespace to - * process: strip out all leading whitespace to be sure. + * Text preceding a macro without intervening whitespace + * requires a .Pf macro. + * Set the spacing flag to avoid a redundant .Ns macro. */ - if (NODE_TEXT != ps->node) { - for (i = 0; i < sz; i++) - if ( ! isspace((int)p[i])) - break; - if (i == sz) - return; - p += i; - sz -= i; - dat = calloc(1, sizeof(struct pnode)); - if (NULL == dat) { - perror(NULL); - exit(EXIT_FAILURE); - } - dat->node = ps->node = NODE_TEXT; - dat->parent = ps->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); - TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); - ps->cur = dat; - assert(NULL != ps->root); + if (f->linestate != LINE_MACRO && + (nn = TAILQ_NEXT(n, child)) != NULL && nn->spc == 0 && + (nn->node != NODE_TEXT && nn->node != NODE_ESCAPE)) { + macro_open(f, "Pf"); + nn->spc = 1; } - /* Append to current buffer. */ - assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, - ps->cur->bsz + (size_t)sz); - if (NULL == ps->cur->b) { - perror(NULL); - exit(EXIT_FAILURE); + if (f->linestate == LINE_NEW) { + last = '\n'; + f->linestate = LINE_TEXT; + } else { + last = ' '; + if (n->spc || f->linestate == LINE_MACRO) + putchar(' '); } - memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->bsz += (size_t)sz; - ps->cur->real = ps->cur->b; -} -static void -pnode_trim(struct pnode *pn) -{ - - assert(NODE_TEXT == pn->node); - for ( ; pn->bsz > 0; pn->bsz--) - if ( ! isspace((int)pn->b[pn->bsz - 1])) - break; -} - -/* - * Begin an element. - * First, look for the element. - * If we don't find it and we're not parsing, keep going. - * If we don't find it and we're parsing, puke and exit. - * If we find it but we're not parsing yet (i.e., it's not a refentry - * and thus out of context), keep going. - * If we find it and we're at the root and already have a tree, puke and - * exit (FIXME: I don't think this is right?). - * If we find it but we're parsing a text node, close out the text node, - * return to its parent, and keep going. - * Make sure that the element is in the right context. - * Lastly, put the node onto our parse tree and continue. - */ -static void -xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) -{ - struct parse *ps = arg; - enum nodeid node; - enum attrkey key; - enum attrval val; - struct pnode *dat; - struct pattr *pattr; - const XML_Char **att; - - /* FIXME: find a better way to ditch other namespaces. */ - if (ps->stop || 0 == strcmp(name, "xi:include")) + if (n->node == NODE_ESCAPE) { + fputs(n->b, stdout); return; - - /* Close out text node, if applicable... */ - if (NODE_TEXT == ps->node) { - assert(NULL != ps->cur); - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - assert(NULL != ps->cur); - ps->node = ps->cur->node; } - for (node = 0; node < NODE__MAX; node++) - if (NULL == nodes[node].name) - continue; - else if (0 == strcmp(nodes[node].name, name)) - break; + /* + * Remove the prefix '-' from