=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.4 retrieving revision 1.10 diff -u -p -r1.4 -r1.10 --- docbook2mdoc/docbook2mdoc.c 2014/03/28 10:00:40 1.4 +++ docbook2mdoc/docbook2mdoc.c 2014/03/29 10:56:21 1.10 @@ -1,4 +1,4 @@ -/* $Id: docbook2mdoc.c,v 1.4 2014/03/28 10:00:40 kristaps Exp $ */ +/* $Id: docbook2mdoc.c,v 1.10 2014/03/29 10:56:21 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -24,6 +24,7 @@ #include #include #include +#include /* * All recognised node types. @@ -58,6 +59,7 @@ enum nodeid { NODE_REFPURPOSE, NODE_REFSECT1, NODE_REFSYNOPSISDIV, + NODE_STRUCTNAME, NODE_SYNOPSIS, NODE_TEXT, NODE_TITLE, @@ -73,13 +75,14 @@ struct parse { int stop; /* should we stop now? */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ - char *b; - size_t bsz; - size_t mbsz; + char *b; /* nil-terminated buffer for pre-print */ + size_t bsz; /* current length of b */ + size_t mbsz; /* max bsz allocation */ + int newln; /* output: are we on a fresh line */ }; struct node { - const char *name; + const char *name; /* docbook element name */ unsigned int flags; #define NODE_IGNTEXT 1 /* ignore all contained text */ }; @@ -124,13 +127,19 @@ static const struct node nodes[NODE__MAX] = { { "refpurpose", 0 }, { "refsect1", 0 }, { "refsynopsisdiv", NODE_IGNTEXT }, + { "structname", 0 }, { "synopsis", 0 }, { NULL, 0 }, { "title", 0 }, }; +static void +pnode_print(struct parse *p, struct pnode *pn); + /* * Look up whether "parent" is a valid parent for "node". + * This is sucked directly from the DocBook specification: look at the + * "children" and "parent" sections of each node. */ static int isparent(enum nodeid node, enum nodeid parent) @@ -334,6 +343,26 @@ isparent(enum nodeid node, enum nodeid parent) return(parent == NODE_REFENTRY); case (NODE_REFSYNOPSISDIV): return(parent == NODE_REFENTRY); + case (NODE_STRUCTNAME): + switch (parent) { + case (NODE_CODE): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_FUNCTION): + case (NODE_OPTION): + case (NODE_PARA): + case (NODE_PARAMETER): + case (NODE_PROGRAMLISTING): + case (NODE_REFDESCRIPTOR): + case (NODE_REFENTRYTITLE): + case (NODE_REFNAME): + case (NODE_REFPURPOSE): + case (NODE_SYNOPSIS): + case (NODE_TITLE): + return(1); + default: + break; + } + return(0); case (NODE_SYNOPSIS): switch (parent) { case (NODE_REFSYNOPSISDIV): @@ -362,6 +391,12 @@ isparent(enum nodeid node, enum nodeid parent) return(0); } +/* + * Process a stream of characters. + * We store text as nodes in and of themselves. + * If a text node is already open, append to it. + * If it's not open, open one under the current context. + */ static void xml_char(void *arg, const XML_Char *p, int sz) { @@ -383,7 +418,7 @@ xml_char(void *arg, const XML_Char *p, int sz) * If we're not processing text right now, then create a text * node for doing so. * However, don't do so unless we have some non-whitespace to - * process! + * process: strip out all leading whitespace to be sure. */ if (NODE_TEXT != ps->node) { for (i = 0; i < sz; i++) @@ -391,6 +426,8 @@ xml_char(void *arg, const XML_Char *p, int sz) break; if (i == sz) return; + p += i; + sz -= i; dat = calloc(1, sizeof(struct pnode)); if (NULL == dat) { perror(NULL); @@ -403,7 +440,6 @@ xml_char(void *arg, const XML_Char *p, int sz) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; assert(NULL != ps->root); - } /* Append to current buffer. */ @@ -418,14 +454,27 @@ xml_char(void *arg, const XML_Char *p, int sz) ps->cur->bsz += (size_t)sz; } +static void +pnode_trim(struct pnode *pn) +{ + + assert(NODE_TEXT == pn->node); + for ( ; pn->bsz > 0; pn->bsz--) + if ( ! isspace((int)pn->b[pn->bsz - 1])) + break; +} + /* * Begin an element. * First, look for the element. * If we don't find it and we're not parsing, keep going. - * If we don't find it (and we're parsing), puke and exit. + * If we don't find it and we're parsing, puke and exit. * If we find it but we're not parsing yet (i.e., it's not a refentry * and thus out of context), keep going. - * If we're at the root and already have a tree, puke and exit. + * If we find it and we're at the root and already have a tree, puke and + * exit (FIXME: I don't think this is right?). + * If we find it but we're parsing a text node, close out the text node, + * return to its parent, and keep going. * Make sure that the element is in the right context. * Lastly, put the node onto our parse tree and continue. */ @@ -442,6 +491,7 @@ xml_elem_start(void *arg, const XML_Char *name, const /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); + pnode_trim(ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; @@ -453,6 +503,7 @@ xml_elem_start(void *arg, const XML_Char *name, const else if (0 == strcmp(nodes[node].name, name)) break; + /* FIXME: do more with these error messages... */ if (NODE__MAX == node && NODE_ROOT == ps->node) { fprintf(stderr, "%s: ignoring node\n", name); return; @@ -492,7 +543,7 @@ xml_elem_start(void *arg, const XML_Char *name, const /* * Roll up the parse tree. - * Does nothing else special. + * If we're at a text node, roll that one up first. * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void @@ -506,6 +557,7 @@ xml_elem_end(void *arg, const XML_Char *name) /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); + pnode_trim(ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; @@ -517,6 +569,9 @@ xml_elem_end(void *arg, const XML_Char *name) ps->node = ps->cur->node; } +/* + * Recursively free a node (NULL is ok). + */ static void pnode_free(struct pnode *pn) { @@ -534,6 +589,9 @@ pnode_free(struct pnode *pn) free(pn); } +/* + * Unlink a node from its parent and pnode_free() it. + */ static void pnode_unlink(struct pnode *pn) { @@ -543,6 +601,9 @@ pnode_unlink(struct pnode *pn) pnode_free(pn); } +/* + * Unlink all children of a node and pnode_free() them. + */ static void pnode_unlinksub(struct pnode *pn) { @@ -551,6 +612,9 @@ pnode_unlinksub(struct pnode *pn) pnode_unlink(TAILQ_FIRST(&pn->childq)); } +/* + * Reset the lookaside buffer. + */ static void bufclear(struct parse *p) { @@ -558,6 +622,11 @@ bufclear(struct parse *p) p->b[p->bsz = 0] = '\0'; } +/* + * Append NODE_TEXT contents to the current buffer, reallocating its + * size if necessary. + * The buffer is ALWAYS nil-terminated. + */ static void bufappend(struct parse *p, struct pnode *pn) { @@ -575,6 +644,12 @@ bufappend(struct parse *p, struct pnode *pn) p->b[p->bsz] = '\0'; } +/* + * Recursively append all NODE_TEXT nodes to the buffer. + * This descends into non-text nodes, but doesn't do anything beyond + * them. + * In other words, this is a recursive text grok. + */ static void bufappend_r(struct parse *p, struct pnode *pn) { @@ -587,8 +662,7 @@ bufappend_r(struct parse *p, struct pnode *pn) } /* - * Print text presumably on a macro line. - * Ignore any child macros. + * Recursively print text presumably on a macro line. * Convert all whitespace to regular spaces. */ static void @@ -638,6 +712,44 @@ pnode_printmacroline(struct parse *p, struct pnode *pn } static void +pnode_printmopen(struct parse *p) +{ + if (p->newln) { + putchar('.'); + p->newln = 0; + } else + putchar(' '); +} + +static void +pnode_printmclose(struct parse *p, int sv) +{ + + if (sv && ! p->newln) { + putchar('\n'); + p->newln = 1; + } +} + +/* + * If the SYNOPSIS macro has a superfluous title, kill it. + */ +static void +pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + TAILQ_FOREACH(pp, &pn->childq, child) + if (NODE_TITLE == pp->node) { + pnode_unlink(pp); + return; + } +} + +/* + * Start a hopefully-named `Sh' section. + */ +static void pnode_printrefsect(struct parse *p, struct pnode *pn) { struct pnode *pp; @@ -648,12 +760,16 @@ pnode_printrefsect(struct parse *p, struct pnode *pn) fputs(".Sh ", stdout); - if (NULL != pp) + if (NULL != pp) { pnode_printmacroline(p, pp); - else + pnode_unlink(pp); + } else puts("UNKNOWN"); } +/* + * Start a reference, extracting the title and volume. + */ static void pnode_printciterefentry(struct parse *p, struct pnode *pn) { @@ -696,6 +812,7 @@ pnode_printrefmeta(struct parse *p, struct pnode *pn) fputs(".Dt ", stdout); if (NULL != title) { + /* FIXME: uppercase. */ pnode_printmacrolinepart(p, title); putchar(' '); } else @@ -780,30 +897,64 @@ pnode_printfuncprototype(struct parse *p, struct pnode puts(".Fc"); } -/* TODO: handle "optional" values. */ +/* + * The element is more complicated than it should be because text + * nodes are treated like ".Ar foo", but non-text nodes need to be + * re-sent into the printer (i.e., without the preceding ".Ar"). + * TODO: handle "optional" attribute. + */ static void -pnode_printarg(struct parse *p, struct pnode *pn, int nested) +pnode_printarg(struct parse *p, struct pnode *pn) { struct pnode *pp; - int sv = nested; - if ( ! nested) - fputs(".", stdout); - nested = 1; - TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_OPTION == pp->node) { - fputs("Fl ", stdout); - pnode_printmacrolinepart(p, pp); - } else if (NODE_TEXT == pp->node) { + TAILQ_FOREACH(pp, &pn->childq, child) { + if (NODE_TEXT == pp->node) { + pnode_printmopen(p); fputs("Ar ", stdout); - pnode_printmacrolinepart(p, pp); - } else if (NODE_ARG == pp->node) - pnode_printarg(p, pp, nested); + } + pnode_print(p, pp); + } +} - if ( ! sv) - puts(""); +/* + * Recursively search and return the first instance of "node". + */ +static struct pnode * +pnode_findfirst(struct pnode *pn, enum nodeid node) +{ + struct pnode *pp, *res; + + res = NULL; + TAILQ_FOREACH(pp, &pn->childq, child) { + res = pp->node == node ? pp : + pnode_findfirst(pp, node); + if (NULL != res) + break; + } + + return(res); } +static void +pnode_printprologue(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + pp = NULL == p->root ? NULL : + pnode_findfirst(p->root, NODE_REFMETA); + + if (NULL != pp) { + pnode_printrefmeta(p, pp); + pnode_unlink(pp); + } else { + puts(".\\\" Supplying bogus prologue..."); + puts(".Dd $Mdocdate" "$"); + puts(".Dt UNKNOWN 1"); + puts(".Os"); + } +} + /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. @@ -814,84 +965,95 @@ pnode_print(struct parse *p, struct pnode *pn) { struct pnode *pp; char *cp; - int last; + int last, sv; if (NULL == pn) return; - if (NODE_TEXT != pn->node && NODE_ROOT != pn->node) - printf(".\\\" %s\n", nodes[pn->node].name); + sv = p->newln; switch (pn->node) { case (NODE_ARG): - pnode_printarg(p, pn, 0); + pnode_printarg(p, pn); pnode_unlinksub(pn); break; case (NODE_CITEREFENTRY): + assert(p->newln); pnode_printciterefentry(p, pn); pnode_unlinksub(pn); break; case (NODE_CODE): - fputs(".Li ", stdout); - pnode_printmacroline(p, pn); - pnode_unlinksub(pn); + pnode_printmopen(p); + fputs("Li ", stdout); break; case (NODE_COMMAND): - fputs(".Nm ", stdout); - pnode_printmacroline(p, pn); - pnode_unlinksub(pn); + pnode_printmopen(p); + fputs("Nm ", stdout); break; case (NODE_FUNCTION): - fputs(".Fn ", stdout); - pnode_printmacroline(p, pn); - pnode_unlinksub(pn); + pnode_printmopen(p); + fputs("Fn ", stdout); break; case (NODE_FUNCPROTOTYPE): + assert(p->newln); pnode_printfuncprototype(p, pn); pnode_unlinksub(pn); break; case (NODE_FUNCSYNOPSISINFO): - fputs(".Fd ", stdout); - pnode_printmacroline(p, pn); - pnode_unlinksub(pn); + pnode_printmopen(p); + fputs("Fd ", stdout); break; + case (NODE_OPTION): + pnode_printmopen(p); + fputs("Fl ", stdout); + break; case (NODE_PARA): - /* FIXME: not always. */ + assert(p->newln); puts(".Pp"); break; case (NODE_PARAMETER): - fputs(".Fa \"", stdout); + /* Suppress non-text children... */ + pnode_printmopen(p); + fputs("Fa \"", stdout); pnode_printmacrolinepart(p, pn); puts("\""); pnode_unlinksub(pn); break; case (NODE_PROGRAMLISTING): + assert(p->newln); puts(".Bd -literal"); break; case (NODE_REFMETA): - pnode_printrefmeta(p, pn); - pnode_unlinksub(pn); + abort(); break; case (NODE_REFNAME): - fputs(".Nm ", stdout); - pnode_printmacroline(p, pn); + /* Suppress non-text children... */ + pnode_printmopen(p); + fputs("Nm ", stdout); + pnode_printmacrolinepart(p, pn); pnode_unlinksub(pn); - return; + break; case (NODE_REFNAMEDIV): + assert(p->newln); puts(".Sh NAME"); break; case (NODE_REFPURPOSE): + assert(p->newln); fputs(".Nd ", stdout); - pnode_printmacroline(p, pn); - pnode_unlinksub(pn); - return; + break; case (NODE_REFSYNOPSISDIV): + assert(p->newln); + pnode_printrefsynopsisdiv(p, pn); puts(".Sh SYNOPSIS"); break; case (NODE_REFSECT1): + assert(p->newln); pnode_printrefsect(p, pn); - pnode_unlinksub(pn); break; + case (NODE_STRUCTNAME): + pnode_printmopen(p); + fputs("Vt ", stdout); + break; case (NODE_TEXT): bufclear(p); bufappend(p, pn); @@ -901,6 +1063,7 @@ pnode_print(struct parse *p, struct pnode *pn) * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ + assert(p->bsz); for (last = '\n', cp = p->b; '\0' != *cp; ) { if ('\n' == last) { /* Consume all whitespace. */ @@ -916,8 +1079,7 @@ pnode_print(struct parse *p, struct pnode *pn) if ('\\' == last) putchar('e'); } - if ('\n' != last) - putchar('\n'); + p->newln = 0; break; default: break; @@ -927,8 +1089,22 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_print(p, pp); switch (pn->node) { + case (NODE_ARG): + case (NODE_CODE): + case (NODE_COMMAND): + case (NODE_FUNCTION): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_OPTION): + case (NODE_PARAMETER): + case (NODE_REFNAME): + case (NODE_STRUCTNAME): + case (NODE_TEXT): + pnode_printmclose(p, sv); + break; case (NODE_PROGRAMLISTING): + assert(p->newln); puts(".Ed"); + p->newln = 1; break; default: break; @@ -966,6 +1142,8 @@ readfile(XML_Parser xp, int fd, * Exit when we've read all or errors have occured * during the parse sequence. */ + p.newln = 1; + pnode_printprologue(&p, p.root); pnode_print(&p, p.root); pnode_free(p.root); free(p.b);