=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.1.1.1 retrieving revision 1.12 diff -u -p -r1.1.1.1 -r1.12 --- docbook2mdoc/docbook2mdoc.c 2014/03/28 02:04:47 1.1.1.1 +++ docbook2mdoc/docbook2mdoc.c 2014/03/29 22:44:06 1.12 @@ -1,4 +1,4 @@ -/* $Id: docbook2mdoc.c,v 1.1.1.1 2014/03/28 02:04:47 kristaps Exp $ */ +/* $Id: docbook2mdoc.c,v 1.12 2014/03/29 22:44:06 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -24,6 +24,7 @@ #include #include #include +#include /* * All recognised node types. @@ -31,12 +32,21 @@ enum nodeid { NODE_ROOT = 0, /* Must comes first. */ /* Alpha-ordered hereafter. */ + NODE_ARG, NODE_CITEREFENTRY, + NODE_CMDSYNOPSIS, NODE_CODE, + NODE_COMMAND, + NODE_FUNCDEF, + NODE_FUNCPROTOTYPE, NODE_FUNCSYNOPSIS, NODE_FUNCSYNOPSISINFO, + NODE_FUNCTION, NODE_MANVOLNUM, + NODE_OPTION, NODE_PARA, + NODE_PARAMDEF, + NODE_PARAMETER, NODE_PROGRAMLISTING, NODE_REFCLASS, NODE_REFDESCRIPTOR, @@ -49,6 +59,7 @@ enum nodeid { NODE_REFPURPOSE, NODE_REFSECT1, NODE_REFSYNOPSISDIV, + NODE_STRUCTNAME, NODE_SYNOPSIS, NODE_TEXT, NODE_TITLE, @@ -56,44 +67,104 @@ enum nodeid { }; /* + * All recognised attribute keys. + */ +enum attrkey { + /* Alpha-order... */ + ATTRKEY_CHOICE = 0, + ATTRKEY_ID, + ATTRKEY_REP, + ATTRKEY__MAX +}; + +/* + * All [explicitly] recognised attribute values. + * If an attribute has ATTRVAL__MAX, it could be a free-form. + */ +enum attrval { + /* Alpha-order... */ + ATTRVAL_NOREPEAT, + ATTRVAL_OPT, + ATTRVAL_PLAIN, + ATTRVAL_REPEAT, + ATTRVAL_REQ, + ATTRVAL__MAX +}; + +/* * Global parse state. * Keep this as simple and small as possible. */ struct parse { + XML_Parser xml; enum nodeid node; /* current (NODE_ROOT if pre-tree) */ + const char *fname; /* filename */ int stop; /* should we stop now? */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ - char *b; - size_t bsz; - size_t mbsz; + char *b; /* nil-terminated buffer for pre-print */ + size_t bsz; /* current length of b */ + size_t mbsz; /* max bsz allocation */ + int newln; /* output: are we on a fresh line */ }; struct node { - const char *name; + const char *name; /* docbook element name */ unsigned int flags; #define NODE_IGNTEXT 1 /* ignore all contained text */ }; TAILQ_HEAD(pnodeq, pnode); +TAILQ_HEAD(pattrq, pattr); +struct pattr { + enum attrkey key; + enum attrval val; + char *rawval; + TAILQ_ENTRY(pattr) child; +}; + struct pnode { enum nodeid node; /* node type */ char *b; /* binary data buffer */ size_t bsz; /* data buffer size */ struct pnode *parent; /* parent (or NULL if top) */ struct pnodeq childq; /* queue of children */ + struct pattrq attrq; /* attributes of node */ TAILQ_ENTRY(pnode) child; }; +static const char *attrkeys[ATTRKEY__MAX] = { + "choice", + "id", + "rep" +}; + +static const char *attrvals[ATTRVAL__MAX] = { + "norepeat", + "opt", + "plain", + "repeat", + "req" +}; + static const struct node nodes[NODE__MAX] = { { NULL, 0 }, + { "arg", 0 }, { "citerefentry", NODE_IGNTEXT }, + { "cmdsynopsis", NODE_IGNTEXT }, { "code", 0 }, + { "command", 0 }, + { "funcdef", 0 }, + { "funcprototype", NODE_IGNTEXT }, { "funcsynopsis", NODE_IGNTEXT }, { "funcsynopsisinfo", 0 }, + { "function", 0 }, { "manvolnum", 0 }, + { "option", 0 }, { "para", 0 }, + { "paramdef", 0 }, + { "parameter", 0 }, { "programlisting", 0 }, { "refclass", NODE_IGNTEXT }, { "refdescriptor", NODE_IGNTEXT }, @@ -106,13 +177,57 @@ static const struct node nodes[NODE__MAX] = { { "refpurpose", 0 }, { "refsect1", 0 }, { "refsynopsisdiv", NODE_IGNTEXT }, + { "structname", 0 }, { "synopsis", 0 }, { NULL, 0 }, { "title", 0 }, }; +static void +pnode_print(struct parse *p, struct pnode *pn); + +static int +isattrkey(enum nodeid node, enum attrkey key) +{ + + switch (key) { + case (ATTRKEY_CHOICE): + return(node == NODE_ARG); + case (ATTRKEY_ID): + /* Common to all. */ + return(1); + case (ATTRKEY_REP): + return(node == NODE_ARG); + default: + break; + } + abort(); + return(0); +} + +static int +isattrval(enum attrkey key, enum attrval val) +{ + + switch (val) { + case (ATTRVAL_OPT): + case (ATTRVAL_PLAIN): + case (ATTRVAL_REQ): + return(key == ATTRKEY_CHOICE); + case (ATTRVAL_REPEAT): + case (ATTRVAL_NOREPEAT): + return(key == ATTRKEY_REP); + default: + break; + } + abort(); + return(0); +} + /* * Look up whether "parent" is a valid parent for "node". + * This is sucked directly from the DocBook specification: look at the + * "children" and "parent" sections of each node. */ static int isparent(enum nodeid node, enum nodeid parent) @@ -121,6 +236,15 @@ isparent(enum nodeid node, enum nodeid parent) switch (node) { case (NODE_ROOT): return(0); + case (NODE_ARG): + switch (parent) { + case (NODE_ARG): + case (NODE_CMDSYNOPSIS): + return(1); + default: + break; + } + return(0); case (NODE_CITEREFENTRY): switch (parent) { case (NODE_FUNCSYNOPSISINFO): @@ -137,6 +261,16 @@ isparent(enum nodeid node, enum nodeid parent) break; } return(0); + case (NODE_CMDSYNOPSIS): + switch (parent) { + case (NODE_PARA): + case (NODE_REFSECT1): + case (NODE_REFSYNOPSISDIV): + return(1); + default: + break; + } + return(0); case (NODE_CODE): switch (parent) { case (NODE_FUNCSYNOPSISINFO): @@ -153,15 +287,27 @@ isparent(enum nodeid node, enum nodeid parent) break; } return(0); - case (NODE_MANVOLNUM): + case (NODE_COMMAND): switch (parent) { - case (NODE_CITEREFENTRY): - case (NODE_REFMETA): + case (NODE_CMDSYNOPSIS): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_PARA): + case (NODE_PROGRAMLISTING): + case (NODE_REFDESCRIPTOR): + case (NODE_REFENTRYTITLE): + case (NODE_REFNAME): + case (NODE_REFPURPOSE): + case (NODE_SYNOPSIS): + case (NODE_TITLE): return(1); default: break; } return(0); + case (NODE_FUNCDEF): + return(NODE_FUNCPROTOTYPE == parent); + case (NODE_FUNCPROTOTYPE): + return(NODE_FUNCSYNOPSIS == parent); case (NODE_FUNCSYNOPSIS): switch (parent) { case (NODE_PARA): @@ -174,6 +320,50 @@ isparent(enum nodeid node, enum nodeid parent) return(0); case (NODE_FUNCSYNOPSISINFO): return(NODE_FUNCSYNOPSIS == parent); + case (NODE_FUNCTION): + switch (parent) { + case (NODE_CODE): + case (NODE_FUNCDEF): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_PARA): + case (NODE_PROGRAMLISTING): + case (NODE_REFDESCRIPTOR): + case (NODE_REFENTRYTITLE): + case (NODE_REFNAME): + case (NODE_REFPURPOSE): + case (NODE_SYNOPSIS): + case (NODE_TITLE): + return(1); + default: + break; + } + return(0); + case (NODE_MANVOLNUM): + switch (parent) { + case (NODE_CITEREFENTRY): + case (NODE_REFMETA): + return(1); + default: + break; + } + return(0); + case (NODE_OPTION): + switch (parent) { + case (NODE_ARG): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_PARA): + case (NODE_PROGRAMLISTING): + case (NODE_REFDESCRIPTOR): + case (NODE_REFENTRYTITLE): + case (NODE_REFNAME): + case (NODE_REFPURPOSE): + case (NODE_SYNOPSIS): + case (NODE_TITLE): + return(1); + default: + break; + } + return(0); case (NODE_PARA): switch (parent) { case (NODE_REFSECT1): @@ -183,6 +373,26 @@ isparent(enum nodeid node, enum nodeid parent) break; } return(0); + case (NODE_PARAMDEF): + return(NODE_FUNCPROTOTYPE == parent); + case (NODE_PARAMETER): + switch (parent) { + case (NODE_CODE): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_PARA): + case (NODE_PARAMDEF): + case (NODE_PROGRAMLISTING): + case (NODE_REFDESCRIPTOR): + case (NODE_REFENTRYTITLE): + case (NODE_REFNAME): + case (NODE_REFPURPOSE): + case (NODE_SYNOPSIS): + case (NODE_TITLE): + return(1); + default: + break; + } + return(0); case (NODE_PROGRAMLISTING): switch (parent) { case (NODE_PARA): @@ -221,6 +431,26 @@ isparent(enum nodeid node, enum nodeid parent) return(parent == NODE_REFENTRY); case (NODE_REFSYNOPSISDIV): return(parent == NODE_REFENTRY); + case (NODE_STRUCTNAME): + switch (parent) { + case (NODE_CODE): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_FUNCTION): + case (NODE_OPTION): + case (NODE_PARA): + case (NODE_PARAMETER): + case (NODE_PROGRAMLISTING): + case (NODE_REFDESCRIPTOR): + case (NODE_REFENTRYTITLE): + case (NODE_REFNAME): + case (NODE_REFPURPOSE): + case (NODE_SYNOPSIS): + case (NODE_TITLE): + return(1); + default: + break; + } + return(0); case (NODE_SYNOPSIS): switch (parent) { case (NODE_REFSYNOPSISDIV): @@ -249,11 +479,18 @@ isparent(enum nodeid node, enum nodeid parent) return(0); } +/* + * Process a stream of characters. + * We store text as nodes in and of themselves. + * If a text node is already open, append to it. + * If it's not open, open one under the current context. + */ static void xml_char(void *arg, const XML_Char *p, int sz) { struct parse *ps = arg; struct pnode *dat; + int i; /* Stopped or no tree yet. */ if (ps->stop || NODE_ROOT == ps->node) @@ -268,8 +505,17 @@ xml_char(void *arg, const XML_Char *p, int sz) * Are we in the midst of processing text? * If we're not processing text right now, then create a text * node for doing so. + * However, don't do so unless we have some non-whitespace to + * process: strip out all leading whitespace to be sure. */ if (NODE_TEXT != ps->node) { + for (i = 0; i < sz; i++) + if ( ! isspace((int)p[i])) + break; + if (i == sz) + return; + p += i; + sz -= i; dat = calloc(1, sizeof(struct pnode)); if (NULL == dat) { perror(NULL); @@ -279,10 +525,10 @@ xml_char(void *arg, const XML_Char *p, int sz) dat->node = ps->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); + TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; assert(NULL != ps->root); - } /* Append to current buffer. */ @@ -297,23 +543,40 @@ xml_char(void *arg, const XML_Char *p, int sz) ps->cur->bsz += (size_t)sz; } +static void +pnode_trim(struct pnode *pn) +{ + + assert(NODE_TEXT == pn->node); + for ( ; pn->bsz > 0; pn->bsz--) + if ( ! isspace((int)pn->b[pn->bsz - 1])) + break; +} + /* * Begin an element. * First, look for the element. * If we don't find it and we're not parsing, keep going. - * If we don't find it (and we're parsing), puke and exit. + * If we don't find it and we're parsing, puke and exit. * If we find it but we're not parsing yet (i.e., it's not a refentry * and thus out of context), keep going. - * If we're at the root and already have a tree, puke and exit. + * If we find it and we're at the root and already have a tree, puke and + * exit (FIXME: I don't think this is right?). + * If we find it but we're parsing a text node, close out the text node, + * return to its parent, and keep going. * Make sure that the element is in the right context. * Lastly, put the node onto our parse tree and continue. */ static void xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { - struct parse *ps = arg; - enum nodeid node; - struct pnode *dat; + struct parse *ps = arg; + enum nodeid node; + enum attrkey key; + enum attrval val; + struct pnode *dat; + struct pattr *pattr; + const XML_Char **att; if (ps->stop) return; @@ -321,6 +584,7 @@ xml_elem_start(void *arg, const XML_Char *name, const /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); + pnode_trim(ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; @@ -333,21 +597,27 @@ xml_elem_start(void *arg, const XML_Char *name, const break; if (NODE__MAX == node && NODE_ROOT == ps->node) { - fprintf(stderr, "%s: ignoring node\n", name); return; } else if (NODE__MAX == node) { - fprintf(stderr, "%s: unknown node\n", name); + fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", + ps->fname, XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NULL != ps->root) { - fprintf(stderr, "%s: reentering?\n", name); + fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", + ps->fname, XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml)); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) { - fprintf(stderr, "%s: known node w/o context\n", name); return; } else if ( ! isparent(node, ps->node)) { - fprintf(stderr, "%s: bad parent\n", name); + fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\"\n", + ps->fname, XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + NULL == nodes[ps->node].name ? + "(none)" : nodes[ps->node].name); ps->stop = 1; return; } @@ -360,6 +630,7 @@ xml_elem_start(void *arg, const XML_Char *name, const dat->node = ps->node = node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); + TAILQ_INIT(&dat->attrq); if (NULL != ps->cur) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); @@ -367,11 +638,53 @@ xml_elem_start(void *arg, const XML_Char *name, const ps->cur = dat; if (NULL == ps->root) ps->root = dat; + + /* + * Process attributes. + */ + for (att = atts; NULL != *att; att += 2) { + for (key = 0; key < ATTRKEY__MAX; key++) + if (0 == strcmp(*att, attrkeys[key])) + break; + if (ATTRKEY__MAX == key) { + fprintf(stderr, "%s:%zu:%zu: unknown " + "attribute \"%s\"\n", ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *att); + continue; + } else if ( ! isattrkey(node, key)) { + fprintf(stderr, "%s:%zu:%zu: bad " + "attribute \"%s\"\n", ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *att); + continue; + } + for (val = 0; val < ATTRVAL__MAX; val++) + if (0 == strcmp(*(att + 1), attrvals[val])) + break; + if (ATTRVAL__MAX != val && ! isattrval(key, val)) { + fprintf(stderr, "%s:%zu:%zu: bad " + "value \"%s\"\n", ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *(att + 1)); + continue; + } + pattr = calloc(1, sizeof(struct pattr)); + pattr->key = key; + pattr->val = val; + if (ATTRVAL__MAX == val) + pattr->rawval = strdup(*(att + 1)); + TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); + } + } /* * Roll up the parse tree. - * Does nothing else special. + * If we're at a text node, roll that one up first. * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void @@ -385,6 +698,7 @@ xml_elem_end(void *arg, const XML_Char *name) /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); + pnode_trim(ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; @@ -396,10 +710,14 @@ xml_elem_end(void *arg, const XML_Char *name) ps->node = ps->cur->node; } +/* + * Recursively free a node (NULL is ok). + */ static void pnode_free(struct pnode *pn) { struct pnode *pp; + struct pattr *ap; if (NULL == pn) return; @@ -409,10 +727,19 @@ pnode_free(struct pnode *pn) pnode_free(pp); } + while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) { + TAILQ_REMOVE(&pn->attrq, ap, child); + free(ap->rawval); + free(ap); + } + free(pn->b); free(pn); } +/* + * Unlink a node from its parent and pnode_free() it. + */ static void pnode_unlink(struct pnode *pn) { @@ -422,13 +749,32 @@ pnode_unlink(struct pnode *pn) pnode_free(pn); } +/* + * Unlink all children of a node and pnode_free() them. + */ static void +pnode_unlinksub(struct pnode *pn) +{ + + while ( ! TAILQ_EMPTY(&pn->childq)) + pnode_unlink(TAILQ_FIRST(&pn->childq)); +} + +/* + * Reset the lookaside buffer. + */ +static void bufclear(struct parse *p) { p->b[p->bsz = 0] = '\0'; } +/* + * Append NODE_TEXT contents to the current buffer, reallocating its + * size if necessary. + * The buffer is ALWAYS nil-terminated. + */ static void bufappend(struct parse *p, struct pnode *pn) { @@ -447,22 +793,35 @@ bufappend(struct parse *p, struct pnode *pn) } /* - * Print text presumably on a macro line. - * Ignore any child macros. - * Convert all whitespace to regular spaces. + * Recursively append all NODE_TEXT nodes to the buffer. + * This descends into non-text nodes, but doesn't do anything beyond + * them. + * In other words, this is a recursive text grok. */ static void -pnode_printmacrolinepart(struct parse *p, struct pnode *pn) +bufappend_r(struct parse *p, struct pnode *pn) { struct pnode *pp; + + if (NODE_TEXT == pn->node) + bufappend(p, pn); + TAILQ_FOREACH(pp, &pn->childq, child) + bufappend_r(p, pp); +} + +#define MACROLINE_NORM 0 +#define MACROLINE_UPPER 1 +/* + * Recursively print text presumably on a macro line. + * Convert all whitespace to regular spaces. + */ +static void +pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl) +{ char *cp; bufclear(p); - while (NULL != (pp = TAILQ_FIRST(&pn->childq))) { - if (NODE_TEXT == pp->node) - bufappend(p, pp); - pnode_unlink(pp); - } + bufappend_r(p, pn); /* Convert all space to spaces. */ for (cp = p->b; '\0' != *cp; cp++) @@ -470,8 +829,7 @@ pnode_printmacrolinepart(struct parse *p, struct pnode *cp = ' '; for (cp = p->b; isspace((int)*cp); cp++) - /* Spin. */ ; - + /* Spin past whitespace (XXX: necessary?) */ ; for ( ; '\0' != *cp; cp++) { /* Escape us if we look like a macro. */ if ((cp == p->b || ' ' == *(cp - 1)) && @@ -484,13 +842,23 @@ pnode_printmacrolinepart(struct parse *p, struct pnode ('\0' == *(cp + 3) || ' ' == *(cp + 3))))) fputs("\\&", stdout); - putchar(*cp); + if (MACROLINE_UPPER & fl) + putchar(toupper((int)*cp)); + else + putchar((int)*cp); /* If we're a character escape, escape us. */ if ('\\' == *cp) putchar('e'); } } +static void +pnode_printmacrolinepart(struct parse *p, struct pnode *pn) +{ + + pnode_printmacrolinetext(p, pn, 0); +} + /* * Just pnode_printmacrolinepart() but with a newline. * If no text, just the newline. @@ -499,11 +867,49 @@ static void pnode_printmacroline(struct parse *p, struct pnode *pn) { - pnode_printmacrolinepart(p, pn); + pnode_printmacrolinetext(p, pn, 0); putchar('\n'); } static void +pnode_printmopen(struct parse *p) +{ + if (p->newln) { + putchar('.'); + p->newln = 0; + } else + putchar(' '); +} + +static void +pnode_printmclose(struct parse *p, int sv) +{ + + if (sv && ! p->newln) { + putchar('\n'); + p->newln = 1; + } +} + +/* + * If the SYNOPSIS macro has a superfluous title, kill it. + */ +static void +pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + TAILQ_FOREACH(pp, &pn->childq, child) + if (NODE_TITLE == pp->node) { + pnode_unlink(pp); + return; + } +} + +/* + * Start a hopefully-named `Sh' section. + */ +static void pnode_printrefsect(struct parse *p, struct pnode *pn) { struct pnode *pp; @@ -512,14 +918,18 @@ pnode_printrefsect(struct parse *p, struct pnode *pn) if (NODE_TITLE == pp->node) break; + fputs(".Sh ", stdout); + if (NULL != pp) { - fputs(".Sh ", stdout); pnode_printmacroline(p, pp); pnode_unlink(pp); } else - puts(".Sh UNKNOWN"); + puts("UNKNOWN"); } +/* + * Start a reference, extracting the title and volume. + */ static void pnode_printciterefentry(struct parse *p, struct pnode *pn) { @@ -533,16 +943,16 @@ pnode_printciterefentry(struct parse *p, struct pnode title = pp; fputs(".Xr ", stdout); + if (NULL != title) { pnode_printmacrolinepart(p, title); - pnode_unlink(title); + putchar(' '); } else - fputs("unknown", stdout); - putchar(' '); - if (NULL != manvol) { + fputs("unknown ", stdout); + + if (NULL != manvol) pnode_printmacroline(p, manvol); - pnode_unlink(manvol); - } else + else puts("1"); } @@ -558,24 +968,172 @@ pnode_printrefmeta(struct parse *p, struct pnode *pn) else if (NODE_REFENTRYTITLE == pp->node) title = pp; - puts(".Dd $Mdocdate: March 28 2014 $"); + puts(".Dd $Mdocdate" "$"); fputs(".Dt ", stdout); if (NULL != title) { - pnode_printmacrolinepart(p, title); - pnode_unlink(title); + /* FIXME: uppercase. */ + pnode_printmacrolinetext(p, title, MACROLINE_UPPER); + putchar(' '); } else - fputs("UNKNOWN", stdout); - putchar(' '); - if (NULL != manvol) { + fputs("UNKNOWN ", stdout); + + if (NULL != manvol) pnode_printmacroline(p, manvol); - pnode_unlink(manvol); - } else + else puts("1"); puts(".Os"); } +static void +pnode_printfuncdef(struct parse *p, struct pnode *pn) +{ + struct pnode *pp, *ftype, *func; + + ftype = func = NULL; + TAILQ_FOREACH(pp, &pn->childq, child) + if (NODE_TEXT == pp->node) + ftype = pp; + else if (NODE_FUNCTION == pp->node) + func = pp; + + if (NULL != ftype) { + fputs(".Ft ", stdout); + pnode_printmacroline(p, ftype); + } + + if (NULL != func) { + fputs(".Fo ", stdout); + pnode_printmacroline(p, func); + } else + puts(".Fo UNKNOWN"); +} + +static void +pnode_printparamdef(struct parse *p, struct pnode *pn) +{ + struct pnode *pp, *ptype, *param; + + ptype = param = NULL; + TAILQ_FOREACH(pp, &pn->childq, child) + if (NODE_TEXT == pp->node) + ptype = pp; + else if (NODE_PARAMETER == pp->node) + param = pp; + + fputs(".Fa \"", stdout); + if (NULL != ptype) { + pnode_printmacrolinepart(p, ptype); + putchar(' '); + } + + if (NULL != param) + pnode_printmacrolinepart(p, param); + + puts("\""); +} + +static void +pnode_printfuncprototype(struct parse *p, struct pnode *pn) +{ + struct pnode *pp, *fdef; + + TAILQ_FOREACH(fdef, &pn->childq, child) + if (NODE_FUNCDEF == fdef->node) + break; + + if (NULL != fdef) + pnode_printfuncdef(p, fdef); + else + puts(".Fo UNKNOWN"); + + TAILQ_FOREACH(pp, &pn->childq, child) + if (NODE_PARAMDEF == pp->node) + pnode_printparamdef(p, pp); + + puts(".Fc"); +} + +/* + * The element is more complicated than it should be because text + * nodes are treated like ".Ar foo", but non-text nodes need to be + * re-sent into the printer (i.e., without the preceding ".Ar"). + * This also handles the case of "repetition" (or in other words, the + * ellipsis following an argument) and optionality. + */ +static void +pnode_printarg(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + struct pattr *ap; + int isop, isrep; + + isop = 1; + isrep = 0; + TAILQ_FOREACH(ap, &pn->attrq, child) + if (ATTRKEY_CHOICE == ap->key && + (ATTRVAL_PLAIN == ap->val || + ATTRVAL_REQ == ap->val)) + isop = 0; + else if (ATTRKEY_REP == ap->key && + (ATTRVAL_REPEAT == ap->val)) + isrep = 1; + + if (isop) { + pnode_printmopen(p); + fputs("Op ", stdout); + } + + TAILQ_FOREACH(pp, &pn->childq, child) { + if (NODE_TEXT == pp->node) { + pnode_printmopen(p); + fputs("Ar ", stdout); + } + pnode_print(p, pp); + if (NODE_TEXT == pp->node && isrep) + fputs("...", stdout); + } +} + +/* + * Recursively search and return the first instance of "node". + */ +static struct pnode * +pnode_findfirst(struct pnode *pn, enum nodeid node) +{ + struct pnode *pp, *res; + + res = NULL; + TAILQ_FOREACH(pp, &pn->childq, child) { + res = pp->node == node ? pp : + pnode_findfirst(pp, node); + if (NULL != res) + break; + } + + return(res); +} + +static void +pnode_printprologue(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + pp = NULL == p->root ? NULL : + pnode_findfirst(p->root, NODE_REFMETA); + + if (NULL != pp) { + pnode_printrefmeta(p, pp); + pnode_unlink(pp); + } else { + puts(".\\\" Supplying bogus prologue..."); + puts(".Dd $Mdocdate" "$"); + puts(".Dt UNKNOWN 1"); + puts(".Os"); + } +} + /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. @@ -586,53 +1144,95 @@ pnode_print(struct parse *p, struct pnode *pn) { struct pnode *pp; char *cp; - int last; + int last, sv; if (NULL == pn) return; - if (NODE_TEXT != pn->node && NODE_ROOT != pn->node) - printf(".\\\" %s\n", nodes[pn->node].name); + sv = p->newln; switch (pn->node) { + case (NODE_ARG): + pnode_printarg(p, pn); + pnode_unlinksub(pn); + break; case (NODE_CITEREFENTRY): + assert(p->newln); pnode_printciterefentry(p, pn); + pnode_unlinksub(pn); break; case (NODE_CODE): - fputs(".Li ", stdout); - pnode_printmacroline(p, pn); + pnode_printmopen(p); + fputs("Li ", stdout); break; + case (NODE_COMMAND): + pnode_printmopen(p); + fputs("Nm ", stdout); + break; + case (NODE_FUNCTION): + pnode_printmopen(p); + fputs("Fn ", stdout); + break; + case (NODE_FUNCPROTOTYPE): + assert(p->newln); + pnode_printfuncprototype(p, pn); + pnode_unlinksub(pn); + break; case (NODE_FUNCSYNOPSISINFO): - fputs(".Fd ", stdout); - pnode_printmacroline(p, pn); + pnode_printmopen(p); + fputs("Fd ", stdout); break; + case (NODE_OPTION): + pnode_printmopen(p); + fputs("Fl ", stdout); + break; case (NODE_PARA): - /* FIXME: not always. */ + assert(p->newln); puts(".Pp"); break; + case (NODE_PARAMETER): + /* Suppress non-text children... */ + pnode_printmopen(p); + fputs("Fa \"", stdout); + pnode_printmacrolinepart(p, pn); + puts("\""); + pnode_unlinksub(pn); + break; case (NODE_PROGRAMLISTING): + assert(p->newln); puts(".Bd -literal"); break; case (NODE_REFMETA): - pnode_printrefmeta(p, pn); + abort(); break; case (NODE_REFNAME): - fputs(".Nm ", stdout); - pnode_printmacroline(p, pn); - return; + /* Suppress non-text children... */ + pnode_printmopen(p); + fputs("Nm ", stdout); + pnode_printmacrolinepart(p, pn); + pnode_unlinksub(pn); + break; case (NODE_REFNAMEDIV): + assert(p->newln); puts(".Sh NAME"); break; case (NODE_REFPURPOSE): + assert(p->newln); fputs(".Nd ", stdout); - pnode_printmacroline(p, pn); - return; + break; case (NODE_REFSYNOPSISDIV): + assert(p->newln); + pnode_printrefsynopsisdiv(p, pn); puts(".Sh SYNOPSIS"); break; case (NODE_REFSECT1): + assert(p->newln); pnode_printrefsect(p, pn); break; + case (NODE_STRUCTNAME): + pnode_printmopen(p); + fputs("Vt ", stdout); + break; case (NODE_TEXT): bufclear(p); bufappend(p, pn); @@ -642,6 +1242,7 @@ pnode_print(struct parse *p, struct pnode *pn) * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ + assert(p->bsz); for (last = '\n', cp = p->b; '\0' != *cp; ) { if ('\n' == last) { /* Consume all whitespace. */ @@ -657,8 +1258,7 @@ pnode_print(struct parse *p, struct pnode *pn) if ('\\' == last) putchar('e'); } - if ('\n' != last) - putchar('\n'); + p->newln = 0; break; default: break; @@ -668,8 +1268,34 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_print(p, pp); switch (pn->node) { + case (NODE_ARG): + case (NODE_CODE): + case (NODE_COMMAND): + case (NODE_FUNCTION): + case (NODE_FUNCSYNOPSISINFO): + case (NODE_OPTION): + case (NODE_PARAMETER): + case (NODE_STRUCTNAME): + case (NODE_TEXT): + pnode_printmclose(p, sv); + break; + case (NODE_REFNAME): + /* + * If we're in the NAME macro and we have multiple + * macros in sequence, then print out a + * trailing comma before the newline. + */ + if (NULL != pn->parent && + NODE_REFNAMEDIV == pn->parent->node && + NULL != TAILQ_NEXT(pn, child) && + NODE_REFNAME == TAILQ_NEXT(pn, child)->node) + fputs(" ,", stdout); + pnode_printmclose(p, sv); + break; case (NODE_PROGRAMLISTING): + assert(p->newln); puts(".Ed"); + p->newln = 1; break; default: break; @@ -691,6 +1317,8 @@ readfile(XML_Parser xp, int fd, memset(&p, 0, sizeof(struct parse)); p.b = malloc(p.bsz = p.mbsz = 1024); + p.fname = fn; + p.xml = xp; XML_SetCharacterDataHandler(xp, xml_char); XML_SetElementHandler(xp, xml_elem_start, xml_elem_end); @@ -707,6 +1335,8 @@ readfile(XML_Parser xp, int fd, * Exit when we've read all or errors have occured * during the parse sequence. */ + p.newln = 1; + pnode_printprologue(&p, p.root); pnode_print(&p, p.root); pnode_free(p.root); free(p.b);