/* $Id: docbook2mdoc.c,v 1.18 2014/03/30 16:33:27 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "extern.h" /* * Global parse state. * Keep this as simple and small as possible. */ struct parse { XML_Parser xml; enum nodeid node; /* current (NODE_ROOT if pre-tree) */ const char *fname; /* filename */ int stop; /* should we stop now? */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ char *b; /* nil-terminated buffer for pre-print */ size_t bsz; /* current length of b */ size_t mbsz; /* max bsz allocation */ int newln; /* output: are we on a fresh line */ }; struct node { const char *name; /* docbook element name */ unsigned int flags; #define NODE_IGNTEXT 1 /* ignore all contained text */ }; TAILQ_HEAD(pnodeq, pnode); TAILQ_HEAD(pattrq, pattr); struct pattr { enum attrkey key; enum attrval val; char *rawval; TAILQ_ENTRY(pattr) child; }; struct pnode { enum nodeid node; /* node type */ char *b; /* binary data buffer */ size_t bsz; /* data buffer size */ struct pnode *parent; /* parent (or NULL if top) */ struct pnodeq childq; /* queue of children */ struct pattrq attrq; /* attributes of node */ TAILQ_ENTRY(pnode) child; }; static const char *attrkeys[ATTRKEY__MAX] = { "choice", "id", "rep" }; static const char *attrvals[ATTRVAL__MAX] = { "norepeat", "opt", "plain", "repeat", "req" }; static const struct node nodes[NODE__MAX] = { { NULL, 0 }, { "arg", 0 }, { "citerefentry", NODE_IGNTEXT }, { "cmdsynopsis", NODE_IGNTEXT }, { "code", 0 }, { "command", 0 }, { "date", 0 }, { "emphasis", 0 }, { "filename", 0 }, { "funcdef", 0 }, { "funcprototype", NODE_IGNTEXT }, { "funcsynopsis", NODE_IGNTEXT }, { "funcsynopsisinfo", 0 }, { "function", 0 }, { "itemizedlist", NODE_IGNTEXT }, { "link", 0 }, { "listitem", NODE_IGNTEXT }, { "manvolnum", 0 }, { "option", 0 }, { "para", 0 }, { "paramdef", 0 }, { "parameter", 0 }, { "programlisting", 0 }, { "refclass", NODE_IGNTEXT }, { "refdescriptor", NODE_IGNTEXT }, { "refentry", NODE_IGNTEXT }, { "refentryinfo", NODE_IGNTEXT }, { "refentrytitle", 0 }, { "refmeta", NODE_IGNTEXT }, { "refmiscinfo", NODE_IGNTEXT }, { "refname", 0 }, { "refnamediv", NODE_IGNTEXT }, { "refpurpose", 0 }, { "refsect1", 0 }, { "refsynopsisdiv", NODE_IGNTEXT }, { "replaceable", 0 }, { "structname", 0 }, { "synopsis", 0 }, { "term", 0 }, { NULL, 0 }, { "title", 0 }, { "ulink", 0 }, { "variablelist", NODE_IGNTEXT }, { "varlistentry", NODE_IGNTEXT }, }; static void pnode_print(struct parse *p, struct pnode *pn); /* * Process a stream of characters. * We store text as nodes in and of themselves. * If a text node is already open, append to it. * If it's not open, open one under the current context. */ static void xml_char(void *arg, const XML_Char *p, int sz) { struct parse *ps = arg; struct pnode *dat; int i; /* Stopped or no tree yet. */ if (ps->stop || NODE_ROOT == ps->node) return; /* Not supposed to be collecting text. */ assert(NULL != ps->cur); if (NODE_IGNTEXT & nodes[ps->node].flags) return; /* * Are we in the midst of processing text? * If we're not processing text right now, then create a text * node for doing so. * However, don't do so unless we have some non-whitespace to * process: strip out all leading whitespace to be sure. */ if (NODE_TEXT != ps->node) { for (i = 0; i < sz; i++) if ( ! isspace((int)p[i])) break; if (i == sz) return; p += i; sz -= i; dat = calloc(1, sizeof(struct pnode)); if (NULL == dat) { perror(NULL); exit(EXIT_FAILURE); } dat->node = ps->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; assert(NULL != ps->root); } /* Append to current buffer. */ assert(sz >= 0); ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + (size_t)sz); if (NULL == ps->cur->b) { perror(NULL); exit(EXIT_FAILURE); } memcpy(ps->cur->b + ps->cur->bsz, p, sz); ps->cur->bsz += (size_t)sz; } static void pnode_trim(struct pnode *pn) { assert(NODE_TEXT == pn->node); for ( ; pn->bsz > 0; pn->bsz--) if ( ! isspace((int)pn->b[pn->bsz - 1])) break; } /* * Begin an element. * First, look for the element. * If we don't find it and we're not parsing, keep going. * If we don't find it and we're parsing, puke and exit. * If we find it but we're not parsing yet (i.e., it's not a refentry * and thus out of context), keep going. * If we find it and we're at the root and already have a tree, puke and * exit (FIXME: I don't think this is right?). * If we find it but we're parsing a text node, close out the text node, * return to its parent, and keep going. * Make sure that the element is in the right context. * Lastly, put the node onto our parse tree and continue. */ static void xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { struct parse *ps = arg; enum nodeid node; enum attrkey key; enum attrval val; struct pnode *dat; struct pattr *pattr; const XML_Char **att; if (ps->stop) return; /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); pnode_trim(ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; } for (node = 0; node < NODE__MAX; node++) if (NULL == nodes[node].name) continue; else if (0 == strcmp(nodes[node].name, name)) break; if (NODE__MAX == node && NODE_ROOT == ps->node) { return; } else if (NODE__MAX == node) { fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NULL != ps->root) { fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml)); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) { return; } else if ( ! isparent(node, ps->node)) { fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" " "of node \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), NULL == nodes[ps->node].name ? "(none)" : nodes[ps->node].name, NULL == nodes[node].name ? "(none)" : nodes[node].name); ps->stop = 1; return; } if (NULL == (dat = calloc(1, sizeof(struct pnode)))) { perror(NULL); exit(EXIT_FAILURE); } dat->node = ps->node = node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); if (NULL != ps->cur) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; if (NULL == ps->root) ps->root = dat; /* * Process attributes. */ for (att = atts; NULL != *att; att += 2) { for (key = 0; key < ATTRKEY__MAX; key++) if (0 == strcmp(*att, attrkeys[key])) break; if (ATTRKEY__MAX == key) { fprintf(stderr, "%s:%zu:%zu: unknown " "attribute \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), *att); continue; } else if ( ! isattrkey(node, key)) { fprintf(stderr, "%s:%zu:%zu: bad " "attribute \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), *att); continue; } for (val = 0; val < ATTRVAL__MAX; val++) if (0 == strcmp(*(att + 1), attrvals[val])) break; if (ATTRVAL__MAX != val && ! isattrval(key, val)) { fprintf(stderr, "%s:%zu:%zu: bad " "value \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), *(att + 1)); continue; } pattr = calloc(1, sizeof(struct pattr)); pattr->key = key; pattr->val = val; if (ATTRVAL__MAX == val) pattr->rawval = strdup(*(att + 1)); TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); } } /* * Roll up the parse tree. * If we're at a text node, roll that one up first. * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void xml_elem_end(void *arg, const XML_Char *name) { struct parse *ps = arg; if (ps->stop || NODE_ROOT == ps->node) return; /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); pnode_trim(ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; } if (NULL == (ps->cur = ps->cur->parent)) ps->node = NODE_ROOT; else ps->node = ps->cur->node; } /* * Recursively free a node (NULL is ok). */ static void pnode_free(struct pnode *pn) { struct pnode *pp; struct pattr *ap; if (NULL == pn) return; while (NULL != (pp = TAILQ_FIRST(&pn->childq))) { TAILQ_REMOVE(&pn->childq, pp, child); pnode_free(pp); } while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) { TAILQ_REMOVE(&pn->attrq, ap, child); free(ap->rawval); free(ap); } free(pn->b); free(pn); } /* * Unlink a node from its parent and pnode_free() it. */ static void pnode_unlink(struct pnode *pn) { if (NULL != pn->parent) TAILQ_REMOVE(&pn->parent->childq, pn, child); pnode_free(pn); } /* * Unlink all children of a node and pnode_free() them. */ static void pnode_unlinksub(struct pnode *pn) { while ( ! TAILQ_EMPTY(&pn->childq)) pnode_unlink(TAILQ_FIRST(&pn->childq)); } /* * Reset the lookaside buffer. */ static void bufclear(struct parse *p) { p->b[p->bsz = 0] = '\0'; } /* * Append NODE_TEXT contents to the current buffer, reallocating its * size if necessary. * The buffer is ALWAYS nil-terminated. */ static void bufappend(struct parse *p, struct pnode *pn) { assert(NODE_TEXT == pn->node); if (p->bsz + pn->bsz + 1 > p->mbsz) { p->mbsz = p->bsz + pn->bsz + 1; if (NULL == (p->b = realloc(p->b, p->mbsz))) { perror(NULL); exit(EXIT_FAILURE); } } memcpy(p->b + p->bsz, pn->b, pn->bsz); p->bsz += pn->bsz; p->b[p->bsz] = '\0'; } /* * Recursively append all NODE_TEXT nodes to the buffer. * This descends into non-text nodes, but doesn't do anything beyond * them. * In other words, this is a recursive text grok. */ static void bufappend_r(struct parse *p, struct pnode *pn) { struct pnode *pp; if (NODE_TEXT == pn->node) bufappend(p, pn); TAILQ_FOREACH(pp, &pn->childq, child) bufappend_r(p, pp); } #define MACROLINE_NORM 0 #define MACROLINE_UPPER 1 /* * Recursively print text presumably on a macro line. * Convert all whitespace to regular spaces. */ static void pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl) { char *cp; if (0 == p->newln) putchar(' '); bufclear(p); bufappend_r(p, pn); /* Convert all space to spaces. */ for (cp = p->b; '\0' != *cp; cp++) if (isspace((int)*cp)) *cp = ' '; for (cp = p->b; isspace((int)*cp); cp++) /* Spin past whitespace (XXX: necessary?) */ ; for ( ; '\0' != *cp; cp++) { /* Escape us if we look like a macro. */ if ((cp == p->b || ' ' == *(cp - 1)) && isupper((int)*cp) && '\0' != *(cp + 1) && islower((int)*(cp + 1)) && ('\0' == *(cp + 2) || ' ' == *(cp + 2) || (islower((int)*(cp + 2)) && ('\0' == *(cp + 3) || ' ' == *(cp + 3))))) fputs("\\&", stdout); if (MACROLINE_UPPER & fl) putchar(toupper((int)*cp)); else putchar((int)*cp); /* If we're a character escape, escape us. */ if ('\\' == *cp) putchar('e'); } } static void pnode_printmacrolinepart(struct parse *p, struct pnode *pn) { pnode_printmacrolinetext(p, pn, 0); } /* * Just pnode_printmacrolinepart() but with a newline. * If no text, just the newline. */ static void pnode_printmacroline(struct parse *p, struct pnode *pn) { assert(0 == p->newln); pnode_printmacrolinetext(p, pn, 0); putchar('\n'); p->newln = 1; } static void pnode_printmopen(struct parse *p) { if (p->newln) { putchar('.'); p->newln = 0; } else putchar(' '); } static void pnode_printmclose(struct parse *p, int sv) { if (sv && ! p->newln) { putchar('\n'); p->newln = 1; } } /* * If the SYNOPSIS macro has a superfluous title, kill it. */ static void pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn) { struct pnode *pp; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TITLE == pp->node) { pnode_unlink(pp); return; } } /* * Start a hopefully-named `Sh' section. */ static void pnode_printrefsect(struct parse *p, struct pnode *pn) { struct pnode *pp; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TITLE == pp->node) break; fputs(".Sh", stdout); p->newln = 0; if (NULL != pp) { pnode_printmacrolinetext(p, pp, MACROLINE_UPPER); pnode_printmclose(p, 1); pnode_unlink(pp); } else { puts("UNKNOWN"); p->newln = 1; } } /* * Start a reference, extracting the title and volume. */ static void pnode_printciterefentry(struct parse *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_MANVOLNUM == pp->node) manvol = pp; else if (NODE_REFENTRYTITLE == pp->node) title = pp; fputs(".Xr", stdout); p->newln = 0; if (NULL != title) { pnode_printmacrolinepart(p, title); } else fputs(" unknown ", stdout); if (NULL == manvol) { puts(" 1"); p->newln = 1; } else pnode_printmacroline(p, manvol); } static void pnode_printrefmeta(struct parse *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_MANVOLNUM == pp->node) manvol = pp; else if (NODE_REFENTRYTITLE == pp->node) title = pp; puts(".Dd $Mdocdate" "$"); fputs(".Dt", stdout); p->newln = 0; if (NULL != title) pnode_printmacrolinetext(p, title, MACROLINE_UPPER); else fputs(" UNKNOWN ", stdout); if (NULL == manvol) { puts(" 1"); p->newln = 1; } else pnode_printmacroline(p, manvol); puts(".Os"); } static void pnode_printfuncdef(struct parse *p, struct pnode *pn) { struct pnode *pp, *ftype, *func; assert(p->newln); ftype = func = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TEXT == pp->node) ftype = pp; else if (NODE_FUNCTION == pp->node) func = pp; if (NULL != ftype) { fputs(".Ft", stdout); p->newln = 0; pnode_printmacroline(p, ftype); } if (NULL != func) { fputs(".Fo", stdout); p->newln = 0; pnode_printmacroline(p, func); } else { puts(".Fo UNKNOWN"); p->newln = 1; } } static void pnode_printparamdef(struct parse *p, struct pnode *pn) { struct pnode *pp, *ptype, *param; assert(p->newln); ptype = param = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TEXT == pp->node) ptype = pp; else if (NODE_PARAMETER == pp->node) param = pp; fputs(".Fa \"", stdout); p->newln = 0; if (NULL != ptype) { pnode_printmacrolinepart(p, ptype); putchar(' '); } if (NULL != param) pnode_printmacrolinepart(p, param); puts("\""); p->newln = 1; } static void pnode_printfuncprototype(struct parse *p, struct pnode *pn) { struct pnode *pp, *fdef; assert(p->newln); TAILQ_FOREACH(fdef, &pn->childq, child) if (NODE_FUNCDEF == fdef->node) break; if (NULL != fdef) pnode_printfuncdef(p, fdef); else puts(".Fo UNKNOWN"); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_PARAMDEF == pp->node) pnode_printparamdef(p, pp); puts(".Fc"); p->newln = 1; } /* * The element is more complicated than it should be because text * nodes are treated like ".Ar foo", but non-text nodes need to be * re-sent into the printer (i.e., without the preceding ".Ar"). * This also handles the case of "repetition" (or in other words, the * ellipsis following an argument) and optionality. */ static void pnode_printarg(struct parse *p, struct pnode *pn) { struct pnode *pp; struct pattr *ap; int isop, isrep; isop = 1; isrep = 0; TAILQ_FOREACH(ap, &pn->attrq, child) if (ATTRKEY_CHOICE == ap->key && (ATTRVAL_PLAIN == ap->val || ATTRVAL_REQ == ap->val)) isop = 0; else if (ATTRKEY_REP == ap->key && (ATTRVAL_REPEAT == ap->val)) isrep = 1; if (isop) { pnode_printmopen(p); fputs("Op", stdout); } TAILQ_FOREACH(pp, &pn->childq, child) { if (NODE_TEXT == pp->node) { pnode_printmopen(p); fputs("Ar", stdout); } pnode_print(p, pp); if (NODE_TEXT == pp->node && isrep) fputs("...", stdout); } } /* * Recursively search and return the first instance of "node". */ static struct pnode * pnode_findfirst(struct pnode *pn, enum nodeid node) { struct pnode *pp, *res; res = NULL; TAILQ_FOREACH(pp, &pn->childq, child) { res = pp->node == node ? pp : pnode_findfirst(pp, node); if (NULL != res) break; } return(res); } static void pnode_printprologue(struct parse *p, struct pnode *pn) { struct pnode *pp; pp = NULL == p->root ? NULL : pnode_findfirst(p->root, NODE_REFMETA); if (NULL != pp) { pnode_printrefmeta(p, pp); pnode_unlink(pp); } else { puts(".\\\" Supplying bogus prologue..."); puts(".Dd $Mdocdate" "$"); puts(".Dt UNKNOWN 1"); puts(".Os"); } } static void pnode_printvarlistentry(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TERM == pp->node) { fputs(".It", stdout); p->newln = 0; pnode_print(p, pp); pnode_unlink(pp); pnode_printmclose(p, 1); return; } puts(".It"); p->newln = 1; } static void pnode_printitemizedlist(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TITLE == pp->node) { puts(".Pp"); pnode_print(p, pp); pnode_unlink(pp); } assert(p->newln); puts(".Bl -item"); TAILQ_FOREACH(pp, &pn->childq, child) { assert(p->newln); puts(".It"); pnode_print(p, pp); pnode_printmclose(p, 1); } assert(p->newln); puts(".El"); } static void pnode_printvariablelist(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TITLE == pp->node) { puts(".Pp"); pnode_print(p, pp); pnode_unlink(pp); } assert(p->newln); puts(".Bl -tag -width Ds"); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_VARLISTENTRY != pp->node) { assert(p->newln); fputs(".It", stdout); pnode_printmacroline(p, pp); } else { assert(p->newln); pnode_print(p, pp); } assert(p->newln); puts(".El"); } /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. * FIXME: macro line continuation? */ static void pnode_print(struct parse *p, struct pnode *pn) { struct pnode *pp; char *cp; int last, sv; if (NULL == pn) return; sv = p->newln; switch (pn->node) { case (NODE_ARG): pnode_printarg(p, pn); pnode_unlinksub(pn); break; case (NODE_CITEREFENTRY): assert(p->newln); pnode_printciterefentry(p, pn); pnode_unlinksub(pn); break; case (NODE_CODE): pnode_printmopen(p); fputs("Li", stdout); break; case (NODE_COMMAND): pnode_printmopen(p); fputs("Nm", stdout); break; case (NODE_EMPHASIS): pnode_printmopen(p); fputs("Em", stdout); break; case (NODE_FILENAME): pnode_printmopen(p); fputs("Pa", stdout); break; case (NODE_FUNCTION): pnode_printmopen(p); fputs("Fn", stdout); break; case (NODE_FUNCPROTOTYPE): assert(p->newln); pnode_printfuncprototype(p, pn); pnode_unlinksub(pn); break; case (NODE_FUNCSYNOPSISINFO): pnode_printmopen(p); fputs("Fd", stdout); break; case (NODE_ITEMIZEDLIST): assert(p->newln); pnode_printitemizedlist(p, pn); break; case (NODE_OPTION): pnode_printmopen(p); fputs("Fl", stdout); /* FIXME: bogus leading '-'? */ break; case (NODE_PARA): assert(p->newln); if (NULL != pn->parent && NODE_LISTITEM == pn->parent->node) break; puts(".Pp"); break; case (NODE_PARAMETER): /* Suppress non-text children... */ pnode_printmopen(p); fputs("Fa \"", stdout); pnode_printmacrolinepart(p, pn); puts("\""); pnode_unlinksub(pn); break; case (NODE_PROGRAMLISTING): assert(p->newln); puts(".Bd -literal"); break; case (NODE_REFENTRYINFO): /* Suppress. */ pnode_unlinksub(pn); break; case (NODE_REFMETA): abort(); break; case (NODE_REFNAME): /* Suppress non-text children... */ pnode_printmopen(p); fputs("Nm", stdout); p->newln = 0; pnode_printmacrolinepart(p, pn); pnode_unlinksub(pn); break; case (NODE_REFNAMEDIV): assert(p->newln); puts(".Sh NAME"); break; case (NODE_REFPURPOSE): assert(p->newln); pnode_printmopen(p); fputs("Nd", stdout); break; case (NODE_REFSYNOPSISDIV): assert(p->newln); pnode_printrefsynopsisdiv(p, pn); puts(".Sh SYNOPSIS"); break; case (NODE_REFSECT1): assert(p->newln); pnode_printrefsect(p, pn); break; case (NODE_REPLACEABLE): pnode_printmopen(p); fputs("Ar", stdout); break; case (NODE_STRUCTNAME): pnode_printmopen(p); fputs("Vt", stdout); break; case (NODE_TEXT): if (0 == p->newln) putchar(' '); bufclear(p); bufappend(p, pn); /* * Output all characters, squeezing out whitespace * between newlines. * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ assert(p->bsz); for (last = '\n', cp = p->b; '\0' != *cp; ) { if ('\n' == last) { /* Consume all whitespace. */ if (isspace((int)*cp)) { while (isspace((int)*cp)) cp++; continue; } else if ('\'' == *cp || '.' == *cp) fputs("\\&", stdout); } putchar(last = *cp++); /* If we're a character escape, escape us. */ if ('\\' == last) putchar('e'); } p->newln = 0; break; case (NODE_VARIABLELIST): assert(p->newln); pnode_printvariablelist(p, pn); pnode_unlinksub(pn); break; case (NODE_VARLISTENTRY): assert(p->newln); pnode_printvarlistentry(p, pn); break; default: break; } TAILQ_FOREACH(pp, &pn->childq, child) pnode_print(p, pp); switch (pn->node) { case (NODE_ARG): case (NODE_CODE): case (NODE_COMMAND): case (NODE_EMPHASIS): case (NODE_FILENAME): case (NODE_FUNCTION): case (NODE_FUNCSYNOPSISINFO): case (NODE_OPTION): case (NODE_PARAMETER): case (NODE_REPLACEABLE): case (NODE_REFPURPOSE): case (NODE_STRUCTNAME): case (NODE_TEXT): pnode_printmclose(p, sv); break; case (NODE_REFNAME): /* * If we're in the NAME macro and we have multiple * macros in sequence, then print out a * trailing comma before the newline. */ if (NULL != pn->parent && NODE_REFNAMEDIV == pn->parent->node && NULL != TAILQ_NEXT(pn, child) && NODE_REFNAME == TAILQ_NEXT(pn, child)->node) fputs(" ,", stdout); pnode_printmclose(p, sv); break; case (NODE_PROGRAMLISTING): assert(p->newln); puts(".Ed"); p->newln = 1; break; default: break; } } /* * Loop around the read buffer until we've drained it of all data. * Invoke the parser context with each buffer fill. */ static int readfile(XML_Parser xp, int fd, char *b, size_t bsz, const char *fn) { struct parse p; int rc; ssize_t ssz; memset(&p, 0, sizeof(struct parse)); p.b = malloc(p.bsz = p.mbsz = 1024); p.fname = fn; p.xml = xp; XML_SetCharacterDataHandler(xp, xml_char); XML_SetElementHandler(xp, xml_elem_start, xml_elem_end); XML_SetUserData(xp, &p); while ((ssz = read(fd, b, bsz)) >= 0) { if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz))) fprintf(stderr, "%s: %s\n", fn, XML_ErrorString (XML_GetErrorCode(xp))); else if ( ! p.stop && ssz > 0) continue; /* * Exit when we've read all or errors have occured * during the parse sequence. */ p.newln = 1; pnode_printprologue(&p, p.root); pnode_print(&p, p.root); pnode_free(p.root); free(p.b); return(0 != rc && ! p.stop); } /* Read error has occured. */ perror(fn); pnode_free(p.root); free(p.b); return(0); } int main(int argc, char *argv[]) { XML_Parser xp; const char *fname; char *buf; int fd, rc; fname = "-"; xp = NULL; buf = NULL; rc = 0; if (-1 != getopt(argc, argv, "")) return(EXIT_FAILURE); argc -= optind; argv += optind; if (argc > 1) return(EXIT_FAILURE); else if (argc > 0) fname = argv[0]; /* Read from stdin or a file. */ fd = 0 == strcmp(fname, "-") ? STDIN_FILENO : open(fname, O_RDONLY, 0); /* * Open file for reading. * Allocate a read buffer. * Create the parser context. * Dive directly into the parse. */ if (-1 == fd) perror(fname); else if (NULL == (buf = malloc(4096))) perror(NULL); else if (NULL == (xp = XML_ParserCreate(NULL))) perror(NULL); else if ( ! readfile(xp, fd, buf, 4096, fname)) rc = 1; XML_ParserFree(xp); free(buf); if (STDIN_FILENO != fd) close(fd); return(rc ? EXIT_SUCCESS : EXIT_FAILURE); }