/* $Id: docbook2mdoc.c,v 1.9 2014/03/28 13:16:40 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include /* * All recognised node types. */ enum nodeid { NODE_ROOT = 0, /* Must comes first. */ /* Alpha-ordered hereafter. */ NODE_ARG, NODE_CITEREFENTRY, NODE_CMDSYNOPSIS, NODE_CODE, NODE_COMMAND, NODE_FUNCDEF, NODE_FUNCPROTOTYPE, NODE_FUNCSYNOPSIS, NODE_FUNCSYNOPSISINFO, NODE_FUNCTION, NODE_MANVOLNUM, NODE_OPTION, NODE_PARA, NODE_PARAMDEF, NODE_PARAMETER, NODE_PROGRAMLISTING, NODE_REFCLASS, NODE_REFDESCRIPTOR, NODE_REFENTRY, NODE_REFENTRYTITLE, NODE_REFMETA, NODE_REFMISCINFO, NODE_REFNAME, NODE_REFNAMEDIV, NODE_REFPURPOSE, NODE_REFSECT1, NODE_REFSYNOPSISDIV, NODE_STRUCTNAME, NODE_SYNOPSIS, NODE_TEXT, NODE_TITLE, NODE__MAX }; /* * Global parse state. * Keep this as simple and small as possible. */ struct parse { enum nodeid node; /* current (NODE_ROOT if pre-tree) */ int stop; /* should we stop now? */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ char *b; /* nil-terminated buffer for pre-print */ size_t bsz; /* current length of b */ size_t mbsz; /* max bsz allocation */ }; struct node { const char *name; /* docbook element name */ unsigned int flags; #define NODE_IGNTEXT 1 /* ignore all contained text */ }; TAILQ_HEAD(pnodeq, pnode); struct pnode { enum nodeid node; /* node type */ char *b; /* binary data buffer */ size_t bsz; /* data buffer size */ struct pnode *parent; /* parent (or NULL if top) */ struct pnodeq childq; /* queue of children */ TAILQ_ENTRY(pnode) child; }; static const struct node nodes[NODE__MAX] = { { NULL, 0 }, { "arg", 0 }, { "citerefentry", NODE_IGNTEXT }, { "cmdsynopsis", NODE_IGNTEXT }, { "code", 0 }, { "command", 0 }, { "funcdef", 0 }, { "funcprototype", NODE_IGNTEXT }, { "funcsynopsis", NODE_IGNTEXT }, { "funcsynopsisinfo", 0 }, { "function", 0 }, { "manvolnum", 0 }, { "option", 0 }, { "para", 0 }, { "paramdef", 0 }, { "parameter", 0 }, { "programlisting", 0 }, { "refclass", NODE_IGNTEXT }, { "refdescriptor", NODE_IGNTEXT }, { "refentry", NODE_IGNTEXT }, { "refentrytitle", 0 }, { "refmeta", NODE_IGNTEXT }, { "refmiscinfo", NODE_IGNTEXT }, { "refname", 0 }, { "refnamediv", NODE_IGNTEXT }, { "refpurpose", 0 }, { "refsect1", 0 }, { "refsynopsisdiv", NODE_IGNTEXT }, { "structname", 0 }, { "synopsis", 0 }, { NULL, 0 }, { "title", 0 }, }; /* * Look up whether "parent" is a valid parent for "node". * This is sucked directly from the DocBook specification: look at the * "children" and "parent" sections of each node. */ static int isparent(enum nodeid node, enum nodeid parent) { switch (node) { case (NODE_ROOT): return(0); case (NODE_ARG): switch (parent) { case (NODE_ARG): case (NODE_CMDSYNOPSIS): return(1); default: break; } return(0); case (NODE_CITEREFENTRY): switch (parent) { case (NODE_FUNCSYNOPSISINFO): case (NODE_PARA): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_CMDSYNOPSIS): switch (parent) { case (NODE_PARA): case (NODE_REFSECT1): case (NODE_REFSYNOPSISDIV): return(1); default: break; } return(0); case (NODE_CODE): switch (parent) { case (NODE_FUNCSYNOPSISINFO): case (NODE_PARA): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_COMMAND): switch (parent) { case (NODE_CMDSYNOPSIS): case (NODE_FUNCSYNOPSISINFO): case (NODE_PARA): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_FUNCDEF): return(NODE_FUNCPROTOTYPE == parent); case (NODE_FUNCPROTOTYPE): return(NODE_FUNCSYNOPSIS == parent); case (NODE_FUNCSYNOPSIS): switch (parent) { case (NODE_PARA): case (NODE_REFSECT1): case (NODE_REFSYNOPSISDIV): return(1); default: break; } return(0); case (NODE_FUNCSYNOPSISINFO): return(NODE_FUNCSYNOPSIS == parent); case (NODE_FUNCTION): switch (parent) { case (NODE_CODE): case (NODE_FUNCDEF): case (NODE_FUNCSYNOPSISINFO): case (NODE_PARA): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_MANVOLNUM): switch (parent) { case (NODE_CITEREFENTRY): case (NODE_REFMETA): return(1); default: break; } return(0); case (NODE_OPTION): switch (parent) { case (NODE_ARG): case (NODE_FUNCSYNOPSISINFO): case (NODE_PARA): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_PARA): switch (parent) { case (NODE_REFSECT1): case (NODE_REFSYNOPSISDIV): return(1); default: break; } return(0); case (NODE_PARAMDEF): return(NODE_FUNCPROTOTYPE == parent); case (NODE_PARAMETER): switch (parent) { case (NODE_CODE): case (NODE_FUNCSYNOPSISINFO): case (NODE_PARA): case (NODE_PARAMDEF): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_PROGRAMLISTING): switch (parent) { case (NODE_PARA): case (NODE_REFSECT1): case (NODE_REFSYNOPSISDIV): return(1); default: break; } return(0); case (NODE_REFCLASS): return(parent == NODE_REFNAMEDIV); case (NODE_REFDESCRIPTOR): return(parent == NODE_REFNAMEDIV); case (NODE_REFENTRY): return(parent == NODE_ROOT); case (NODE_REFENTRYTITLE): switch (parent) { case (NODE_CITEREFENTRY): case (NODE_REFMETA): return(1); default: break; } case (NODE_REFMETA): return(parent == NODE_REFENTRY); case (NODE_REFMISCINFO): return(parent == NODE_REFMETA); case (NODE_REFNAME): return(parent == NODE_REFNAMEDIV); case (NODE_REFNAMEDIV): return(parent == NODE_REFENTRY); case (NODE_REFPURPOSE): return(parent == NODE_REFNAMEDIV); case (NODE_REFSECT1): return(parent == NODE_REFENTRY); case (NODE_REFSYNOPSISDIV): return(parent == NODE_REFENTRY); case (NODE_STRUCTNAME): switch (parent) { case (NODE_CODE): case (NODE_FUNCSYNOPSISINFO): case (NODE_FUNCTION): case (NODE_OPTION): case (NODE_PARA): case (NODE_PARAMETER): case (NODE_PROGRAMLISTING): case (NODE_REFDESCRIPTOR): case (NODE_REFENTRYTITLE): case (NODE_REFNAME): case (NODE_REFPURPOSE): case (NODE_SYNOPSIS): case (NODE_TITLE): return(1); default: break; } return(0); case (NODE_SYNOPSIS): switch (parent) { case (NODE_REFSYNOPSISDIV): case (NODE_REFSECT1): return(1); default: break; } return(0); case (NODE_TITLE): switch (parent) { case (NODE_REFSECT1): case (NODE_REFSYNOPSISDIV): return(1); default: break; } return(0); case (NODE_TEXT): return(1); case (NODE__MAX): break; } abort(); return(0); } /* * Process a stream of characters. * We store text as nodes in and of themselves. * If a text node is already open, append to it. * If it's not open, open one under the current context. */ static void xml_char(void *arg, const XML_Char *p, int sz) { struct parse *ps = arg; struct pnode *dat; int i; /* Stopped or no tree yet. */ if (ps->stop || NODE_ROOT == ps->node) return; /* Not supposed to be collecting text. */ assert(NULL != ps->cur); if (NODE_IGNTEXT & nodes[ps->node].flags) return; /* * Are we in the midst of processing text? * If we're not processing text right now, then create a text * node for doing so. * However, don't do so unless we have some non-whitespace to * process! */ if (NODE_TEXT != ps->node) { for (i = 0; i < sz; i++) if ( ! isspace((int)p[i])) break; if (i == sz) return; dat = calloc(1, sizeof(struct pnode)); if (NULL == dat) { perror(NULL); exit(EXIT_FAILURE); } dat->node = ps->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; assert(NULL != ps->root); } /* Append to current buffer. */ assert(sz >= 0); ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + (size_t)sz); if (NULL == ps->cur->b) { perror(NULL); exit(EXIT_FAILURE); } memcpy(ps->cur->b + ps->cur->bsz, p, sz); ps->cur->bsz += (size_t)sz; } /* * Begin an element. * First, look for the element. * If we don't find it and we're not parsing, keep going. * If we don't find it and we're parsing, puke and exit. * If we find it but we're not parsing yet (i.e., it's not a refentry * and thus out of context), keep going. * If we find it and we're at the root and already have a tree, puke and * exit (FIXME: I don't think this is right?). * If we find it but we're parsing a text node, close out the text node, * return to its parent, and keep going. * Make sure that the element is in the right context. * Lastly, put the node onto our parse tree and continue. */ static void xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { struct parse *ps = arg; enum nodeid node; struct pnode *dat; if (ps->stop) return; /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; } for (node = 0; node < NODE__MAX; node++) if (NULL == nodes[node].name) continue; else if (0 == strcmp(nodes[node].name, name)) break; /* FIXME: do more with these error messages... */ if (NODE__MAX == node && NODE_ROOT == ps->node) { fprintf(stderr, "%s: ignoring node\n", name); return; } else if (NODE__MAX == node) { fprintf(stderr, "%s: unknown node\n", name); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NULL != ps->root) { fprintf(stderr, "%s: reentering?\n", name); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) { fprintf(stderr, "%s: known node w/o context\n", name); return; } else if ( ! isparent(node, ps->node)) { fprintf(stderr, "%s: bad parent\n", name); ps->stop = 1; return; } if (NULL == (dat = calloc(1, sizeof(struct pnode)))) { perror(NULL); exit(EXIT_FAILURE); } dat->node = ps->node = node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); if (NULL != ps->cur) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; if (NULL == ps->root) ps->root = dat; } /* * Roll up the parse tree. * If we're at a text node, roll that one up first. * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void xml_elem_end(void *arg, const XML_Char *name) { struct parse *ps = arg; if (ps->stop || NODE_ROOT == ps->node) return; /* Close out text node, if applicable... */ if (NODE_TEXT == ps->node) { assert(NULL != ps->cur); ps->cur = ps->cur->parent; assert(NULL != ps->cur); ps->node = ps->cur->node; } if (NULL == (ps->cur = ps->cur->parent)) ps->node = NODE_ROOT; else ps->node = ps->cur->node; } /* * Recursively free a node (NULL is ok). */ static void pnode_free(struct pnode *pn) { struct pnode *pp; if (NULL == pn) return; while (NULL != (pp = TAILQ_FIRST(&pn->childq))) { TAILQ_REMOVE(&pn->childq, pp, child); pnode_free(pp); } free(pn->b); free(pn); } /* * Unlink a node from its parent and pnode_free() it. */ static void pnode_unlink(struct pnode *pn) { if (NULL != pn->parent) TAILQ_REMOVE(&pn->parent->childq, pn, child); pnode_free(pn); } /* * Unlink all children of a node and pnode_free() them. */ static void pnode_unlinksub(struct pnode *pn) { while ( ! TAILQ_EMPTY(&pn->childq)) pnode_unlink(TAILQ_FIRST(&pn->childq)); } /* * Reset the lookaside buffer. */ static void bufclear(struct parse *p) { p->b[p->bsz = 0] = '\0'; } /* * Append NODE_TEXT contents to the current buffer, reallocating its * size if necessary. * The buffer is ALWAYS nil-terminated. */ static void bufappend(struct parse *p, struct pnode *pn) { assert(NODE_TEXT == pn->node); if (p->bsz + pn->bsz + 1 > p->mbsz) { p->mbsz = p->bsz + pn->bsz + 1; if (NULL == (p->b = realloc(p->b, p->mbsz))) { perror(NULL); exit(EXIT_FAILURE); } } memcpy(p->b + p->bsz, pn->b, pn->bsz); p->bsz += pn->bsz; p->b[p->bsz] = '\0'; } /* * Recursively append all NODE_TEXT nodes to the buffer. * This descends into non-text nodes, but doesn't do anything beyond * them. * In other words, this is a recursive text grok. */ static void bufappend_r(struct parse *p, struct pnode *pn) { struct pnode *pp; if (NODE_TEXT == pn->node) bufappend(p, pn); TAILQ_FOREACH(pp, &pn->childq, child) bufappend_r(p, pp); } /* * Recursively print text presumably on a macro line. * Convert all whitespace to regular spaces. */ static void pnode_printmacrolinepart(struct parse *p, struct pnode *pn) { char *cp; bufclear(p); bufappend_r(p, pn); /* Convert all space to spaces. */ for (cp = p->b; '\0' != *cp; cp++) if (isspace((int)*cp)) *cp = ' '; for (cp = p->b; isspace((int)*cp); cp++) /* Spin past whitespace (XXX: necessary?) */ ; for ( ; '\0' != *cp; cp++) { /* Escape us if we look like a macro. */ if ((cp == p->b || ' ' == *(cp - 1)) && isupper((int)*cp) && '\0' != *(cp + 1) && islower((int)*(cp + 1)) && ('\0' == *(cp + 2) || ' ' == *(cp + 2) || (islower((int)*(cp + 2)) && ('\0' == *(cp + 3) || ' ' == *(cp + 3))))) fputs("\\&", stdout); putchar(*cp); /* If we're a character escape, escape us. */ if ('\\' == *cp) putchar('e'); } } /* * Just pnode_printmacrolinepart() but with a newline. * If no text, just the newline. */ static void pnode_printmacroline(struct parse *p, struct pnode *pn) { pnode_printmacrolinepart(p, pn); putchar('\n'); } /* * Start the SYNOPSIS macro, unlinking its [superfluous] title. */ static void pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn) { struct pnode *pp; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TITLE == pp->node) { pnode_unlink(pp); break; } puts(".Sh SYNOPSIS"); } /* * Start a hopefully-named `Sh' section. */ static void pnode_printrefsect(struct parse *p, struct pnode *pn) { struct pnode *pp; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TITLE == pp->node) break; fputs(".Sh ", stdout); if (NULL != pp) { pnode_printmacroline(p, pp); pnode_unlink(pp); } else puts("UNKNOWN"); } /* * Start a reference, extracting the title and volume. */ static void pnode_printciterefentry(struct parse *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_MANVOLNUM == pp->node) manvol = pp; else if (NODE_REFENTRYTITLE == pp->node) title = pp; fputs(".Xr ", stdout); if (NULL != title) { pnode_printmacrolinepart(p, title); putchar(' '); } else fputs("unknown ", stdout); if (NULL != manvol) pnode_printmacroline(p, manvol); else puts("1"); } static void pnode_printrefmeta(struct parse *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_MANVOLNUM == pp->node) manvol = pp; else if (NODE_REFENTRYTITLE == pp->node) title = pp; puts(".Dd $Mdocdate" "$"); fputs(".Dt ", stdout); if (NULL != title) { /* FIXME: uppercase. */ pnode_printmacrolinepart(p, title); putchar(' '); } else fputs("UNKNOWN ", stdout); if (NULL != manvol) pnode_printmacroline(p, manvol); else puts("1"); puts(".Os"); } static void pnode_printfuncdef(struct parse *p, struct pnode *pn) { struct pnode *pp, *ftype, *func; ftype = func = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TEXT == pp->node) ftype = pp; else if (NODE_FUNCTION == pp->node) func = pp; if (NULL != ftype) { fputs(".Ft ", stdout); pnode_printmacroline(p, ftype); } if (NULL != func) { fputs(".Fo ", stdout); pnode_printmacroline(p, func); } else puts(".Fo UNKNOWN"); } static void pnode_printparamdef(struct parse *p, struct pnode *pn) { struct pnode *pp, *ptype, *param; ptype = param = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_TEXT == pp->node) ptype = pp; else if (NODE_PARAMETER == pp->node) param = pp; fputs(".Fa \"", stdout); if (NULL != ptype) { pnode_printmacrolinepart(p, ptype); putchar(' '); } if (NULL != param) pnode_printmacrolinepart(p, param); else fputs("UNKNOWN", stdout); puts("\""); } static void pnode_printfuncprototype(struct parse *p, struct pnode *pn) { struct pnode *pp, *fdef; TAILQ_FOREACH(fdef, &pn->childq, child) if (NODE_FUNCDEF == fdef->node) break; if (NULL != fdef) pnode_printfuncdef(p, fdef); else puts(".Fo UNKNOWN"); TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_PARAMDEF == pp->node) pnode_printparamdef(p, pp); puts(".Fc"); } /* TODO: handle "optional" values. */ static void pnode_printarg(struct parse *p, struct pnode *pn, int nested) { struct pnode *pp; int sv = nested; if ( ! nested) fputs(".", stdout); nested = 1; TAILQ_FOREACH(pp, &pn->childq, child) if (NODE_OPTION == pp->node) { fputs("Fl ", stdout); pnode_printmacrolinepart(p, pp); } else if (NODE_TEXT == pp->node) { fputs("Ar ", stdout); pnode_printmacrolinepart(p, pp); } else if (NODE_ARG == pp->node) pnode_printarg(p, pp, nested); if ( ! sv) puts(""); } /* * Recursively search and return the first instance of "node". */ static struct pnode * pnode_findfirst(struct pnode *pn, enum nodeid node) { struct pnode *pp, *res; res = NULL; TAILQ_FOREACH(pp, &pn->childq, child) { res = pp->node == node ? pp : pnode_findfirst(pp, node); if (NULL != res) break; } return(res); } static void pnode_printprologue(struct parse *p, struct pnode *pn) { struct pnode *pp; pp = NULL == p->root ? NULL : pnode_findfirst(p->root, NODE_REFMETA); if (NULL != pp) { pnode_printrefmeta(p, pp); pnode_unlink(pp); } else { puts(".\\\" Supplying bogus prologue..."); puts(".Dd $Mdocdate" "$"); puts(".Dt UNKNOWN 1"); puts(".Os"); } } /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. * FIXME: macro line continuation? */ static void pnode_print(struct parse *p, struct pnode *pn) { struct pnode *pp; char *cp; int last; if (NULL == pn) return; if (NODE_TEXT != pn->node && NODE_ROOT != pn->node) printf(".\\\" %s\n", nodes[pn->node].name); switch (pn->node) { case (NODE_ARG): pnode_printarg(p, pn, 0); pnode_unlinksub(pn); break; case (NODE_CITEREFENTRY): pnode_printciterefentry(p, pn); pnode_unlinksub(pn); break; case (NODE_CODE): fputs(".Li ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); break; case (NODE_COMMAND): fputs(".Nm ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); break; case (NODE_FUNCTION): fputs(".Fn ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); break; case (NODE_FUNCPROTOTYPE): pnode_printfuncprototype(p, pn); pnode_unlinksub(pn); break; case (NODE_FUNCSYNOPSISINFO): fputs(".Fd ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); break; case (NODE_PARA): /* FIXME: not always. */ puts(".Pp"); break; case (NODE_PARAMETER): fputs(".Fa \"", stdout); pnode_printmacrolinepart(p, pn); puts("\""); pnode_unlinksub(pn); break; case (NODE_PROGRAMLISTING): puts(".Bd -literal"); break; case (NODE_REFMETA): abort(); break; case (NODE_REFNAME): fputs(".Nm ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); return; case (NODE_REFNAMEDIV): puts(".Sh NAME"); break; case (NODE_REFPURPOSE): fputs(".Nd ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); return; case (NODE_REFSYNOPSISDIV): pnode_printrefsynopsisdiv(p, pn); break; case (NODE_REFSECT1): pnode_printrefsect(p, pn); break; case (NODE_STRUCTNAME): fputs(".Vt ", stdout); pnode_printmacroline(p, pn); pnode_unlinksub(pn); return; case (NODE_TEXT): bufclear(p); bufappend(p, pn); /* * Output all characters, squeezing out whitespace * between newlines. * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ for (last = '\n', cp = p->b; '\0' != *cp; ) { if ('\n' == last) { /* Consume all whitespace. */ if (isspace((int)*cp)) { while (isspace((int)*cp)) cp++; continue; } else if ('\'' == *cp || '.' == *cp) fputs("\\&", stdout); } putchar(last = *cp++); /* If we're a character escape, escape us. */ if ('\\' == last) putchar('e'); } if ('\n' != last) putchar('\n'); break; default: break; } TAILQ_FOREACH(pp, &pn->childq, child) pnode_print(p, pp); switch (pn->node) { case (NODE_PROGRAMLISTING): puts(".Ed"); break; default: break; } } /* * Loop around the read buffer until we've drained it of all data. * Invoke the parser context with each buffer fill. */ static int readfile(XML_Parser xp, int fd, char *b, size_t bsz, const char *fn) { struct parse p; int rc; ssize_t ssz; memset(&p, 0, sizeof(struct parse)); p.b = malloc(p.bsz = p.mbsz = 1024); XML_SetCharacterDataHandler(xp, xml_char); XML_SetElementHandler(xp, xml_elem_start, xml_elem_end); XML_SetUserData(xp, &p); while ((ssz = read(fd, b, bsz)) >= 0) { if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz))) fprintf(stderr, "%s: %s\n", fn, XML_ErrorString (XML_GetErrorCode(xp))); else if ( ! p.stop && ssz > 0) continue; /* * Exit when we've read all or errors have occured * during the parse sequence. */ pnode_printprologue(&p, p.root); pnode_print(&p, p.root); pnode_free(p.root); free(p.b); return(0 != rc && ! p.stop); } /* Read error has occured. */ perror(fn); pnode_free(p.root); free(p.b); return(0); } int main(int argc, char *argv[]) { XML_Parser xp; const char *fname; char *buf; int fd, rc; fname = "-"; xp = NULL; buf = NULL; rc = 0; if (-1 != getopt(argc, argv, "")) return(EXIT_FAILURE); argc -= optind; argv += optind; if (argc > 1) return(EXIT_FAILURE); else if (argc > 0) fname = argv[0]; /* Read from stdin or a file. */ fd = 0 == strcmp(fname, "-") ? STDIN_FILENO : open(fname, O_RDONLY, 0); /* * Open file for reading. * Allocate a read buffer. * Create the parser context. * Dive directly into the parse. */ if (-1 == fd) perror(fname); else if (NULL == (buf = malloc(4096))) perror(NULL); else if (NULL == (xp = XML_ParserCreate(NULL))) perror(NULL); else if ( ! readfile(xp, fd, buf, 4096, fname)) rc = 1; XML_ParserFree(xp); free(buf); if (STDIN_FILENO != fd) close(fd); return(rc ? EXIT_SUCCESS : EXIT_FAILURE); }