/* $Id: docbook2mdoc.c,v 1.66 2019/03/23 10:22:23 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "extern.h" /* * Global parse state. * Keep this as simple and small as possible. */ struct parse { XML_Parser xml; enum nodeid node; /* current (NODE_ROOT if pre-tree) */ const char *fname; /* filename */ int stop; /* should we stop now? */ #define PARSE_EQN 1 unsigned int flags; /* document-wide flags */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ char *b; /* NUL-terminated buffer for pre-print */ size_t bsz; /* current length of b */ size_t mbsz; /* max bsz allocation */ int level; /* header level, starting at 1 */ int newln; /* output: are we on a fresh line */ }; struct node { const char *name; /* docbook element name */ enum nodeid node; /* docbook element to generate */ }; TAILQ_HEAD(pnodeq, pnode); TAILQ_HEAD(pattrq, pattr); struct pattr { enum attrkey key; enum attrval val; char *rawval; TAILQ_ENTRY(pattr) child; }; struct pnode { enum nodeid node; /* node type */ char *b; /* binary data buffer */ char *real; /* store for "b" */ size_t bsz; /* data buffer size */ struct pnode *parent; /* parent (or NULL if top) */ struct pnodeq childq; /* queue of children */ struct pattrq attrq; /* attributes of node */ TAILQ_ENTRY(pnode) child; }; static const char *attrkeys[ATTRKEY__MAX] = { "choice", "class", "close", "id", "linkend", "open", "rep" }; static const char *attrvals[ATTRVAL__MAX] = { "monospaced", "norepeat", "opt", "plain", "repeat", "req" }; static const struct node nodes[] = { { "acronym", NODE_ACRONYM }, { "affiliation", NODE_AFFILIATION }, { "anchor", NODE_ANCHOR }, { "application", NODE_APPLICATION }, { "arg", NODE_ARG }, { "author", NODE_AUTHOR }, { "authorgroup", NODE_AUTHORGROUP }, { "blockquote", NODE_BLOCKQUOTE }, { "book", NODE_BOOK }, { "bookinfo", NODE_BOOKINFO }, { "caution", NODE_CAUTION }, { "chapter", NODE_SECTION }, { "citerefentry", NODE_CITEREFENTRY }, { "citetitle", NODE_CITETITLE }, { "cmdsynopsis", NODE_CMDSYNOPSIS }, { "code", NODE_CODE }, { "colspec", NODE_COLSPEC }, { "command", NODE_COMMAND }, { "constant", NODE_CONSTANT }, { "copyright", NODE_COPYRIGHT }, { "date", NODE_DATE }, { "editor", NODE_EDITOR }, { "email", NODE_EMAIL }, { "emphasis", NODE_EMPHASIS }, { "entry", NODE_ENTRY }, { "envar", NODE_ENVAR }, { "fieldsynopsis", NODE_FIELDSYNOPSIS }, { "filename", NODE_FILENAME }, { "firstname", NODE_FIRSTNAME }, { "firstterm", NODE_FIRSTTERM }, { "footnote", NODE_FOOTNOTE }, { "funcdef", NODE_FUNCDEF }, { "funcprototype", NODE_FUNCPROTOTYPE }, { "funcsynopsis", NODE_FUNCSYNOPSIS }, { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO }, { "function", NODE_FUNCTION }, { "glossterm", NODE_GLOSSTERM }, { "group", NODE_GROUP }, { "holder", NODE_HOLDER }, { "index", NODE_INDEX }, { "indexterm", NODE_INDEXTERM }, { "info", NODE_INFO }, { "informalequation", NODE_INFORMALEQUATION }, { "informaltable", NODE_INFORMALTABLE }, { "inlineequation", NODE_INLINEEQUATION }, { "itemizedlist", NODE_ITEMIZEDLIST }, { "keysym", NODE_KEYSYM }, { "legalnotice", NODE_LEGALNOTICE }, { "link", NODE_LINK }, { "listitem", NODE_LISTITEM }, { "literal", NODE_LITERAL }, { "literallayout", NODE_LITERALLAYOUT }, { "manvolnum", NODE_MANVOLNUM }, { "member", NODE_MEMBER }, { "mml:math", NODE_MML_MATH }, { "mml:mfenced", NODE_MML_MFENCED }, { "mml:mfrac", NODE_MML_MFRAC }, { "mml:mi", NODE_MML_MI }, { "mml:mn", NODE_MML_MN }, { "mml:mo", NODE_MML_MO }, { "mml:mrow", NODE_MML_MROW }, { "mml:msub", NODE_MML_MSUB }, { "mml:msup", NODE_MML_MSUP }, { "modifier", NODE_MODIFIER }, { "note", NODE_NOTE }, { "option", NODE_OPTION }, { "orderedlist", NODE_ORDEREDLIST }, { "orgname", NODE_ORGNAME }, { "othername", NODE_OTHERNAME }, { "para", NODE_PARA }, { "paramdef", NODE_PARAMDEF }, { "parameter", NODE_PARAMETER }, { "part", NODE_SECTION }, { "personname", NODE_PERSONNAME }, { "phrase", NODE_PHRASE }, { "preface", NODE_PREFACE }, { "primary", NODE_PRIMARY }, { "programlisting", NODE_PROGRAMLISTING }, { "prompt", NODE_PROMPT }, { "quote", NODE_QUOTE }, { "refclass", NODE_REFCLASS }, { "refdescriptor", NODE_REFDESCRIPTOR }, { "refentry", NODE_REFENTRY }, { "refentryinfo", NODE_REFENTRYINFO }, { "refentrytitle", NODE_REFENTRYTITLE }, { "refmeta", NODE_REFMETA }, { "refmetainfo", NODE_REFMETAINFO }, { "refmiscinfo", NODE_REFMISCINFO }, { "refname", NODE_REFNAME }, { "refnamediv", NODE_REFNAMEDIV }, { "refpurpose", NODE_REFPURPOSE }, { "refsect1", NODE_SECTION }, { "refsect2", NODE_SECTION }, { "refsect3", NODE_SECTION }, { "refsection", NODE_SECTION }, { "refsynopsisdiv", NODE_REFSYNOPSISDIV }, { "releaseinfo", NODE_RELEASEINFO }, { "replaceable", NODE_REPLACEABLE }, { "row", NODE_ROW }, { "sbr", NODE_SBR }, { "screen", NODE_SCREEN }, { "secondary", NODE_SECONDARY }, { "sect1", NODE_SECTION }, { "sect2", NODE_SECTION }, { "section", NODE_SECTION }, { "sgmltag", NODE_SGMLTAG }, { "simplelist", NODE_SIMPLELIST }, { "spanspec", NODE_SPANSPEC }, { "structname", NODE_STRUCTNAME }, { "subtitle", NODE_SUBTITLE }, { "surname", NODE_SURNAME }, { "synopsis", NODE_SYNOPSIS }, { "table", NODE_TABLE }, { "tbody", NODE_TBODY }, { "term", NODE_TERM }, { "tfoot", NODE_TFOOT }, { "tgroup", NODE_TGROUP }, { "thead", NODE_THEAD }, { "tip", NODE_TIP }, { "title", NODE_TITLE }, { "trademark", NODE_TRADEMARK }, { "type", NODE_TYPE }, { "ulink", NODE_ULINK }, { "userinput", NODE_USERINPUT }, { "variablelist", NODE_VARIABLELIST }, { "varlistentry", NODE_VARLISTENTRY }, { "varname", NODE_VARNAME }, { "warning", NODE_WARNING }, { "wordasword", NODE_WORDASWORD }, { "year", NODE_YEAR }, { NULL, NODE__MAX } }; static int warn = 0; static void pnode_print(struct parse *p, struct pnode *pn); /* * Process a stream of characters. * We store text as nodes in and of themselves. * If a text node is already open, append to it. * If it's not open, open one under the current context. */ static void xml_char(void *arg, const XML_Char *p, int sz) { struct parse *ps = arg; struct pnode *dat; int i; /* Stopped or no tree yet. */ if (ps->stop || ps->node == NODE_ROOT) return; assert(ps->cur != NULL); /* * Are we in the midst of processing text? * If we're not processing text right now, then create a text * node for doing so. * However, don't do so unless we have some non-whitespace to * process: strip out all leading whitespace to be sure. */ if (ps->node != NODE_TEXT) { for (i = 0; i < sz; i++) if ( ! isspace((unsigned char)p[i])) break; if (i == sz) return; p += i; sz -= i; dat = calloc(1, sizeof(struct pnode)); if (dat == NULL) { perror(NULL); exit(1); } dat->node = ps->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; assert(ps->root != NULL); } /* Append to current buffer. */ assert(sz >= 0); ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + (size_t)sz); if (ps->cur->b == NULL) { perror(NULL); exit(1); } memcpy(ps->cur->b + ps->cur->bsz, p, sz); ps->cur->bsz += (size_t)sz; ps->cur->real = ps->cur->b; } static void pnode_trim(struct pnode *pn) { assert(pn->node == NODE_TEXT); for ( ; pn->bsz > 0; pn->bsz--) if ( ! isspace((unsigned char)pn->b[pn->bsz - 1])) break; } /* * Begin an element. * First, look for the element. * If we don't find it and we're not parsing, keep going. * If we don't find it and we're parsing, puke and exit. * If we find it but we're not parsing yet (i.e., it's not a refentry * and thus out of context), keep going. * If we find it and we're at the root and already have a tree, puke and * exit (FIXME: I don't think this is right?). * If we find it but we're parsing a text node, close out the text node, * return to its parent, and keep going. * Make sure that the element is in the right context. * Lastly, put the node onto our parse tree and continue. */ static void xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { struct parse *ps = arg; const struct node *node; enum attrkey key; enum attrval val; struct pnode *dat; struct pattr *pattr; const XML_Char **att; /* FIXME: find a better way to ditch other namespaces. */ if (ps->stop || strcmp(name, "xi:include") == 0) return; /* Close out text node, if applicable... */ if (ps->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; ps->node = ps->cur->node; } for (node = nodes; node->name != NULL; node++) if (strcmp(node->name, name) == 0) break; if (node->name == NULL) { if (ps->node == NODE_ROOT) return; fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; } else if (ps->node == NODE_ROOT && ps->root != NULL) { fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml)); ps->stop = 1; return; } if (node->node == NODE_INLINEEQUATION) ps->flags |= PARSE_EQN; if ((dat = calloc(1, sizeof(struct pnode))) == NULL) { perror(NULL); exit(1); } dat->node = ps->node = node->node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); if (ps->cur != NULL) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; if (ps->root == NULL) ps->root = dat; /* * Process attributes. */ for (att = atts; *att != NULL; att += 2) { for (key = 0; key < ATTRKEY__MAX; key++) if (strcmp(*att, attrkeys[key]) == 0) break; if (key == ATTRKEY__MAX) { if (warn) fprintf(stderr, "%s:%zu:%zu: warning: " "unknown attribute \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), *att); continue; } for (val = 0; val < ATTRVAL__MAX; val++) if (strcmp(att[1], attrvals[val]) == 0) break; pattr = calloc(1, sizeof(struct pattr)); pattr->key = key; pattr->val = val; if (val == ATTRVAL__MAX) pattr->rawval = strdup(att[1]); TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); } } /* * Roll up the parse tree. * If we're at a text node, roll that one up first. * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void xml_elem_end(void *arg, const XML_Char *name) { struct parse *ps = arg; /* FIXME: find a better way to ditch other namespaces. */ if (ps->stop || ps->node == NODE_ROOT) return; else if (strcmp(name, "xi:include") == 0) return; /* Close out text node, if applicable... */ if (ps->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; ps->node = ps->cur->node; } if ((ps->cur = ps->cur->parent) == NULL) ps->node = NODE_ROOT; else ps->node = ps->cur->node; } /* * Recursively free a node (NULL is ok). */ static void pnode_free(struct pnode *pn) { struct pnode *pp; struct pattr *ap; if (pn == NULL) return; while ((pp = TAILQ_FIRST(&pn->childq)) != NULL) { TAILQ_REMOVE(&pn->childq, pp, child); pnode_free(pp); } while ((ap = TAILQ_FIRST(&pn->attrq)) != NULL) { TAILQ_REMOVE(&pn->attrq, ap, child); free(ap->rawval); free(ap); } free(pn->real); free(pn); } /* * Unlink a node from its parent and pnode_free() it. */ static void pnode_unlink(struct pnode *pn) { if (pn->parent != NULL) TAILQ_REMOVE(&pn->parent->childq, pn, child); pnode_free(pn); } /* * Unlink all children of a node and pnode_free() them. */ static void pnode_unlinksub(struct pnode *pn) { while ( ! TAILQ_EMPTY(&pn->childq)) pnode_unlink(TAILQ_FIRST(&pn->childq)); } /* * Retrieve an enumeration attribute from a node. * Return ATTRVAL__MAX if the node has no such attribute. */ enum attrval pnode_getattr(struct pnode *pn, enum attrkey key) { struct pattr *ap; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == key) return ap->val; return ATTRVAL__MAX; } /* * Retrieve an attribute string from a node. * Return defval if the node has no such attribute. */ const char * pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval) { struct pattr *ap; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == key) return ap->val == ATTRVAL__MAX ? ap->rawval : attrvals[ap->val]; return defval; } /* * Reset the lookaside buffer. */ static void bufclear(struct parse *p) { p->b[p->bsz = 0] = '\0'; } /* * Append NODE_TEXT contents to the current buffer, reallocating its * size if necessary. * The buffer is ALWAYS NUL-terminated. */ static void bufappend(struct parse *p, struct pnode *pn) { assert(pn->node == NODE_TEXT); if (p->bsz + pn->bsz + 1 > p->mbsz) { p->mbsz = p->bsz + pn->bsz + 1; if ((p->b = realloc(p->b, p->mbsz)) == NULL) { perror(NULL); exit(1); } } memcpy(p->b + p->bsz, pn->b, pn->bsz); p->bsz += pn->bsz; p->b[p->bsz] = '\0'; } /* * Recursively append all NODE_TEXT nodes to the buffer. * This descends into non-text nodes, but doesn't do anything beyond * them. * In other words, this is a recursive text grok. */ static void bufappend_r(struct parse *p, struct pnode *pn) { struct pnode *pp; if (pn->node == NODE_TEXT) bufappend(p, pn); TAILQ_FOREACH(pp, &pn->childq, child) bufappend_r(p, pp); } /* * Recursively search and return the first instance of "node". */ static struct pnode * pnode_findfirst(struct pnode *pn, enum nodeid node) { struct pnode *pp, *res; res = NULL; TAILQ_FOREACH(pp, &pn->childq, child) { res = pp->node == node ? pp : pnode_findfirst(pp, node); if (res != NULL) break; } return res; } #define MACROLINE_NORM 0 #define MACROLINE_UPPER 1 #define MACROLINE_NOWS 2 /* * Recursively print text presumably on a macro line. * Convert all whitespace to regular spaces. */ static void pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl) { char *cp; if (p->newln == 0 && (fl & MACROLINE_NOWS) == 0) putchar(' '); bufclear(p); bufappend_r(p, pn); /* Convert all space to spaces. */ for (cp = p->b; *cp != '\0'; cp++) if (isspace((unsigned char)*cp)) *cp = ' '; for (cp = p->b; isspace((unsigned char)*cp); cp++) /* Spin past whitespace (XXX: necessary?) */ ; for ( ; *cp != '\0'; cp++) { /* Escape us if we look like a macro. */ if ((cp == p->b || cp[-1] == ' ') && isupper((unsigned char)cp[0]) && islower((unsigned char)cp[1]) && (cp[2] == '\0' || cp[2] == ' ' || (islower((unsigned char)cp[2]) && (cp[3] == '\0' || cp[3] == ' ')))) fputs("\\&", stdout); if (fl & MACROLINE_UPPER) putchar(toupper((unsigned char)*cp)); else putchar(*cp); /* If we're a character escape, escape us. */ if (*cp == '\\') putchar('e'); } } static void pnode_printmacrolinepart(struct parse *p, struct pnode *pn) { pnode_printmacrolinetext(p, pn, 0); } /* * Just pnode_printmacrolinepart() but with a newline. * If no text, just the newline. */ static void pnode_printmacroline(struct parse *p, struct pnode *pn) { assert(p->newln == 0); pnode_printmacrolinetext(p, pn, 0); putchar('\n'); p->newln = 1; } static void pnode_printmopen(struct parse *p) { if (p->newln) { putchar('.'); p->newln = 0; } else putchar(' '); } static void pnode_printmclose(struct parse *p, int sv) { if (sv && ! p->newln) { putchar('\n'); p->newln = 1; } } /* * Like pnode_printmclose() except we look to the next node, and, if * found, see if it starts with punctuation. * If it does, then we print that punctuation before the newline. */ static void pnode_printmclosepunct(struct parse *p, struct pnode *pn, int sv) { /* We wouldn't have done anything anyway. */ if ( ! (sv && ! p->newln)) return; /* No next node or it's not text. */ if ((pn = TAILQ_NEXT(pn, child)) == NULL) { pnode_printmclose(p, sv); return; } else if (pn->node != NODE_TEXT) { pnode_printmclose(p, sv); return; } /* Only do this for the comma/period. */ if (pn->bsz > 0 && (pn->b[0] == ',' || pn->b[0] == '.') && (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) { putchar(' '); putchar(pn->b[0]); pn->b++; pn->bsz--; } putchar('\n'); p->newln = 1; } static void pnode_printpara(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); if ((pp = TAILQ_PREV(pn, pnodeq, child)) == NULL && (pp = pn->parent) == NULL) return; switch (pp->node) { case NODE_ENTRY: case NODE_LISTITEM: return; case NODE_PREFACE: case NODE_SECTION: if (p->level < 3) return; break; default: break; } puts(".Pp"); } /* * If the SYNOPSIS macro has a superfluous title, kill it. */ static void pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn) { struct pnode *pp; TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TITLE) { pnode_unlink(pp); return; } } /* * Start a hopefully-named `Sh' section. */ static void pnode_printrefsect(struct parse *p, struct pnode *pn) { struct pnode *pp; const char *title; int flags, level; if (pn->parent == NULL) return; level = ++p->level; flags = level == 1 ? MACROLINE_UPPER : 0; if (level < 3) { switch (pn->node) { case NODE_CAUTION: case NODE_NOTE: case NODE_TIP: case NODE_WARNING: level = 3; break; default: break; } } TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TITLE) break; if (pp == NULL) { switch (pn->node) { case NODE_PREFACE: title = "Preface"; break; case NODE_CAUTION: title = "Caution"; break; case NODE_NOTE: title = "Note"; break; case NODE_TIP: title = "Tip"; break; case NODE_WARNING: title = "Warning"; break; default: title = "Unknown"; break; } } switch (level) { case 1: fputs(".Sh", stdout); break; case 2: fputs(".Ss", stdout); break; default: pnode_printpara(p, pn); fputs(".Sy", stdout); break; } if (pp != NULL) { p->newln = 0; pnode_printmacrolinetext(p, pp, flags); pnode_printmclose(p, 1); pnode_unlink(pp); } else printf(" %s\n", title); } /* * Start a reference, extracting the title and volume. */ static void pnode_printciterefentry(struct parse *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_MANVOLNUM) manvol = pp; else if (pp->node == NODE_REFENTRYTITLE) title = pp; if (title != NULL) pnode_printmacrolinepart(p, title); else fputs(" unknown ", stdout); if (manvol == NULL) { puts(" 1"); p->newln = 1; } else pnode_printmacrolinepart(p, manvol); } static void pnode_printrefmeta(struct parse *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_MANVOLNUM) manvol = pp; else if (pp->node == NODE_REFENTRYTITLE) title = pp; fputs(".Dt", stdout); p->newln = 0; if (title != NULL) pnode_printmacrolinetext(p, title, MACROLINE_UPPER); else fputs(" UNKNOWN ", stdout); if (manvol == NULL) { puts(" 1"); p->newln = 1; } else pnode_printmacroline(p, manvol); } static void pnode_printfuncdef(struct parse *p, struct pnode *pn) { struct pnode *pp, *ftype, *func; assert(p->newln); ftype = func = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TEXT) ftype = pp; else if (pp->node == NODE_FUNCTION) func = pp; if (ftype != NULL) { fputs(".Ft", stdout); p->newln = 0; pnode_printmacroline(p, ftype); } if (func != NULL) { fputs(".Fo", stdout); p->newln = 0; pnode_printmacroline(p, func); } else { puts(".Fo UNKNOWN"); p->newln = 1; } } static void pnode_printparamdef(struct parse *p, struct pnode *pn) { struct pnode *pp, *ptype, *param; assert(p->newln); ptype = param = NULL; TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TEXT) ptype = pp; else if (pp->node == NODE_PARAMETER) param = pp; fputs(".Fa \"", stdout); p->newln = 0; if (ptype != NULL) { pnode_printmacrolinetext(p, ptype, MACROLINE_NOWS); putchar(' '); } if (param != NULL) pnode_printmacrolinepart(p, param); puts("\""); p->newln = 1; } /* * The node is a little peculiar. * First, it can have arbitrary open and closing tokens, which default * to parentheses. * Second, >1 arguments are separated by commas. */ static void pnode_printmathfenced(struct parse *p, struct pnode *pn) { struct pnode *pp; printf("left %s ", pnode_getattr_raw(pn, ATTRKEY_OPEN, "(")); pp = TAILQ_FIRST(&pn->childq); pnode_print(p, pp); while ((pp = TAILQ_NEXT(pp, child)) != NULL) { putchar(','); pnode_print(p, pp); } printf("right %s ", pnode_getattr_raw(pn, ATTRKEY_CLOSE, ")")); } /* * These math nodes require special handling because they have infix * syntax, instead of the usual prefix or prefix. * So we need to break up the first and second child node with a * particular eqn(7) word. */ static void pnode_printmath(struct parse *p, struct pnode *pn) { struct pnode *pp; pp = TAILQ_FIRST(&pn->childq); pnode_print(p, pp); switch (pn->node) { case NODE_MML_MSUP: fputs(" sup ", stdout); break; case NODE_MML_MFRAC: fputs(" over ", stdout); break; case NODE_MML_MSUB: fputs(" sub ", stdout); break; default: break; } pp = TAILQ_NEXT(pp, child); pnode_print(p, pp); } static void pnode_printfuncprototype(struct parse *p, struct pnode *pn) { struct pnode *pp, *fdef; assert(p->newln); TAILQ_FOREACH(fdef, &pn->childq, child) if (fdef->node == NODE_FUNCDEF) break; if (fdef != NULL) pnode_printfuncdef(p, fdef); else puts(".Fo UNKNOWN"); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_PARAMDEF) pnode_printparamdef(p, pp); puts(".Fc"); p->newln = 1; } /* * The element is more complicated than it should be because text * nodes are treated like ".Ar foo", but non-text nodes need to be * re-sent into the printer (i.e., without the preceding ".Ar"). * This also handles the case of "repetition" (or in other words, the * ellipsis following an argument) and optionality. */ static void pnode_printarg(struct parse *p, struct pnode *pn) { struct pnode *pp; struct pattr *ap; int isop, isrep; isop = 1; isrep = 0; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == ATTRKEY_CHOICE && (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) isop = 0; else if (ap->key == ATTRKEY_REP && ap->val == ATTRVAL_REPEAT) isrep = 1; if (isop) { pnode_printmopen(p); fputs("Op", stdout); } TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node == NODE_TEXT) { pnode_printmopen(p); fputs("Ar", stdout); } pnode_print(p, pp); if (isrep && pp->node == NODE_TEXT) fputs("...", stdout); } } static void pnode_printgroup(struct parse *p, struct pnode *pn) { struct pnode *pp, *np; struct pattr *ap; int isop, sv; isop = 1; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == ATTRKEY_CHOICE && (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) { isop = 0; break; } /* * Make sure we're on a macro line. * This will prevent pnode_print() for putting us on a * subsequent line. */ sv = p->newln; pnode_printmopen(p); if (isop) fputs("Op", stdout); else if (sv) fputs("No", stdout); /* * Keep on printing text separated by the vertical bar as long * as we're within the same origin node as the group. * This is kind of a nightmare. * Eh, DocBook... * FIXME: if there's a "Fl", we don't cut off the leading "-" * like we do in pnode_print(). */ TAILQ_FOREACH(pp, &pn->childq, child) { pnode_print(p, pp); np = TAILQ_NEXT(pp, child); while (np != NULL) { if (pp->node != np->node) break; fputs(" |", stdout); pnode_printmacrolinepart(p, np); pp = np; np = TAILQ_NEXT(np, child); } } pnode_printmclose(p, sv); } static void pnode_printprologue(struct parse *p, struct pnode *pn) { struct pnode *pp; pp = p->root == NULL ? NULL : pnode_findfirst(p->root, NODE_REFMETA); puts(".Dd $Mdocdate" "$"); if (pp != NULL) { pnode_printrefmeta(p, pp); pnode_unlink(pp); } else printf(".Dt %s 1\n", pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN")); puts(".Os"); if (p->flags & PARSE_EQN) { puts(".EQ"); puts("delim $$"); puts(".EN"); } } /* * We can have multiple elements within a , which * we should comma-separate as list headers. */ static void pnode_printvarlistentry(struct parse *p, struct pnode *pn) { struct pnode *pp; int first = 1; assert(p->newln); fputs(".It", stdout); p->newln = 0; TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TERM) { if ( ! first) putchar(','); pnode_print(p, pp); pnode_unlink(pp); first = 0; } else break; putchar('\n'); p->newln = 1; } static void pnode_printrow(struct parse *p, struct pnode *pn) { struct pnode *pp; puts(".Bl -dash -compact"); TAILQ_FOREACH(pp, &pn->childq, child) { assert(p->newln); puts(".It"); pnode_print(p, pp); pnode_printmclose(p, 1); } assert(p->newln); puts(".El"); } static void pnode_printtable(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TITLE) { pnode_printpara(p, pp); pnode_print(p, pp); pnode_unlink(pp); } assert(p->newln); puts(".Bl -ohang"); while ((pp = pnode_findfirst(pn, NODE_ROW)) != NULL) { puts(".It Table Row"); pnode_printrow(p, pp); pnode_printmclose(p, 1); pnode_unlink(pp); } assert(p->newln); puts(".El"); } static void pnode_printlist(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TITLE) { pnode_printpara(p, pp); pnode_print(p, pp); pnode_unlink(pp); } assert(p->newln); if (pn->node == NODE_ORDEREDLIST) puts(".Bl -enum"); else puts(".Bl -bullet"); TAILQ_FOREACH(pp, &pn->childq, child) { assert(p->newln); puts(".It"); pnode_print(p, pp); pnode_printmclose(p, 1); } assert(p->newln); puts(".El"); } static void pnode_printvariablelist(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TITLE) { pnode_printpara(p, pp); pnode_print(p, pp); pnode_unlink(pp); } assert(p->newln); puts(".Bl -tag -width Ds"); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node != NODE_VARLISTENTRY) { assert(p->newln); fputs(".It", stdout); pnode_printmacroline(p, pp); } else { assert(p->newln); pnode_print(p, pp); } assert(p->newln); puts(".El"); } /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. * FIXME: if we're in a literal context ( or or * whatever), don't print inline macros. */ static void pnode_print(struct parse *p, struct pnode *pn) { struct pnode *pp; const char *ccp; char *cp; int last, sv; if (pn == NULL) return; sv = p->newln; switch (pn->node) { case NODE_APPLICATION: pnode_printmopen(p); fputs("Nm", stdout); break; case NODE_ANCHOR: /* Don't print anything! */ return; case NODE_ARG: pnode_printarg(p, pn); pnode_unlinksub(pn); break; case NODE_AUTHOR: pnode_printmopen(p); fputs("An", stdout); break; case NODE_AUTHORGROUP: assert(p->newln); puts(".An -split"); break; case NODE_BOOKINFO: assert(p->newln); puts(".Sh NAME"); break; case NODE_CITEREFENTRY: pnode_printmopen(p); fputs("Xr", stdout); pnode_printciterefentry(p, pn); pnode_unlinksub(pn); break; case NODE_CODE: pnode_printmopen(p); fputs("Li", stdout); break; case NODE_COMMAND: pnode_printmopen(p); fputs("Nm", stdout); break; case NODE_CONSTANT: pnode_printmopen(p); fputs("Dv", stdout); break; case NODE_EDITOR: puts("editor: "); pnode_printmopen(p); fputs("An", stdout); break; case NODE_EMAIL: pnode_printmopen(p); fputs("Aq Mt", stdout); break; case NODE_EMPHASIS: case NODE_FIRSTTERM: pnode_printmopen(p); fputs("Em", stdout); break; case NODE_ENVAR: pnode_printmopen(p); fputs("Ev", stdout); break; case NODE_FILENAME: pnode_printmopen(p); fputs("Pa", stdout); break; case NODE_FUNCTION: pnode_printmopen(p); fputs("Fn", stdout); break; case NODE_FUNCPROTOTYPE: assert(p->newln); pnode_printfuncprototype(p, pn); pnode_unlinksub(pn); break; case NODE_FUNCSYNOPSISINFO: pnode_printmopen(p); fputs("Fd", stdout); break; case NODE_INDEXTERM: return; case NODE_INFORMALEQUATION: if ( ! p->newln) putchar('\n'); puts(".EQ"); p->newln = 0; break; case NODE_INLINEEQUATION: fputc('$', stdout); p->newln = 0; break; case NODE_ITEMIZEDLIST: assert(p->newln); pnode_printlist(p, pn); pnode_unlinksub(pn); break; case NODE_GROUP: pnode_printgroup(p, pn); pnode_unlinksub(pn); break; case NODE_LEGALNOTICE: assert(p->newln); puts(".Sh LEGAL NOTICE"); break; case NODE_LINK: ccp = pnode_getattr_raw(pn, ATTRKEY_LINKEND, NULL); if (ccp == NULL) break; pnode_printmopen(p); printf("Sx %s\n", ccp); p->newln = 1; return; case NODE_LITERAL: pnode_printmopen(p); fputs("Li", stdout); break; case NODE_LITERALLAYOUT: assert(p->newln); printf(".Bd %s\n", pnode_getattr(pn, ATTRKEY_CLASS) == ATTRVAL_MONOSPACED ? "-literal" : "-unfilled"); break; case NODE_MML_MFENCED: pnode_printmathfenced(p, pn); pnode_unlinksub(pn); break; case NODE_MML_MROW: case NODE_MML_MI: case NODE_MML_MN: case NODE_MML_MO: if (TAILQ_EMPTY(&pn->childq)) break; fputs(" { ", stdout); break; case NODE_MML_MFRAC: case NODE_MML_MSUB: case NODE_MML_MSUP: pnode_printmath(p, pn); pnode_unlinksub(pn); break; case NODE_OPTION: pnode_printmopen(p); fputs("Fl", stdout); break; case NODE_ORDEREDLIST: assert(p->newln); pnode_printlist(p, pn); pnode_unlinksub(pn); break; case NODE_PARA: pnode_printpara(p, pn); break; case NODE_PARAMETER: /* Suppress non-text children... */ pnode_printmopen(p); fputs("Fa \"", stdout); pnode_printmacrolinetext(p, pn, MACROLINE_NOWS); fputs("\"", stdout); pnode_unlinksub(pn); break; case NODE_QUOTE: pnode_printmopen(p); fputs("Qo", stdout); break; case NODE_PROGRAMLISTING: case NODE_SCREEN: assert(p->newln); puts(".Bd -literal"); break; case NODE_REFENTRYINFO: /* Suppress. */ pnode_unlinksub(pn); break; case NODE_REFMETA: abort(); break; case NODE_REFNAME: /* Suppress non-text children... */ pnode_printmopen(p); fputs("Nm", stdout); p->newln = 0; pnode_printmacrolinepart(p, pn); pnode_unlinksub(pn); break; case NODE_REFNAMEDIV: assert(p->newln); puts(".Sh NAME"); break; case NODE_REFPURPOSE: assert(p->newln); pnode_printmopen(p); fputs("Nd", stdout); break; case NODE_REFSYNOPSISDIV: assert(p->newln); pnode_printrefsynopsisdiv(p, pn); puts(".Sh SYNOPSIS"); break; case NODE_PREFACE: case NODE_SECTION: case NODE_NOTE: case NODE_TIP: case NODE_CAUTION: case NODE_WARNING: assert(p->newln); pnode_printrefsect(p, pn); break; case NODE_REPLACEABLE: pnode_printmopen(p); fputs("Ar", stdout); break; case NODE_SBR: assert(p->newln); puts(".br"); break; case NODE_SGMLTAG: pnode_printmopen(p); fputs("Li", stdout); break; case NODE_STRUCTNAME: pnode_printmopen(p); fputs("Vt", stdout); break; case NODE_TABLE: case NODE_INFORMALTABLE: assert(p->newln); pnode_printtable(p, pn); pnode_unlinksub(pn); break; case NODE_TEXT: if (p->newln == 0) putchar(' '); bufclear(p); bufappend(p, pn); if (p->bsz == 0) { assert(pn->real != pn->b); break; } /* * Output all characters, squeezing out whitespace * between newlines. * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ assert(p->bsz); cp = p->b; /* * There's often a superfluous "-" in its