/* $Id: docbook2mdoc.c,v 1.74 2019/03/26 18:32:07 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include "node.h" #include "format.h" /* * The implementation of the mdoc(7) formatter. */ enum linestate { LINE_NEW = 0, LINE_TEXT, LINE_MACRO }; struct format { int level; /* Header level, starting at 1. */ enum linestate linestate; }; static void pnode_print(struct format *, struct pnode *); static void macro_open(struct format *p, const char *name) { switch (p->linestate) { case LINE_TEXT: putchar('\n'); /* FALLTHROUGH */ case LINE_NEW: putchar('.'); p->linestate = LINE_MACRO; break; case LINE_MACRO: putchar(' '); break; } fputs(name, stdout); } static void macro_close(struct format *p) { assert(p->linestate == LINE_MACRO); putchar('\n'); p->linestate = LINE_NEW; } static void macro_line(struct format *p, const char *name) { macro_open(p, name); macro_close(p); } #define ARG_SPACE 1 /* Insert whitespace before this argument. */ #define ARG_SINGLE 2 /* Quote argument if it contains whitespace. */ #define ARG_QUOTED 4 /* We are already in a quoted argument. */ #define ARG_UPPER 8 /* Covert argument to upper case. */ /* * Print an argument string on a macro line, collapsing whitespace. */ static void macro_addarg(struct format *p, const char *arg, int flags) { const char *cp; assert(p->linestate == LINE_MACRO); /* Quote if requested and necessary. */ if ((flags & (ARG_SINGLE | ARG_QUOTED)) == ARG_SINGLE) { for (cp = arg; *cp != '\0'; cp++) if (isspace((unsigned char)*cp)) break; if (*cp != '\0') { if (flags & ARG_SPACE) { putchar(' '); flags &= ~ ARG_SPACE; } putchar('"'); flags = ARG_QUOTED; } } for (cp = arg; *cp != '\0'; cp++) { /* Collapse whitespace. */ if (isspace((unsigned char)*cp)) { flags |= ARG_SPACE; continue; } else if (flags & ARG_SPACE) { putchar(' '); flags &= ~ ARG_SPACE; } /* Escape us if we look like a macro. */ if ((flags & ARG_QUOTED) == 0 && (cp == arg || isspace((unsigned char)cp[-1])) && isupper((unsigned char)cp[0]) && islower((unsigned char)cp[1]) && (cp[2] == '\0' || cp[2] == ' ' || (islower((unsigned char)cp[2]) && (cp[3] == '\0' || cp[3] == ' ')))) fputs("\\&", stdout); if (*cp == '"') fputs("\\(dq", stdout); else if (flags & ARG_UPPER) putchar(toupper((unsigned char)*cp)); else putchar(*cp); if (*cp == '\\') putchar('e'); } } static void macro_argline(struct format *p, const char *name, const char *arg) { macro_open(p, name); macro_addarg(p, arg, ARG_SPACE); macro_close(p); } /* * Recursively append text from the children of a node to a macro line. */ static void macro_addnode(struct format *p, struct pnode *pn, int flags) { int quote_now; assert(p->linestate == LINE_MACRO); /* * If the only child is a text node, just add that text, * letting macro_addarg() decide about quoting. */ pn = TAILQ_FIRST(&pn->childq); if (pn != NULL && pn->node == NODE_TEXT && TAILQ_NEXT(pn, child) == NULL) { macro_addarg(p, pn->b, flags); return; } /* * If we want the argument quoted and are not already * in a quoted context, quote now. */ quote_now = 0; if (flags & ARG_SINGLE) { if ((flags & ARG_QUOTED) == 0) { if (flags & ARG_SPACE) { putchar(' '); flags &= ~ARG_SPACE; } putchar('"'); flags |= ARG_QUOTED; quote_now = 1; } flags &= ~ARG_SINGLE; } /* * Iterate to child and sibling nodes, * inserting whitespace between nodes. */ while (pn != NULL) { if (pn->node == NODE_TEXT) macro_addarg(p, pn->b, flags); else macro_addnode(p, pn, flags); pn = TAILQ_NEXT(pn, child); flags |= ARG_SPACE; } if (quote_now) putchar('"'); } static void macro_nodeline(struct format *p, const char *name, struct pnode *pn, int flags) { macro_open(p, name); macro_addnode(p, pn, ARG_SPACE | flags); macro_close(p); } /* * If the next node is a text node starting with closing punctuation, * emit the closing punctuation as a trailing macro argument. */ static void macro_closepunct(struct format *p, struct pnode *pn) { if ((pn = TAILQ_NEXT(pn, child)) != NULL && pn->node == NODE_TEXT && pn->bsz > 0 && (pn->b[0] == ',' || pn->b[0] == '.') && (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) { putchar(' '); putchar(pn->b[0]); pn->b++; pn->bsz--; } macro_close(p); } static void print_text(struct format *p, const char *word) { switch (p->linestate) { case LINE_NEW: break; case LINE_TEXT: putchar(' '); break; case LINE_MACRO: macro_close(p); break; } fputs(word, stdout); p->linestate = LINE_TEXT; } static void pnode_printpara(struct format *p, struct pnode *pn) { struct pnode *pp; if ((pp = TAILQ_PREV(pn, pnodeq, child)) == NULL && (pp = pn->parent) == NULL) return; switch (pp->node) { case NODE_ENTRY: case NODE_LISTITEM: return; case NODE_PREFACE: case NODE_SECTION: if (p->level < 3) return; break; default: break; } macro_line(p, "Pp"); } /* * If the SYNOPSIS macro has a superfluous title, kill it. */ static void pnode_printrefsynopsisdiv(struct format *p, struct pnode *pn) { struct pnode *pp, *pq; TAILQ_FOREACH_SAFE(pp, &pn->childq, child, pq) if (pp->node == NODE_TITLE) pnode_unlink(pp); macro_line(p, "Sh SYNOPSIS"); } /* * Start a hopefully-named `Sh' section. */ static void pnode_printrefsect(struct format *p, struct pnode *pn) { struct pnode *pp; const char *title; int flags, level; if (pn->parent == NULL) return; level = ++p->level; flags = ARG_SPACE; if (level == 1) flags |= ARG_UPPER; if (level < 3) { switch (pn->node) { case NODE_CAUTION: case NODE_NOTE: case NODE_TIP: case NODE_WARNING: level = 3; break; default: break; } } TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_TITLE) break; if (pp == NULL) { switch (pn->node) { case NODE_PREFACE: title = "Preface"; break; case NODE_CAUTION: title = "Caution"; break; case NODE_NOTE: title = "Note"; break; case NODE_TIP: title = "Tip"; break; case NODE_WARNING: title = "Warning"; break; default: title = "Unknown"; break; } } switch (level) { case 1: macro_open(p, "Sh"); break; case 2: macro_open(p, "Ss"); break; default: pnode_printpara(p, pn); macro_open(p, "Sy"); break; } if (pp != NULL) { macro_addnode(p, pp, flags); pnode_unlink(pp); } else macro_addarg(p, title, ARG_SPACE | ARG_QUOTED); macro_close(p); } /* * Start a reference, extracting the title and volume. */ static void pnode_printciterefentry(struct format *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node == NODE_MANVOLNUM) manvol = pp; else if (pp->node == NODE_REFENTRYTITLE) title = pp; } macro_open(p, "Xr"); if (title == NULL) macro_addarg(p, "unknown", ARG_SPACE); else macro_addnode(p, title, ARG_SPACE | ARG_SINGLE); if (manvol == NULL) macro_addarg(p, "1", ARG_SPACE); else macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE); macro_close(p); pnode_unlinksub(pn); } static void pnode_printrefmeta(struct format *p, struct pnode *pn) { struct pnode *pp, *title, *manvol; title = manvol = NULL; TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node == NODE_MANVOLNUM) manvol = pp; else if (pp->node == NODE_REFENTRYTITLE) title = pp; } macro_open(p, "Dt"); if (title == NULL) macro_addarg(p, "UNKNOWN", ARG_SPACE); else macro_addnode(p, title, ARG_SPACE | ARG_SINGLE | ARG_UPPER); if (manvol == NULL) macro_addarg(p, "1", ARG_SPACE); else macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE); macro_close(p); pnode_unlink(pn); } static void pnode_printfuncdef(struct format *p, struct pnode *pn) { struct pnode *pp, *ftype, *func; ftype = func = NULL; TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node == NODE_TEXT) ftype = pp; else if (pp->node == NODE_FUNCTION) func = pp; } if (ftype != NULL) macro_argline(p, "Ft", ftype->b); macro_open(p, "Fo"); if (func == NULL) macro_addarg(p, "UNKNOWN", ARG_SPACE); else macro_addnode(p, func, ARG_SPACE | ARG_SINGLE); macro_close(p); } /* * The node is a little peculiar. * First, it can have arbitrary open and closing tokens, which default * to parentheses. * Second, >1 arguments are separated by commas. */ static void pnode_printmathfenced(struct format *p, struct pnode *pn) { struct pnode *pp; printf("left %s ", pnode_getattr_raw(pn, ATTRKEY_OPEN, "(")); pp = TAILQ_FIRST(&pn->childq); pnode_print(p, pp); while ((pp = TAILQ_NEXT(pp, child)) != NULL) { putchar(','); pnode_print(p, pp); } printf("right %s ", pnode_getattr_raw(pn, ATTRKEY_CLOSE, ")")); pnode_unlinksub(pn); } /* * These math nodes require special handling because they have infix * syntax, instead of the usual prefix or prefix. * So we need to break up the first and second child node with a * particular eqn(7) word. */ static void pnode_printmath(struct format *p, struct pnode *pn) { struct pnode *pp; pp = TAILQ_FIRST(&pn->childq); pnode_print(p, pp); switch (pn->node) { case NODE_MML_MSUP: fputs(" sup ", stdout); break; case NODE_MML_MFRAC: fputs(" over ", stdout); break; case NODE_MML_MSUB: fputs(" sub ", stdout); break; default: break; } pp = TAILQ_NEXT(pp, child); pnode_print(p, pp); pnode_unlinksub(pn); } static void pnode_printfuncprototype(struct format *p, struct pnode *pn) { struct pnode *pp, *fdef; TAILQ_FOREACH(fdef, &pn->childq, child) if (fdef->node == NODE_FUNCDEF) break; if (fdef != NULL) pnode_printfuncdef(p, fdef); else macro_line(p, "Fo UNKNOWN"); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_PARAMDEF) macro_nodeline(p, "Fa", pp, ARG_SINGLE); macro_line(p, "Fc"); pnode_unlinksub(pn); } /* * The element is more complicated than it should be because text * nodes are treated like ".Ar foo", but non-text nodes need to be * re-sent into the printer (i.e., without the preceding ".Ar"). * This also handles the case of "repetition" (or in other words, the * ellipsis following an argument) and optionality. */ static void pnode_printarg(struct format *p, struct pnode *pn) { struct pnode *pp; struct pattr *ap; int isop, isrep; isop = 1; isrep = 0; TAILQ_FOREACH(ap, &pn->attrq, child) { if (ap->key == ATTRKEY_CHOICE && (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) isop = 0; else if (ap->key == ATTRKEY_REP && ap->val == ATTRVAL_REPEAT) isrep = 1; } if (isop) macro_open(p, "Op"); TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node == NODE_TEXT) macro_open(p, "Ar"); pnode_print(p, pp); if (isrep && pp->node == NODE_TEXT) macro_addarg(p, "...", ARG_SPACE); } pnode_unlinksub(pn); } static void pnode_printgroup(struct format *p, struct pnode *pn) { struct pnode *pp, *np; struct pattr *ap; int isop, sv; isop = 1; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == ATTRKEY_CHOICE && (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) { isop = 0; break; } /* * Make sure we're on a macro line. * This will prevent pnode_print() for putting us on a * subsequent line. */ sv = p->linestate == LINE_NEW; if (isop) macro_open(p, "Op"); else if (sv) macro_open(p, "No"); /* * Keep on printing text separated by the vertical bar as long * as we're within the same origin node as the group. * This is kind of a nightmare. * Eh, DocBook... * FIXME: if there's a "Fl", we don't cut off the leading "-" * like we do in pnode_print(). */ TAILQ_FOREACH(pp, &pn->childq, child) { pnode_print(p, pp); np = TAILQ_NEXT(pp, child); while (np != NULL) { if (pp->node != np->node) break; macro_addarg(p, "|", ARG_SPACE); macro_addnode(p, np, ARG_SPACE); pp = np; np = TAILQ_NEXT(np, child); } } if (sv) macro_close(p); pnode_unlinksub(pn); } static void pnode_printprologue(struct format *p, struct ptree *tree) { struct pnode *refmeta; refmeta = tree->root == NULL ? NULL : pnode_findfirst(tree->root, NODE_REFMETA); macro_line(p, "Dd $Mdocdate" "$"); if (refmeta == NULL) { macro_open(p, "Dt"); macro_addarg(p, pnode_getattr_raw(tree->root, ATTRKEY_ID, "UNKNOWN"), ARG_SPACE | ARG_SINGLE | ARG_UPPER); macro_addarg(p, "1", ARG_SPACE); macro_close(p); } else pnode_printrefmeta(p, refmeta); macro_line(p, "Os"); if (tree->flags & TREE_EQN) { macro_line(p, "EQ"); print_text(p, "delim $$"); macro_line(p, "EN"); } } /* * We can have multiple elements within a , which * we should comma-separate as list headers. */ static void pnode_printvarlistentry(struct format *p, struct pnode *pn) { struct pnode *pp; int first = 1; macro_open(p, "It"); TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node != NODE_TERM) continue; if ( ! first) macro_addarg(p, ",", 0); pnode_print(p, pp); first = 0; } macro_close(p); TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node != NODE_TERM) pnode_print(p, pp); pnode_unlinksub(pn); } static void pnode_printtitle(struct format *p, struct pnode *pn) { struct pnode *pp, *pq; TAILQ_FOREACH_SAFE(pp, &pn->childq, child, pq) { if (pp->node == NODE_TITLE) { pnode_printpara(p, pp); pnode_print(p, pp); pnode_unlink(pp); } } } static void pnode_printrow(struct format *p, struct pnode *pn) { struct pnode *pp; macro_line(p, "Bl -dash -compact"); TAILQ_FOREACH(pp, &pn->childq, child) { macro_line(p, "It"); pnode_print(p, pp); } macro_line(p, "El"); pnode_unlink(pn); } static void pnode_printtable(struct format *p, struct pnode *pn) { struct pnode *pp; pnode_printtitle(p, pn); macro_line(p, "Bl -ohang"); while ((pp = pnode_findfirst(pn, NODE_ROW)) != NULL) { macro_line(p, "It Table Row"); pnode_printrow(p, pp); } macro_line(p, "El"); pnode_unlinksub(pn); } static void pnode_printlist(struct format *p, struct pnode *pn) { struct pnode *pp; pnode_printtitle(p, pn); macro_argline(p, "Bl", pn->node == NODE_ORDEREDLIST ? "-enum" : "-bullet"); TAILQ_FOREACH(pp, &pn->childq, child) { macro_line(p, "It"); pnode_print(p, pp); } macro_line(p, "El"); pnode_unlinksub(pn); } static void pnode_printvariablelist(struct format *p, struct pnode *pn) { struct pnode *pp; pnode_printtitle(p, pn); macro_line(p, "Bl -tag -width Ds"); TAILQ_FOREACH(pp, &pn->childq, child) { if (pp->node == NODE_VARLISTENTRY) pnode_print(p, pp); else macro_nodeline(p, "It", pp, 0); } macro_line(p, "El"); pnode_unlinksub(pn); } /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. * FIXME: if we're in a literal context ( or or * whatever), don't print inline macros. */ static void pnode_print(struct format *p, struct pnode *pn) { struct pnode *pp; const char *ccp; char *cp; int last; enum linestate sv; if (pn == NULL) return; sv = p->linestate; switch (pn->node) { case NODE_APPLICATION: macro_open(p, "Nm"); break; case NODE_ANCHOR: /* Don't print anything! */ return; case NODE_ARG: pnode_printarg(p, pn); break; case NODE_AUTHOR: macro_open(p, "An"); break; case NODE_AUTHORGROUP: macro_line(p, "An -split"); break; case NODE_BOOKINFO: macro_line(p, "Sh NAME"); break; case NODE_CITEREFENTRY: pnode_printciterefentry(p, pn); break; case NODE_CITETITLE: macro_open(p, "%T"); break; case NODE_CODE: macro_open(p, "Li"); break; case NODE_COMMAND: macro_open(p, "Nm"); break; case NODE_CONSTANT: macro_open(p, "Dv"); break; case NODE_EDITOR: print_text(p, "editor:"); macro_open(p, "An"); break; case NODE_EMAIL: macro_open(p, "Aq Mt"); break; case NODE_EMPHASIS: case NODE_FIRSTTERM: macro_open(p, "Em"); break; case NODE_ENVAR: macro_open(p, "Ev"); break; case NODE_FILENAME: macro_open(p, "Pa"); break; case NODE_FUNCTION: macro_open(p, "Fn"); break; case NODE_FUNCPROTOTYPE: pnode_printfuncprototype(p, pn); break; case NODE_FUNCSYNOPSISINFO: macro_open(p, "Fd"); break; case NODE_INDEXTERM: return; case NODE_INFORMALEQUATION: macro_line(p, "EQ"); break; case NODE_INLINEEQUATION: if (p->linestate == LINE_NEW) p->linestate = LINE_TEXT; putchar('$'); break; case NODE_ITEMIZEDLIST: pnode_printlist(p, pn); break; case NODE_GROUP: pnode_printgroup(p, pn); break; case NODE_KEYSYM: macro_open(p, "Sy"); break; case NODE_LEGALNOTICE: macro_line(p, "Sh LEGAL NOTICE"); break; case NODE_LINK: ccp = pnode_getattr_raw(pn, ATTRKEY_LINKEND, NULL); if (ccp == NULL) break; macro_argline(p, "Sx", ccp); return; case NODE_LITERAL: macro_open(p, "Li"); break; case NODE_LITERALLAYOUT: macro_argline(p, "Bd", pnode_getattr(pn, ATTRKEY_CLASS) == ATTRVAL_MONOSPACED ? "-literal" : "-unfilled"); break; case NODE_MML_MFENCED: pnode_printmathfenced(p, pn); break; case NODE_MML_MROW: case NODE_MML_MI: case NODE_MML_MN: case NODE_MML_MO: if (TAILQ_EMPTY(&pn->childq)) break; fputs(" { ", stdout); break; case NODE_MML_MFRAC: case NODE_MML_MSUB: case NODE_MML_MSUP: pnode_printmath(p, pn); break; case NODE_OPTION: macro_open(p, "Fl"); break; case NODE_ORDEREDLIST: pnode_printlist(p, pn); break; case NODE_PARA: pnode_printpara(p, pn); break; case NODE_PARAMETER: macro_nodeline(p, "Fa", pn, ARG_SINGLE); pnode_unlinksub(pn); break; case NODE_QUOTE: macro_open(p, "Qo"); break; case NODE_PROGRAMLISTING: case NODE_SCREEN: macro_line(p, "Bd -literal"); break; case NODE_REFENTRYINFO: /* Suppress. */ pnode_unlinksub(pn); break; case NODE_REFMETA: abort(); break; case NODE_REFNAME: /* Suppress non-text children... */ macro_open(p, "Nm"); macro_addnode(p, pn, ARG_SPACE | ARG_SINGLE); pnode_unlinksub(pn); break; case NODE_REFNAMEDIV: macro_line(p, "Sh NAME"); break; case NODE_REFPURPOSE: macro_open(p, "Nd"); break; case NODE_REFSYNOPSISDIV: pnode_printrefsynopsisdiv(p, pn); break; case NODE_PREFACE: case NODE_SECTION: case NODE_NOTE: case NODE_TIP: case NODE_CAUTION: case NODE_WARNING: pnode_printrefsect(p, pn); break; case NODE_REPLACEABLE: macro_open(p, "Ar"); break; case NODE_SBR: macro_line(p, "br"); break; case NODE_SGMLTAG: macro_open(p, "Li"); break; case NODE_STRUCTNAME: macro_open(p, "Vt"); break; case NODE_TABLE: case NODE_INFORMALTABLE: pnode_printtable(p, pn); break; case NODE_TEXT: if (pn->bsz == 0) { assert(pn->real != pn->b); break; } if (p->linestate == LINE_NEW) p->linestate = LINE_TEXT; else putchar(' '); /* * Output all characters, squeezing out whitespace * between newlines. * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ cp = pn->b; /* * There's often a superfluous "-" in its