=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.14 retrieving revision 1.64 diff -u -p -r1.14 -r1.64 --- docbook2mdoc/docbook2mdoc.c 2014/03/30 13:18:49 1.14 +++ docbook2mdoc/docbook2mdoc.c 2019/03/22 19:07:07 1.64 @@ -1,6 +1,7 @@ -/* $Id: docbook2mdoc.c,v 1.14 2014/03/30 13:18:49 kristaps Exp $ */ +/* $Id: docbook2mdoc.c,v 1.64 2019/03/22 19:07:07 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons + * Copyright (c) 2019 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -37,18 +38,20 @@ struct parse { enum nodeid node; /* current (NODE_ROOT if pre-tree) */ const char *fname; /* filename */ int stop; /* should we stop now? */ +#define PARSE_EQN 1 + unsigned int flags; /* document-wide flags */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ - char *b; /* nil-terminated buffer for pre-print */ + char *b; /* NUL-terminated buffer for pre-print */ size_t bsz; /* current length of b */ size_t mbsz; /* max bsz allocation */ + int level; /* header level, starting at 1 */ int newln; /* output: are we on a fresh line */ }; struct node { const char *name; /* docbook element name */ - unsigned int flags; -#define NODE_IGNTEXT 1 /* ignore all contained text */ + enum nodeid node; /* docbook element to generate */ }; TAILQ_HEAD(pnodeq, pnode); @@ -64,6 +67,7 @@ struct pattr { struct pnode { enum nodeid node; /* node type */ char *b; /* binary data buffer */ + char *real; /* store for "b" */ size_t bsz; /* data buffer size */ struct pnode *parent; /* parent (or NULL if top) */ struct pnodeq childq; /* queue of children */ @@ -73,11 +77,16 @@ struct pnode { static const char *attrkeys[ATTRKEY__MAX] = { "choice", + "class", + "close", "id", + "linkend", + "open", "rep" }; static const char *attrvals[ATTRVAL__MAX] = { + "monospaced", "norepeat", "opt", "plain", @@ -85,49 +94,140 @@ static const char *attrvals[ATTRVAL__MAX] = { "req" }; -static const struct node nodes[NODE__MAX] = { - { NULL, 0 }, - { "arg", 0 }, - { "citerefentry", NODE_IGNTEXT }, - { "cmdsynopsis", NODE_IGNTEXT }, - { "code", 0 }, - { "command", 0 }, - { "emphasis", 0 }, - { "funcdef", 0 }, - { "funcprototype", NODE_IGNTEXT }, - { "funcsynopsis", NODE_IGNTEXT }, - { "funcsynopsisinfo", 0 }, - { "function", 0 }, - { "link", 0 }, - { "listitem", NODE_IGNTEXT }, - { "manvolnum", 0 }, - { "option", 0 }, - { "para", 0 }, - { "paramdef", 0 }, - { "parameter", 0 }, - { "programlisting", 0 }, - { "refclass", NODE_IGNTEXT }, - { "refdescriptor", NODE_IGNTEXT }, - { "refentry", NODE_IGNTEXT }, - { "refentrytitle", 0 }, - { "refmeta", NODE_IGNTEXT }, - { "refmiscinfo", NODE_IGNTEXT }, - { "refname", 0 }, - { "refnamediv", NODE_IGNTEXT }, - { "refpurpose", 0 }, - { "refsect1", 0 }, - { "refsynopsisdiv", NODE_IGNTEXT }, - { "replaceable", 0 }, - { "structname", 0 }, - { "synopsis", 0 }, - { "term", 0 }, - { NULL, 0 }, - { "title", 0 }, - { "ulink", 0 }, - { "variablelist", NODE_IGNTEXT }, - { "varlistentry", NODE_IGNTEXT }, +static const struct node nodes[] = { + { "acronym", NODE_ACRONYM }, + { "affiliation", NODE_AFFILIATION }, + { "anchor", NODE_ANCHOR }, + { "application", NODE_APPLICATION }, + { "arg", NODE_ARG }, + { "author", NODE_AUTHOR }, + { "authorgroup", NODE_AUTHORGROUP }, + { "blockquote", NODE_BLOCKQUOTE }, + { "book", NODE_BOOK }, + { "bookinfo", NODE_BOOKINFO }, + { "caution", NODE_CAUTION }, + { "chapter", NODE_SECTION }, + { "citerefentry", NODE_CITEREFENTRY }, + { "citetitle", NODE_CITETITLE }, + { "cmdsynopsis", NODE_CMDSYNOPSIS }, + { "code", NODE_CODE }, + { "colspec", NODE_COLSPEC }, + { "command", NODE_COMMAND }, + { "constant", NODE_CONSTANT }, + { "copyright", NODE_COPYRIGHT }, + { "date", NODE_DATE }, + { "editor", NODE_EDITOR }, + { "emphasis", NODE_EMPHASIS }, + { "entry", NODE_ENTRY }, + { "envar", NODE_ENVAR }, + { "fieldsynopsis", NODE_FIELDSYNOPSIS }, + { "filename", NODE_FILENAME }, + { "firstname", NODE_FIRSTNAME }, + { "firstterm", NODE_FIRSTTERM }, + { "footnote", NODE_FOOTNOTE }, + { "funcdef", NODE_FUNCDEF }, + { "funcprototype", NODE_FUNCPROTOTYPE }, + { "funcsynopsis", NODE_FUNCSYNOPSIS }, + { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO }, + { "function", NODE_FUNCTION }, + { "glossterm", NODE_GLOSSTERM }, + { "group", NODE_GROUP }, + { "holder", NODE_HOLDER }, + { "index", NODE_INDEX }, + { "indexterm", NODE_INDEXTERM }, + { "info", NODE_INFO }, + { "informalequation", NODE_INFORMALEQUATION }, + { "informaltable", NODE_INFORMALTABLE }, + { "inlineequation", NODE_INLINEEQUATION }, + { "itemizedlist", NODE_ITEMIZEDLIST }, + { "keysym", NODE_KEYSYM }, + { "legalnotice", NODE_LEGALNOTICE }, + { "link", NODE_LINK }, + { "listitem", NODE_LISTITEM }, + { "literal", NODE_LITERAL }, + { "literallayout", NODE_LITERALLAYOUT }, + { "manvolnum", NODE_MANVOLNUM }, + { "member", NODE_MEMBER }, + { "mml:math", NODE_MML_MATH }, + { "mml:mfenced", NODE_MML_MFENCED }, + { "mml:mfrac", NODE_MML_MFRAC }, + { "mml:mi", NODE_MML_MI }, + { "mml:mn", NODE_MML_MN }, + { "mml:mo", NODE_MML_MO }, + { "mml:mrow", NODE_MML_MROW }, + { "mml:msub", NODE_MML_MSUB }, + { "mml:msup", NODE_MML_MSUP }, + { "modifier", NODE_MODIFIER }, + { "note", NODE_NOTE }, + { "option", NODE_OPTION }, + { "orderedlist", NODE_ORDEREDLIST }, + { "orgname", NODE_ORGNAME }, + { "othername", NODE_OTHERNAME }, + { "para", NODE_PARA }, + { "paramdef", NODE_PARAMDEF }, + { "parameter", NODE_PARAMETER }, + { "part", NODE_SECTION }, + { "phrase", NODE_PHRASE }, + { "preface", NODE_PREFACE }, + { "primary", NODE_PRIMARY }, + { "programlisting", NODE_PROGRAMLISTING }, + { "prompt", NODE_PROMPT }, + { "quote", NODE_QUOTE }, + { "refclass", NODE_REFCLASS }, + { "refdescriptor", NODE_REFDESCRIPTOR }, + { "refentry", NODE_REFENTRY }, + { "refentryinfo", NODE_REFENTRYINFO }, + { "refentrytitle", NODE_REFENTRYTITLE }, + { "refmeta", NODE_REFMETA }, + { "refmetainfo", NODE_REFMETAINFO }, + { "refmiscinfo", NODE_REFMISCINFO }, + { "refname", NODE_REFNAME }, + { "refnamediv", NODE_REFNAMEDIV }, + { "refpurpose", NODE_REFPURPOSE }, + { "refsect1", NODE_SECTION }, + { "refsect2", NODE_SECTION }, + { "refsect3", NODE_SECTION }, + { "refsection", NODE_SECTION }, + { "refsynopsisdiv", NODE_REFSYNOPSISDIV }, + { "releaseinfo", NODE_RELEASEINFO }, + { "replaceable", NODE_REPLACEABLE }, + { "row", NODE_ROW }, + { "sbr", NODE_SBR }, + { "screen", NODE_SCREEN }, + { "secondary", NODE_SECONDARY }, + { "sect1", NODE_SECTION }, + { "sect2", NODE_SECTION }, + { "section", NODE_SECTION }, + { "sgmltag", NODE_SGMLTAG }, + { "simplelist", NODE_SIMPLELIST }, + { "spanspec", NODE_SPANSPEC }, + { "structname", NODE_STRUCTNAME }, + { "subtitle", NODE_SUBTITLE }, + { "surname", NODE_SURNAME }, + { "synopsis", NODE_SYNOPSIS }, + { "table", NODE_TABLE }, + { "tbody", NODE_TBODY }, + { "term", NODE_TERM }, + { "tfoot", NODE_TFOOT }, + { "tgroup", NODE_TGROUP }, + { "thead", NODE_THEAD }, + { "tip", NODE_TIP }, + { "title", NODE_TITLE }, + { "trademark", NODE_TRADEMARK }, + { "type", NODE_TYPE }, + { "ulink", NODE_ULINK }, + { "userinput", NODE_USERINPUT }, + { "variablelist", NODE_VARIABLELIST }, + { "varlistentry", NODE_VARLISTENTRY }, + { "varname", NODE_VARNAME }, + { "warning", NODE_WARNING }, + { "wordasword", NODE_WORDASWORD }, + { "year", NODE_YEAR }, + { NULL, NODE__MAX } }; +static int warn = 0; + static void pnode_print(struct parse *p, struct pnode *pn); @@ -145,13 +245,10 @@ xml_char(void *arg, const XML_Char *p, int sz) int i; /* Stopped or no tree yet. */ - if (ps->stop || NODE_ROOT == ps->node) + if (ps->stop || ps->node == NODE_ROOT) return; - /* Not supposed to be collecting text. */ - assert(NULL != ps->cur); - if (NODE_IGNTEXT & nodes[ps->node].flags) - return; + assert(ps->cur != NULL); /* * Are we in the midst of processing text? @@ -160,18 +257,18 @@ xml_char(void *arg, const XML_Char *p, int sz) * However, don't do so unless we have some non-whitespace to * process: strip out all leading whitespace to be sure. */ - if (NODE_TEXT != ps->node) { + if (ps->node != NODE_TEXT) { for (i = 0; i < sz; i++) - if ( ! isspace((int)p[i])) + if ( ! isspace((unsigned char)p[i])) break; if (i == sz) return; p += i; sz -= i; dat = calloc(1, sizeof(struct pnode)); - if (NULL == dat) { + if (dat == NULL) { perror(NULL); - exit(EXIT_FAILURE); + exit(1); } dat->node = ps->node = NODE_TEXT; @@ -180,28 +277,28 @@ xml_char(void *arg, const XML_Char *p, int sz) TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; - assert(NULL != ps->root); + assert(ps->root != NULL); } /* Append to current buffer. */ assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, + ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + (size_t)sz); - if (NULL == ps->cur->b) { + if (ps->cur->b == NULL) { perror(NULL); - exit(EXIT_FAILURE); + exit(1); } memcpy(ps->cur->b + ps->cur->bsz, p, sz); ps->cur->bsz += (size_t)sz; + ps->cur->real = ps->cur->b; } static void pnode_trim(struct pnode *pn) { - - assert(NODE_TEXT == pn->node); + assert(pn->node == NODE_TEXT); for ( ; pn->bsz > 0; pn->bsz--) - if ( ! isspace((int)pn->b[pn->bsz - 1])) + if ( ! isspace((unsigned char)pn->b[pn->bsz - 1])) break; } @@ -223,115 +320,89 @@ static void xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { struct parse *ps = arg; - enum nodeid node; + const struct node *node; enum attrkey key; enum attrval val; struct pnode *dat; struct pattr *pattr; const XML_Char **att; - if (ps->stop) + /* FIXME: find a better way to ditch other namespaces. */ + if (ps->stop || strcmp(name, "xi:include") == 0) return; /* Close out text node, if applicable... */ - if (NODE_TEXT == ps->node) { - assert(NULL != ps->cur); + if (ps->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; - assert(NULL != ps->cur); ps->node = ps->cur->node; } - for (node = 0; node < NODE__MAX; node++) - if (NULL == nodes[node].name) - continue; - else if (0 == strcmp(nodes[node].name, name)) + for (node = nodes; node->name != NULL; node++) + if (strcmp(node->name, name) == 0) break; - if (NODE__MAX == node && NODE_ROOT == ps->node) { - return; - } else if (NODE__MAX == node) { - fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", + if (node->name == NULL) { + if (ps->node == NODE_ROOT) + return; + fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; - } else if (NODE_ROOT == ps->node && NULL != ps->root) { + } else if (ps->node == NODE_ROOT && ps->root != NULL) { fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml)); ps->stop = 1; return; - } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) { - return; - } else if ( ! isparent(node, ps->node)) { - fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" " - "of node \"%s\"\n", - ps->fname, XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), - NULL == nodes[ps->node].name ? - "(none)" : nodes[ps->node].name, - NULL == nodes[node].name ? - "(none)" : nodes[node].name); - ps->stop = 1; - return; } - if (NULL == (dat = calloc(1, sizeof(struct pnode)))) { + if (node->node == NODE_INLINEEQUATION) + ps->flags |= PARSE_EQN; + + if ((dat = calloc(1, sizeof(struct pnode))) == NULL) { perror(NULL); - exit(EXIT_FAILURE); + exit(1); } - dat->node = ps->node = node; + dat->node = ps->node = node->node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); - if (NULL != ps->cur) + if (ps->cur != NULL) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; - if (NULL == ps->root) + if (ps->root == NULL) ps->root = dat; /* * Process attributes. */ - for (att = atts; NULL != *att; att += 2) { + for (att = atts; *att != NULL; att += 2) { for (key = 0; key < ATTRKEY__MAX; key++) - if (0 == strcmp(*att, attrkeys[key])) + if (strcmp(*att, attrkeys[key]) == 0) break; - if (ATTRKEY__MAX == key) { - fprintf(stderr, "%s:%zu:%zu: unknown " - "attribute \"%s\"\n", ps->fname, - XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), - *att); + if (key == ATTRKEY__MAX) { + if (warn) + fprintf(stderr, "%s:%zu:%zu: warning: " + "unknown attribute \"%s\"\n", + ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *att); continue; - } else if ( ! isattrkey(node, key)) { - fprintf(stderr, "%s:%zu:%zu: bad " - "attribute \"%s\"\n", ps->fname, - XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), - *att); - continue; } for (val = 0; val < ATTRVAL__MAX; val++) - if (0 == strcmp(*(att + 1), attrvals[val])) + if (strcmp(att[1], attrvals[val]) == 0) break; - if (ATTRVAL__MAX != val && ! isattrval(key, val)) { - fprintf(stderr, "%s:%zu:%zu: bad " - "value \"%s\"\n", ps->fname, - XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), - *(att + 1)); - continue; - } pattr = calloc(1, sizeof(struct pattr)); pattr->key = key; pattr->val = val; - if (ATTRVAL__MAX == val) - pattr->rawval = strdup(*(att + 1)); + if (val == ATTRVAL__MAX) + pattr->rawval = strdup(att[1]); TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); } @@ -347,19 +418,20 @@ xml_elem_end(void *arg, const XML_Char *name) { struct parse *ps = arg; - if (ps->stop || NODE_ROOT == ps->node) + /* FIXME: find a better way to ditch other namespaces. */ + if (ps->stop || ps->node == NODE_ROOT) return; + else if (strcmp(name, "xi:include") == 0) + return; /* Close out text node, if applicable... */ - if (NODE_TEXT == ps->node) { - assert(NULL != ps->cur); + if (ps->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; - assert(NULL != ps->cur); ps->node = ps->cur->node; } - if (NULL == (ps->cur = ps->cur->parent)) + if ((ps->cur = ps->cur->parent) == NULL) ps->node = NODE_ROOT; else ps->node = ps->cur->node; @@ -374,21 +446,21 @@ pnode_free(struct pnode *pn) struct pnode *pp; struct pattr *ap; - if (NULL == pn) + if (pn == NULL) return; - while (NULL != (pp = TAILQ_FIRST(&pn->childq))) { + while ((pp = TAILQ_FIRST(&pn->childq)) != NULL) { TAILQ_REMOVE(&pn->childq, pp, child); pnode_free(pp); } - while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) { + while ((ap = TAILQ_FIRST(&pn->attrq)) != NULL) { TAILQ_REMOVE(&pn->attrq, ap, child); free(ap->rawval); free(ap); } - free(pn->b); + free(pn->real); free(pn); } @@ -398,8 +470,7 @@ pnode_free(struct pnode *pn) static void pnode_unlink(struct pnode *pn) { - - if (NULL != pn->parent) + if (pn->parent != NULL) TAILQ_REMOVE(&pn->parent->childq, pn, child); pnode_free(pn); } @@ -416,6 +487,37 @@ pnode_unlinksub(struct pnode *pn) } /* + * Retrieve an enumeration attribute from a node. + * Return ATTRVAL__MAX if the node has no such attribute. + */ +enum attrval +pnode_getattr(struct pnode *pn, enum attrkey key) +{ + struct pattr *ap; + + TAILQ_FOREACH(ap, &pn->attrq, child) + if (ap->key == key) + return ap->val; + return ATTRVAL__MAX; +} + +/* + * Retrieve an attribute string from a node. + * Return defval if the node has no such attribute. + */ +const char * +pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval) +{ + struct pattr *ap; + + TAILQ_FOREACH(ap, &pn->attrq, child) + if (ap->key == key) + return ap->val == ATTRVAL__MAX ? ap->rawval : + attrvals[ap->val]; + return defval; +} + +/* * Reset the lookaside buffer. */ static void @@ -428,18 +530,18 @@ bufclear(struct parse *p) /* * Append NODE_TEXT contents to the current buffer, reallocating its * size if necessary. - * The buffer is ALWAYS nil-terminated. + * The buffer is ALWAYS NUL-terminated. */ static void bufappend(struct parse *p, struct pnode *pn) { - assert(NODE_TEXT == pn->node); + assert(pn->node == NODE_TEXT); if (p->bsz + pn->bsz + 1 > p->mbsz) { p->mbsz = p->bsz + pn->bsz + 1; - if (NULL == (p->b = realloc(p->b, p->mbsz))) { + if ((p->b = realloc(p->b, p->mbsz)) == NULL) { perror(NULL); - exit(EXIT_FAILURE); + exit(1); } } memcpy(p->b + p->bsz, pn->b, pn->bsz); @@ -458,14 +560,34 @@ bufappend_r(struct parse *p, struct pnode *pn) { struct pnode *pp; - if (NODE_TEXT == pn->node) + if (pn->node == NODE_TEXT) bufappend(p, pn); TAILQ_FOREACH(pp, &pn->childq, child) bufappend_r(p, pp); } +/* + * Recursively search and return the first instance of "node". + */ +static struct pnode * +pnode_findfirst(struct pnode *pn, enum nodeid node) +{ + struct pnode *pp, *res; + + res = NULL; + TAILQ_FOREACH(pp, &pn->childq, child) { + res = pp->node == node ? pp : + pnode_findfirst(pp, node); + if (res != NULL) + break; + } + + return res; +} + #define MACROLINE_NORM 0 #define MACROLINE_UPPER 1 +#define MACROLINE_NOWS 2 /* * Recursively print text presumably on a macro line. * Convert all whitespace to regular spaces. @@ -475,37 +597,34 @@ pnode_printmacrolinetext(struct parse *p, struct pnode { char *cp; - if (0 == p->newln) + if (p->newln == 0 && (fl & MACROLINE_NOWS) == 0) putchar(' '); bufclear(p); bufappend_r(p, pn); /* Convert all space to spaces. */ - for (cp = p->b; '\0' != *cp; cp++) - if (isspace((int)*cp)) + for (cp = p->b; *cp != '\0'; cp++) + if (isspace((unsigned char)*cp)) *cp = ' '; - for (cp = p->b; isspace((int)*cp); cp++) + for (cp = p->b; isspace((unsigned char)*cp); cp++) /* Spin past whitespace (XXX: necessary?) */ ; - for ( ; '\0' != *cp; cp++) { + for ( ; *cp != '\0'; cp++) { /* Escape us if we look like a macro. */ - if ((cp == p->b || ' ' == *(cp - 1)) && - isupper((int)*cp) && - '\0' != *(cp + 1) && - islower((int)*(cp + 1)) && - ('\0' == *(cp + 2) || - ' ' == *(cp + 2) || - (islower((int)*(cp + 2)) && - ('\0' == *(cp + 3) || - ' ' == *(cp + 3))))) + if ((cp == p->b || cp[-1] == ' ') && + isupper((unsigned char)cp[0]) && + islower((unsigned char)cp[1]) && + (cp[2] == '\0' || cp[2] == ' ' || + (islower((unsigned char)cp[2]) && + (cp[3] == '\0' || cp[3] == ' ')))) fputs("\\&", stdout); - if (MACROLINE_UPPER & fl) - putchar(toupper((int)*cp)); + if (fl & MACROLINE_UPPER) + putchar(toupper((unsigned char)*cp)); else - putchar((int)*cp); + putchar(*cp); /* If we're a character escape, escape us. */ - if ('\\' == *cp) + if (*cp == '\\') putchar('e'); } } @@ -524,8 +643,7 @@ pnode_printmacrolinepart(struct parse *p, struct pnode static void pnode_printmacroline(struct parse *p, struct pnode *pn) { - - assert(0 == p->newln); + assert(p->newln == 0); pnode_printmacrolinetext(p, pn, 0); putchar('\n'); p->newln = 1; @@ -552,6 +670,66 @@ pnode_printmclose(struct parse *p, int sv) } /* + * Like pnode_printmclose() except we look to the next node, and, if + * found, see if it starts with punctuation. + * If it does, then we print that punctuation before the newline. + */ +static void +pnode_printmclosepunct(struct parse *p, struct pnode *pn, int sv) +{ + /* We wouldn't have done anything anyway. */ + if ( ! (sv && ! p->newln)) + return; + + /* No next node or it's not text. */ + if ((pn = TAILQ_NEXT(pn, child)) == NULL) { + pnode_printmclose(p, sv); + return; + } else if (pn->node != NODE_TEXT) { + pnode_printmclose(p, sv); + return; + } + + /* Only do this for the comma/period. */ + if (pn->bsz > 0 && + (pn->b[0] == ',' || pn->b[0] == '.') && + (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) { + putchar(' '); + putchar(pn->b[0]); + pn->b++; + pn->bsz--; + } + + putchar('\n'); + p->newln = 1; +} + +static void +pnode_printpara(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + assert(p->newln); + if ((pp = TAILQ_PREV(pn, pnodeq, child)) == NULL && + (pp = pn->parent) == NULL) + return; + + switch (pp->node) { + case NODE_ENTRY: + case NODE_LISTITEM: + return; + case NODE_PREFACE: + case NODE_SECTION: + if (p->level < 3) + return; + break; + default: + break; + } + puts(".Pp"); +} + +/* * If the SYNOPSIS macro has a superfluous title, kill it. */ static void @@ -559,8 +737,8 @@ pnode_printrefsynopsisdiv(struct parse *p, struct pnod { struct pnode *pp; - TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_TITLE == pp->node) { + TAILQ_FOREACH(pp, &pn->childq, child) + if (pp->node == NODE_TITLE) { pnode_unlink(pp); return; } @@ -573,21 +751,74 @@ static void pnode_printrefsect(struct parse *p, struct pnode *pn) { struct pnode *pp; + const char *title; + int flags, level; + if (pn->parent == NULL) + return; + + level = ++p->level; + flags = level == 1 ? MACROLINE_UPPER : 0; + if (level < 3) { + switch (pn->node) { + case NODE_CAUTION: + case NODE_NOTE: + case NODE_TIP: + case NODE_WARNING: + level = 3; + break; + default: + break; + } + } + TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_TITLE == pp->node) + if (pp->node == NODE_TITLE) break; - fputs(".Sh", stdout); - p->newln = 0; + if (pp == NULL) { + switch (pn->node) { + case NODE_PREFACE: + title = "Preface"; + break; + case NODE_CAUTION: + title = "Caution"; + break; + case NODE_NOTE: + title = "Note"; + break; + case NODE_TIP: + title = "Tip"; + break; + case NODE_WARNING: + title = "Warning"; + break; + default: + title = "Unknown"; + break; + } + } - if (NULL != pp) { - pnode_printmacroline(p, pp); - pnode_unlink(pp); - } else { - puts("UNKNOWN"); - p->newln = 1; + switch (level) { + case 1: + fputs(".Sh", stdout); + break; + case 2: + fputs(".Ss", stdout); + break; + default: + pnode_printpara(p, pn); + fputs(".Sy", stdout); + break; } + + if (pp != NULL) { + p->newln = 0; + pnode_printmacrolinetext(p, pp, flags); + pnode_printmclose(p, 1); + pnode_unlink(pp); + } else + printf(" %s\n", title); } /* @@ -599,26 +830,22 @@ pnode_printciterefentry(struct parse *p, struct pnode struct pnode *pp, *title, *manvol; title = manvol = NULL; - assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_MANVOLNUM == pp->node) + if (pp->node == NODE_MANVOLNUM) manvol = pp; - else if (NODE_REFENTRYTITLE == pp->node) + else if (pp->node == NODE_REFENTRYTITLE) title = pp; - fputs(".Xr", stdout); - p->newln = 0; - - if (NULL != title) { + if (title != NULL) pnode_printmacrolinepart(p, title); - } else + else fputs(" unknown ", stdout); - if (NULL == manvol) { + if (manvol == NULL) { puts(" 1"); p->newln = 1; } else - pnode_printmacroline(p, manvol); + pnode_printmacrolinepart(p, manvol); } static void @@ -629,27 +856,24 @@ pnode_printrefmeta(struct parse *p, struct pnode *pn) title = manvol = NULL; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_MANVOLNUM == pp->node) + if (pp->node == NODE_MANVOLNUM) manvol = pp; - else if (NODE_REFENTRYTITLE == pp->node) + else if (pp->node == NODE_REFENTRYTITLE) title = pp; - puts(".Dd $Mdocdate" "$"); fputs(".Dt", stdout); p->newln = 0; - if (NULL != title) + if (title != NULL) pnode_printmacrolinetext(p, title, MACROLINE_UPPER); else fputs(" UNKNOWN ", stdout); - if (NULL == manvol) { + if (manvol == NULL) { puts(" 1"); p->newln = 1; } else pnode_printmacroline(p, manvol); - - puts(".Os"); } static void @@ -660,18 +884,18 @@ pnode_printfuncdef(struct parse *p, struct pnode *pn) assert(p->newln); ftype = func = NULL; TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_TEXT == pp->node) + if (pp->node == NODE_TEXT) ftype = pp; - else if (NODE_FUNCTION == pp->node) + else if (pp->node == NODE_FUNCTION) func = pp; - if (NULL != ftype) { + if (ftype != NULL) { fputs(".Ft", stdout); p->newln = 0; pnode_printmacroline(p, ftype); } - if (NULL != func) { + if (func != NULL) { fputs(".Fo", stdout); p->newln = 0; pnode_printmacroline(p, func); @@ -689,49 +913,105 @@ pnode_printparamdef(struct parse *p, struct pnode *pn) assert(p->newln); ptype = param = NULL; TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_TEXT == pp->node) + if (pp->node == NODE_TEXT) ptype = pp; - else if (NODE_PARAMETER == pp->node) + else if (pp->node == NODE_PARAMETER) param = pp; fputs(".Fa \"", stdout); p->newln = 0; - if (NULL != ptype) { - pnode_printmacrolinepart(p, ptype); + if (ptype != NULL) { + pnode_printmacrolinetext(p, ptype, MACROLINE_NOWS); putchar(' '); } - if (NULL != param) + if (param != NULL) pnode_printmacrolinepart(p, param); puts("\""); p->newln = 1; } +/* + * The node is a little peculiar. + * First, it can have arbitrary open and closing tokens, which default + * to parentheses. + * Second, >1 arguments are separated by commas. + */ static void +pnode_printmathfenced(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + printf("left %s ", pnode_getattr_raw(pn, ATTRKEY_OPEN, "(")); + + pp = TAILQ_FIRST(&pn->childq); + pnode_print(p, pp); + + while ((pp = TAILQ_NEXT(pp, child)) != NULL) { + putchar(','); + pnode_print(p, pp); + } + + printf("right %s ", pnode_getattr_raw(pn, ATTRKEY_CLOSE, ")")); +} + +/* + * These math nodes require special handling because they have infix + * syntax, instead of the usual prefix or prefix. + * So we need to break up the first and second child node with a + * particular eqn(7) word. + */ +static void +pnode_printmath(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + pp = TAILQ_FIRST(&pn->childq); + pnode_print(p, pp); + + switch (pn->node) { + case NODE_MML_MSUP: + fputs(" sup ", stdout); + break; + case NODE_MML_MFRAC: + fputs(" over ", stdout); + break; + case NODE_MML_MSUB: + fputs(" sub ", stdout); + break; + default: + break; + } + + pp = TAILQ_NEXT(pp, child); + pnode_print(p, pp); +} + +static void pnode_printfuncprototype(struct parse *p, struct pnode *pn) { struct pnode *pp, *fdef; assert(p->newln); TAILQ_FOREACH(fdef, &pn->childq, child) - if (NODE_FUNCDEF == fdef->node) + if (fdef->node == NODE_FUNCDEF) break; - if (NULL != fdef) + if (fdef != NULL) pnode_printfuncdef(p, fdef); else puts(".Fo UNKNOWN"); - TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_PARAMDEF == pp->node) + TAILQ_FOREACH(pp, &pn->childq, child) + if (pp->node == NODE_PARAMDEF) pnode_printparamdef(p, pp); puts(".Fc"); p->newln = 1; } -/* +/* * The element is more complicated than it should be because text * nodes are treated like ".Ar foo", but non-text nodes need to be * re-sent into the printer (i.e., without the preceding ".Ar"). @@ -747,13 +1027,11 @@ pnode_printarg(struct parse *p, struct pnode *pn) isop = 1; isrep = 0; - TAILQ_FOREACH(ap, &pn->attrq, child) - if (ATTRKEY_CHOICE == ap->key && - (ATTRVAL_PLAIN == ap->val || - ATTRVAL_REQ == ap->val)) + TAILQ_FOREACH(ap, &pn->attrq, child) + if (ap->key == ATTRKEY_CHOICE && + (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) isop = 0; - else if (ATTRKEY_REP == ap->key && - (ATTRVAL_REPEAT == ap->val)) + else if (ap->key == ATTRKEY_REP && ap->val == ATTRVAL_REPEAT) isrep = 1; if (isop) { @@ -762,33 +1040,65 @@ pnode_printarg(struct parse *p, struct pnode *pn) } TAILQ_FOREACH(pp, &pn->childq, child) { - if (NODE_TEXT == pp->node) { + if (pp->node == NODE_TEXT) { pnode_printmopen(p); fputs("Ar", stdout); - } + } pnode_print(p, pp); - if (NODE_TEXT == pp->node && isrep) + if (isrep && pp->node == NODE_TEXT) fputs("...", stdout); } } -/* - * Recursively search and return the first instance of "node". - */ -static struct pnode * -pnode_findfirst(struct pnode *pn, enum nodeid node) +static void +pnode_printgroup(struct parse *p, struct pnode *pn) { - struct pnode *pp, *res; + struct pnode *pp, *np; + struct pattr *ap; + int isop, sv; - res = NULL; - TAILQ_FOREACH(pp, &pn->childq, child) { - res = pp->node == node ? pp : - pnode_findfirst(pp, node); - if (NULL != res) + isop = 1; + TAILQ_FOREACH(ap, &pn->attrq, child) + if (ap->key == ATTRKEY_CHOICE && + (ap->val == ATTRVAL_PLAIN || ap->val == ATTRVAL_REQ)) { + isop = 0; break; + } + + /* + * Make sure we're on a macro line. + * This will prevent pnode_print() for putting us on a + * subsequent line. + */ + sv = p->newln; + pnode_printmopen(p); + if (isop) + fputs("Op", stdout); + else if (sv) + fputs("No", stdout); + + /* + * Keep on printing text separated by the vertical bar as long + * as we're within the same origin node as the group. + * This is kind of a nightmare. + * Eh, DocBook... + * FIXME: if there's a "Fl", we don't cut off the leading "-" + * like we do in pnode_print(). + */ + TAILQ_FOREACH(pp, &pn->childq, child) { + pnode_print(p, pp); + np = TAILQ_NEXT(pp, child); + while (np != NULL) { + if (pp->node != np->node) + break; + fputs(" |", stdout); + pnode_printmacrolinepart(p, np); + pp = np; + np = TAILQ_NEXT(np, child); + } } - return(res); + pnode_printmclose(p, sv); } static void @@ -796,50 +1106,132 @@ pnode_printprologue(struct parse *p, struct pnode *pn) { struct pnode *pp; - pp = NULL == p->root ? NULL : + pp = p->root == NULL ? NULL : pnode_findfirst(p->root, NODE_REFMETA); - if (NULL != pp) { + puts(".Dd $Mdocdate" "$"); + if (pp != NULL) { pnode_printrefmeta(p, pp); pnode_unlink(pp); - } else { - puts(".\\\" Supplying bogus prologue..."); - puts(".Dd $Mdocdate" "$"); - puts(".Dt UNKNOWN 1"); - puts(".Os"); + } else + printf(".Dt %s 1\n", + pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN")); + puts(".Os"); + + if (p->flags & PARSE_EQN) { + puts(".EQ"); + puts("delim $$"); + puts(".EN"); } } +/* + * We can have multiple elements within a , which + * we should comma-separate as list headers. + */ static void pnode_printvarlistentry(struct parse *p, struct pnode *pn) { struct pnode *pp; + int first = 1; assert(p->newln); + fputs(".It", stdout); + p->newln = 0; + TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_TERM == pp->node) { - fputs(".It", stdout); - p->newln = 0; + if (pp->node == NODE_TERM) { + if ( ! first) + putchar(','); pnode_print(p, pp); pnode_unlink(pp); - putchar('\n'); - p->newln = 1; - return; - } + first = 0; + } else + break; - puts(".It"); + putchar('\n'); p->newln = 1; } static void +pnode_printrow(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + puts(".Bl -dash -compact"); + + TAILQ_FOREACH(pp, &pn->childq, child) { + assert(p->newln); + puts(".It"); + pnode_print(p, pp); + pnode_printmclose(p, 1); + } + assert(p->newln); + puts(".El"); +} + +static void +pnode_printtable(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + assert(p->newln); + TAILQ_FOREACH(pp, &pn->childq, child) + if (pp->node == NODE_TITLE) { + pnode_printpara(p, pp); + pnode_print(p, pp); + pnode_unlink(pp); + } + assert(p->newln); + puts(".Bl -ohang"); + while ((pp = pnode_findfirst(pn, NODE_ROW)) != NULL) { + puts(".It Table Row"); + pnode_printrow(p, pp); + pnode_printmclose(p, 1); + pnode_unlink(pp); + } + assert(p->newln); + puts(".El"); +} + +static void +pnode_printlist(struct parse *p, struct pnode *pn) +{ + struct pnode *pp; + + assert(p->newln); + TAILQ_FOREACH(pp, &pn->childq, child) + if (pp->node == NODE_TITLE) { + pnode_printpara(p, pp); + pnode_print(p, pp); + pnode_unlink(pp); + } + assert(p->newln); + + if (pn->node == NODE_ORDEREDLIST) + puts(".Bl -enum"); + else + puts(".Bl -bullet"); + + TAILQ_FOREACH(pp, &pn->childq, child) { + assert(p->newln); + puts(".It"); + pnode_print(p, pp); + pnode_printmclose(p, 1); + } + assert(p->newln); + puts(".El"); +} + +static void pnode_printvariablelist(struct parse *p, struct pnode *pn) { struct pnode *pp; assert(p->newln); TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_TITLE == pp->node) { - puts(".Pp"); + if (pp->node == NODE_TITLE) { + pnode_printpara(p, pp); pnode_print(p, pp); pnode_unlink(pp); } @@ -847,7 +1239,7 @@ pnode_printvariablelist(struct parse *p, struct pnode assert(p->newln); puts(".Bl -tag -width Ds"); TAILQ_FOREACH(pp, &pn->childq, child) - if (NODE_VARLISTENTRY != pp->node) { + if (pp->node != NODE_VARLISTENTRY) { assert(p->newln); fputs(".It", stdout); pnode_printmacroline(p, pp); @@ -862,83 +1254,192 @@ pnode_printvariablelist(struct parse *p, struct pnode /* * Print a parsed node (or ignore it--whatever). * This is a recursive function. - * FIXME: macro line continuation? + * FIXME: if we're in a literal context ( or or + * whatever), don't print inline macros. */ static void pnode_print(struct parse *p, struct pnode *pn) { struct pnode *pp; + const char *ccp; char *cp; int last, sv; - if (NULL == pn) + if (pn == NULL) return; sv = p->newln; switch (pn->node) { - case (NODE_ARG): + case NODE_APPLICATION: + pnode_printmopen(p); + fputs("Nm", stdout); + break; + case NODE_ANCHOR: + /* Don't print anything! */ + return; + case NODE_ARG: pnode_printarg(p, pn); pnode_unlinksub(pn); break; - case (NODE_CITEREFENTRY): + case NODE_AUTHOR: + pnode_printmopen(p); + fputs("An", stdout); + break; + case NODE_AUTHORGROUP: assert(p->newln); + puts(".An -split"); + break; + case NODE_BOOKINFO: + assert(p->newln); + puts(".Sh NAME"); + break; + case NODE_CITEREFENTRY: + pnode_printmopen(p); + fputs("Xr", stdout); pnode_printciterefentry(p, pn); pnode_unlinksub(pn); break; - case (NODE_CODE): + case NODE_CODE: pnode_printmopen(p); fputs("Li", stdout); break; - case (NODE_COMMAND): + case NODE_COMMAND: pnode_printmopen(p); fputs("Nm", stdout); break; - case (NODE_EMPHASIS): + case NODE_CONSTANT: pnode_printmopen(p); + fputs("Dv", stdout); + break; + case NODE_EDITOR: + puts("editor: "); + pnode_printmopen(p); + fputs("An", stdout); + break; + case NODE_EMPHASIS: + case NODE_FIRSTTERM: + pnode_printmopen(p); fputs("Em", stdout); break; - case (NODE_FUNCTION): + case NODE_ENVAR: pnode_printmopen(p); + fputs("Ev", stdout); + break; + case NODE_FILENAME: + pnode_printmopen(p); + fputs("Pa", stdout); + break; + case NODE_FUNCTION: + pnode_printmopen(p); fputs("Fn", stdout); break; - case (NODE_FUNCPROTOTYPE): + case NODE_FUNCPROTOTYPE: assert(p->newln); pnode_printfuncprototype(p, pn); pnode_unlinksub(pn); break; - case (NODE_FUNCSYNOPSISINFO): + case NODE_FUNCSYNOPSISINFO: pnode_printmopen(p); fputs("Fd", stdout); break; - case (NODE_OPTION): + case NODE_INDEXTERM: + return; + case NODE_INFORMALEQUATION: + if ( ! p->newln) + putchar('\n'); + puts(".EQ"); + p->newln = 0; + break; + case NODE_INLINEEQUATION: + fputc('$', stdout); + p->newln = 0; + break; + case NODE_ITEMIZEDLIST: + assert(p->newln); + pnode_printlist(p, pn); + pnode_unlinksub(pn); + break; + case NODE_GROUP: + pnode_printgroup(p, pn); + pnode_unlinksub(pn); + break; + case NODE_LEGALNOTICE: + assert(p->newln); + puts(".Sh LEGAL NOTICE"); + break; + case NODE_LINK: + ccp = pnode_getattr_raw(pn, ATTRKEY_LINKEND, NULL); + if (ccp == NULL) + break; pnode_printmopen(p); - fputs("Fl", stdout); - /* FIXME: bogus leading '-'? */ + printf("Sx %s\n", ccp); + p->newln = 1; + return; + case NODE_LITERAL: + pnode_printmopen(p); + fputs("Li", stdout); break; - case (NODE_PARA): + case NODE_LITERALLAYOUT: assert(p->newln); - if (NULL != pn->parent && - NODE_LISTITEM == pn->parent->node) + puts(".Bd -literal"); + break; + case NODE_MML_MFENCED: + pnode_printmathfenced(p, pn); + pnode_unlinksub(pn); + break; + case NODE_MML_MROW: + case NODE_MML_MI: + case NODE_MML_MN: + case NODE_MML_MO: + if (TAILQ_EMPTY(&pn->childq)) break; - puts(".Pp"); + fputs(" { ", stdout); break; - case (NODE_PARAMETER): + case NODE_MML_MFRAC: + case NODE_MML_MSUB: + case NODE_MML_MSUP: + pnode_printmath(p, pn); + pnode_unlinksub(pn); + break; + case NODE_OPTION: + pnode_printmopen(p); + fputs("Fl", stdout); + break; + case NODE_ORDEREDLIST: + assert(p->newln); + pnode_printlist(p, pn); + pnode_unlinksub(pn); + break; + case NODE_PARA: + pnode_printpara(p, pn); + break; + case NODE_PARAMETER: /* Suppress non-text children... */ pnode_printmopen(p); fputs("Fa \"", stdout); - pnode_printmacrolinepart(p, pn); - puts("\""); + pnode_printmacrolinetext(p, pn, MACROLINE_NOWS); + fputs("\"", stdout); pnode_unlinksub(pn); break; - case (NODE_PROGRAMLISTING): + case NODE_QUOTE: + pnode_printmopen(p); + fputs("Qo", stdout); + break; + case NODE_PROGRAMLISTING: + case NODE_SCREEN: assert(p->newln); - puts(".Bd -literal"); + printf(".Bd %s\n", pnode_getattr(pn, ATTRKEY_CLASS) == + ATTRVAL_MONOSPACED ? "-literal" : "-unfilled"); break; - case (NODE_REFMETA): + case NODE_REFENTRYINFO: + /* Suppress. */ + pnode_unlinksub(pn); + break; + case NODE_REFMETA: abort(); break; - case (NODE_REFNAME): + case NODE_REFNAME: /* Suppress non-text children... */ pnode_printmopen(p); fputs("Nm", stdout); @@ -946,70 +1447,125 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_printmacrolinepart(p, pn); pnode_unlinksub(pn); break; - case (NODE_REFNAMEDIV): + case NODE_REFNAMEDIV: assert(p->newln); puts(".Sh NAME"); break; - case (NODE_REFPURPOSE): + case NODE_REFPURPOSE: assert(p->newln); pnode_printmopen(p); fputs("Nd", stdout); break; - case (NODE_REFSYNOPSISDIV): + case NODE_REFSYNOPSISDIV: assert(p->newln); pnode_printrefsynopsisdiv(p, pn); puts(".Sh SYNOPSIS"); break; - case (NODE_REFSECT1): + case NODE_PREFACE: + case NODE_SECTION: + case NODE_NOTE: + case NODE_TIP: + case NODE_CAUTION: + case NODE_WARNING: assert(p->newln); pnode_printrefsect(p, pn); break; - case (NODE_REPLACEABLE): + case NODE_REPLACEABLE: pnode_printmopen(p); fputs("Ar", stdout); break; - case (NODE_STRUCTNAME): + case NODE_SBR: + assert(p->newln); + puts(".br"); + break; + case NODE_SGMLTAG: pnode_printmopen(p); + fputs("Li", stdout); + break; + case NODE_STRUCTNAME: + pnode_printmopen(p); fputs("Vt", stdout); break; - case (NODE_TEXT): - if (0 == p->newln) + case NODE_TABLE: + case NODE_INFORMALTABLE: + assert(p->newln); + pnode_printtable(p, pn); + pnode_unlinksub(pn); + break; + case NODE_TEXT: + if (p->newln == 0) putchar(' '); + bufclear(p); bufappend(p, pn); + + if (p->bsz == 0) { + assert(pn->real != pn->b); + break; + } + /* * Output all characters, squeezing out whitespace - * between newlines. + * between newlines. * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ assert(p->bsz); - for (last = '\n', cp = p->b; '\0' != *cp; ) { - if ('\n' == last) { + cp = p->b; + + /* + * There's often a superfluous "-" in its