=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.71 retrieving revision 1.73 diff -u -p -r1.71 -r1.73 --- docbook2mdoc/docbook2mdoc.c 2019/03/24 23:48:58 1.71 +++ docbook2mdoc/docbook2mdoc.c 2019/03/25 23:14:44 1.73 @@ -1,4 +1,4 @@ -/* $Id: docbook2mdoc.c,v 1.71 2019/03/24 23:48:58 schwarze Exp $ */ +/* $Id: docbook2mdoc.c,v 1.73 2019/03/25 23:14:44 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -41,16 +41,12 @@ enum linestate { */ struct parse { XML_Parser xml; - enum nodeid node; /* current (NODE_ROOT if pre-tree) */ const char *fname; /* filename */ int stop; /* should we stop now? */ #define PARSE_EQN 1 unsigned int flags; /* document-wide flags */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ - char *b; /* NUL-terminated buffer for pre-print */ - size_t bsz; /* current length of b */ - size_t mbsz; /* max bsz allocation */ int level; /* header level, starting at 1 */ enum linestate linestate; }; @@ -252,12 +248,9 @@ xml_char(void *arg, const XML_Char *p, int sz) struct pnode *dat; int i; - /* Stopped or no tree yet. */ - if (ps->stop || ps->node == NODE_ROOT) + if (ps->stop) return; - assert(ps->cur != NULL); - /* * Are we in the midst of processing text? * If we're not processing text right now, then create a text @@ -265,7 +258,7 @@ xml_char(void *arg, const XML_Char *p, int sz) * However, don't do so unless we have some non-whitespace to * process: strip out all leading whitespace to be sure. */ - if (ps->node != NODE_TEXT) { + if (ps->cur->node != NODE_TEXT) { for (i = 0; i < sz; i++) if ( ! isspace((unsigned char)p[i])) break; @@ -273,13 +266,13 @@ xml_char(void *arg, const XML_Char *p, int sz) return; p += i; sz -= i; - dat = calloc(1, sizeof(struct pnode)); + dat = calloc(1, sizeof(*dat)); if (dat == NULL) { perror(NULL); exit(1); } - dat->node = ps->node = NODE_TEXT; + dat->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -290,14 +283,14 @@ xml_char(void *arg, const XML_Char *p, int sz) /* Append to current buffer. */ assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, - ps->cur->bsz + (size_t)sz); + ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1); if (ps->cur->b == NULL) { perror(NULL); exit(1); } memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->bsz += (size_t)sz; + ps->cur->bsz += sz; + ps->cur->b[ps->cur->bsz] = '\0'; ps->cur->real = ps->cur->b; } @@ -305,7 +298,7 @@ static void pnode_trim(struct pnode *pn) { assert(pn->node == NODE_TEXT); - for ( ; pn->bsz > 0; pn->bsz--) + for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0') if ( ! isspace((unsigned char)pn->b[pn->bsz - 1])) break; } @@ -340,10 +333,9 @@ xml_elem_start(void *arg, const XML_Char *name, const return; /* Close out text node, if applicable... */ - if (ps->node == NODE_TEXT) { + if (ps->cur != NULL && ps->cur->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; - ps->node = ps->cur->node; } for (node = nodes; node->name != NULL; node++) @@ -351,30 +343,22 @@ xml_elem_start(void *arg, const XML_Char *name, const break; if (node->name == NULL) { - if (ps->node == NODE_ROOT) - return; fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; - } else if (ps->node == NODE_ROOT && ps->root != NULL) { - fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", - ps->fname, XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml)); - ps->stop = 1; - return; } if (node->node == NODE_INLINEEQUATION) ps->flags |= PARSE_EQN; - if ((dat = calloc(1, sizeof(struct pnode))) == NULL) { + if ((dat = calloc(1, sizeof(*dat))) == NULL) { perror(NULL); exit(1); } - dat->node = ps->node = node->node; + dat->node = node->node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -406,43 +390,37 @@ xml_elem_start(void *arg, const XML_Char *name, const for (val = 0; val < ATTRVAL__MAX; val++) if (strcmp(att[1], attrvals[val]) == 0) break; - pattr = calloc(1, sizeof(struct pattr)); + pattr = calloc(1, sizeof(*pattr)); pattr->key = key; pattr->val = val; if (val == ATTRVAL__MAX) pattr->rawval = strdup(att[1]); TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); } - } /* * Roll up the parse tree. * If we're at a text node, roll that one up first. - * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void xml_elem_end(void *arg, const XML_Char *name) { struct parse *ps = arg; + if (ps->stop) + return; + /* FIXME: find a better way to ditch other namespaces. */ - if (ps->stop || ps->node == NODE_ROOT) + if (strcmp(name, "xi:include") == 0) return; - else if (strcmp(name, "xi:include") == 0) - return; /* Close out text node, if applicable... */ - if (ps->node == NODE_TEXT) { + if (ps->cur->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; - ps->node = ps->cur->node; } - - if ((ps->cur = ps->cur->parent) == NULL) - ps->node = NODE_ROOT; - else - ps->node = ps->cur->node; + ps->cur = ps->cur->parent; } /* @@ -503,6 +481,8 @@ pnode_getattr(struct pnode *pn, enum attrkey key) { struct pattr *ap; + if (pn == NULL) + return ATTRVAL__MAX; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == key) return ap->val; @@ -518,6 +498,8 @@ pnode_getattr_raw(struct pnode *pn, enum attrkey key, { struct pattr *ap; + if (pn == NULL) + return defval; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == key) return ap->val == ATTRVAL__MAX ? ap->rawval : @@ -526,55 +508,6 @@ pnode_getattr_raw(struct pnode *pn, enum attrkey key, } /* - * Reset the lookaside buffer. - */ -static void -bufclear(struct parse *p) -{ - - p->b[p->bsz = 0] = '\0'; -} - -/* - * Append NODE_TEXT contents to the current buffer, reallocating its - * size if necessary. - * The buffer is ALWAYS NUL-terminated. - */ -static void -bufappend(struct parse *p, struct pnode *pn) -{ - - assert(pn->node == NODE_TEXT); - if (p->bsz + pn->bsz + 1 > p->mbsz) { - p->mbsz = p->bsz + pn->bsz + 1; - if ((p->b = realloc(p->b, p->mbsz)) == NULL) { - perror(NULL); - exit(1); - } - } - memcpy(p->b + p->bsz, pn->b, pn->bsz); - p->bsz += pn->bsz; - p->b[p->bsz] = '\0'; -} - -/* - * Recursively append all NODE_TEXT nodes to the buffer. - * This descends into non-text nodes, but doesn't do anything beyond - * them. - * In other words, this is a recursive text grok. - */ -static void -bufappend_r(struct parse *p, struct pnode *pn) -{ - struct pnode *pp; - - if (pn->node == NODE_TEXT) - bufappend(p, pn); - TAILQ_FOREACH(pp, &pn->childq, child) - bufappend_r(p, pp); -} - -/* * Recursively search and return the first instance of "node". */ static struct pnode * @@ -626,36 +559,62 @@ macro_line(struct parse *p, const char *name) macro_close(p); } -#define MACROLINE_UPPER 1 -#define MACROLINE_NOWS 2 +#define ARG_SPACE 1 /* Insert whitespace before this argument. */ +#define ARG_SINGLE 2 /* Quote argument if it contains whitespace. */ +#define ARG_QUOTED 4 /* We are already in a quoted argument. */ +#define ARG_UPPER 8 /* Covert argument to upper case. */ /* * Print an argument string on a macro line, collapsing whitespace. */ static void -macro_addarg(struct parse *p, const char *arg, int fl) +macro_addarg(struct parse *p, const char *arg, int flags) { const char *cp; - int wantspace; assert(p->linestate == LINE_MACRO); - wantspace = !(fl & MACROLINE_NOWS); + + /* Quote if requested and necessary. */ + + if ((flags & (ARG_SINGLE | ARG_QUOTED)) == ARG_SINGLE) { + for (cp = arg; *cp != '\0'; cp++) + if (isspace((unsigned char)*cp)) + break; + if (*cp != '\0') { + if (flags & ARG_SPACE) { + putchar(' '); + flags &= ~ ARG_SPACE; + } + putchar('"'); + flags = ARG_QUOTED; + } + } + for (cp = arg; *cp != '\0'; cp++) { + + /* Collapse whitespace. */ + if (isspace((unsigned char)*cp)) { - wantspace = 1; + flags |= ARG_SPACE; continue; - } else if (wantspace) { + } else if (flags & ARG_SPACE) { putchar(' '); - wantspace = 0; + flags &= ~ ARG_SPACE; } + /* Escape us if we look like a macro. */ - if ((cp == arg || cp[-1] == ' ') && + + if ((flags & ARG_QUOTED) == 0 && + (cp == arg || isspace((unsigned char)cp[-1])) && isupper((unsigned char)cp[0]) && islower((unsigned char)cp[1]) && (cp[2] == '\0' || cp[2] == ' ' || (islower((unsigned char)cp[2]) && (cp[3] == '\0' || cp[3] == ' ')))) fputs("\\&", stdout); - if (fl & MACROLINE_UPPER) + + if (*cp == '"') + fputs("\\(dq", stdout); + else if (flags & ARG_UPPER) putchar(toupper((unsigned char)*cp)); else putchar(*cp); @@ -668,26 +627,73 @@ static void macro_argline(struct parse *p, const char *name, const char *arg) { macro_open(p, name); - macro_addarg(p, arg, 0); + macro_addarg(p, arg, ARG_SPACE); macro_close(p); } /* - * Recurse nodes to print arguments on a macro line. + * Recursively append text from the children of a node to a macro line. */ static void -macro_addnode(struct parse *p, struct pnode *pn, int fl) +macro_addnode(struct parse *p, struct pnode *pn, int flags) { - bufclear(p); - bufappend_r(p, pn); - macro_addarg(p, p->b, fl); + int quote_now; + + assert(p->linestate == LINE_MACRO); + + /* + * If the only child is a text node, just add that text, + * letting macro_addarg() decide about quoting. + */ + + pn = TAILQ_FIRST(&pn->childq); + if (pn != NULL && pn->node == NODE_TEXT && + TAILQ_NEXT(pn, child) == NULL) { + macro_addarg(p, pn->b, flags); + return; + } + + /* + * If we want the argument quoted and are not already + * in a quoted context, quote now. + */ + + quote_now = 0; + if (flags & ARG_SINGLE) { + if ((flags & ARG_QUOTED) == 0) { + if (flags & ARG_SPACE) { + putchar(' '); + flags &= ~ARG_SPACE; + } + putchar('"'); + flags |= ARG_QUOTED; + quote_now = 1; + } + flags &= ~ARG_SINGLE; + } + + /* + * Iterate to child and sibling nodes, + * inserting whitespace between nodes. + */ + + while (pn != NULL) { + if (pn->node == NODE_TEXT) + macro_addarg(p, pn->b, flags); + else + macro_addnode(p, pn, flags); + pn = TAILQ_NEXT(pn, child); + flags |= ARG_SPACE; + } + if (quote_now) + putchar('"'); } static void -macro_nodeline(struct parse *p, const char *name, struct pnode *pn) +macro_nodeline(struct parse *p, const char *name, struct pnode *pn, int flags) { macro_open(p, name); - macro_addnode(p, pn, 0); + macro_addnode(p, pn, ARG_SPACE | flags); macro_close(p); } @@ -780,7 +786,9 @@ pnode_printrefsect(struct parse *p, struct pnode *pn) return; level = ++p->level; - flags = level == 1 ? MACROLINE_UPPER : 0; + flags = ARG_SPACE; + if (level == 1) + flags |= ARG_UPPER; if (level < 3) { switch (pn->node) { case NODE_CAUTION: @@ -838,7 +846,7 @@ pnode_printrefsect(struct parse *p, struct pnode *pn) macro_addnode(p, pp, flags); pnode_unlink(pp); } else - macro_addarg(p, title, 0); + macro_addarg(p, title, ARG_SPACE | ARG_QUOTED); macro_close(p); } @@ -859,13 +867,13 @@ pnode_printciterefentry(struct parse *p, struct pnode } macro_open(p, "Xr"); if (title == NULL) - macro_addarg(p, "unknown", 0); + macro_addarg(p, "unknown", ARG_SPACE); else - macro_addnode(p, title, 0); + macro_addnode(p, title, ARG_SPACE | ARG_SINGLE); if (manvol == NULL) - macro_addarg(p, "1", 0); + macro_addarg(p, "1", ARG_SPACE); else - macro_addnode(p, manvol, 0); + macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE); macro_close(p); pnode_unlinksub(pn); } @@ -884,13 +892,13 @@ pnode_printrefmeta(struct parse *p, struct pnode *pn) } macro_open(p, "Dt"); if (title == NULL) - macro_addarg(p, "UNKNOWN", 0); + macro_addarg(p, "UNKNOWN", ARG_SPACE); else - macro_addnode(p, title, MACROLINE_UPPER); + macro_addnode(p, title, ARG_SPACE | ARG_SINGLE | ARG_UPPER); if (manvol == NULL) - macro_addarg(p, "1", 0); + macro_addarg(p, "1", ARG_SPACE); else - macro_addnode(p, manvol, 0); + macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE); macro_close(p); pnode_unlink(pn); } @@ -908,41 +916,15 @@ pnode_printfuncdef(struct parse *p, struct pnode *pn) func = pp; } if (ftype != NULL) - macro_nodeline(p, "Ft", ftype); + macro_argline(p, "Ft", ftype->b); macro_open(p, "Fo"); if (func == NULL) - macro_addarg(p, "UNKNOWN", 0); + macro_addarg(p, "UNKNOWN", ARG_SPACE); else - macro_addnode(p, func, 0); + macro_addnode(p, func, ARG_SPACE | ARG_SINGLE); macro_close(p); } -static void -pnode_printparamdef(struct parse *p, struct pnode *pn) -{ - struct pnode *pp, *ptype, *param; - int flags; - - ptype = param = NULL; - TAILQ_FOREACH(pp, &pn->childq, child) { - if (pp->node == NODE_TEXT) - ptype = pp; - else if (pp->node == NODE_PARAMETER) - param = pp; - } - macro_open(p, "Fa \""); - flags = MACROLINE_NOWS; - if (ptype != NULL) { - macro_addnode(p, ptype, flags); - flags = 0; - } - if (param != NULL) - macro_addnode(p, param, flags); - flags = MACROLINE_NOWS; - macro_addarg(p, "\"", flags); - macro_close(p); -} - /* * The node is a little peculiar. * First, it can have arbitrary open and closing tokens, which default @@ -1016,7 +998,7 @@ pnode_printfuncprototype(struct parse *p, struct pnode TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_PARAMDEF) - pnode_printparamdef(p, pp); + macro_nodeline(p, "Fa", pp, ARG_SINGLE); macro_line(p, "Fc"); pnode_unlinksub(pn); @@ -1053,7 +1035,7 @@ pnode_printarg(struct parse *p, struct pnode *pn) macro_open(p, "Ar"); pnode_print(p, pp); if (isrep && pp->node == NODE_TEXT) - macro_addarg(p, "...", 0); + macro_addarg(p, "...", ARG_SPACE); } pnode_unlinksub(pn); } @@ -1098,8 +1080,8 @@ pnode_printgroup(struct parse *p, struct pnode *pn) while (np != NULL) { if (pp->node != np->node) break; - macro_addarg(p, "|", 0); - macro_addnode(p, np, 0); + macro_addarg(p, "|", ARG_SPACE); + macro_addnode(p, np, ARG_SPACE); pp = np; np = TAILQ_NEXT(np, child); } @@ -1123,8 +1105,9 @@ pnode_printprologue(struct parse *p, struct pnode *pn) else { macro_open(p, "Dt"); macro_addarg(p, - pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), 0); - macro_addarg(p, "1", 0); + pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), + ARG_SPACE | ARG_SINGLE | ARG_UPPER); + macro_addarg(p, "1", ARG_SPACE); macro_close(p); } macro_line(p, "Os"); @@ -1151,7 +1134,7 @@ pnode_printvarlistentry(struct parse *p, struct pnode if (pp->node != NODE_TERM) continue; if ( ! first) - macro_addarg(p, ",", MACROLINE_NOWS); + macro_addarg(p, ",", 0); pnode_print(p, pp); first = 0; } @@ -1232,7 +1215,7 @@ pnode_printvariablelist(struct parse *p, struct pnode if (pp->node == NODE_VARLISTENTRY) pnode_print(p, pp); else - macro_nodeline(p, "It", pp); + macro_nodeline(p, "It", pp, 0); } macro_line(p, "El"); pnode_unlinksub(pn); @@ -1379,11 +1362,7 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_printpara(p, pn); break; case NODE_PARAMETER: - /* Suppress non-text children... */ - macro_open(p, "Fa \""); - macro_addnode(p, pn, MACROLINE_NOWS); - macro_addarg(p, "\"", MACROLINE_NOWS); - macro_close(p); + macro_nodeline(p, "Fa", pn, ARG_SINGLE); pnode_unlinksub(pn); break; case NODE_QUOTE: @@ -1403,7 +1382,7 @@ pnode_print(struct parse *p, struct pnode *pn) case NODE_REFNAME: /* Suppress non-text children... */ macro_open(p, "Nm"); - macro_addnode(p, pn, 0); + macro_addnode(p, pn, ARG_SPACE | ARG_SINGLE); pnode_unlinksub(pn); break; case NODE_REFNAMEDIV: @@ -1440,9 +1419,7 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_printtable(p, pn); break; case NODE_TEXT: - bufclear(p); - bufappend(p, pn); - if (p->bsz == 0) { + if (pn->bsz == 0) { assert(pn->real != pn->b); break; } @@ -1457,7 +1434,7 @@ pnode_print(struct parse *p, struct pnode *pn) * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ - cp = p->b; + cp = pn->b; /* * There's often a superfluous "-" in its