=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.71 retrieving revision 1.72 diff -u -p -r1.71 -r1.72 --- docbook2mdoc/docbook2mdoc.c 2019/03/24 23:48:58 1.71 +++ docbook2mdoc/docbook2mdoc.c 2019/03/25 17:28:32 1.72 @@ -1,4 +1,4 @@ -/* $Id: docbook2mdoc.c,v 1.71 2019/03/24 23:48:58 schwarze Exp $ */ +/* $Id: docbook2mdoc.c,v 1.72 2019/03/25 17:28:32 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -48,9 +48,6 @@ struct parse { unsigned int flags; /* document-wide flags */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ - char *b; /* NUL-terminated buffer for pre-print */ - size_t bsz; /* current length of b */ - size_t mbsz; /* max bsz allocation */ int level; /* header level, starting at 1 */ enum linestate linestate; }; @@ -273,7 +270,7 @@ xml_char(void *arg, const XML_Char *p, int sz) return; p += i; sz -= i; - dat = calloc(1, sizeof(struct pnode)); + dat = calloc(1, sizeof(*dat)); if (dat == NULL) { perror(NULL); exit(1); @@ -290,14 +287,14 @@ xml_char(void *arg, const XML_Char *p, int sz) /* Append to current buffer. */ assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, - ps->cur->bsz + (size_t)sz); + ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1); if (ps->cur->b == NULL) { perror(NULL); exit(1); } memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->bsz += (size_t)sz; + ps->cur->bsz += sz; + ps->cur->b[ps->cur->bsz] = '\0'; ps->cur->real = ps->cur->b; } @@ -305,7 +302,7 @@ static void pnode_trim(struct pnode *pn) { assert(pn->node == NODE_TEXT); - for ( ; pn->bsz > 0; pn->bsz--) + for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0') if ( ! isspace((unsigned char)pn->b[pn->bsz - 1])) break; } @@ -369,7 +366,7 @@ xml_elem_start(void *arg, const XML_Char *name, const if (node->node == NODE_INLINEEQUATION) ps->flags |= PARSE_EQN; - if ((dat = calloc(1, sizeof(struct pnode))) == NULL) { + if ((dat = calloc(1, sizeof(*dat))) == NULL) { perror(NULL); exit(1); } @@ -406,7 +403,7 @@ xml_elem_start(void *arg, const XML_Char *name, const for (val = 0; val < ATTRVAL__MAX; val++) if (strcmp(att[1], attrvals[val]) == 0) break; - pattr = calloc(1, sizeof(struct pattr)); + pattr = calloc(1, sizeof(*pattr)); pattr->key = key; pattr->val = val; if (val == ATTRVAL__MAX) @@ -526,55 +523,6 @@ pnode_getattr_raw(struct pnode *pn, enum attrkey key, } /* - * Reset the lookaside buffer. - */ -static void -bufclear(struct parse *p) -{ - - p->b[p->bsz = 0] = '\0'; -} - -/* - * Append NODE_TEXT contents to the current buffer, reallocating its - * size if necessary. - * The buffer is ALWAYS NUL-terminated. - */ -static void -bufappend(struct parse *p, struct pnode *pn) -{ - - assert(pn->node == NODE_TEXT); - if (p->bsz + pn->bsz + 1 > p->mbsz) { - p->mbsz = p->bsz + pn->bsz + 1; - if ((p->b = realloc(p->b, p->mbsz)) == NULL) { - perror(NULL); - exit(1); - } - } - memcpy(p->b + p->bsz, pn->b, pn->bsz); - p->bsz += pn->bsz; - p->b[p->bsz] = '\0'; -} - -/* - * Recursively append all NODE_TEXT nodes to the buffer. - * This descends into non-text nodes, but doesn't do anything beyond - * them. - * In other words, this is a recursive text grok. - */ -static void -bufappend_r(struct parse *p, struct pnode *pn) -{ - struct pnode *pp; - - if (pn->node == NODE_TEXT) - bufappend(p, pn); - TAILQ_FOREACH(pp, &pn->childq, child) - bufappend_r(p, pp); -} - -/* * Recursively search and return the first instance of "node". */ static struct pnode * @@ -626,36 +574,62 @@ macro_line(struct parse *p, const char *name) macro_close(p); } -#define MACROLINE_UPPER 1 -#define MACROLINE_NOWS 2 +#define ARG_SPACE 1 /* Insert whitespace before this argument. */ +#define ARG_SINGLE 2 /* Quote argument if it contains whitespace. */ +#define ARG_QUOTED 4 /* We are already in a quoted argument. */ +#define ARG_UPPER 8 /* Covert argument to upper case. */ /* * Print an argument string on a macro line, collapsing whitespace. */ static void -macro_addarg(struct parse *p, const char *arg, int fl) +macro_addarg(struct parse *p, const char *arg, int flags) { const char *cp; - int wantspace; assert(p->linestate == LINE_MACRO); - wantspace = !(fl & MACROLINE_NOWS); + + /* Quote if requested and necessary. */ + + if ((flags & (ARG_SINGLE | ARG_QUOTED)) == ARG_SINGLE) { + for (cp = arg; *cp != '\0'; cp++) + if (isspace((unsigned char)*cp)) + break; + if (*cp != '\0') { + if (flags & ARG_SPACE) { + putchar(' '); + flags &= ~ ARG_SPACE; + } + putchar('"'); + flags = ARG_QUOTED; + } + } + for (cp = arg; *cp != '\0'; cp++) { + + /* Collapse whitespace. */ + if (isspace((unsigned char)*cp)) { - wantspace = 1; + flags |= ARG_SPACE; continue; - } else if (wantspace) { + } else if (flags & ARG_SPACE) { putchar(' '); - wantspace = 0; + flags &= ~ ARG_SPACE; } + /* Escape us if we look like a macro. */ - if ((cp == arg || cp[-1] == ' ') && + + if ((flags & ARG_QUOTED) == 0 && + (cp == arg || isspace((unsigned char)cp[-1])) && isupper((unsigned char)cp[0]) && islower((unsigned char)cp[1]) && (cp[2] == '\0' || cp[2] == ' ' || (islower((unsigned char)cp[2]) && (cp[3] == '\0' || cp[3] == ' ')))) fputs("\\&", stdout); - if (fl & MACROLINE_UPPER) + + if (*cp == '"') + fputs("\\(dq", stdout); + else if (flags & ARG_UPPER) putchar(toupper((unsigned char)*cp)); else putchar(*cp); @@ -668,26 +642,73 @@ static void macro_argline(struct parse *p, const char *name, const char *arg) { macro_open(p, name); - macro_addarg(p, arg, 0); + macro_addarg(p, arg, ARG_SPACE); macro_close(p); } /* - * Recurse nodes to print arguments on a macro line. + * Recursively append text from the children of a node to a macro line. */ static void -macro_addnode(struct parse *p, struct pnode *pn, int fl) +macro_addnode(struct parse *p, struct pnode *pn, int flags) { - bufclear(p); - bufappend_r(p, pn); - macro_addarg(p, p->b, fl); + int quote_now; + + assert(p->linestate == LINE_MACRO); + + /* + * If the only child is a text node, just add that text, + * letting macro_addarg() decide about quoting. + */ + + pn = TAILQ_FIRST(&pn->childq); + if (pn != NULL && pn->node == NODE_TEXT && + TAILQ_NEXT(pn, child) == NULL) { + macro_addarg(p, pn->b, flags); + return; + } + + /* + * If we want the argument quoted and are not already + * in a quoted context, quote now. + */ + + quote_now = 0; + if (flags & ARG_SINGLE) { + if ((flags & ARG_QUOTED) == 0) { + if (flags & ARG_SPACE) { + putchar(' '); + flags &= ~ARG_SPACE; + } + putchar('"'); + flags |= ARG_QUOTED; + quote_now = 1; + } + flags &= ~ARG_SINGLE; + } + + /* + * Iterate to child and sibling nodes, + * inserting whitespace between nodes. + */ + + while (pn != NULL) { + if (pn->node == NODE_TEXT) + macro_addarg(p, pn->b, flags); + else + macro_addnode(p, pn, flags); + pn = TAILQ_NEXT(pn, child); + flags |= ARG_SPACE; + } + if (quote_now) + putchar('"'); } static void -macro_nodeline(struct parse *p, const char *name, struct pnode *pn) +macro_nodeline(struct parse *p, const char *name, struct pnode *pn, int flags) { macro_open(p, name); - macro_addnode(p, pn, 0); + macro_addnode(p, pn, ARG_SPACE | flags); macro_close(p); } @@ -780,7 +801,9 @@ pnode_printrefsect(struct parse *p, struct pnode *pn) return; level = ++p->level; - flags = level == 1 ? MACROLINE_UPPER : 0; + flags = ARG_SPACE; + if (level == 1) + flags |= ARG_UPPER; if (level < 3) { switch (pn->node) { case NODE_CAUTION: @@ -838,7 +861,7 @@ pnode_printrefsect(struct parse *p, struct pnode *pn) macro_addnode(p, pp, flags); pnode_unlink(pp); } else - macro_addarg(p, title, 0); + macro_addarg(p, title, ARG_SPACE | ARG_QUOTED); macro_close(p); } @@ -859,13 +882,13 @@ pnode_printciterefentry(struct parse *p, struct pnode } macro_open(p, "Xr"); if (title == NULL) - macro_addarg(p, "unknown", 0); + macro_addarg(p, "unknown", ARG_SPACE); else - macro_addnode(p, title, 0); + macro_addnode(p, title, ARG_SPACE | ARG_SINGLE); if (manvol == NULL) - macro_addarg(p, "1", 0); + macro_addarg(p, "1", ARG_SPACE); else - macro_addnode(p, manvol, 0); + macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE); macro_close(p); pnode_unlinksub(pn); } @@ -884,13 +907,13 @@ pnode_printrefmeta(struct parse *p, struct pnode *pn) } macro_open(p, "Dt"); if (title == NULL) - macro_addarg(p, "UNKNOWN", 0); + macro_addarg(p, "UNKNOWN", ARG_SPACE); else - macro_addnode(p, title, MACROLINE_UPPER); + macro_addnode(p, title, ARG_SPACE | ARG_SINGLE | ARG_UPPER); if (manvol == NULL) - macro_addarg(p, "1", 0); + macro_addarg(p, "1", ARG_SPACE); else - macro_addnode(p, manvol, 0); + macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE); macro_close(p); pnode_unlink(pn); } @@ -908,41 +931,15 @@ pnode_printfuncdef(struct parse *p, struct pnode *pn) func = pp; } if (ftype != NULL) - macro_nodeline(p, "Ft", ftype); + macro_argline(p, "Ft", ftype->b); macro_open(p, "Fo"); if (func == NULL) - macro_addarg(p, "UNKNOWN", 0); + macro_addarg(p, "UNKNOWN", ARG_SPACE); else - macro_addnode(p, func, 0); + macro_addnode(p, func, ARG_SPACE | ARG_SINGLE); macro_close(p); } -static void -pnode_printparamdef(struct parse *p, struct pnode *pn) -{ - struct pnode *pp, *ptype, *param; - int flags; - - ptype = param = NULL; - TAILQ_FOREACH(pp, &pn->childq, child) { - if (pp->node == NODE_TEXT) - ptype = pp; - else if (pp->node == NODE_PARAMETER) - param = pp; - } - macro_open(p, "Fa \""); - flags = MACROLINE_NOWS; - if (ptype != NULL) { - macro_addnode(p, ptype, flags); - flags = 0; - } - if (param != NULL) - macro_addnode(p, param, flags); - flags = MACROLINE_NOWS; - macro_addarg(p, "\"", flags); - macro_close(p); -} - /* * The node is a little peculiar. * First, it can have arbitrary open and closing tokens, which default @@ -1016,7 +1013,7 @@ pnode_printfuncprototype(struct parse *p, struct pnode TAILQ_FOREACH(pp, &pn->childq, child) if (pp->node == NODE_PARAMDEF) - pnode_printparamdef(p, pp); + macro_nodeline(p, "Fa", pp, ARG_SINGLE); macro_line(p, "Fc"); pnode_unlinksub(pn); @@ -1053,7 +1050,7 @@ pnode_printarg(struct parse *p, struct pnode *pn) macro_open(p, "Ar"); pnode_print(p, pp); if (isrep && pp->node == NODE_TEXT) - macro_addarg(p, "...", 0); + macro_addarg(p, "...", ARG_SPACE); } pnode_unlinksub(pn); } @@ -1098,8 +1095,8 @@ pnode_printgroup(struct parse *p, struct pnode *pn) while (np != NULL) { if (pp->node != np->node) break; - macro_addarg(p, "|", 0); - macro_addnode(p, np, 0); + macro_addarg(p, "|", ARG_SPACE); + macro_addnode(p, np, ARG_SPACE); pp = np; np = TAILQ_NEXT(np, child); } @@ -1123,8 +1120,9 @@ pnode_printprologue(struct parse *p, struct pnode *pn) else { macro_open(p, "Dt"); macro_addarg(p, - pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), 0); - macro_addarg(p, "1", 0); + pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), + ARG_SPACE | ARG_SINGLE | ARG_UPPER); + macro_addarg(p, "1", ARG_SPACE); macro_close(p); } macro_line(p, "Os"); @@ -1151,7 +1149,7 @@ pnode_printvarlistentry(struct parse *p, struct pnode if (pp->node != NODE_TERM) continue; if ( ! first) - macro_addarg(p, ",", MACROLINE_NOWS); + macro_addarg(p, ",", 0); pnode_print(p, pp); first = 0; } @@ -1232,7 +1230,7 @@ pnode_printvariablelist(struct parse *p, struct pnode if (pp->node == NODE_VARLISTENTRY) pnode_print(p, pp); else - macro_nodeline(p, "It", pp); + macro_nodeline(p, "It", pp, 0); } macro_line(p, "El"); pnode_unlinksub(pn); @@ -1379,11 +1377,7 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_printpara(p, pn); break; case NODE_PARAMETER: - /* Suppress non-text children... */ - macro_open(p, "Fa \""); - macro_addnode(p, pn, MACROLINE_NOWS); - macro_addarg(p, "\"", MACROLINE_NOWS); - macro_close(p); + macro_nodeline(p, "Fa", pn, ARG_SINGLE); pnode_unlinksub(pn); break; case NODE_QUOTE: @@ -1403,7 +1397,7 @@ pnode_print(struct parse *p, struct pnode *pn) case NODE_REFNAME: /* Suppress non-text children... */ macro_open(p, "Nm"); - macro_addnode(p, pn, 0); + macro_addnode(p, pn, ARG_SPACE | ARG_SINGLE); pnode_unlinksub(pn); break; case NODE_REFNAMEDIV: @@ -1440,9 +1434,7 @@ pnode_print(struct parse *p, struct pnode *pn) pnode_printtable(p, pn); break; case NODE_TEXT: - bufclear(p); - bufappend(p, pn); - if (p->bsz == 0) { + if (pn->bsz == 0) { assert(pn->real != pn->b); break; } @@ -1457,7 +1449,7 @@ pnode_print(struct parse *p, struct pnode *pn) * XXX: all whitespace, including tabs (?). * Remember to escape control characters and escapes. */ - cp = p->b; + cp = pn->b; /* * There's often a superfluous "-" in its