=================================================================== RCS file: /cvs/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.11 retrieving revision 1.12 diff -u -p -r1.11 -r1.12 --- docbook2mdoc/docbook2mdoc.c 2014/03/29 11:13:49 1.11 +++ docbook2mdoc/docbook2mdoc.c 2014/03/29 22:44:06 1.12 @@ -1,4 +1,4 @@ -/* $Id: docbook2mdoc.c,v 1.11 2014/03/29 11:13:49 kristaps Exp $ */ +/* $Id: docbook2mdoc.c,v 1.12 2014/03/29 22:44:06 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -67,11 +67,38 @@ enum nodeid { }; /* + * All recognised attribute keys. + */ +enum attrkey { + /* Alpha-order... */ + ATTRKEY_CHOICE = 0, + ATTRKEY_ID, + ATTRKEY_REP, + ATTRKEY__MAX +}; + +/* + * All [explicitly] recognised attribute values. + * If an attribute has ATTRVAL__MAX, it could be a free-form. + */ +enum attrval { + /* Alpha-order... */ + ATTRVAL_NOREPEAT, + ATTRVAL_OPT, + ATTRVAL_PLAIN, + ATTRVAL_REPEAT, + ATTRVAL_REQ, + ATTRVAL__MAX +}; + +/* * Global parse state. * Keep this as simple and small as possible. */ struct parse { + XML_Parser xml; enum nodeid node; /* current (NODE_ROOT if pre-tree) */ + const char *fname; /* filename */ int stop; /* should we stop now? */ struct pnode *root; /* root of parse tree */ struct pnode *cur; /* current node in tree */ @@ -88,16 +115,39 @@ struct node { }; TAILQ_HEAD(pnodeq, pnode); +TAILQ_HEAD(pattrq, pattr); +struct pattr { + enum attrkey key; + enum attrval val; + char *rawval; + TAILQ_ENTRY(pattr) child; +}; + struct pnode { enum nodeid node; /* node type */ char *b; /* binary data buffer */ size_t bsz; /* data buffer size */ struct pnode *parent; /* parent (or NULL if top) */ struct pnodeq childq; /* queue of children */ + struct pattrq attrq; /* attributes of node */ TAILQ_ENTRY(pnode) child; }; +static const char *attrkeys[ATTRKEY__MAX] = { + "choice", + "id", + "rep" +}; + +static const char *attrvals[ATTRVAL__MAX] = { + "norepeat", + "opt", + "plain", + "repeat", + "req" +}; + static const struct node nodes[NODE__MAX] = { { NULL, 0 }, { "arg", 0 }, @@ -136,6 +186,44 @@ static const struct node nodes[NODE__MAX] = { static void pnode_print(struct parse *p, struct pnode *pn); +static int +isattrkey(enum nodeid node, enum attrkey key) +{ + + switch (key) { + case (ATTRKEY_CHOICE): + return(node == NODE_ARG); + case (ATTRKEY_ID): + /* Common to all. */ + return(1); + case (ATTRKEY_REP): + return(node == NODE_ARG); + default: + break; + } + abort(); + return(0); +} + +static int +isattrval(enum attrkey key, enum attrval val) +{ + + switch (val) { + case (ATTRVAL_OPT): + case (ATTRVAL_PLAIN): + case (ATTRVAL_REQ): + return(key == ATTRKEY_CHOICE); + case (ATTRVAL_REPEAT): + case (ATTRVAL_NOREPEAT): + return(key == ATTRKEY_REP); + default: + break; + } + abort(); + return(0); +} + /* * Look up whether "parent" is a valid parent for "node". * This is sucked directly from the DocBook specification: look at the @@ -437,6 +525,7 @@ xml_char(void *arg, const XML_Char *p, int sz) dat->node = ps->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); + TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); ps->cur = dat; assert(NULL != ps->root); @@ -481,9 +570,13 @@ pnode_trim(struct pnode *pn) static void xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { - struct parse *ps = arg; - enum nodeid node; - struct pnode *dat; + struct parse *ps = arg; + enum nodeid node; + enum attrkey key; + enum attrval val; + struct pnode *dat; + struct pattr *pattr; + const XML_Char **att; if (ps->stop) return; @@ -503,23 +596,28 @@ xml_elem_start(void *arg, const XML_Char *name, const else if (0 == strcmp(nodes[node].name, name)) break; - /* FIXME: do more with these error messages... */ if (NODE__MAX == node && NODE_ROOT == ps->node) { - fprintf(stderr, "%s: ignoring node\n", name); return; } else if (NODE__MAX == node) { - fprintf(stderr, "%s: unknown node\n", name); + fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", + ps->fname, XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NULL != ps->root) { - fprintf(stderr, "%s: reentering?\n", name); + fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", + ps->fname, XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml)); ps->stop = 1; return; } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) { - fprintf(stderr, "%s: known node w/o context\n", name); return; } else if ( ! isparent(node, ps->node)) { - fprintf(stderr, "%s: bad parent\n", name); + fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\"\n", + ps->fname, XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + NULL == nodes[ps->node].name ? + "(none)" : nodes[ps->node].name); ps->stop = 1; return; } @@ -532,6 +630,7 @@ xml_elem_start(void *arg, const XML_Char *name, const dat->node = ps->node = node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); + TAILQ_INIT(&dat->attrq); if (NULL != ps->cur) TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); @@ -539,6 +638,48 @@ xml_elem_start(void *arg, const XML_Char *name, const ps->cur = dat; if (NULL == ps->root) ps->root = dat; + + /* + * Process attributes. + */ + for (att = atts; NULL != *att; att += 2) { + for (key = 0; key < ATTRKEY__MAX; key++) + if (0 == strcmp(*att, attrkeys[key])) + break; + if (ATTRKEY__MAX == key) { + fprintf(stderr, "%s:%zu:%zu: unknown " + "attribute \"%s\"\n", ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *att); + continue; + } else if ( ! isattrkey(node, key)) { + fprintf(stderr, "%s:%zu:%zu: bad " + "attribute \"%s\"\n", ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *att); + continue; + } + for (val = 0; val < ATTRVAL__MAX; val++) + if (0 == strcmp(*(att + 1), attrvals[val])) + break; + if (ATTRVAL__MAX != val && ! isattrval(key, val)) { + fprintf(stderr, "%s:%zu:%zu: bad " + "value \"%s\"\n", ps->fname, + XML_GetCurrentLineNumber(ps->xml), + XML_GetCurrentColumnNumber(ps->xml), + *(att + 1)); + continue; + } + pattr = calloc(1, sizeof(struct pattr)); + pattr->key = key; + pattr->val = val; + if (ATTRVAL__MAX == val) + pattr->rawval = strdup(*(att + 1)); + TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); + } + } /* @@ -576,6 +717,7 @@ static void pnode_free(struct pnode *pn) { struct pnode *pp; + struct pattr *ap; if (NULL == pn) return; @@ -585,6 +727,12 @@ pnode_free(struct pnode *pn) pnode_free(pp); } + while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) { + TAILQ_REMOVE(&pn->attrq, ap, child); + free(ap->rawval); + free(ap); + } + free(pn->b); free(pn); } @@ -661,12 +809,14 @@ bufappend_r(struct parse *p, struct pnode *pn) bufappend_r(p, pp); } +#define MACROLINE_NORM 0 +#define MACROLINE_UPPER 1 /* * Recursively print text presumably on a macro line. * Convert all whitespace to regular spaces. */ static void -pnode_printmacrolinepart(struct parse *p, struct pnode *pn) +pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl) { char *cp; @@ -692,13 +842,23 @@ pnode_printmacrolinepart(struct parse *p, struct pnode ('\0' == *(cp + 3) || ' ' == *(cp + 3))))) fputs("\\&", stdout); - putchar(*cp); + if (MACROLINE_UPPER & fl) + putchar(toupper((int)*cp)); + else + putchar((int)*cp); /* If we're a character escape, escape us. */ if ('\\' == *cp) putchar('e'); } } +static void +pnode_printmacrolinepart(struct parse *p, struct pnode *pn) +{ + + pnode_printmacrolinetext(p, pn, 0); +} + /* * Just pnode_printmacrolinepart() but with a newline. * If no text, just the newline. @@ -707,7 +867,7 @@ static void pnode_printmacroline(struct parse *p, struct pnode *pn) { - pnode_printmacrolinepart(p, pn); + pnode_printmacrolinetext(p, pn, 0); putchar('\n'); } @@ -813,7 +973,7 @@ pnode_printrefmeta(struct parse *p, struct pnode *pn) if (NULL != title) { /* FIXME: uppercase. */ - pnode_printmacrolinepart(p, title); + pnode_printmacrolinetext(p, title, MACROLINE_UPPER); putchar(' '); } else fputs("UNKNOWN ", stdout); @@ -899,19 +1059,40 @@ pnode_printfuncprototype(struct parse *p, struct pnode * The element is more complicated than it should be because text * nodes are treated like ".Ar foo", but non-text nodes need to be * re-sent into the printer (i.e., without the preceding ".Ar"). - * TODO: handle "optional" attribute. + * This also handles the case of "repetition" (or in other words, the + * ellipsis following an argument) and optionality. */ static void pnode_printarg(struct parse *p, struct pnode *pn) { struct pnode *pp; + struct pattr *ap; + int isop, isrep; + isop = 1; + isrep = 0; + TAILQ_FOREACH(ap, &pn->attrq, child) + if (ATTRKEY_CHOICE == ap->key && + (ATTRVAL_PLAIN == ap->val || + ATTRVAL_REQ == ap->val)) + isop = 0; + else if (ATTRKEY_REP == ap->key && + (ATTRVAL_REPEAT == ap->val)) + isrep = 1; + + if (isop) { + pnode_printmopen(p); + fputs("Op ", stdout); + } + TAILQ_FOREACH(pp, &pn->childq, child) { if (NODE_TEXT == pp->node) { pnode_printmopen(p); fputs("Ar ", stdout); } pnode_print(p, pp); + if (NODE_TEXT == pp->node && isrep) + fputs("...", stdout); } } @@ -1094,11 +1275,23 @@ pnode_print(struct parse *p, struct pnode *pn) case (NODE_FUNCSYNOPSISINFO): case (NODE_OPTION): case (NODE_PARAMETER): - case (NODE_REFNAME): case (NODE_STRUCTNAME): case (NODE_TEXT): pnode_printmclose(p, sv); break; + case (NODE_REFNAME): + /* + * If we're in the NAME macro and we have multiple + * macros in sequence, then print out a + * trailing comma before the newline. + */ + if (NULL != pn->parent && + NODE_REFNAMEDIV == pn->parent->node && + NULL != TAILQ_NEXT(pn, child) && + NODE_REFNAME == TAILQ_NEXT(pn, child)->node) + fputs(" ,", stdout); + pnode_printmclose(p, sv); + break; case (NODE_PROGRAMLISTING): assert(p->newln); puts(".Ed"); @@ -1124,6 +1317,8 @@ readfile(XML_Parser xp, int fd, memset(&p, 0, sizeof(struct parse)); p.b = malloc(p.bsz = p.mbsz = 1024); + p.fname = fn; + p.xml = xp; XML_SetCharacterDataHandler(xp, xml_char); XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);