=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.14 retrieving revision 1.22 diff -u -p -r1.14 -r1.22 --- docbook2mdoc/parse.c 2019/04/05 14:37:36 1.14 +++ docbook2mdoc/parse.c 2019/04/07 19:33:27 1.22 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.14 2019/04/05 14:37:36 schwarze Exp $ */ +/* $Id: parse.c,v 1.22 2019/04/07 19:33:27 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -52,6 +52,7 @@ struct parse { int nline; /* Line number of next token. */ int ncol; /* Column number of next token. */ int del; /* Levels of nested nodes being deleted. */ + int spc; /* Whitespace before the next element. */ int attr; /* The most recent attribute is valid. */ int warn; }; @@ -65,12 +66,14 @@ static const struct element elements[] = { { "acronym", NODE_IGNORE }, { "affiliation", NODE_AFFILIATION }, { "anchor", NODE_DELETE }, + { "appendix", NODE_APPENDIX }, { "application", NODE_APPLICATION }, { "arg", NODE_ARG }, + { "article", NODE_SECTION }, { "author", NODE_AUTHOR }, { "authorgroup", NODE_AUTHORGROUP }, { "blockquote", NODE_BLOCKQUOTE }, - { "book", NODE_BOOK }, + { "book", NODE_SECTION }, { "bookinfo", NODE_BOOKINFO }, { "caution", NODE_CAUTION }, { "chapter", NODE_SECTION }, @@ -100,6 +103,11 @@ static const struct element elements[] = { { "funcsynopsis", NODE_FUNCSYNOPSIS }, { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO }, { "function", NODE_FUNCTION }, + { "glossary", NODE_VARIABLELIST }, + { "glossdef", NODE_IGNORE }, + { "glossdiv", NODE_IGNORE }, + { "glossentry", NODE_VARLISTENTRY }, + { "glosslist", NODE_VARIABLELIST }, { "glossterm", NODE_GLOSSTERM }, { "group", NODE_GROUP }, { "holder", NODE_HOLDER }, @@ -170,6 +178,7 @@ static const struct element elements[] = { { "sect2", NODE_SECTION }, { "section", NODE_SECTION }, { "sgmltag", NODE_SGMLTAG }, + { "simpara", NODE_PARA }, { "simplelist", NODE_SIMPLELIST }, { "spanspec", NODE_SPANSPEC }, { "structfield", NODE_PARAMETER }, @@ -188,7 +197,7 @@ static const struct element elements[] = { { "title", NODE_TITLE }, { "trademark", NODE_IGNORE }, { "type", NODE_TYPE }, - { "ulink", NODE_ULINK }, + { "ulink", NODE_LINK }, { "userinput", NODE_LITERAL }, { "variablelist", NODE_VARIABLELIST }, { "varlistentry", NODE_VARLISTENTRY }, @@ -293,6 +302,7 @@ static void xml_char(struct parse *ps, const char *p, int sz) { struct pnode *dat; + size_t newsz; if (ps->del > 0) return; @@ -308,6 +318,7 @@ xml_char(struct parse *ps, const char *p, int sz) exit(1); } dat->node = NODE_TEXT; + dat->spc = ps->spc; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -322,24 +333,35 @@ xml_char(struct parse *ps, const char *p, int sz) /* Append to the current text node. */ assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1); + newsz = ps->cur->bsz + (ps->cur->bsz && ps->spc) + sz; + ps->cur->b = realloc(ps->cur->b, newsz + 1); if (ps->cur->b == NULL) { perror(NULL); exit(1); } + if (ps->cur->bsz && ps->spc) + ps->cur->b[ps->cur->bsz++] = ' '; memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->bsz += sz; - ps->cur->b[ps->cur->bsz] = '\0'; + ps->cur->b[ps->cur->bsz = newsz] = '\0'; ps->cur->real = ps->cur->b; + ps->spc = 0; } +/* + * Close out the text node and strip trailing whitespace, if one is open. + */ static void -pnode_trim(struct pnode *pn) +pnode_closetext(struct parse *p) { - assert(pn->node == NODE_TEXT); - for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0') - if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0) - break; + struct pnode *n; + + if ((n = p->cur) == NULL || n->node != NODE_TEXT) + return; + p->cur = n->parent; + while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) { + n->b[--n->bsz] = '\0'; + p->spc = 1; + } } static void @@ -356,11 +378,7 @@ xml_entity(struct parse *p, const char *name) return; } - /* Close out the text node, if there is one. */ - if (p->cur->node == NODE_TEXT) { - pnode_trim(p->cur); - p->cur = p->cur->parent; - } + pnode_closetext(p); if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) warn_msg(p, "entity after end of document: &%s;", name); @@ -382,10 +400,12 @@ xml_entity(struct parse *p, const char *name) } dat->node = NODE_ESCAPE; dat->bsz = strlen(dat->b); + dat->spc = p->spc; dat->parent = p->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&p->cur->childq, dat, child); + p->spc = 0; } /* @@ -409,11 +429,7 @@ xml_elem_start(struct parse *ps, const char *name) return; } - /* Close out the text node, if there is one. */ - if (ps->cur != NULL && ps->cur->node == NODE_TEXT) { - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - } + pnode_closetext(ps); for (elem = elements; elem->name != NULL; elem++) if (strcmp(elem->name, name) == 0) @@ -447,7 +463,52 @@ xml_elem_start(struct parse *ps, const char *name) perror(NULL); exit(1); } - dat->node = elem->node; + + /* + * Nodes that begin a new macro or request line or start by + * printing text always want whitespace before themselves. + */ + + switch (dat->node = elem->node) { + case NODE_APPENDIX: + case NODE_AUTHORGROUP: + case NODE_BLOCKQUOTE: + case NODE_BOOKINFO: + case NODE_CAUTION: + case NODE_EDITOR: + case NODE_ENTRY: + case NODE_FUNCDEF: + case NODE_FUNCPROTOTYPE: + case NODE_INFORMALEQUATION: + case NODE_INLINEEQUATION: + case NODE_ITEMIZEDLIST: + case NODE_LEGALNOTICE: + case NODE_LITERALLAYOUT: + case NODE_NOTE: + case NODE_ORDEREDLIST: + case NODE_PARA: + case NODE_PREFACE: + case NODE_PROGRAMLISTING: + case NODE_REFMETA: + case NODE_REFNAMEDIV: + case NODE_REFSYNOPSISDIV: + case NODE_ROW: + case NODE_SBR: + case NODE_SCREEN: + case NODE_SECTION: + case NODE_SYNOPSIS: + case NODE_TGROUP: + case NODE_TIP: + case NODE_TITLE: + case NODE_VARIABLELIST: + case NODE_VARLISTENTRY: + case NODE_WARNING: + dat->spc = 1; + break; + default: + dat->spc = ps->spc; + break; + } dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -466,7 +527,7 @@ xml_attrkey(struct parse *ps, const char *name) struct pattr *attr; enum attrkey key; - if (ps->del > 0 || *name == '\0') + if (ps->del > 0 || ps->ncur == NODE_IGNORE || *name == '\0') return; if ((key = attrkey_parse(name)) == ATTRKEY__MAX) { ps->attr = 0; @@ -488,7 +549,7 @@ xml_attrval(struct parse *ps, const char *name) { struct pattr *attr; - if (ps->del > 0 || ps->attr == 0) + if (ps->del > 0 || ps->ncur == NODE_IGNORE || ps->attr == 0) return; if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL) return; @@ -518,11 +579,8 @@ xml_elem_end(struct parse *ps, const char *name) return; } - /* Close out the text node, if there is one. */ - if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) { - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - } + if (ps->del == 0) + pnode_closetext(ps); if (name != NULL) { for (elem = elements; elem->name != NULL; elem++) @@ -557,6 +615,7 @@ xml_elem_end(struct parse *ps, const char *name) ps->tree->flags |= TREE_CLOSED; else ps->cur = ps->cur->parent; + ps->spc = 0; break; } assert(ps->del == 0); @@ -650,6 +709,7 @@ parse_string(struct parse *p, char *b, size_t rlen, size_t pend; /* Offset of the end of the current word. */ int elem_end; + p->spc = 0; pend = 0; for (;;) { @@ -662,6 +722,7 @@ parse_string(struct parse *p, char *b, size_t rlen, if ((poff = pend) == rlen) break; if (isspace((unsigned char)b[pend])) { + p->spc = 1; increment(p, b, &pend, refill); continue; } @@ -835,10 +896,7 @@ parse_file(struct parse *p, int fd, const char *fname) perror(fname); p->tree->flags |= TREE_FAIL; } - if (p->cur != NULL && p->cur->node == NODE_TEXT) { - pnode_trim(p->cur); - p->cur = p->cur->parent; - } + pnode_closetext(p); if ((p->tree->flags & TREE_CLOSED) == 0) warn_msg(p, "document not closed"); return p->tree;