=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.31 retrieving revision 1.37 diff -u -p -r1.31 -r1.37 --- docbook2mdoc/parse.c 2019/04/10 14:34:08 1.31 +++ docbook2mdoc/parse.c 2019/04/12 07:53:09 1.37 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.31 2019/04/10 14:34:08 schwarze Exp $ */ +/* $Id: parse.c,v 1.37 2019/04/12 07:53:09 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -89,6 +89,7 @@ static const struct element elements[] = { { "code", NODE_LITERAL }, { "colspec", NODE_COLSPEC }, { "command", NODE_COMMAND }, + { "computeroutput", NODE_LITERAL }, { "constant", NODE_CONSTANT }, { "contrib", NODE_CONTRIB }, { "copyright", NODE_COPYRIGHT }, @@ -134,6 +135,7 @@ static const struct element elements[] = { { "literal", NODE_LITERAL }, { "literallayout", NODE_LITERALLAYOUT }, { "manvolnum", NODE_MANVOLNUM }, + { "markup", NODE_MARKUP }, { "member", NODE_MEMBER }, { "mml:math", NODE_MML_MATH }, { "mml:mfenced", NODE_MML_MFENCED }, @@ -186,7 +188,7 @@ static const struct element elements[] = { { "sect1", NODE_SECTION }, { "sect2", NODE_SECTION }, { "section", NODE_SECTION }, - { "sgmltag", NODE_SGMLTAG }, + { "sgmltag", NODE_MARKUP }, { "simpara", NODE_PARA }, { "simplelist", NODE_SIMPLELIST }, { "spanspec", NODE_SPANSPEC }, @@ -322,64 +324,132 @@ warn_msg(struct parse *p, const char *fmt, ...) * Otherwise, create a new one as a child of the current node. */ static void -xml_char(struct parse *p, const char *word, int sz) +xml_text(struct parse *p, const char *word, int sz) { - struct pnode *n; - size_t newsz; + struct pnode *n, *np; + size_t oldsz, newsz; + int i; + assert(sz > 0); if (p->del > 0) return; - if (p->cur == NULL) { - error_msg(p, "discarding text before document: %.*s", sz, word); + if ((n = p->cur) == NULL) { + error_msg(p, "discarding text before document: %.*s", + sz, word); return; } - if (p->cur->node != NODE_TEXT) { - if ((n = calloc(1, sizeof(*n))) == NULL) + /* Append to the current text node, if one is open. */ + + if (n->node == NODE_TEXT) { + oldsz = strlen(n->b); + newsz = oldsz + sz; + if (oldsz && (p->flags & PFLAG_SPC)) + newsz++; + if ((n->b = realloc(n->b, newsz + 1)) == NULL) fatal(p); - n->node = NODE_TEXT; - n->spc = (p->flags & PFLAG_SPC) != 0; - n->parent = p->cur; - TAILQ_INIT(&n->childq); - TAILQ_INIT(&n->attrq); - TAILQ_INSERT_TAIL(&p->cur->childq, n, child); - p->cur = n; + if (oldsz && (p->flags & PFLAG_SPC)) + n->b[oldsz++] = ' '; + memcpy(n->b + oldsz, word, sz); + n->b[newsz] = '\0'; + p->flags &= ~PFLAG_SPC; + return; } - if (p->tree->flags & TREE_CLOSED && - p->cur->parent == p->tree->root) + if (p->tree->flags & TREE_CLOSED && n == p->tree->root) warn_msg(p, "text after end of document: %.*s", sz, word); - /* Append to the current text node. */ + /* Create a new text node. */ - assert(sz >= 0); - newsz = p->cur->bsz + (p->cur->bsz && (p->flags & PFLAG_SPC)) + sz; - if ((p->cur->b = realloc(p->cur->b, newsz + 1)) == NULL) + if ((n = pnode_alloc(p->cur)) == NULL) fatal(p); - if (p->cur->bsz && (p->flags & PFLAG_SPC)) - p->cur->b[p->cur->bsz++] = ' '; - memcpy(p->cur->b + p->cur->bsz, word, sz); - p->cur->b[p->cur->bsz = newsz] = '\0'; - p->cur->real = p->cur->b; + n->node = NODE_TEXT; + n->spc = (p->flags & PFLAG_SPC) != 0; p->flags &= ~PFLAG_SPC; + + /* + * If this node follows a non-text node without intervening + * whitespace, keep the text in it as short as possible, + * and do not keep it open. + */ + + if (n->spc == 0 && + (np = TAILQ_PREV(n, pnodeq, child)) != NULL && + np->node != NODE_TEXT && np->node != NODE_ESCAPE) { + i = 0; + while (i < sz && !isspace((unsigned char)word[i])) + i++; + if ((n->b = strndup(word, i)) == NULL) + fatal(p); + if (i == sz) + return; + while (i < sz && isspace((unsigned char)word[i])) + i++; + if (i == sz) { + p->flags |= PFLAG_SPC; + return; + } + + /* Put any remaining text into a second node. */ + + if ((n = pnode_alloc(p->cur)) == NULL) + fatal(p); + n->node = NODE_TEXT; + n->spc = 1; + word += i; + sz -= i; + } + if ((n->b = strndup(word, sz)) == NULL) + fatal(p); + + /* The new node remains open for later pnode_closetext(). */ + + p->cur = n; } /* * Close out the text node and strip trailing whitespace, if one is open. */ static void -pnode_closetext(struct parse *p) +pnode_closetext(struct parse *p, int check_last_word) { struct pnode *n; + char *cp, *last_word; if ((n = p->cur) == NULL || n->node != NODE_TEXT) return; p->cur = n->parent; - while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) { - n->b[--n->bsz] = '\0'; + for (cp = strchr(n->b, '\0'); + cp > n->b && isspace((unsigned char)cp[-1]); + *--cp = '\0') p->flags |= PFLAG_SPC; - } + + if (p->flags & PFLAG_SPC || !check_last_word) + return; + + /* + * Find the beginning of the last word + * and delete whitespace before it. + */ + + while (cp > n->b && !isspace((unsigned char)cp[-1])) + cp--; + if (cp == n->b) + return; + + last_word = cp; + while (cp > n->b && isspace((unsigned char)cp[-1])) + *--cp = '\0'; + + /* Move the last word into its own node, for use with .Pf. */ + + if ((n = pnode_alloc(p->cur)) == NULL) + fatal(p); + n->node = NODE_TEXT; + n->spc = 1; + if ((n->b = strdup(last_word)) == NULL) + fatal(p); } static void @@ -399,7 +469,7 @@ xml_entity(struct parse *p, const char *name) return; } - pnode_closetext(p); + pnode_closetext(p, 0); if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) warn_msg(p, "entity after end of document: &%s;", name); @@ -438,16 +508,11 @@ xml_entity(struct parse *p, const char *name) } /* Create, append, and close out an entity node. */ - if ((n = calloc(1, sizeof(*n))) == NULL || - (n->b = n->real = strdup(entity->roff)) == NULL) + if ((n = pnode_alloc(p->cur)) == NULL || + (n->b = strdup(entity->roff)) == NULL) fatal(p); n->node = NODE_ESCAPE; - n->bsz = strlen(n->b); n->spc = (p->flags & PFLAG_SPC) != 0; - n->parent = p->cur; - TAILQ_INIT(&n->childq); - TAILQ_INIT(&n->attrq); - TAILQ_INSERT_TAIL(&p->cur->childq, n, child); p->flags &= ~PFLAG_SPC; } @@ -470,7 +535,7 @@ xml_elem_start(struct parse *p, const char *name) return; } - pnode_closetext(p); + pnode_closetext(p, 1); for (elem = elements; elem->name != NULL; elem++) if (strcmp(elem->name, name) == 0) @@ -503,7 +568,7 @@ xml_elem_start(struct parse *p, const char *name) if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL) warn_msg(p, "element after end of document: <%s>", name); - if ((n = calloc(1, sizeof(*n))) == NULL) + if ((n = pnode_alloc(p->cur)) == NULL) fatal(p); /* @@ -555,13 +620,6 @@ xml_elem_start(struct parse *p, const char *name) n->spc = (p->flags & PFLAG_SPC) != 0; break; } - n->parent = p->cur; - TAILQ_INIT(&n->childq); - TAILQ_INIT(&n->attrq); - - if (p->cur != NULL) - TAILQ_INSERT_TAIL(&p->cur->childq, n, child); - p->cur = n; if (n->node == NODE_DOCTYPE) { if (p->doctype == NULL) @@ -649,7 +707,7 @@ xml_elem_end(struct parse *p, const char *name) } if (p->del == 0) - pnode_closetext(p); + pnode_closetext(p, 0); if (name != NULL) { for (elem = elements; elem->name != NULL; elem++) @@ -680,6 +738,7 @@ xml_elem_end(struct parse *p, const char *name) p->flags &= ~PFLAG_SPC; break; case NODE_DOCTYPE: + case NODE_SBR: p->flags &= ~PFLAG_EEND; /* FALLTHROUGH */ default: @@ -970,9 +1029,11 @@ parse_string(struct parse *p, char *b, size_t rlen, } else { advance(p, b, rlen, &pend, - p->ncur == NODE_DOCTYPE ? "<&]" : "<&", + p->ncur == NODE_DOCTYPE ? "<&]\n" : "<&\n", refill); - xml_char(p, b + poff, pend - poff); + xml_text(p, b + poff, pend - poff); + if (b[pend] == '\n') + pnode_closetext(p, 0); } } return poff; @@ -1057,7 +1118,7 @@ parse_file(struct parse *p, int fd, const char *fname) /* On the top level, finalize the parse tree. */ if (save_fname == NULL) { - pnode_closetext(p); + pnode_closetext(p, 0); if (p->tree->root == NULL) error_msg(p, "empty document"); else if ((p->tree->flags & TREE_CLOSED) == 0)