version 1.31, 2019/04/10 14:34:08 |
version 1.38, 2019/04/12 11:37:09 |
Line 89 static const struct element elements[] = { |
|
Line 89 static const struct element elements[] = { |
|
{ "code", NODE_LITERAL }, |
{ "code", NODE_LITERAL }, |
{ "colspec", NODE_COLSPEC }, |
{ "colspec", NODE_COLSPEC }, |
{ "command", NODE_COMMAND }, |
{ "command", NODE_COMMAND }, |
|
{ "computeroutput", NODE_LITERAL }, |
{ "constant", NODE_CONSTANT }, |
{ "constant", NODE_CONSTANT }, |
{ "contrib", NODE_CONTRIB }, |
{ "contrib", NODE_CONTRIB }, |
{ "copyright", NODE_COPYRIGHT }, |
{ "copyright", NODE_COPYRIGHT }, |
Line 134 static const struct element elements[] = { |
|
Line 135 static const struct element elements[] = { |
|
{ "literal", NODE_LITERAL }, |
{ "literal", NODE_LITERAL }, |
{ "literallayout", NODE_LITERALLAYOUT }, |
{ "literallayout", NODE_LITERALLAYOUT }, |
{ "manvolnum", NODE_MANVOLNUM }, |
{ "manvolnum", NODE_MANVOLNUM }, |
|
{ "markup", NODE_MARKUP }, |
{ "member", NODE_MEMBER }, |
{ "member", NODE_MEMBER }, |
{ "mml:math", NODE_MML_MATH }, |
{ "mml:math", NODE_MML_MATH }, |
{ "mml:mfenced", NODE_MML_MFENCED }, |
{ "mml:mfenced", NODE_MML_MFENCED }, |
Line 186 static const struct element elements[] = { |
|
Line 188 static const struct element elements[] = { |
|
{ "sect1", NODE_SECTION }, |
{ "sect1", NODE_SECTION }, |
{ "sect2", NODE_SECTION }, |
{ "sect2", NODE_SECTION }, |
{ "section", NODE_SECTION }, |
{ "section", NODE_SECTION }, |
{ "sgmltag", NODE_SGMLTAG }, |
{ "sgmltag", NODE_MARKUP }, |
{ "simpara", NODE_PARA }, |
{ "simpara", NODE_PARA }, |
{ "simplelist", NODE_SIMPLELIST }, |
{ "simplelist", NODE_SIMPLELIST }, |
{ "spanspec", NODE_SPANSPEC }, |
{ "spanspec", NODE_SPANSPEC }, |
Line 322 warn_msg(struct parse *p, const char *fmt, ...) |
|
Line 324 warn_msg(struct parse *p, const char *fmt, ...) |
|
* Otherwise, create a new one as a child of the current node. |
* Otherwise, create a new one as a child of the current node. |
*/ |
*/ |
static void |
static void |
xml_char(struct parse *p, const char *word, int sz) |
xml_text(struct parse *p, const char *word, int sz) |
{ |
{ |
struct pnode *n; |
struct pnode *n, *np; |
size_t newsz; |
size_t oldsz, newsz; |
|
int i; |
|
|
|
assert(sz > 0); |
if (p->del > 0) |
if (p->del > 0) |
return; |
return; |
|
|
if (p->cur == NULL) { |
if ((n = p->cur) == NULL) { |
error_msg(p, "discarding text before document: %.*s", sz, word); |
error_msg(p, "discarding text before document: %.*s", |
|
sz, word); |
return; |
return; |
} |
} |
|
|
if (p->cur->node != NODE_TEXT) { |
/* Append to the current text node, if one is open. */ |
if ((n = calloc(1, sizeof(*n))) == NULL) |
|
|
if (n->node == NODE_TEXT) { |
|
oldsz = strlen(n->b); |
|
newsz = oldsz + sz; |
|
if (oldsz && (p->flags & PFLAG_SPC)) |
|
newsz++; |
|
if ((n->b = realloc(n->b, newsz + 1)) == NULL) |
fatal(p); |
fatal(p); |
n->node = NODE_TEXT; |
if (oldsz && (p->flags & PFLAG_SPC)) |
n->spc = (p->flags & PFLAG_SPC) != 0; |
n->b[oldsz++] = ' '; |
n->parent = p->cur; |
memcpy(n->b + oldsz, word, sz); |
TAILQ_INIT(&n->childq); |
n->b[newsz] = '\0'; |
TAILQ_INIT(&n->attrq); |
p->flags &= ~PFLAG_SPC; |
TAILQ_INSERT_TAIL(&p->cur->childq, n, child); |
return; |
p->cur = n; |
|
} |
} |
|
|
if (p->tree->flags & TREE_CLOSED && |
if (p->tree->flags & TREE_CLOSED && n == p->tree->root) |
p->cur->parent == p->tree->root) |
|
warn_msg(p, "text after end of document: %.*s", sz, word); |
warn_msg(p, "text after end of document: %.*s", sz, word); |
|
|
/* Append to the current text node. */ |
/* Create a new text node. */ |
|
|
assert(sz >= 0); |
if ((n = pnode_alloc(p->cur)) == NULL) |
newsz = p->cur->bsz + (p->cur->bsz && (p->flags & PFLAG_SPC)) + sz; |
|
if ((p->cur->b = realloc(p->cur->b, newsz + 1)) == NULL) |
|
fatal(p); |
fatal(p); |
if (p->cur->bsz && (p->flags & PFLAG_SPC)) |
n->node = NODE_TEXT; |
p->cur->b[p->cur->bsz++] = ' '; |
n->spc = (p->flags & PFLAG_SPC) != 0; |
memcpy(p->cur->b + p->cur->bsz, word, sz); |
|
p->cur->b[p->cur->bsz = newsz] = '\0'; |
|
p->cur->real = p->cur->b; |
|
p->flags &= ~PFLAG_SPC; |
p->flags &= ~PFLAG_SPC; |
|
|
|
/* |
|
* If this node follows a non-text node without intervening |
|
* whitespace, keep the text in it as short as possible, |
|
* and do not keep it open. |
|
*/ |
|
|
|
if (n->spc == 0 && |
|
(np = TAILQ_PREV(n, pnodeq, child)) != NULL && |
|
np->node != NODE_TEXT && np->node != NODE_ESCAPE) { |
|
i = 0; |
|
while (i < sz && !isspace((unsigned char)word[i])) |
|
i++; |
|
if ((n->b = strndup(word, i)) == NULL) |
|
fatal(p); |
|
if (i == sz) |
|
return; |
|
while (i < sz && isspace((unsigned char)word[i])) |
|
i++; |
|
if (i == sz) { |
|
p->flags |= PFLAG_SPC; |
|
return; |
|
} |
|
|
|
/* Put any remaining text into a second node. */ |
|
|
|
if ((n = pnode_alloc(p->cur)) == NULL) |
|
fatal(p); |
|
n->node = NODE_TEXT; |
|
n->spc = 1; |
|
word += i; |
|
sz -= i; |
|
} |
|
if ((n->b = strndup(word, sz)) == NULL) |
|
fatal(p); |
|
|
|
/* The new node remains open for later pnode_closetext(). */ |
|
|
|
p->cur = n; |
} |
} |
|
|
/* |
/* |
* Close out the text node and strip trailing whitespace, if one is open. |
* Close out the text node and strip trailing whitespace, if one is open. |
*/ |
*/ |
static void |
static void |
pnode_closetext(struct parse *p) |
pnode_closetext(struct parse *p, int check_last_word) |
{ |
{ |
struct pnode *n; |
struct pnode *n; |
|
char *cp, *last_word; |
|
|
if ((n = p->cur) == NULL || n->node != NODE_TEXT) |
if ((n = p->cur) == NULL || n->node != NODE_TEXT) |
return; |
return; |
p->cur = n->parent; |
p->cur = n->parent; |
while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) { |
for (cp = strchr(n->b, '\0'); |
n->b[--n->bsz] = '\0'; |
cp > n->b && isspace((unsigned char)cp[-1]); |
|
*--cp = '\0') |
p->flags |= PFLAG_SPC; |
p->flags |= PFLAG_SPC; |
} |
|
|
if (p->flags & PFLAG_SPC || !check_last_word) |
|
return; |
|
|
|
/* |
|
* Find the beginning of the last word |
|
* and delete whitespace before it. |
|
*/ |
|
|
|
while (cp > n->b && !isspace((unsigned char)cp[-1])) |
|
cp--; |
|
if (cp == n->b) |
|
return; |
|
|
|
last_word = cp; |
|
while (cp > n->b && isspace((unsigned char)cp[-1])) |
|
*--cp = '\0'; |
|
|
|
/* Move the last word into its own node, for use with .Pf. */ |
|
|
|
if ((n = pnode_alloc(p->cur)) == NULL) |
|
fatal(p); |
|
n->node = NODE_TEXT; |
|
n->spc = 1; |
|
if ((n->b = strdup(last_word)) == NULL) |
|
fatal(p); |
} |
} |
|
|
static void |
static void |
Line 399 xml_entity(struct parse *p, const char *name) |
|
Line 469 xml_entity(struct parse *p, const char *name) |
|
return; |
return; |
} |
} |
|
|
pnode_closetext(p); |
pnode_closetext(p, 0); |
|
|
if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) |
if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) |
warn_msg(p, "entity after end of document: &%s;", name); |
warn_msg(p, "entity after end of document: &%s;", name); |
Line 438 xml_entity(struct parse *p, const char *name) |
|
Line 508 xml_entity(struct parse *p, const char *name) |
|
} |
} |
|
|
/* Create, append, and close out an entity node. */ |
/* Create, append, and close out an entity node. */ |
if ((n = calloc(1, sizeof(*n))) == NULL || |
if ((n = pnode_alloc(p->cur)) == NULL || |
(n->b = n->real = strdup(entity->roff)) == NULL) |
(n->b = strdup(entity->roff)) == NULL) |
fatal(p); |
fatal(p); |
n->node = NODE_ESCAPE; |
n->node = NODE_ESCAPE; |
n->bsz = strlen(n->b); |
|
n->spc = (p->flags & PFLAG_SPC) != 0; |
n->spc = (p->flags & PFLAG_SPC) != 0; |
n->parent = p->cur; |
|
TAILQ_INIT(&n->childq); |
|
TAILQ_INIT(&n->attrq); |
|
TAILQ_INSERT_TAIL(&p->cur->childq, n, child); |
|
p->flags &= ~PFLAG_SPC; |
p->flags &= ~PFLAG_SPC; |
} |
} |
|
|
Line 470 xml_elem_start(struct parse *p, const char *name) |
|
Line 535 xml_elem_start(struct parse *p, const char *name) |
|
return; |
return; |
} |
} |
|
|
pnode_closetext(p); |
pnode_closetext(p, 1); |
|
|
for (elem = elements; elem->name != NULL; elem++) |
for (elem = elements; elem->name != NULL; elem++) |
if (strcmp(elem->name, name) == 0) |
if (strcmp(elem->name, name) == 0) |
Line 493 xml_elem_start(struct parse *p, const char *name) |
|
Line 558 xml_elem_start(struct parse *p, const char *name) |
|
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case NODE_IGNORE: |
case NODE_IGNORE: |
return; |
return; |
case NODE_INLINEEQUATION: |
|
p->tree->flags |= TREE_EQN; |
|
break; |
|
default: |
default: |
break; |
break; |
} |
} |
Line 503 xml_elem_start(struct parse *p, const char *name) |
|
Line 565 xml_elem_start(struct parse *p, const char *name) |
|
if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL) |
if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL) |
warn_msg(p, "element after end of document: <%s>", name); |
warn_msg(p, "element after end of document: <%s>", name); |
|
|
if ((n = calloc(1, sizeof(*n))) == NULL) |
if ((n = pnode_alloc(p->cur)) == NULL) |
fatal(p); |
fatal(p); |
|
|
/* |
/* |
Line 555 xml_elem_start(struct parse *p, const char *name) |
|
Line 617 xml_elem_start(struct parse *p, const char *name) |
|
n->spc = (p->flags & PFLAG_SPC) != 0; |
n->spc = (p->flags & PFLAG_SPC) != 0; |
break; |
break; |
} |
} |
n->parent = p->cur; |
|
TAILQ_INIT(&n->childq); |
|
TAILQ_INIT(&n->attrq); |
|
|
|
if (p->cur != NULL) |
|
TAILQ_INSERT_TAIL(&p->cur->childq, n, child); |
|
|
|
p->cur = n; |
p->cur = n; |
if (n->node == NODE_DOCTYPE) { |
if (n->node == NODE_DOCTYPE) { |
if (p->doctype == NULL) |
if (p->doctype == NULL) |
Line 649 xml_elem_end(struct parse *p, const char *name) |
|
Line 704 xml_elem_end(struct parse *p, const char *name) |
|
} |
} |
|
|
if (p->del == 0) |
if (p->del == 0) |
pnode_closetext(p); |
pnode_closetext(p, 0); |
|
|
if (name != NULL) { |
if (name != NULL) { |
for (elem = elements; elem->name != NULL; elem++) |
for (elem = elements; elem->name != NULL; elem++) |
Line 680 xml_elem_end(struct parse *p, const char *name) |
|
Line 735 xml_elem_end(struct parse *p, const char *name) |
|
p->flags &= ~PFLAG_SPC; |
p->flags &= ~PFLAG_SPC; |
break; |
break; |
case NODE_DOCTYPE: |
case NODE_DOCTYPE: |
|
case NODE_SBR: |
p->flags &= ~PFLAG_EEND; |
p->flags &= ~PFLAG_EEND; |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
default: |
default: |
Line 970 parse_string(struct parse *p, char *b, size_t rlen, |
|
Line 1026 parse_string(struct parse *p, char *b, size_t rlen, |
|
|
|
} else { |
} else { |
advance(p, b, rlen, &pend, |
advance(p, b, rlen, &pend, |
p->ncur == NODE_DOCTYPE ? "<&]" : "<&", |
p->ncur == NODE_DOCTYPE ? "<&]\n" : "<&\n", |
refill); |
refill); |
xml_char(p, b + poff, pend - poff); |
xml_text(p, b + poff, pend - poff); |
|
if (b[pend] == '\n') |
|
pnode_closetext(p, 0); |
} |
} |
} |
} |
return poff; |
return poff; |
Line 1057 parse_file(struct parse *p, int fd, const char *fname) |
|
Line 1115 parse_file(struct parse *p, int fd, const char *fname) |
|
/* On the top level, finalize the parse tree. */ |
/* On the top level, finalize the parse tree. */ |
|
|
if (save_fname == NULL) { |
if (save_fname == NULL) { |
pnode_closetext(p); |
pnode_closetext(p, 0); |
if (p->tree->root == NULL) |
if (p->tree->root == NULL) |
error_msg(p, "empty document"); |
error_msg(p, "empty document"); |
else if ((p->tree->flags & TREE_CLOSED) == 0) |
else if ((p->tree->flags & TREE_CLOSED) == 0) |