version 1.5, 2019/03/28 12:21:10 |
version 1.13, 2019/04/03 17:53:02 |
|
|
*/ |
*/ |
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
|
#include <stdarg.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
Line 68 static const struct element elements[] = { |
|
Line 69 static const struct element elements[] = { |
|
{ "citerefentry", NODE_CITEREFENTRY }, |
{ "citerefentry", NODE_CITEREFENTRY }, |
{ "citetitle", NODE_CITETITLE }, |
{ "citetitle", NODE_CITETITLE }, |
{ "cmdsynopsis", NODE_CMDSYNOPSIS }, |
{ "cmdsynopsis", NODE_CMDSYNOPSIS }, |
{ "code", NODE_CODE }, |
{ "code", NODE_LITERAL }, |
{ "colspec", NODE_COLSPEC }, |
{ "colspec", NODE_COLSPEC }, |
{ "command", NODE_COMMAND }, |
{ "command", NODE_COMMAND }, |
{ "constant", NODE_CONSTANT }, |
{ "constant", NODE_CONSTANT }, |
|
{ "contrib", NODE_CONTRIB }, |
{ "copyright", NODE_COPYRIGHT }, |
{ "copyright", NODE_COPYRIGHT }, |
{ "date", NODE_DATE }, |
{ "date", NODE_DATE }, |
{ "editor", NODE_EDITOR }, |
{ "editor", NODE_EDITOR }, |
Line 79 static const struct element elements[] = { |
|
Line 81 static const struct element elements[] = { |
|
{ "emphasis", NODE_EMPHASIS }, |
{ "emphasis", NODE_EMPHASIS }, |
{ "entry", NODE_ENTRY }, |
{ "entry", NODE_ENTRY }, |
{ "envar", NODE_ENVAR }, |
{ "envar", NODE_ENVAR }, |
|
{ "errorname", NODE_ERRORNAME }, |
{ "fieldsynopsis", NODE_FIELDSYNOPSIS }, |
{ "fieldsynopsis", NODE_FIELDSYNOPSIS }, |
{ "filename", NODE_FILENAME }, |
{ "filename", NODE_FILENAME }, |
{ "firstname", NODE_IGNORE }, |
{ "firstname", NODE_PERSONNAME }, |
{ "firstterm", NODE_FIRSTTERM }, |
{ "firstterm", NODE_FIRSTTERM }, |
{ "footnote", NODE_FOOTNOTE }, |
{ "footnote", NODE_FOOTNOTE }, |
{ "funcdef", NODE_FUNCDEF }, |
{ "funcdef", NODE_FUNCDEF }, |
Line 96 static const struct element elements[] = { |
|
Line 99 static const struct element elements[] = { |
|
{ "indexterm", NODE_DELETE }, |
{ "indexterm", NODE_DELETE }, |
{ "info", NODE_INFO }, |
{ "info", NODE_INFO }, |
{ "informalequation", NODE_INFORMALEQUATION }, |
{ "informalequation", NODE_INFORMALEQUATION }, |
{ "informaltable", NODE_INFORMALTABLE }, |
{ "informaltable", NODE_TABLE }, |
{ "inlineequation", NODE_INLINEEQUATION }, |
{ "inlineequation", NODE_INLINEEQUATION }, |
{ "itemizedlist", NODE_ITEMIZEDLIST }, |
{ "itemizedlist", NODE_ITEMIZEDLIST }, |
{ "keysym", NODE_KEYSYM }, |
{ "keysym", NODE_KEYSYM }, |
Line 121 static const struct element elements[] = { |
|
Line 124 static const struct element elements[] = { |
|
{ "option", NODE_OPTION }, |
{ "option", NODE_OPTION }, |
{ "orderedlist", NODE_ORDEREDLIST }, |
{ "orderedlist", NODE_ORDEREDLIST }, |
{ "orgname", NODE_ORGNAME }, |
{ "orgname", NODE_ORGNAME }, |
{ "othername", NODE_IGNORE }, |
{ "othername", NODE_PERSONNAME }, |
{ "para", NODE_PARA }, |
{ "para", NODE_PARA }, |
{ "paramdef", NODE_PARAMDEF }, |
{ "paramdef", NODE_PARAMDEF }, |
{ "parameter", NODE_PARAMETER }, |
{ "parameter", NODE_PARAMETER }, |
Line 161 static const struct element elements[] = { |
|
Line 164 static const struct element elements[] = { |
|
{ "sgmltag", NODE_SGMLTAG }, |
{ "sgmltag", NODE_SGMLTAG }, |
{ "simplelist", NODE_SIMPLELIST }, |
{ "simplelist", NODE_SIMPLELIST }, |
{ "spanspec", NODE_SPANSPEC }, |
{ "spanspec", NODE_SPANSPEC }, |
{ "structname", NODE_STRUCTNAME }, |
{ "structfield", NODE_PARAMETER }, |
|
{ "structname", NODE_TYPE }, |
{ "subtitle", NODE_SUBTITLE }, |
{ "subtitle", NODE_SUBTITLE }, |
{ "surname", NODE_IGNORE }, |
{ "surname", NODE_PERSONNAME }, |
|
{ "symbol", NODE_CONSTANT }, |
{ "synopsis", NODE_SYNOPSIS }, |
{ "synopsis", NODE_SYNOPSIS }, |
{ "table", NODE_TABLE }, |
{ "table", NODE_TABLE }, |
{ "tbody", NODE_TBODY }, |
{ "tbody", NODE_TBODY }, |
Line 176 static const struct element elements[] = { |
|
Line 181 static const struct element elements[] = { |
|
{ "trademark", NODE_IGNORE }, |
{ "trademark", NODE_IGNORE }, |
{ "type", NODE_TYPE }, |
{ "type", NODE_TYPE }, |
{ "ulink", NODE_ULINK }, |
{ "ulink", NODE_ULINK }, |
{ "userinput", NODE_USERINPUT }, |
{ "userinput", NODE_LITERAL }, |
{ "variablelist", NODE_VARIABLELIST }, |
{ "variablelist", NODE_VARIABLELIST }, |
{ "varlistentry", NODE_VARLISTENTRY }, |
{ "varlistentry", NODE_VARLISTENTRY }, |
{ "varname", NODE_VARNAME }, |
{ "varname", NODE_VARNAME }, |
Line 187 static const struct element elements[] = { |
|
Line 192 static const struct element elements[] = { |
|
{ NULL, NODE_IGNORE } |
{ NULL, NODE_IGNORE } |
}; |
}; |
|
|
|
struct entity { |
|
const char *name; |
|
const char *roff; |
|
}; |
|
|
/* |
/* |
|
* XML character entity references found in the wild. |
|
* Those that don't have an exact mandoc_char(7) representation |
|
* are approximated, and the desired codepoint is given as a comment. |
|
* Encoding them as \\[u...] would leave -Tascii out in the cold. |
|
*/ |
|
static const struct entity entities[] = { |
|
{ "alpha", "\\(*a" }, |
|
{ "amp", "&" }, |
|
{ "apos", "'" }, |
|
{ "auml", "\\(:a" }, |
|
{ "beta", "\\(*b" }, |
|
{ "circ", "^" }, /* U+02C6 */ |
|
{ "copy", "\\(co" }, |
|
{ "dagger", "\\(dg" }, |
|
{ "Delta", "\\(*D" }, |
|
{ "eacute", "\\('e" }, |
|
{ "emsp", "\\ " }, /* U+2003 */ |
|
{ "gt", ">" }, |
|
{ "hairsp", "\\^" }, |
|
{ "kappa", "\\(*k" }, |
|
{ "larr", "\\(<-" }, |
|
{ "ldquo", "\\(lq" }, |
|
{ "le", "\\(<=" }, |
|
{ "lowbar", "_" }, |
|
{ "lsqb", "[" }, |
|
{ "lt", "<" }, |
|
{ "mdash", "\\(em" }, |
|
{ "minus", "\\-" }, |
|
{ "ndash", "\\(en" }, |
|
{ "nbsp", "\\ " }, |
|
{ "num", "#" }, |
|
{ "oslash", "\\(/o" }, |
|
{ "ouml", "\\(:o" }, |
|
{ "percnt", "%" }, |
|
{ "quot", "\\(dq" }, |
|
{ "rarr", "\\(->" }, |
|
{ "rArr", "\\(rA" }, |
|
{ "rdquo", "\\(rq" }, |
|
{ "reg", "\\(rg" }, |
|
{ "rho", "\\(*r" }, |
|
{ "rsqb", "]" }, |
|
{ "sigma", "\\(*s" }, |
|
{ "shy", "\\&" }, /* U+00AD */ |
|
{ "tau", "\\(*t" }, |
|
{ "tilde", "\\[u02DC]" }, |
|
{ "times", "\\[tmu]" }, |
|
{ "uuml", "\\(:u" }, |
|
{ NULL, NULL } |
|
}; |
|
|
|
static void |
|
error_msg(struct parse *p, const char *fmt, ...) |
|
{ |
|
va_list ap; |
|
|
|
fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col); |
|
va_start(ap, fmt); |
|
vfprintf(stderr, fmt, ap); |
|
va_end(ap); |
|
fputc('\n', stderr); |
|
p->tree->flags |= TREE_FAIL; |
|
} |
|
|
|
static void |
|
warn_msg(struct parse *p, const char *fmt, ...) |
|
{ |
|
va_list ap; |
|
|
|
if (p->warn == 0) |
|
return; |
|
|
|
fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col); |
|
va_start(ap, fmt); |
|
vfprintf(stderr, fmt, ap); |
|
va_end(ap); |
|
fputc('\n', stderr); |
|
} |
|
|
|
/* |
* Process a string of characters. |
* Process a string of characters. |
* If a text node is already open, append to it. |
* If a text node is already open, append to it. |
* Otherwise, create a new one as a child of the current node. |
* Otherwise, create a new one as a child of the current node. |
Line 201 xml_char(struct parse *ps, const char *p, int sz) |
|
Line 290 xml_char(struct parse *ps, const char *p, int sz) |
|
return; |
return; |
|
|
if (ps->cur == NULL) { |
if (ps->cur == NULL) { |
fprintf(stderr, "%s:%d:%d: discarding text before docum" |
error_msg(ps, "discarding text before document: %.*s", sz, p); |
"ent: %.*s\n", ps->fname, ps->line, ps->col, sz, p); |
|
ps->tree->flags |= TREE_FAIL; |
|
return; |
return; |
} |
} |
|
|
Line 221 xml_char(struct parse *ps, const char *p, int sz) |
|
Line 308 xml_char(struct parse *ps, const char *p, int sz) |
|
} |
} |
|
|
if (ps->tree->flags & TREE_CLOSED && |
if (ps->tree->flags & TREE_CLOSED && |
ps->cur->parent == ps->tree->root && ps->warn) |
ps->cur->parent == ps->tree->root) |
fprintf(stderr, "%s:%d:%d: warning: " |
warn_msg(ps, "text after end of document: %.*s", sz, p); |
"text after end of document: %.*s\n", |
|
ps->fname, ps->line, ps->col, sz, p); |
|
|
|
/* Append to the current text node. */ |
/* Append to the current text node. */ |
|
|
Line 249 pnode_trim(struct pnode *pn) |
|
Line 334 pnode_trim(struct pnode *pn) |
|
break; |
break; |
} |
} |
|
|
|
static void |
|
xml_entity(struct parse *p, const char *name) |
|
{ |
|
const struct entity *entity; |
|
struct pnode *dat; |
|
|
|
if (p->del > 0) |
|
return; |
|
|
|
if (p->cur == NULL) { |
|
error_msg(p, "discarding entity before document: &%s;", name); |
|
return; |
|
} |
|
|
|
/* Close out the text node, if there is one. */ |
|
if (p->cur->node == NODE_TEXT) { |
|
pnode_trim(p->cur); |
|
p->cur = p->cur->parent; |
|
} |
|
|
|
if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) |
|
warn_msg(p, "entity after end of document: &%s;", name); |
|
|
|
for (entity = entities; entity->name != NULL; entity++) |
|
if (strcmp(name, entity->name) == 0) |
|
break; |
|
|
|
if (entity->roff == NULL) { |
|
error_msg(p, "unknown entity &%s;", name); |
|
return; |
|
} |
|
|
|
/* Create, append, and close out an entity node. */ |
|
if ((dat = calloc(1, sizeof(*dat))) == NULL || |
|
(dat->b = dat->real = strdup(entity->roff)) == NULL) { |
|
perror(NULL); |
|
exit(1); |
|
} |
|
dat->node = NODE_ESCAPE; |
|
dat->bsz = strlen(dat->b); |
|
dat->parent = p->cur; |
|
TAILQ_INIT(&dat->childq); |
|
TAILQ_INIT(&dat->attrq); |
|
TAILQ_INSERT_TAIL(&p->cur->childq, dat, child); |
|
} |
|
|
/* |
/* |
* Begin an element. |
* Begin an element. |
*/ |
*/ |
Line 280 xml_elem_start(struct parse *ps, const char *name) |
|
Line 411 xml_elem_start(struct parse *ps, const char *name) |
|
if (strcmp(elem->name, name) == 0) |
if (strcmp(elem->name, name) == 0) |
break; |
break; |
|
|
if (elem->name == NULL) { |
if (elem->name == NULL) |
fprintf(stderr, "%s:%d:%d: unknown element <%s>\n", |
error_msg(ps, "unknown element <%s>", name); |
ps->fname, ps->line, ps->col, name); |
|
ps->tree->flags |= TREE_FAIL; |
|
} |
|
ps->ncur = elem->node; |
ps->ncur = elem->node; |
|
|
switch (ps->ncur) { |
switch (ps->ncur) { |
case NODE_DELETE_WARN: |
case NODE_DELETE_WARN: |
if (ps->warn) |
warn_msg(ps, "skipping element <%s>", name); |
fprintf(stderr, "%s:%d:%d: warning: " |
|
"skipping element <%s>\n", |
|
ps->fname, ps->line, ps->col, name); |
|
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case NODE_DELETE: |
case NODE_DELETE: |
ps->del = 1; |
ps->del = 1; |
Line 306 xml_elem_start(struct parse *ps, const char *name) |
|
Line 432 xml_elem_start(struct parse *ps, const char *name) |
|
break; |
break; |
} |
} |
|
|
if (ps->tree->flags & TREE_CLOSED && |
if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL) |
ps->cur->parent == NULL && ps->warn) |
warn_msg(ps, "element after end of document: <%s>", name); |
fprintf(stderr, "%s:%d:%d: warning: " |
|
"element after end of document: %s\n", |
|
ps->fname, ps->line, ps->col, name); |
|
|
|
if ((dat = calloc(1, sizeof(*dat))) == NULL) { |
if ((dat = calloc(1, sizeof(*dat))) == NULL) { |
perror(NULL); |
perror(NULL); |
Line 338 xml_attrkey(struct parse *ps, const char *name) |
|
Line 461 xml_attrkey(struct parse *ps, const char *name) |
|
if (ps->del > 0 || *name == '\0') |
if (ps->del > 0 || *name == '\0') |
return; |
return; |
if ((key = attrkey_parse(name)) == ATTRKEY__MAX) { |
if ((key = attrkey_parse(name)) == ATTRKEY__MAX) { |
if (ps->warn) |
|
fprintf(stderr, "%s:%d:%d: warning: " |
|
"unknown attribute \"%s\"\n", |
|
ps->fname, ps->line, ps->col, name); |
|
ps->attr = 0; |
ps->attr = 0; |
return; |
return; |
} |
} |
Line 415 xml_elem_end(struct parse *ps, const char *name) |
|
Line 534 xml_elem_end(struct parse *ps, const char *name) |
|
break; |
break; |
default: |
default: |
if (ps->cur == NULL || node != ps->cur->node) { |
if (ps->cur == NULL || node != ps->cur->node) { |
if (ps->warn) |
warn_msg(ps, "element not open: </%s>", name); |
fprintf(stderr, "%s:%d:%d: warning: " |
|
"element not open: </%s>\n", |
|
ps->fname, ps->line, ps->col, name); |
|
break; |
break; |
} |
} |
|
|
|
|
parse_file(struct parse *p, int fd, const char *fname) |
parse_file(struct parse *p, int fd, const char *fname) |
{ |
{ |
char b[4096]; |
char b[4096]; |
|
char *cp; |
ssize_t rsz; /* Return value from read(2). */ |
ssize_t rsz; /* Return value from read(2). */ |
size_t rlen; /* Number of bytes in b[]. */ |
size_t rlen; /* Number of bytes in b[]. */ |
size_t poff; /* Parse offset in b[]. */ |
size_t poff; /* Parse offset in b[]. */ |
Line 561 parse_file(struct parse *p, int fd, const char *fname) |
|
Line 678 parse_file(struct parse *p, int fd, const char *fname) |
|
} |
} |
|
|
/* |
/* |
* The following three cases (in_arg, in_tag, |
* The following four cases (in_arg, in_tag, and |
* and starting a tag) all parse a word or |
* starting an entity or a tag) all parse a word |
* quoted string. If that extends beyond the |
* or quoted string. If that extends beyond the |
* read buffer and the last read(2) still got |
* read buffer and the last read(2) still got |
* data, they all break out of the token loop |
* data, they all break out of the token loop |
* to request more data from the read loop. |
* to request more data from the read loop. |
* |
* |
* Also, they all detect self-closing tags, |
* Also, three of them detect self-closing tags, |
* those ending with "/>", setting the flag |
* those ending with "/>", setting the flag |
* elem_end and calling xml_elem_end() at the |
* elem_end and calling xml_elem_end() at the |
* very end, after handling the attribute value, |
* very end, after handling the attribute value, |
Line 578 parse_file(struct parse *p, int fd, const char *fname) |
|
Line 695 parse_file(struct parse *p, int fd, const char *fname) |
|
/* Parse an attribute value. */ |
/* Parse an attribute value. */ |
|
|
if (in_arg) { |
if (in_arg) { |
if (in_quotes == 0 && b[pend] == '"') { |
if (in_quotes == 0 && |
in_quotes = 1; |
(b[pend] == '\'' || b[pend] == '"')) { |
|
in_quotes = b[pend] == '"' ? 2 : 1; |
p->ncol++; |
p->ncol++; |
pend++; |
pend++; |
continue; |
continue; |
} |
} |
if (advance(p, b, rlen, &pend, |
if (advance(p, b, rlen, &pend, |
in_quotes ? "\"" : " >") && rsz > 0) |
in_quotes == 2 ? "\"" : |
|
in_quotes == 1 ? "'" : " >") && rsz > 0) |
break; |
break; |
in_arg = in_quotes = elem_end = 0; |
in_arg = in_quotes = elem_end = 0; |
if (b[pend] == '>') { |
if (b[pend] == '>') { |
Line 636 parse_file(struct parse *p, int fd, const char *fname) |
|
Line 755 parse_file(struct parse *p, int fd, const char *fname) |
|
if (advance(p, b, rlen, &pend, " >") && |
if (advance(p, b, rlen, &pend, " >") && |
rsz > 0) |
rsz > 0) |
break; |
break; |
|
if (pend > poff + 3 && |
|
strncmp(b + poff, "<!--", 4) == 0) { |
|
|
|
/* Skip a comment. */ |
|
|
|
cp = strstr(b + pend - 2, "-->"); |
|
if (cp == NULL) { |
|
if (rsz > 0) { |
|
pend = rlen; |
|
break; |
|
} |
|
cp = b + rlen; |
|
} else |
|
cp += 3; |
|
while (b + pend < cp) { |
|
if (b[++pend] == '\n') { |
|
p->nline++; |
|
p->ncol = 1; |
|
} else |
|
p->ncol++; |
|
} |
|
continue; |
|
} |
elem_end = 0; |
elem_end = 0; |
if (b[pend] != '>') |
if (b[pend] != '>') |
in_tag = 1; |
in_tag = 1; |
Line 654 parse_file(struct parse *p, int fd, const char *fname) |
|
Line 796 parse_file(struct parse *p, int fd, const char *fname) |
|
if (elem_end) |
if (elem_end) |
xml_elem_end(p, b + poff); |
xml_elem_end(p, b + poff); |
|
|
/* Process text up to the next tag. */ |
/* Process an entity. */ |
|
|
|
} else if (b[poff] == '&') { |
|
if (advance(p, b, rlen, &pend, ";") && |
|
rsz > 0) |
|
break; |
|
b[pend] = '\0'; |
|
if (pend < rlen) |
|
pend++; |
|
xml_entity(p, b + poff + 1); |
|
|
|
/* Process text up to the next tag or entity. */ |
|
|
} else { |
} else { |
if (advance(p, b, rlen, &pend, "<") == 0) |
if (advance(p, b, rlen, &pend, "<&") == 0) |
p->ncol--; |
p->ncol--; |
xml_char(p, b + poff, pend - poff); |
xml_char(p, b + poff, pend - poff); |
} |
} |
Line 677 parse_file(struct parse *p, int fd, const char *fname) |
|
Line 830 parse_file(struct parse *p, int fd, const char *fname) |
|
pnode_trim(p->cur); |
pnode_trim(p->cur); |
p->cur = p->cur->parent; |
p->cur = p->cur->parent; |
} |
} |
if ((p->tree->flags & TREE_CLOSED) == 0 && p->warn) |
if ((p->tree->flags & TREE_CLOSED) == 0) |
fprintf(stderr, "%s:%d:%d: warning: document not closed\n", |
warn_msg(p, "document not closed"); |
p->fname, p->line, p->col); |
|
return p->tree; |
return p->tree; |
} |
} |