=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.2 retrieving revision 1.13 diff -u -p -r1.2 -r1.13 --- docbook2mdoc/parse.c 2019/03/26 20:54:43 1.2 +++ docbook2mdoc/parse.c 2019/04/03 17:53:02 1.13 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.2 2019/03/26 20:54:43 schwarze Exp $ */ +/* $Id: parse.c,v 1.13 2019/04/03 17:53:02 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -17,8 +17,9 @@ */ #include #include -#include +#include #include +#include #include #include @@ -34,10 +35,16 @@ * Keep this as simple and small as possible. */ struct parse { - XML_Parser xml; const char *fname; /* Name of the input file. */ struct ptree *tree; /* Complete parse result. */ struct pnode *cur; /* Current node in the tree. */ + enum nodeid ncur; /* Type of the current node. */ + int line; /* Line number in the input file. */ + int col; /* Column number in the input file. */ + int nline; /* Line number of next token. */ + int ncol; /* Column number of next token. */ + int del; /* Levels of nested nodes being deleted. */ + int attr; /* The most recent attribute is valid. */ int warn; }; @@ -47,9 +54,9 @@ struct element { }; static const struct element elements[] = { - { "acronym", NODE_ACRONYM }, + { "acronym", NODE_IGNORE }, { "affiliation", NODE_AFFILIATION }, - { "anchor", NODE_ANCHOR }, + { "anchor", NODE_DELETE }, { "application", NODE_APPLICATION }, { "arg", NODE_ARG }, { "author", NODE_AUTHOR }, @@ -62,10 +69,11 @@ static const struct element elements[] = { { "citerefentry", NODE_CITEREFENTRY }, { "citetitle", NODE_CITETITLE }, { "cmdsynopsis", NODE_CMDSYNOPSIS }, - { "code", NODE_CODE }, + { "code", NODE_LITERAL }, { "colspec", NODE_COLSPEC }, { "command", NODE_COMMAND }, { "constant", NODE_CONSTANT }, + { "contrib", NODE_CONTRIB }, { "copyright", NODE_COPYRIGHT }, { "date", NODE_DATE }, { "editor", NODE_EDITOR }, @@ -73,9 +81,10 @@ static const struct element elements[] = { { "emphasis", NODE_EMPHASIS }, { "entry", NODE_ENTRY }, { "envar", NODE_ENVAR }, + { "errorname", NODE_ERRORNAME }, { "fieldsynopsis", NODE_FIELDSYNOPSIS }, { "filename", NODE_FILENAME }, - { "firstname", NODE_FIRSTNAME }, + { "firstname", NODE_PERSONNAME }, { "firstterm", NODE_FIRSTTERM }, { "footnote", NODE_FOOTNOTE }, { "funcdef", NODE_FUNCDEF }, @@ -87,10 +96,10 @@ static const struct element elements[] = { { "group", NODE_GROUP }, { "holder", NODE_HOLDER }, { "index", NODE_INDEX }, - { "indexterm", NODE_INDEXTERM }, + { "indexterm", NODE_DELETE }, { "info", NODE_INFO }, { "informalequation", NODE_INFORMALEQUATION }, - { "informaltable", NODE_INFORMALTABLE }, + { "informaltable", NODE_TABLE }, { "inlineequation", NODE_INLINEEQUATION }, { "itemizedlist", NODE_ITEMIZEDLIST }, { "keysym", NODE_KEYSYM }, @@ -115,15 +124,15 @@ static const struct element elements[] = { { "option", NODE_OPTION }, { "orderedlist", NODE_ORDEREDLIST }, { "orgname", NODE_ORGNAME }, - { "othername", NODE_OTHERNAME }, + { "othername", NODE_PERSONNAME }, { "para", NODE_PARA }, { "paramdef", NODE_PARAMDEF }, { "parameter", NODE_PARAMETER }, { "part", NODE_SECTION }, { "personname", NODE_PERSONNAME }, - { "phrase", NODE_PHRASE }, + { "phrase", NODE_IGNORE }, { "preface", NODE_PREFACE }, - { "primary", NODE_PRIMARY }, + { "primary", NODE_DELETE }, { "programlisting", NODE_PROGRAMLISTING }, { "prompt", NODE_PROMPT }, { "quote", NODE_QUOTE }, @@ -148,16 +157,18 @@ static const struct element elements[] = { { "row", NODE_ROW }, { "sbr", NODE_SBR }, { "screen", NODE_SCREEN }, - { "secondary", NODE_SECONDARY }, + { "secondary", NODE_DELETE }, { "sect1", NODE_SECTION }, { "sect2", NODE_SECTION }, { "section", NODE_SECTION }, { "sgmltag", NODE_SGMLTAG }, { "simplelist", NODE_SIMPLELIST }, { "spanspec", NODE_SPANSPEC }, - { "structname", NODE_STRUCTNAME }, + { "structfield", NODE_PARAMETER }, + { "structname", NODE_TYPE }, { "subtitle", NODE_SUBTITLE }, - { "surname", NODE_SURNAME }, + { "surname", NODE_PERSONNAME }, + { "symbol", NODE_CONSTANT }, { "synopsis", NODE_SYNOPSIS }, { "table", NODE_TABLE }, { "tbody", NODE_TBODY }, @@ -167,49 +178,123 @@ static const struct element elements[] = { { "thead", NODE_THEAD }, { "tip", NODE_TIP }, { "title", NODE_TITLE }, - { "trademark", NODE_TRADEMARK }, + { "trademark", NODE_IGNORE }, { "type", NODE_TYPE }, { "ulink", NODE_ULINK }, - { "userinput", NODE_USERINPUT }, + { "userinput", NODE_LITERAL }, { "variablelist", NODE_VARIABLELIST }, { "varlistentry", NODE_VARLISTENTRY }, { "varname", NODE_VARNAME }, { "warning", NODE_WARNING }, { "wordasword", NODE_WORDASWORD }, - { "xi:include", NODE_WARN }, + { "xi:include", NODE_DELETE_WARN }, { "year", NODE_YEAR }, - { NULL, NODE__MAX } + { NULL, NODE_IGNORE } }; +struct entity { + const char *name; + const char *roff; +}; + /* + * XML character entity references found in the wild. + * Those that don't have an exact mandoc_char(7) representation + * are approximated, and the desired codepoint is given as a comment. + * Encoding them as \\[u...] would leave -Tascii out in the cold. + */ +static const struct entity entities[] = { + { "alpha", "\\(*a" }, + { "amp", "&" }, + { "apos", "'" }, + { "auml", "\\(:a" }, + { "beta", "\\(*b" }, + { "circ", "^" }, /* U+02C6 */ + { "copy", "\\(co" }, + { "dagger", "\\(dg" }, + { "Delta", "\\(*D" }, + { "eacute", "\\('e" }, + { "emsp", "\\ " }, /* U+2003 */ + { "gt", ">" }, + { "hairsp", "\\^" }, + { "kappa", "\\(*k" }, + { "larr", "\\(<-" }, + { "ldquo", "\\(lq" }, + { "le", "\\(<=" }, + { "lowbar", "_" }, + { "lsqb", "[" }, + { "lt", "<" }, + { "mdash", "\\(em" }, + { "minus", "\\-" }, + { "ndash", "\\(en" }, + { "nbsp", "\\ " }, + { "num", "#" }, + { "oslash", "\\(/o" }, + { "ouml", "\\(:o" }, + { "percnt", "%" }, + { "quot", "\\(dq" }, + { "rarr", "\\(->" }, + { "rArr", "\\(rA" }, + { "rdquo", "\\(rq" }, + { "reg", "\\(rg" }, + { "rho", "\\(*r" }, + { "rsqb", "]" }, + { "sigma", "\\(*s" }, + { "shy", "\\&" }, /* U+00AD */ + { "tau", "\\(*t" }, + { "tilde", "\\[u02DC]" }, + { "times", "\\[tmu]" }, + { "uuml", "\\(:u" }, + { NULL, NULL } +}; + +static void +error_msg(struct parse *p, const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + p->tree->flags |= TREE_FAIL; +} + +static void +warn_msg(struct parse *p, const char *fmt, ...) +{ + va_list ap; + + if (p->warn == 0) + return; + + fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); +} + +/* * Process a string of characters. * If a text node is already open, append to it. * Otherwise, create a new one as a child of the current node. */ static void -xml_char(void *arg, const XML_Char *p, int sz) +xml_char(struct parse *ps, const char *p, int sz) { - struct parse *ps; struct pnode *dat; - int i; - ps = arg; - if (ps->tree->flags && TREE_FAIL) + if (ps->del > 0) return; - /* - * Only create a new node if there is non-whitespace text. - * Strip all leading whitespace. - */ - if (ps->cur->node != NODE_TEXT) { - for (i = 0; i < sz; i++) - if (isspace((unsigned char)p[i]) == 0) - break; - if (i == sz) - return; - p += i; - sz -= i; + if (ps->cur == NULL) { + error_msg(ps, "discarding text before document: %.*s", sz, p); + return; + } + if (ps->cur->node != NODE_TEXT) { if ((dat = calloc(1, sizeof(*dat))) == NULL) { perror(NULL); exit(1); @@ -222,6 +307,10 @@ xml_char(void *arg, const XML_Char *p, int sz) ps->cur = dat; } + if (ps->tree->flags & TREE_CLOSED && + ps->cur->parent == ps->tree->root) + warn_msg(ps, "text after end of document: %.*s", sz, p); + /* Append to the current text node. */ assert(sz >= 0); @@ -245,24 +334,73 @@ pnode_trim(struct pnode *pn) break; } +static void +xml_entity(struct parse *p, const char *name) +{ + const struct entity *entity; + struct pnode *dat; + + if (p->del > 0) + return; + + if (p->cur == NULL) { + error_msg(p, "discarding entity before document: &%s;", name); + return; + } + + /* Close out the text node, if there is one. */ + if (p->cur->node == NODE_TEXT) { + pnode_trim(p->cur); + p->cur = p->cur->parent; + } + + if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) + warn_msg(p, "entity after end of document: &%s;", name); + + for (entity = entities; entity->name != NULL; entity++) + if (strcmp(name, entity->name) == 0) + break; + + if (entity->roff == NULL) { + error_msg(p, "unknown entity &%s;", name); + return; + } + + /* Create, append, and close out an entity node. */ + if ((dat = calloc(1, sizeof(*dat))) == NULL || + (dat->b = dat->real = strdup(entity->roff)) == NULL) { + perror(NULL); + exit(1); + } + dat->node = NODE_ESCAPE; + dat->bsz = strlen(dat->b); + dat->parent = p->cur; + TAILQ_INIT(&dat->childq); + TAILQ_INIT(&dat->attrq); + TAILQ_INSERT_TAIL(&p->cur->childq, dat, child); +} + /* * Begin an element. - * If the name is unknown, abort parsing. */ static void -xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) +xml_elem_start(struct parse *ps, const char *name) { - struct parse *ps; - const struct element *elem; - enum attrkey key; - struct pnode *dat; - struct pattr *pattr; - const XML_Char **att; + const struct element *elem; + struct pnode *dat; - ps = arg; - if (ps->tree->flags && TREE_FAIL) + if (*name == '!' || *name == '?') return; + /* + * An ancestor is excluded from the tree; + * keep track of the number of levels excluded. + */ + if (ps->del > 0) { + ps->del++; + return; + } + /* Close out the text node, if there is one. */ if (ps->cur != NULL && ps->cur->node == NODE_TEXT) { pnode_trim(ps->cur); @@ -273,22 +411,18 @@ xml_elem_start(void *arg, const XML_Char *name, const if (strcmp(elem->name, name) == 0) break; - if (elem->name == NULL) { - fprintf(stderr, "%s:%zu:%zu: unknown element \"%s\"\n", - ps->fname, XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), name); - ps->tree->flags |= TREE_FAIL; - return; - } + if (elem->name == NULL) + error_msg(ps, "unknown element <%s>", name); - switch (elem->node) { - case NODE_WARN: - if (ps->warn) - fprintf(stderr, "%s:%zu:%zu: warning: " - "ignoring element <%s>\n", ps->fname, - XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), name); + ps->ncur = elem->node; + + switch (ps->ncur) { + case NODE_DELETE_WARN: + warn_msg(ps, "skipping element <%s>", name); /* FALLTHROUGH */ + case NODE_DELETE: + ps->del = 1; + /* FALLTHROUGH */ case NODE_IGNORE: return; case NODE_INLINEEQUATION: @@ -298,6 +432,9 @@ xml_elem_start(void *arg, const XML_Char *name, const break; } + if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL) + warn_msg(ps, "element after end of document: <%s>", name); + if ((dat = calloc(1, sizeof(*dat))) == NULL) { perror(NULL); exit(1); @@ -313,61 +450,108 @@ xml_elem_start(void *arg, const XML_Char *name, const ps->cur = dat; if (ps->tree->root == NULL) ps->tree->root = dat; +} - /* - * Process attributes. - */ - for (att = atts; *att != NULL; att += 2) { - if ((key = attrkey_parse(*att)) == ATTRKEY__MAX) { - if (ps->warn) - fprintf(stderr, "%s:%zu:%zu: warning: " - "unknown attribute \"%s\"\n", - ps->fname, - XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml), - *att); - continue; - } - pattr = calloc(1, sizeof(*pattr)); - pattr->key = key; - if ((pattr->val = attrval_parse(att[1])) == ATTRVAL__MAX) - pattr->rawval = strdup(att[1]); - TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); +static void +xml_attrkey(struct parse *ps, const char *name) +{ + struct pattr *attr; + enum attrkey key; + + if (ps->del > 0 || *name == '\0') + return; + if ((key = attrkey_parse(name)) == ATTRKEY__MAX) { + ps->attr = 0; + return; } + if ((attr = calloc(1, sizeof(*attr))) == NULL) { + perror(NULL); + exit(1); + } + attr->key = key; + attr->val = ATTRVAL__MAX; + attr->rawval = NULL; + TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child); + ps->attr = 1; } +static void +xml_attrval(struct parse *ps, const char *name) +{ + struct pattr *attr; + + if (ps->del > 0 || ps->attr == 0) + return; + if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL) + return; + if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX && + (attr->rawval = strdup(name)) == NULL) { + perror(NULL); + exit(1); + } +} + /* * Roll up the parse tree. * If we're at a text node, roll that one up first. */ static void -xml_elem_end(void *arg, const XML_Char *name) +xml_elem_end(struct parse *ps, const char *name) { - struct parse *ps; - const struct element *elem; + const struct element *elem; + enum nodeid node; - ps = arg; - if (ps->tree->flags && TREE_FAIL) + /* + * An ancestor is excluded from the tree; + * keep track of the number of levels excluded. + */ + if (ps->del > 1) { + ps->del--; return; + } /* Close out the text node, if there is one. */ - if (ps->cur->node == NODE_TEXT) { + if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; } - for (elem = elements; elem->name != NULL; elem++) - if (strcmp(elem->name, name) == 0) - break; + if (name != NULL) { + for (elem = elements; elem->name != NULL; elem++) + if (strcmp(elem->name, name) == 0) + break; + node = elem->node; + } else + node = ps->ncur; - switch (elem->node) { + switch (node) { + case NODE_DELETE_WARN: + case NODE_DELETE: + if (ps->del > 0) + ps->del--; + break; case NODE_IGNORE: - case NODE_WARN: break; default: - assert(elem->node == ps->cur->node); - ps->cur = ps->cur->parent; + if (ps->cur == NULL || node != ps->cur->node) { + warn_msg(ps, "element not open: ", name); + break; + } + + /* + * Refrain from actually closing the document element. + * If no more content follows, no harm is done, but if + * some content still follows, simply processing it is + * obviously better than discarding it or crashing. + */ + + if (ps->cur->parent == NULL) + ps->tree->flags |= TREE_CLOSED; + else + ps->cur = ps->cur->parent; + break; } + assert(ps->del == 0); } struct parse * @@ -382,16 +566,7 @@ parse_alloc(int warn) free(p); return NULL; } - - if ((p->xml = XML_ParserCreate(NULL)) == NULL) { - free(p->tree); - free(p); - return NULL; - } p->warn = warn; - XML_SetCharacterDataHandler(p->xml, xml_char); - XML_SetElementHandler(p->xml, xml_elem_start, xml_elem_end); - XML_SetUserData(p->xml, p); return p; } @@ -400,7 +575,6 @@ parse_free(struct parse *p) { if (p == NULL) return; - XML_ParserFree(p->xml); if (p->tree != NULL) { pnode_unlink(p->tree->root); free(p->tree); @@ -408,28 +582,255 @@ parse_free(struct parse *p) free(p); } +/* + * Advance the pend pointer to the next character in the charset. + * If the charset starts with a space, it stands for any whitespace. + * Update the new input file position, used for messages. + * Do not overrun the buffer b of length rlen. + * When reaching the end, NUL-terminate the buffer and return 1; + * otherwise, return 0. + */ +static int +advance(struct parse *p, char *b, size_t rlen, size_t *pend, + const char *charset) +{ + int space; + + if (*charset == ' ') { + space = 1; + charset++; + } else + space = 0; + + p->nline = p->line; + p->ncol = p->col; + while (*pend < rlen) { + if (b[*pend] == '\n') { + p->nline++; + p->ncol = 1; + } else + p->ncol++; + if (space && isspace((unsigned char)b[*pend])) + break; + if (strchr(charset, b[*pend]) != NULL) + break; + ++*pend; + } + if (*pend == rlen) { + b[rlen] = '\0'; + return 1; + } else + return 0; +} + struct ptree * parse_file(struct parse *p, int fd, const char *fname) { char b[4096]; - ssize_t ssz; + char *cp; + ssize_t rsz; /* Return value from read(2). */ + size_t rlen; /* Number of bytes in b[]. */ + size_t poff; /* Parse offset in b[]. */ + size_t pend; /* Offset of the end of the current word. */ + int in_tag, in_arg, in_quotes, elem_end; p->fname = fname; - do { - if ((ssz = read(fd, b, sizeof(b))) < 0) { - perror(fname); - pnode_unlink(p->tree->root); - p->tree->root = p->cur = NULL; - p->tree->flags |= TREE_FAIL; - return NULL; + p->nline = 1; + p->ncol = 1; + rlen = 0; + in_tag = in_arg = in_quotes = 0; + + /* + * Read loop. + * + * We have to enter the read loop once more even on EOF + * because the previous token may have been incomplete, + * such that it asked for more input. + * Once rsz is 0, incomplete tokens will no longer ask + * for more input but instead use whatever there is, + * and then exit the read loop. + * The minus one on the size limit for read(2) is needed + * such that advance() can set b[rlen] to NUL when needed. + */ + + while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0) { + if ((rlen += rsz) == 0) + break; + + /* Token loop. */ + + pend = 0; + for (;;) { + + /* Proceed to the next token, skipping whitespace. */ + + p->line = p->nline; + p->col = p->ncol; + if ((poff = pend) == rlen) + break; + if (isspace((unsigned char)b[pend])) { + if (b[pend++] == '\n') { + p->nline++; + p->ncol = 1; + } else + p->ncol++; + continue; + } + + /* + * The following four cases (in_arg, in_tag, and + * starting an entity or a tag) all parse a word + * or quoted string. If that extends beyond the + * read buffer and the last read(2) still got + * data, they all break out of the token loop + * to request more data from the read loop. + * + * Also, three of them detect self-closing tags, + * those ending with "/>", setting the flag + * elem_end and calling xml_elem_end() at the + * very end, after handling the attribute value, + * attribute name, or tag name, respectively. + */ + + /* Parse an attribute value. */ + + if (in_arg) { + if (in_quotes == 0 && + (b[pend] == '\'' || b[pend] == '"')) { + in_quotes = b[pend] == '"' ? 2 : 1; + p->ncol++; + pend++; + continue; + } + if (advance(p, b, rlen, &pend, + in_quotes == 2 ? "\"" : + in_quotes == 1 ? "'" : " >") && rsz > 0) + break; + in_arg = in_quotes = elem_end = 0; + if (b[pend] == '>') { + in_tag = 0; + if (pend > 0 && b[pend - 1] == '/') { + b[pend - 1] = '\0'; + elem_end = 1; + } + } + b[pend] = '\0'; + if (pend < rlen) + pend++; + xml_attrval(p, b + poff); + if (elem_end) + xml_elem_end(p, NULL); + + /* Look for an attribute name. */ + + } else if (in_tag) { + if (advance(p, b, rlen, &pend, " =>") && + rsz > 0) + break; + elem_end = 0; + switch (b[pend]) { + case '>': + in_tag = 0; + if (pend > 0 && b[pend - 1] == '/') { + b[pend - 1] = '\0'; + elem_end = 1; + } + break; + case '=': + in_arg = 1; + break; + default: + break; + } + b[pend] = '\0'; + if (pend < rlen) + pend++; + xml_attrkey(p, b + poff); + if (elem_end) + xml_elem_end(p, NULL); + + /* Begin an opening or closing tag. */ + + } else if (b[poff] == '<') { + if (advance(p, b, rlen, &pend, " >") && + rsz > 0) + break; + if (pend > poff + 3 && + strncmp(b + poff, ""); + if (cp == NULL) { + if (rsz > 0) { + pend = rlen; + break; + } + cp = b + rlen; + } else + cp += 3; + while (b + pend < cp) { + if (b[++pend] == '\n') { + p->nline++; + p->ncol = 1; + } else + p->ncol++; + } + continue; + } + elem_end = 0; + if (b[pend] != '>') + in_tag = 1; + else if (pend > 0 && b[pend - 1] == '/') { + b[pend - 1] = '\0'; + elem_end = 1; + } + b[pend] = '\0'; + if (pend < rlen) + pend++; + if (b[++poff] == '/') { + elem_end = 1; + poff++; + } else + xml_elem_start(p, b + poff); + if (elem_end) + xml_elem_end(p, b + poff); + + /* Process an entity. */ + + } else if (b[poff] == '&') { + if (advance(p, b, rlen, &pend, ";") && + rsz > 0) + break; + b[pend] = '\0'; + if (pend < rlen) + pend++; + xml_entity(p, b + poff + 1); + + /* Process text up to the next tag or entity. */ + + } else { + if (advance(p, b, rlen, &pend, "<&") == 0) + p->ncol--; + xml_char(p, b + poff, pend - poff); + } } - if (XML_Parse(p->xml, b, ssz, ssz == 0) == 0) { - fprintf(stderr, "%s:%zu:%zu: %s\n", fname, - XML_GetCurrentLineNumber(p->xml), - XML_GetCurrentColumnNumber(p->xml), - XML_ErrorString(XML_GetErrorCode(p->xml))); - p->tree->flags |= TREE_FAIL; - } - } while (ssz > 0 && (p->tree->flags & TREE_FAIL) == 0); + + /* Buffer exhausted; shift left and re-fill. */ + + assert(poff > 0); + memmove(b, b + poff, rlen - poff); + rlen -= poff; + } + if (rsz < 0) { + perror(fname); + p->tree->flags |= TREE_FAIL; + } + if (p->cur != NULL && p->cur->node == NODE_TEXT) { + pnode_trim(p->cur); + p->cur = p->cur->parent; + } + if ((p->tree->flags & TREE_CLOSED) == 0) + warn_msg(p, "document not closed"); return p->tree; }