=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.14 retrieving revision 1.34 diff -u -p -r1.14 -r1.34 --- docbook2mdoc/parse.c 2019/04/05 14:37:36 1.14 +++ docbook2mdoc/parse.c 2019/04/12 04:39:24 1.34 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.14 2019/04/05 14:37:36 schwarze Exp $ */ +/* $Id: parse.c,v 1.34 2019/04/12 04:39:24 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -17,6 +17,9 @@ */ #include #include +#include +#include +#include #include #include #include @@ -45,6 +48,7 @@ enum pstate { struct parse { const char *fname; /* Name of the input file. */ struct ptree *tree; /* Complete parse result. */ + struct pnode *doctype; struct pnode *cur; /* Current node in the tree. */ enum nodeid ncur; /* Type of the current node. */ int line; /* Line number in the input file. */ @@ -52,8 +56,11 @@ struct parse { int nline; /* Line number of next token. */ int ncol; /* Column number of next token. */ int del; /* Levels of nested nodes being deleted. */ - int attr; /* The most recent attribute is valid. */ - int warn; + int flags; +#define PFLAG_WARN (1 << 0) /* Print warning messages. */ +#define PFLAG_SPC (1 << 1) /* Whitespace before the next element. */ +#define PFLAG_ATTR (1 << 2) /* The most recent attribute is valid. */ +#define PFLAG_EEND (1 << 3) /* This element is self-closing. */ }; struct element { @@ -65,12 +72,14 @@ static const struct element elements[] = { { "acronym", NODE_IGNORE }, { "affiliation", NODE_AFFILIATION }, { "anchor", NODE_DELETE }, + { "appendix", NODE_APPENDIX }, { "application", NODE_APPLICATION }, { "arg", NODE_ARG }, + { "article", NODE_SECTION }, { "author", NODE_AUTHOR }, { "authorgroup", NODE_AUTHORGROUP }, { "blockquote", NODE_BLOCKQUOTE }, - { "book", NODE_BOOK }, + { "book", NODE_SECTION }, { "bookinfo", NODE_BOOKINFO }, { "caution", NODE_CAUTION }, { "chapter", NODE_SECTION }, @@ -84,9 +93,12 @@ static const struct element elements[] = { { "contrib", NODE_CONTRIB }, { "copyright", NODE_COPYRIGHT }, { "date", NODE_DATE }, + { "!doctype", NODE_DOCTYPE }, + { "!DOCTYPE", NODE_DOCTYPE }, { "editor", NODE_EDITOR }, { "email", NODE_EMAIL }, { "emphasis", NODE_EMPHASIS }, + { "!ENTITY", NODE_ENTITY }, { "entry", NODE_ENTRY }, { "envar", NODE_ENVAR }, { "errorname", NODE_ERRORNAME }, @@ -100,6 +112,11 @@ static const struct element elements[] = { { "funcsynopsis", NODE_FUNCSYNOPSIS }, { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO }, { "function", NODE_FUNCTION }, + { "glossary", NODE_VARIABLELIST }, + { "glossdef", NODE_IGNORE }, + { "glossdiv", NODE_IGNORE }, + { "glossentry", NODE_VARLISTENTRY }, + { "glosslist", NODE_VARIABLELIST }, { "glossterm", NODE_GLOSSTERM }, { "group", NODE_GROUP }, { "holder", NODE_HOLDER }, @@ -170,6 +187,7 @@ static const struct element elements[] = { { "sect2", NODE_SECTION }, { "section", NODE_SECTION }, { "sgmltag", NODE_SGMLTAG }, + { "simpara", NODE_PARA }, { "simplelist", NODE_SIMPLELIST }, { "spanspec", NODE_SPANSPEC }, { "structfield", NODE_PARAMETER }, @@ -188,14 +206,14 @@ static const struct element elements[] = { { "title", NODE_TITLE }, { "trademark", NODE_IGNORE }, { "type", NODE_TYPE }, - { "ulink", NODE_ULINK }, + { "ulink", NODE_LINK }, { "userinput", NODE_LITERAL }, { "variablelist", NODE_VARIABLELIST }, { "varlistentry", NODE_VARLISTENTRY }, { "varname", NODE_VARNAME }, { "warning", NODE_WARNING }, { "wordasword", NODE_WORDASWORD }, - { "xi:include", NODE_DELETE_WARN }, + { "xi:include", NODE_INCLUDE }, { "year", NODE_YEAR }, { NULL, NODE_IGNORE } }; @@ -256,17 +274,30 @@ static const struct entity entities[] = { { NULL, NULL } }; +static size_t parse_string(struct parse *, char *, size_t, + enum pstate *, int); +static void parse_fd(struct parse *, int); + + static void +fatal(struct parse *p) +{ + fprintf(stderr, "%s:%d:%d: FATAL: ", p->fname, p->line, p->col); + perror(NULL); + exit(6); +} + +static void error_msg(struct parse *p, const char *fmt, ...) { va_list ap; - fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col); + fprintf(stderr, "%s:%d:%d: ERROR: ", p->fname, p->line, p->col); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); - p->tree->flags |= TREE_FAIL; + p->tree->flags |= TREE_ERROR; } static void @@ -274,14 +305,15 @@ warn_msg(struct parse *p, const char *fmt, ...) { va_list ap; - if (p->warn == 0) + if ((p->flags & PFLAG_WARN) == 0) return; - fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col); + fprintf(stderr, "%s:%d:%d: WARNING: ", p->fname, p->line, p->col); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); + p->tree->flags |= TREE_WARN; } /* @@ -290,63 +322,72 @@ warn_msg(struct parse *p, const char *fmt, ...) * Otherwise, create a new one as a child of the current node. */ static void -xml_char(struct parse *ps, const char *p, int sz) +xml_char(struct parse *p, const char *word, int sz) { - struct pnode *dat; + struct pnode *n; + size_t oldsz, newsz; - if (ps->del > 0) + assert(sz > 0); + if (p->del > 0) return; - if (ps->cur == NULL) { - error_msg(ps, "discarding text before document: %.*s", sz, p); + if ((n = p->cur) == NULL) { + error_msg(p, "discarding text before document: %.*s", sz, word); return; } - if (ps->cur->node != NODE_TEXT) { - if ((dat = calloc(1, sizeof(*dat))) == NULL) { - perror(NULL); - exit(1); - } - dat->node = NODE_TEXT; - dat->parent = ps->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); - TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); - ps->cur = dat; + if (n->node != NODE_TEXT) { + if ((n = pnode_alloc(p->cur)) == NULL) + fatal(p); + n->node = NODE_TEXT; + n->spc = (p->flags & PFLAG_SPC) != 0; + p->cur = n; } - if (ps->tree->flags & TREE_CLOSED && - ps->cur->parent == ps->tree->root) - warn_msg(ps, "text after end of document: %.*s", sz, p); + if (p->tree->flags & TREE_CLOSED && n->parent == p->tree->root) + warn_msg(p, "text after end of document: %.*s", sz, word); /* Append to the current text node. */ - assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1); - if (ps->cur->b == NULL) { - perror(NULL); - exit(1); - } - memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->bsz += sz; - ps->cur->b[ps->cur->bsz] = '\0'; - ps->cur->real = ps->cur->b; + oldsz = n->b == NULL ? 0 : strlen(n->b); + newsz = oldsz + sz; + if (oldsz && (p->flags & PFLAG_SPC)) + newsz++; + if ((n->b = realloc(n->b, newsz + 1)) == NULL) + fatal(p); + if (oldsz && (p->flags & PFLAG_SPC)) + n->b[oldsz++] = ' '; + memcpy(n->b + oldsz, word, sz); + n->b[newsz] = '\0'; + p->flags &= ~PFLAG_SPC; } +/* + * Close out the text node and strip trailing whitespace, if one is open. + */ static void -pnode_trim(struct pnode *pn) +pnode_closetext(struct parse *p) { - assert(pn->node == NODE_TEXT); - for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0') - if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0) - break; + struct pnode *n; + char *cp; + + if ((n = p->cur) == NULL || n->node != NODE_TEXT) + return; + p->cur = n->parent; + for (cp = strchr(n->b, '\0'); + cp > n->b && isspace((unsigned char)cp[-1]); + *--cp = '\0') + p->flags |= PFLAG_SPC; } static void xml_entity(struct parse *p, const char *name) { const struct entity *entity; - struct pnode *dat; + struct pnode *n; + const char *ccp; + char *cp; + enum pstate pstate; if (p->del > 0) return; @@ -356,11 +397,7 @@ xml_entity(struct parse *p, const char *name) return; } - /* Close out the text node, if there is one. */ - if (p->cur->node == NODE_TEXT) { - pnode_trim(p->cur); - p->cur = p->cur->parent; - } + pnode_closetext(p); if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) warn_msg(p, "entity after end of document: &%s;", name); @@ -370,133 +407,210 @@ xml_entity(struct parse *p, const char *name) break; if (entity->roff == NULL) { + if (p->doctype != NULL) { + TAILQ_FOREACH(n, &p->doctype->childq, child) { + if ((ccp = pnode_getattr_raw(n, + ATTRKEY_NAME, NULL)) == NULL || + strcmp(ccp, name) != 0) + continue; + if ((ccp = pnode_getattr_raw(n, + ATTRKEY_SYSTEM, NULL)) != NULL) { + parse_file(p, -1, ccp); + p->flags &= ~PFLAG_SPC; + return; + } + if ((ccp = pnode_getattr_raw(n, + ATTRKEY_DEFINITION, NULL)) == NULL) + continue; + if ((cp = strdup(ccp)) == NULL) + fatal(p); + pstate = PARSE_ELEM; + parse_string(p, cp, strlen(cp), &pstate, 0); + p->flags &= ~PFLAG_SPC; + free(cp); + return; + } + } error_msg(p, "unknown entity &%s;", name); return; } /* Create, append, and close out an entity node. */ - if ((dat = calloc(1, sizeof(*dat))) == NULL || - (dat->b = dat->real = strdup(entity->roff)) == NULL) { - perror(NULL); - exit(1); - } - dat->node = NODE_ESCAPE; - dat->bsz = strlen(dat->b); - dat->parent = p->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); - TAILQ_INSERT_TAIL(&p->cur->childq, dat, child); + if ((n = pnode_alloc(p->cur)) == NULL || + (n->b = strdup(entity->roff)) == NULL) + fatal(p); + n->node = NODE_ESCAPE; + n->spc = (p->flags & PFLAG_SPC) != 0; + p->flags &= ~PFLAG_SPC; } /* * Begin an element. */ static void -xml_elem_start(struct parse *ps, const char *name) +xml_elem_start(struct parse *p, const char *name) { const struct element *elem; - struct pnode *dat; + struct pnode *n; - if (*name == '!' || *name == '?') - return; - /* * An ancestor is excluded from the tree; * keep track of the number of levels excluded. */ - if (ps->del > 0) { - ps->del++; + if (p->del > 0) { + if (*name != '!' && *name != '?') + p->del++; return; } - /* Close out the text node, if there is one. */ - if (ps->cur != NULL && ps->cur->node == NODE_TEXT) { - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - } + pnode_closetext(p); for (elem = elements; elem->name != NULL; elem++) if (strcmp(elem->name, name) == 0) break; - if (elem->name == NULL) - error_msg(ps, "unknown element <%s>", name); + if (elem->name == NULL) { + if (*name == '!' || *name == '?') + return; + error_msg(p, "unknown element <%s>", name); + } - ps->ncur = elem->node; + p->ncur = elem->node; - switch (ps->ncur) { + switch (p->ncur) { case NODE_DELETE_WARN: - warn_msg(ps, "skipping element <%s>", name); + warn_msg(p, "skipping element <%s>", name); /* FALLTHROUGH */ case NODE_DELETE: - ps->del = 1; + p->del = 1; /* FALLTHROUGH */ case NODE_IGNORE: return; case NODE_INLINEEQUATION: - ps->tree->flags |= TREE_EQN; + p->tree->flags |= TREE_EQN; break; default: break; } - if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL) - warn_msg(ps, "element after end of document: <%s>", name); + if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL) + warn_msg(p, "element after end of document: <%s>", name); - if ((dat = calloc(1, sizeof(*dat))) == NULL) { - perror(NULL); - exit(1); - } - dat->node = elem->node; - dat->parent = ps->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); + if ((n = pnode_alloc(p->cur)) == NULL) + fatal(p); - if (ps->cur != NULL) - TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); + /* + * Nodes that begin a new macro or request line or start by + * printing text always want whitespace before themselves. + */ - ps->cur = dat; - if (ps->tree->root == NULL) - ps->tree->root = dat; + switch (n->node = elem->node) { + case NODE_DOCTYPE: + case NODE_ENTITY: + case NODE_SBR: + p->flags |= PFLAG_EEND; + /* FALLTHROUGH */ + case NODE_APPENDIX: + case NODE_AUTHORGROUP: + case NODE_BLOCKQUOTE: + case NODE_BOOKINFO: + case NODE_CAUTION: + case NODE_EDITOR: + case NODE_ENTRY: + case NODE_FUNCDEF: + case NODE_FUNCPROTOTYPE: + case NODE_INFORMALEQUATION: + case NODE_INLINEEQUATION: + case NODE_ITEMIZEDLIST: + case NODE_LEGALNOTICE: + case NODE_LITERALLAYOUT: + case NODE_NOTE: + case NODE_ORDEREDLIST: + case NODE_PARA: + case NODE_PREFACE: + case NODE_PROGRAMLISTING: + case NODE_REFMETA: + case NODE_REFNAMEDIV: + case NODE_REFSYNOPSISDIV: + case NODE_ROW: + case NODE_SCREEN: + case NODE_SECTION: + case NODE_SYNOPSIS: + case NODE_TGROUP: + case NODE_TIP: + case NODE_TITLE: + case NODE_VARIABLELIST: + case NODE_VARLISTENTRY: + case NODE_WARNING: + n->spc = 1; + break; + default: + n->spc = (p->flags & PFLAG_SPC) != 0; + break; + } + p->cur = n; + if (n->node == NODE_DOCTYPE) { + if (p->doctype == NULL) + p->doctype = n; + else + error_msg(p, "duplicate doctype"); + } else if (n->parent == NULL && p->tree->root == NULL) + p->tree->root = n; } static void -xml_attrkey(struct parse *ps, const char *name) +xml_attrkey(struct parse *p, const char *name) { - struct pattr *attr; + struct pattr *a; + const char *value; enum attrkey key; - if (ps->del > 0 || *name == '\0') + if (p->del > 0 || p->ncur == NODE_IGNORE || *name == '\0') return; + + if ((p->ncur == NODE_DOCTYPE || p->ncur == NODE_ENTITY) && + TAILQ_FIRST(&p->cur->attrq) == NULL) { + value = name; + name = "NAME"; + } else + value = NULL; + if ((key = attrkey_parse(name)) == ATTRKEY__MAX) { - ps->attr = 0; + p->flags &= ~PFLAG_ATTR; return; } - if ((attr = calloc(1, sizeof(*attr))) == NULL) { - perror(NULL); - exit(1); + if ((a = calloc(1, sizeof(*a))) == NULL) + fatal(p); + + a->key = key; + a->val = ATTRVAL__MAX; + if (value == NULL) { + a->rawval = NULL; + p->flags |= PFLAG_ATTR; + } else { + if ((a->rawval = strdup(value)) == NULL) + fatal(p); + p->flags &= ~PFLAG_ATTR; } - attr->key = key; - attr->val = ATTRVAL__MAX; - attr->rawval = NULL; - TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child); - ps->attr = 1; + TAILQ_INSERT_TAIL(&p->cur->attrq, a, child); + if (p->ncur == NODE_ENTITY && key == ATTRKEY_NAME) + xml_attrkey(p, "DEFINITION"); } static void -xml_attrval(struct parse *ps, const char *name) +xml_attrval(struct parse *p, const char *name) { - struct pattr *attr; + struct pattr *a; - if (ps->del > 0 || ps->attr == 0) + if (p->del > 0 || p->ncur == NODE_IGNORE || + (p->flags & PFLAG_ATTR) == 0) return; - if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL) + if ((a = TAILQ_LAST(&p->cur->attrq, pattrq)) == NULL) return; - if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX && - (attr->rawval = strdup(name)) == NULL) { - perror(NULL); - exit(1); - } + if ((a->val = attrval_parse(name)) == ATTRVAL__MAX && + (a->rawval = strdup(name)) == NULL) + fatal(p); + p->flags &= ~PFLAG_ATTR; } /* @@ -504,25 +618,24 @@ xml_attrval(struct parse *ps, const char *name) * If we're at a text node, roll that one up first. */ static void -xml_elem_end(struct parse *ps, const char *name) +xml_elem_end(struct parse *p, const char *name) { const struct element *elem; + struct pnode *n; + const char *cp; enum nodeid node; /* * An ancestor is excluded from the tree; * keep track of the number of levels excluded. */ - if (ps->del > 1) { - ps->del--; + if (p->del > 1) { + p->del--; return; } - /* Close out the text node, if there is one. */ - if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) { - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - } + if (p->del == 0) + pnode_closetext(p); if (name != NULL) { for (elem = elements; elem->name != NULL; elem++) @@ -530,19 +643,35 @@ xml_elem_end(struct parse *ps, const char *name) break; node = elem->node; } else - node = ps->ncur; + node = p->ncur; switch (node) { case NODE_DELETE_WARN: case NODE_DELETE: - if (ps->del > 0) - ps->del--; + if (p->del > 0) + p->del--; break; case NODE_IGNORE: break; + case NODE_INCLUDE: + n = p->cur; + p->cur = p->cur->parent; + cp = pnode_getattr_raw(n, ATTRKEY_HREF, NULL); + if (cp == NULL) + error_msg(p, " element " + "without href attribute"); + else + parse_file(p, -1, cp); + pnode_unlink(n); + p->flags &= ~PFLAG_SPC; + break; + case NODE_DOCTYPE: + case NODE_SBR: + p->flags &= ~PFLAG_EEND; + /* FALLTHROUGH */ default: - if (ps->cur == NULL || node != ps->cur->node) { - warn_msg(ps, "element not open: ", name); + if (p->cur == NULL || node != p->cur->node) { + warn_msg(p, "element not open: ", name); break; } @@ -553,13 +682,16 @@ xml_elem_end(struct parse *ps, const char *name) * obviously better than discarding it or crashing. */ - if (ps->cur->parent == NULL) - ps->tree->flags |= TREE_CLOSED; - else - ps->cur = ps->cur->parent; + if (p->cur->parent != NULL || node == NODE_DOCTYPE) { + p->cur = p->cur->parent; + if (p->cur != NULL) + p->ncur = p->cur->node; + } else + p->tree->flags |= TREE_CLOSED; + p->flags &= ~PFLAG_SPC; break; } - assert(ps->del == 0); + assert(p->del == 0); } struct parse * @@ -574,7 +706,10 @@ parse_alloc(int warn) free(p); return NULL; } - p->warn = warn; + if (warn) + p->flags |= PFLAG_WARN; + else + p->flags &= ~PFLAG_WARN; return p; } @@ -662,6 +797,7 @@ parse_string(struct parse *p, char *b, size_t rlen, if ((poff = pend) == rlen) break; if (isspace((unsigned char)b[pend])) { + p->flags |= PFLAG_SPC; increment(p, b, &pend, refill); continue; } @@ -702,6 +838,8 @@ parse_string(struct parse *p, char *b, size_t rlen, b[pend - 1] = '\0'; elem_end = 1; } + if (p->flags & PFLAG_EEND) + elem_end = 1; } b[pend] = '\0'; if (pend < rlen) @@ -713,6 +851,23 @@ parse_string(struct parse *p, char *b, size_t rlen, /* Look for an attribute name. */ } else if (*pstate == PARSE_TAG) { + switch (p->ncur) { + case NODE_DOCTYPE: + if (b[pend] == '[') { + *pstate = PARSE_ELEM; + increment(p, b, &pend, refill); + continue; + } + /* FALLTHROUGH */ + case NODE_ENTITY: + if (b[pend] == '"' || b[pend] == '\'') { + *pstate = PARSE_ARG; + continue; + } + break; + default: + break; + } if (advance(p, b, rlen, &pend, " =>", refill)) break; elem_end = 0; @@ -723,6 +878,8 @@ parse_string(struct parse *p, char *b, size_t rlen, b[pend - 1] = '\0'; elem_end = 1; } + if (p->flags & PFLAG_EEND) + elem_end = 1; break; case '=': *pstate = PARSE_ARG; @@ -771,11 +928,21 @@ parse_string(struct parse *p, char *b, size_t rlen, if (b[++poff] == '/') { elem_end = 1; poff++; - } else + } else { xml_elem_start(p, b + poff); + if (*pstate == PARSE_ELEM && + p->flags & PFLAG_EEND) + elem_end = 1; + } if (elem_end) xml_elem_end(p, b + poff); + /* Close a doctype. */ + + } else if (p->ncur == NODE_DOCTYPE && b[poff] == ']') { + *pstate = PARSE_TAG; + increment(p, b, &pend, refill); + /* Process an entity. */ } else if (b[poff] == '&') { @@ -789,15 +956,29 @@ parse_string(struct parse *p, char *b, size_t rlen, /* Process text up to the next tag, entity, or EOL. */ } else { - advance(p, b, rlen, &pend, "<&", refill); + advance(p, b, rlen, &pend, + p->ncur == NODE_DOCTYPE ? "<&]\n" : "<&\n", + refill); xml_char(p, b + poff, pend - poff); + if (b[pend] == '\n') + pnode_closetext(p); } } return poff; } -struct ptree * -parse_file(struct parse *p, int fd, const char *fname) + +/* + * The read loop. + * If the previous token was incomplete and asked for more input, + * we have to enter the read loop once more even on EOF. + * Once rsz is 0, incomplete tokens will no longer ask for more input + * but instead use whatever there is, and then exit the read loop. + * The minus one on the size limit for read(2) is needed such that + * advance() can set b[rlen] to NUL when needed. + */ +static void +parse_fd(struct parse *p, int fd) { char b[4096]; ssize_t rsz; /* Return value from read(2). */ @@ -805,24 +986,8 @@ parse_file(struct parse *p, int fd, const char *fname) size_t poff; /* Parse offset in b[]. */ enum pstate pstate; - p->fname = fname; - p->nline = 1; - p->ncol = 1; - pstate = PARSE_ELEM; rlen = 0; - - /* - * Read loop. - * - * If the previous token was incomplete and asked for more - * input, we have to enter the read loop once more even on EOF. - * Once rsz is 0, incomplete tokens will no longer ask - * for more input but instead use whatever there is, - * and then exit the read loop. - * The minus one on the size limit for read(2) is needed - * such that advance() can set b[rlen] to NUL when needed. - */ - + pstate = PARSE_ELEM; while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0 && (rlen += rsz) > 0) { poff = parse_string(p, b, rlen, &pstate, rsz > 0); @@ -831,15 +996,70 @@ parse_file(struct parse *p, int fd, const char *fname) rlen -= poff; memmove(b, b + poff, rlen); } - if (rsz < 0) { - perror(fname); - p->tree->flags |= TREE_FAIL; + if (rsz < 0) + error_msg(p, "read: %s", strerror(errno)); +} + +/* + * Open and parse a file. + */ +struct ptree * +parse_file(struct parse *p, int fd, const char *fname) +{ + const char *save_fname; + int save_line, save_col; + + /* Save and initialize reporting data. */ + + save_fname = p->fname; + save_line = p->nline; + save_col = p->ncol; + p->fname = fname; + p->line = 0; + p->col = 0; + + /* Open the file, unless it is already open. */ + + if (fd == -1 && (fd = open(fname, O_RDONLY, 0)) == -1) { + error_msg(p, "open: %s", strerror(errno)); + p->fname = save_fname; + return p->tree; } - if (p->cur != NULL && p->cur->node == NODE_TEXT) { - pnode_trim(p->cur); - p->cur = p->cur->parent; + + /* + * After opening the starting file, change to the directory it + * is located in, in case it wants to include any further files, + * which are typically given with relative paths in DocBook. + * Do this on a best-effort basis; don't complain about failure. + */ + + if (save_fname == NULL && (fname = dirname(fname)) != NULL && + strcmp(fname, ".") != 0) + (void)chdir(fname); + + /* Run the read loop. */ + + p->nline = 1; + p->ncol = 1; + parse_fd(p, fd); + + /* On the top level, finalize the parse tree. */ + + if (save_fname == NULL) { + pnode_closetext(p); + if (p->tree->root == NULL) + error_msg(p, "empty document"); + else if ((p->tree->flags & TREE_CLOSED) == 0) + warn_msg(p, "document not closed"); + pnode_unlink(p->doctype); } - if ((p->tree->flags & TREE_CLOSED) == 0) - warn_msg(p, "document not closed"); + + /* Clean up. */ + + if (fd != STDIN_FILENO) + close(fd); + p->fname = save_fname; + p->nline = save_line; + p->ncol = save_col; return p->tree; }