=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.23 retrieving revision 1.31 diff -u -p -r1.23 -r1.31 --- docbook2mdoc/parse.c 2019/04/08 14:37:31 1.23 +++ docbook2mdoc/parse.c 2019/04/10 14:34:08 1.31 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.23 2019/04/08 14:37:31 schwarze Exp $ */ +/* $Id: parse.c,v 1.31 2019/04/10 14:34:08 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -17,6 +17,9 @@ */ #include #include +#include +#include +#include #include #include #include @@ -210,7 +213,7 @@ static const struct element elements[] = { { "varname", NODE_VARNAME }, { "warning", NODE_WARNING }, { "wordasword", NODE_WORDASWORD }, - { "xi:include", NODE_DELETE_WARN }, + { "xi:include", NODE_INCLUDE }, { "year", NODE_YEAR }, { NULL, NODE_IGNORE } }; @@ -273,19 +276,28 @@ static const struct entity entities[] = { static size_t parse_string(struct parse *, char *, size_t, enum pstate *, int); +static void parse_fd(struct parse *, int); static void +fatal(struct parse *p) +{ + fprintf(stderr, "%s:%d:%d: FATAL: ", p->fname, p->line, p->col); + perror(NULL); + exit(6); +} + +static void error_msg(struct parse *p, const char *fmt, ...) { va_list ap; - fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col); + fprintf(stderr, "%s:%d:%d: ERROR: ", p->fname, p->line, p->col); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); - p->tree->flags |= TREE_FAIL; + p->tree->flags |= TREE_ERROR; } static void @@ -296,11 +308,12 @@ warn_msg(struct parse *p, const char *fmt, ...) if ((p->flags & PFLAG_WARN) == 0) return; - fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col); + fprintf(stderr, "%s:%d:%d: WARNING: ", p->fname, p->line, p->col); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); + p->tree->flags |= TREE_WARN; } /* @@ -309,52 +322,47 @@ warn_msg(struct parse *p, const char *fmt, ...) * Otherwise, create a new one as a child of the current node. */ static void -xml_char(struct parse *ps, const char *p, int sz) +xml_char(struct parse *p, const char *word, int sz) { - struct pnode *dat; + struct pnode *n; size_t newsz; - if (ps->del > 0) + if (p->del > 0) return; - if (ps->cur == NULL) { - error_msg(ps, "discarding text before document: %.*s", sz, p); + if (p->cur == NULL) { + error_msg(p, "discarding text before document: %.*s", sz, word); return; } - if (ps->cur->node != NODE_TEXT) { - if ((dat = calloc(1, sizeof(*dat))) == NULL) { - perror(NULL); - exit(1); - } - dat->node = NODE_TEXT; - dat->spc = (ps->flags & PFLAG_SPC) != 0; - dat->parent = ps->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); - TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); - ps->cur = dat; + if (p->cur->node != NODE_TEXT) { + if ((n = calloc(1, sizeof(*n))) == NULL) + fatal(p); + n->node = NODE_TEXT; + n->spc = (p->flags & PFLAG_SPC) != 0; + n->parent = p->cur; + TAILQ_INIT(&n->childq); + TAILQ_INIT(&n->attrq); + TAILQ_INSERT_TAIL(&p->cur->childq, n, child); + p->cur = n; } - if (ps->tree->flags & TREE_CLOSED && - ps->cur->parent == ps->tree->root) - warn_msg(ps, "text after end of document: %.*s", sz, p); + if (p->tree->flags & TREE_CLOSED && + p->cur->parent == p->tree->root) + warn_msg(p, "text after end of document: %.*s", sz, word); /* Append to the current text node. */ assert(sz >= 0); - newsz = ps->cur->bsz + (ps->cur->bsz && (ps->flags & PFLAG_SPC)) + sz; - ps->cur->b = realloc(ps->cur->b, newsz + 1); - if (ps->cur->b == NULL) { - perror(NULL); - exit(1); - } - if (ps->cur->bsz && (ps->flags & PFLAG_SPC)) - ps->cur->b[ps->cur->bsz++] = ' '; - memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->b[ps->cur->bsz = newsz] = '\0'; - ps->cur->real = ps->cur->b; - ps->flags &= ~PFLAG_SPC; + newsz = p->cur->bsz + (p->cur->bsz && (p->flags & PFLAG_SPC)) + sz; + if ((p->cur->b = realloc(p->cur->b, newsz + 1)) == NULL) + fatal(p); + if (p->cur->bsz && (p->flags & PFLAG_SPC)) + p->cur->b[p->cur->bsz++] = ' '; + memcpy(p->cur->b + p->cur->bsz, word, sz); + p->cur->b[p->cur->bsz = newsz] = '\0'; + p->cur->real = p->cur->b; + p->flags &= ~PFLAG_SPC; } /* @@ -378,7 +386,7 @@ static void xml_entity(struct parse *p, const char *name) { const struct entity *entity; - struct pnode *dat; + struct pnode *n; const char *ccp; char *cp; enum pstate pstate; @@ -402,17 +410,22 @@ xml_entity(struct parse *p, const char *name) if (entity->roff == NULL) { if (p->doctype != NULL) { - TAILQ_FOREACH(dat, &p->doctype->childq, child) { - if ((ccp = pnode_getattr_raw(dat, + TAILQ_FOREACH(n, &p->doctype->childq, child) { + if ((ccp = pnode_getattr_raw(n, ATTRKEY_NAME, NULL)) == NULL || - strcmp(ccp, name) != 0 || - (ccp = pnode_getattr_raw(dat, - ATTRKEY_DEFINITION, NULL)) == NULL) + strcmp(ccp, name) != 0) continue; - if ((cp = strdup(ccp)) == NULL) { - perror(NULL); - exit(1); + if ((ccp = pnode_getattr_raw(n, + ATTRKEY_SYSTEM, NULL)) != NULL) { + parse_file(p, -1, ccp); + p->flags &= ~PFLAG_SPC; + return; } + if ((ccp = pnode_getattr_raw(n, + ATTRKEY_DEFINITION, NULL)) == NULL) + continue; + if ((cp = strdup(ccp)) == NULL) + fatal(p); pstate = PARSE_ELEM; parse_string(p, cp, strlen(cp), &pstate, 0); p->flags &= ~PFLAG_SPC; @@ -425,18 +438,16 @@ xml_entity(struct parse *p, const char *name) } /* Create, append, and close out an entity node. */ - if ((dat = calloc(1, sizeof(*dat))) == NULL || - (dat->b = dat->real = strdup(entity->roff)) == NULL) { - perror(NULL); - exit(1); - } - dat->node = NODE_ESCAPE; - dat->bsz = strlen(dat->b); - dat->spc = (p->flags & PFLAG_SPC) != 0; - dat->parent = p->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); - TAILQ_INSERT_TAIL(&p->cur->childq, dat, child); + if ((n = calloc(1, sizeof(*n))) == NULL || + (n->b = n->real = strdup(entity->roff)) == NULL) + fatal(p); + n->node = NODE_ESCAPE; + n->bsz = strlen(n->b); + n->spc = (p->flags & PFLAG_SPC) != 0; + n->parent = p->cur; + TAILQ_INIT(&n->childq); + TAILQ_INIT(&n->attrq); + TAILQ_INSERT_TAIL(&p->cur->childq, n, child); p->flags &= ~PFLAG_SPC; } @@ -444,22 +455,22 @@ xml_entity(struct parse *p, const char *name) * Begin an element. */ static void -xml_elem_start(struct parse *ps, const char *name) +xml_elem_start(struct parse *p, const char *name) { const struct element *elem; - struct pnode *dat; + struct pnode *n; /* * An ancestor is excluded from the tree; * keep track of the number of levels excluded. */ - if (ps->del > 0) { + if (p->del > 0) { if (*name != '!' && *name != '?') - ps->del++; + p->del++; return; } - pnode_closetext(ps); + pnode_closetext(p); for (elem = elements; elem->name != NULL; elem++) if (strcmp(elem->name, name) == 0) @@ -468,45 +479,43 @@ xml_elem_start(struct parse *ps, const char *name) if (elem->name == NULL) { if (*name == '!' || *name == '?') return; - error_msg(ps, "unknown element <%s>", name); + error_msg(p, "unknown element <%s>", name); } - ps->ncur = elem->node; + p->ncur = elem->node; - switch (ps->ncur) { + switch (p->ncur) { case NODE_DELETE_WARN: - warn_msg(ps, "skipping element <%s>", name); + warn_msg(p, "skipping element <%s>", name); /* FALLTHROUGH */ case NODE_DELETE: - ps->del = 1; + p->del = 1; /* FALLTHROUGH */ case NODE_IGNORE: return; case NODE_INLINEEQUATION: - ps->tree->flags |= TREE_EQN; + p->tree->flags |= TREE_EQN; break; default: break; } - if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL) - warn_msg(ps, "element after end of document: <%s>", name); + if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL) + warn_msg(p, "element after end of document: <%s>", name); - if ((dat = calloc(1, sizeof(*dat))) == NULL) { - perror(NULL); - exit(1); - } + if ((n = calloc(1, sizeof(*n))) == NULL) + fatal(p); /* * Nodes that begin a new macro or request line or start by * printing text always want whitespace before themselves. */ - switch (dat->node = elem->node) { + switch (n->node = elem->node) { case NODE_DOCTYPE: case NODE_ENTITY: case NODE_SBR: - ps->flags |= PFLAG_EEND; + p->flags |= PFLAG_EEND; /* FALLTHROUGH */ case NODE_APPENDIX: case NODE_AUTHORGROUP: @@ -540,86 +549,82 @@ xml_elem_start(struct parse *ps, const char *name) case NODE_VARIABLELIST: case NODE_VARLISTENTRY: case NODE_WARNING: - dat->spc = 1; + n->spc = 1; break; default: - dat->spc = (ps->flags & PFLAG_SPC) != 0; + n->spc = (p->flags & PFLAG_SPC) != 0; break; } - dat->parent = ps->cur; - TAILQ_INIT(&dat->childq); - TAILQ_INIT(&dat->attrq); + n->parent = p->cur; + TAILQ_INIT(&n->childq); + TAILQ_INIT(&n->attrq); - if (ps->cur != NULL) - TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child); + if (p->cur != NULL) + TAILQ_INSERT_TAIL(&p->cur->childq, n, child); - ps->cur = dat; - if (dat->node == NODE_DOCTYPE) { - if (ps->doctype == NULL) - ps->doctype = dat; + p->cur = n; + if (n->node == NODE_DOCTYPE) { + if (p->doctype == NULL) + p->doctype = n; else - error_msg(ps, "duplicate doctype"); - } else if (dat->parent == NULL && ps->tree->root == NULL) - ps->tree->root = dat; + error_msg(p, "duplicate doctype"); + } else if (n->parent == NULL && p->tree->root == NULL) + p->tree->root = n; } static void -xml_attrkey(struct parse *ps, const char *name) +xml_attrkey(struct parse *p, const char *name) { - struct pattr *attr; + struct pattr *a; const char *value; enum attrkey key; - if (ps->del > 0 || ps->ncur == NODE_IGNORE || *name == '\0') + if (p->del > 0 || p->ncur == NODE_IGNORE || *name == '\0') return; - if ((ps->ncur == NODE_DOCTYPE || ps->ncur == NODE_ENTITY) && - TAILQ_FIRST(&ps->cur->attrq) == NULL) { + if ((p->ncur == NODE_DOCTYPE || p->ncur == NODE_ENTITY) && + TAILQ_FIRST(&p->cur->attrq) == NULL) { value = name; name = "NAME"; } else value = NULL; if ((key = attrkey_parse(name)) == ATTRKEY__MAX) { - ps->flags &= ~PFLAG_ATTR; + p->flags &= ~PFLAG_ATTR; return; } - if ((attr = calloc(1, sizeof(*attr))) == NULL) { - perror(NULL); - exit(1); - } - attr->key = key; - attr->val = ATTRVAL__MAX; + if ((a = calloc(1, sizeof(*a))) == NULL) + fatal(p); + + a->key = key; + a->val = ATTRVAL__MAX; if (value == NULL) { - attr->rawval = NULL; - ps->flags |= PFLAG_ATTR; + a->rawval = NULL; + p->flags |= PFLAG_ATTR; } else { - if ((attr->rawval = strdup(value)) == NULL) { - perror(NULL); - exit(1); - } - ps->flags &= ~PFLAG_ATTR; + if ((a->rawval = strdup(value)) == NULL) + fatal(p); + p->flags &= ~PFLAG_ATTR; } - TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child); - if (ps->ncur == NODE_ENTITY && key == ATTRKEY_NAME) - xml_attrkey(ps, "DEFINITION"); + TAILQ_INSERT_TAIL(&p->cur->attrq, a, child); + if (p->ncur == NODE_ENTITY && key == ATTRKEY_NAME) + xml_attrkey(p, "DEFINITION"); } static void -xml_attrval(struct parse *ps, const char *name) +xml_attrval(struct parse *p, const char *name) { - struct pattr *attr; + struct pattr *a; - if (ps->del > 0 || ps->ncur == NODE_IGNORE || - (ps->flags & PFLAG_ATTR) == 0) + if (p->del > 0 || p->ncur == NODE_IGNORE || + (p->flags & PFLAG_ATTR) == 0) return; - if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL) + if ((a = TAILQ_LAST(&p->cur->attrq, pattrq)) == NULL) return; - if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX && - (attr->rawval = strdup(name)) == NULL) { - perror(NULL); - exit(1); - } + if ((a->val = attrval_parse(name)) == ATTRVAL__MAX && + (a->rawval = strdup(name)) == NULL) + fatal(p); + p->flags &= ~PFLAG_ATTR; } /* @@ -627,22 +632,24 @@ xml_attrval(struct parse *ps, const char *name) * If we're at a text node, roll that one up first. */ static void -xml_elem_end(struct parse *ps, const char *name) +xml_elem_end(struct parse *p, const char *name) { const struct element *elem; + struct pnode *n; + const char *cp; enum nodeid node; /* * An ancestor is excluded from the tree; * keep track of the number of levels excluded. */ - if (ps->del > 1) { - ps->del--; + if (p->del > 1) { + p->del--; return; } - if (ps->del == 0) - pnode_closetext(ps); + if (p->del == 0) + pnode_closetext(p); if (name != NULL) { for (elem = elements; elem->name != NULL; elem++) @@ -650,22 +657,34 @@ xml_elem_end(struct parse *ps, const char *name) break; node = elem->node; } else - node = ps->ncur; + node = p->ncur; switch (node) { case NODE_DELETE_WARN: case NODE_DELETE: - if (ps->del > 0) - ps->del--; + if (p->del > 0) + p->del--; break; case NODE_IGNORE: break; + case NODE_INCLUDE: + n = p->cur; + p->cur = p->cur->parent; + cp = pnode_getattr_raw(n, ATTRKEY_HREF, NULL); + if (cp == NULL) + error_msg(p, " element " + "without href attribute"); + else + parse_file(p, -1, cp); + pnode_unlink(n); + p->flags &= ~PFLAG_SPC; + break; case NODE_DOCTYPE: - ps->flags &= ~PFLAG_EEND; + p->flags &= ~PFLAG_EEND; /* FALLTHROUGH */ default: - if (ps->cur == NULL || node != ps->cur->node) { - warn_msg(ps, "element not open: ", name); + if (p->cur == NULL || node != p->cur->node) { + warn_msg(p, "element not open: ", name); break; } @@ -676,16 +695,16 @@ xml_elem_end(struct parse *ps, const char *name) * obviously better than discarding it or crashing. */ - if (ps->cur->parent != NULL || node == NODE_DOCTYPE) { - ps->cur = ps->cur->parent; - if (ps->cur != NULL) - ps->ncur = ps->cur->node; + if (p->cur->parent != NULL || node == NODE_DOCTYPE) { + p->cur = p->cur->parent; + if (p->cur != NULL) + p->ncur = p->cur->node; } else - ps->tree->flags |= TREE_CLOSED; - ps->flags &= ~PFLAG_SPC; + p->tree->flags |= TREE_CLOSED; + p->flags &= ~PFLAG_SPC; break; } - assert(ps->del == 0); + assert(p->del == 0); } struct parse * @@ -950,15 +969,27 @@ parse_string(struct parse *p, char *b, size_t rlen, /* Process text up to the next tag, entity, or EOL. */ } else { - advance(p, b, rlen, &pend, "<&", refill); + advance(p, b, rlen, &pend, + p->ncur == NODE_DOCTYPE ? "<&]" : "<&", + refill); xml_char(p, b + poff, pend - poff); } } return poff; } -struct ptree * -parse_file(struct parse *p, int fd, const char *fname) + +/* + * The read loop. + * If the previous token was incomplete and asked for more input, + * we have to enter the read loop once more even on EOF. + * Once rsz is 0, incomplete tokens will no longer ask for more input + * but instead use whatever there is, and then exit the read loop. + * The minus one on the size limit for read(2) is needed such that + * advance() can set b[rlen] to NUL when needed. + */ +static void +parse_fd(struct parse *p, int fd) { char b[4096]; ssize_t rsz; /* Return value from read(2). */ @@ -966,24 +997,8 @@ parse_file(struct parse *p, int fd, const char *fname) size_t poff; /* Parse offset in b[]. */ enum pstate pstate; - p->fname = fname; - p->nline = 1; - p->ncol = 1; - pstate = PARSE_ELEM; rlen = 0; - - /* - * Read loop. - * - * If the previous token was incomplete and asked for more - * input, we have to enter the read loop once more even on EOF. - * Once rsz is 0, incomplete tokens will no longer ask - * for more input but instead use whatever there is, - * and then exit the read loop. - * The minus one on the size limit for read(2) is needed - * such that advance() can set b[rlen] to NUL when needed. - */ - + pstate = PARSE_ELEM; while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0 && (rlen += rsz) > 0) { poff = parse_string(p, b, rlen, &pstate, rsz > 0); @@ -992,13 +1007,70 @@ parse_file(struct parse *p, int fd, const char *fname) rlen -= poff; memmove(b, b + poff, rlen); } - if (rsz < 0) { - perror(fname); - p->tree->flags |= TREE_FAIL; + if (rsz < 0) + error_msg(p, "read: %s", strerror(errno)); +} + +/* + * Open and parse a file. + */ +struct ptree * +parse_file(struct parse *p, int fd, const char *fname) +{ + const char *save_fname; + int save_line, save_col; + + /* Save and initialize reporting data. */ + + save_fname = p->fname; + save_line = p->nline; + save_col = p->ncol; + p->fname = fname; + p->line = 0; + p->col = 0; + + /* Open the file, unless it is already open. */ + + if (fd == -1 && (fd = open(fname, O_RDONLY, 0)) == -1) { + error_msg(p, "open: %s", strerror(errno)); + p->fname = save_fname; + return p->tree; } - pnode_closetext(p); - if ((p->tree->flags & TREE_CLOSED) == 0) - warn_msg(p, "document not closed"); - pnode_unlink(p->doctype); + + /* + * After opening the starting file, change to the directory it + * is located in, in case it wants to include any further files, + * which are typically given with relative paths in DocBook. + * Do this on a best-effort basis; don't complain about failure. + */ + + if (save_fname == NULL && (fname = dirname(fname)) != NULL && + strcmp(fname, ".") != 0) + (void)chdir(fname); + + /* Run the read loop. */ + + p->nline = 1; + p->ncol = 1; + parse_fd(p, fd); + + /* On the top level, finalize the parse tree. */ + + if (save_fname == NULL) { + pnode_closetext(p); + if (p->tree->root == NULL) + error_msg(p, "empty document"); + else if ((p->tree->flags & TREE_CLOSED) == 0) + warn_msg(p, "document not closed"); + pnode_unlink(p->doctype); + } + + /* Clean up. */ + + if (fd != STDIN_FILENO) + close(fd); + p->fname = save_fname; + p->nline = save_line; + p->ncol = save_col; return p->tree; }