=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.23 retrieving revision 1.29 diff -u -p -r1.23 -r1.29 --- docbook2mdoc/parse.c 2019/04/08 14:37:31 1.23 +++ docbook2mdoc/parse.c 2019/04/09 15:23:51 1.29 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.23 2019/04/08 14:37:31 schwarze Exp $ */ +/* $Id: parse.c,v 1.29 2019/04/09 15:23:51 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -17,6 +17,9 @@ */ #include #include +#include +#include +#include #include #include #include @@ -210,7 +213,7 @@ static const struct element elements[] = { { "varname", NODE_VARNAME }, { "warning", NODE_WARNING }, { "wordasword", NODE_WORDASWORD }, - { "xi:include", NODE_DELETE_WARN }, + { "xi:include", NODE_INCLUDE }, { "year", NODE_YEAR }, { NULL, NODE_IGNORE } }; @@ -273,19 +276,28 @@ static const struct entity entities[] = { static size_t parse_string(struct parse *, char *, size_t, enum pstate *, int); +static void parse_fd(struct parse *, int); static void +fatal(struct parse *p) +{ + fprintf(stderr, "%s:%d:%d: FATAL: ", p->fname, p->line, p->col); + perror(NULL); + exit(6); +} + +static void error_msg(struct parse *p, const char *fmt, ...) { va_list ap; - fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col); + fprintf(stderr, "%s:%d:%d: ERROR: ", p->fname, p->line, p->col); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); - p->tree->flags |= TREE_FAIL; + p->tree->flags |= TREE_ERROR; } static void @@ -296,11 +308,12 @@ warn_msg(struct parse *p, const char *fmt, ...) if ((p->flags & PFLAG_WARN) == 0) return; - fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col); + fprintf(stderr, "%s:%d:%d: WARNING: ", p->fname, p->line, p->col); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); + p->tree->flags |= TREE_WARN; } /* @@ -323,10 +336,8 @@ xml_char(struct parse *ps, const char *p, int sz) } if (ps->cur->node != NODE_TEXT) { - if ((dat = calloc(1, sizeof(*dat))) == NULL) { - perror(NULL); - exit(1); - } + if ((dat = calloc(1, sizeof(*dat))) == NULL) + fatal(ps); dat->node = NODE_TEXT; dat->spc = (ps->flags & PFLAG_SPC) != 0; dat->parent = ps->cur; @@ -344,11 +355,8 @@ xml_char(struct parse *ps, const char *p, int sz) assert(sz >= 0); newsz = ps->cur->bsz + (ps->cur->bsz && (ps->flags & PFLAG_SPC)) + sz; - ps->cur->b = realloc(ps->cur->b, newsz + 1); - if (ps->cur->b == NULL) { - perror(NULL); - exit(1); - } + if ((ps->cur->b = realloc(ps->cur->b, newsz + 1)) == NULL) + fatal(ps); if (ps->cur->bsz && (ps->flags & PFLAG_SPC)) ps->cur->b[ps->cur->bsz++] = ' '; memcpy(ps->cur->b + ps->cur->bsz, p, sz); @@ -405,14 +413,19 @@ xml_entity(struct parse *p, const char *name) TAILQ_FOREACH(dat, &p->doctype->childq, child) { if ((ccp = pnode_getattr_raw(dat, ATTRKEY_NAME, NULL)) == NULL || - strcmp(ccp, name) != 0 || - (ccp = pnode_getattr_raw(dat, - ATTRKEY_DEFINITION, NULL)) == NULL) + strcmp(ccp, name) != 0) continue; - if ((cp = strdup(ccp)) == NULL) { - perror(NULL); - exit(1); + if ((ccp = pnode_getattr_raw(dat, + ATTRKEY_SYSTEM, NULL)) != NULL) { + parse_file(p, -1, ccp); + p->flags &= ~PFLAG_SPC; + return; } + if ((ccp = pnode_getattr_raw(dat, + ATTRKEY_DEFINITION, NULL)) == NULL) + continue; + if ((cp = strdup(ccp)) == NULL) + fatal(p); pstate = PARSE_ELEM; parse_string(p, cp, strlen(cp), &pstate, 0); p->flags &= ~PFLAG_SPC; @@ -426,10 +439,8 @@ xml_entity(struct parse *p, const char *name) /* Create, append, and close out an entity node. */ if ((dat = calloc(1, sizeof(*dat))) == NULL || - (dat->b = dat->real = strdup(entity->roff)) == NULL) { - perror(NULL); - exit(1); - } + (dat->b = dat->real = strdup(entity->roff)) == NULL) + fatal(p); dat->node = NODE_ESCAPE; dat->bsz = strlen(dat->b); dat->spc = (p->flags & PFLAG_SPC) != 0; @@ -492,10 +503,8 @@ xml_elem_start(struct parse *ps, const char *name) if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL) warn_msg(ps, "element after end of document: <%s>", name); - if ((dat = calloc(1, sizeof(*dat))) == NULL) { - perror(NULL); - exit(1); - } + if ((dat = calloc(1, sizeof(*dat))) == NULL) + fatal(ps); /* * Nodes that begin a new macro or request line or start by @@ -584,20 +593,17 @@ xml_attrkey(struct parse *ps, const char *name) ps->flags &= ~PFLAG_ATTR; return; } - if ((attr = calloc(1, sizeof(*attr))) == NULL) { - perror(NULL); - exit(1); - } + if ((attr = calloc(1, sizeof(*attr))) == NULL) + fatal(ps); + attr->key = key; attr->val = ATTRVAL__MAX; if (value == NULL) { attr->rawval = NULL; ps->flags |= PFLAG_ATTR; } else { - if ((attr->rawval = strdup(value)) == NULL) { - perror(NULL); - exit(1); - } + if ((attr->rawval = strdup(value)) == NULL) + fatal(ps); ps->flags &= ~PFLAG_ATTR; } TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child); @@ -616,10 +622,9 @@ xml_attrval(struct parse *ps, const char *name) if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL) return; if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX && - (attr->rawval = strdup(name)) == NULL) { - perror(NULL); - exit(1); - } + (attr->rawval = strdup(name)) == NULL) + fatal(ps); + ps->flags &= ~PFLAG_ATTR; } /* @@ -630,6 +635,8 @@ static void xml_elem_end(struct parse *ps, const char *name) { const struct element *elem; + struct pnode *n; + const char *cp; enum nodeid node; /* @@ -660,6 +667,18 @@ xml_elem_end(struct parse *ps, const char *name) break; case NODE_IGNORE: break; + case NODE_INCLUDE: + n = ps->cur; + ps->cur = ps->cur->parent; + cp = pnode_getattr_raw(n, ATTRKEY_HREF, NULL); + if (cp == NULL) + error_msg(ps, " element " + "without href attribute"); + else + parse_file(ps, -1, cp); + pnode_unlink(n); + ps->flags &= ~PFLAG_SPC; + break; case NODE_DOCTYPE: ps->flags &= ~PFLAG_EEND; /* FALLTHROUGH */ @@ -950,15 +969,27 @@ parse_string(struct parse *p, char *b, size_t rlen, /* Process text up to the next tag, entity, or EOL. */ } else { - advance(p, b, rlen, &pend, "<&", refill); + advance(p, b, rlen, &pend, + p->ncur == NODE_DOCTYPE ? "<&]" : "<&", + refill); xml_char(p, b + poff, pend - poff); } } return poff; } -struct ptree * -parse_file(struct parse *p, int fd, const char *fname) + +/* + * The read loop. + * If the previous token was incomplete and asked for more input, + * we have to enter the read loop once more even on EOF. + * Once rsz is 0, incomplete tokens will no longer ask for more input + * but instead use whatever there is, and then exit the read loop. + * The minus one on the size limit for read(2) is needed such that + * advance() can set b[rlen] to NUL when needed. + */ +static void +parse_fd(struct parse *p, int fd) { char b[4096]; ssize_t rsz; /* Return value from read(2). */ @@ -966,24 +997,8 @@ parse_file(struct parse *p, int fd, const char *fname) size_t poff; /* Parse offset in b[]. */ enum pstate pstate; - p->fname = fname; - p->nline = 1; - p->ncol = 1; - pstate = PARSE_ELEM; rlen = 0; - - /* - * Read loop. - * - * If the previous token was incomplete and asked for more - * input, we have to enter the read loop once more even on EOF. - * Once rsz is 0, incomplete tokens will no longer ask - * for more input but instead use whatever there is, - * and then exit the read loop. - * The minus one on the size limit for read(2) is needed - * such that advance() can set b[rlen] to NUL when needed. - */ - + pstate = PARSE_ELEM; while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0 && (rlen += rsz) > 0) { poff = parse_string(p, b, rlen, &pstate, rsz > 0); @@ -992,13 +1007,70 @@ parse_file(struct parse *p, int fd, const char *fname) rlen -= poff; memmove(b, b + poff, rlen); } - if (rsz < 0) { - perror(fname); - p->tree->flags |= TREE_FAIL; + if (rsz < 0) + error_msg(p, "read: %s", strerror(errno)); +} + +/* + * Open and parse a file. + */ +struct ptree * +parse_file(struct parse *p, int fd, const char *fname) +{ + const char *save_fname; + int save_line, save_col; + + /* Save and initialize reporting data. */ + + save_fname = p->fname; + save_line = p->nline; + save_col = p->ncol; + p->fname = fname; + p->line = 0; + p->col = 0; + + /* Open the file, unless it is already open. */ + + if (fd == -1 && (fd = open(fname, O_RDONLY, 0)) == -1) { + error_msg(p, "open: %s", strerror(errno)); + p->fname = save_fname; + return p->tree; } - pnode_closetext(p); - if ((p->tree->flags & TREE_CLOSED) == 0) - warn_msg(p, "document not closed"); - pnode_unlink(p->doctype); + + /* + * After opening the starting file, change to the directory it + * is located in, in case it wants to include any further files, + * which are typically given with relative paths in DocBook. + * Do this on a best-effort basis; don't complain about failure. + */ + + if (save_fname == NULL && (fname = dirname(fname)) != NULL && + strcmp(fname, ".") != 0) + (void)chdir(fname); + + /* Run the read loop. */ + + p->nline = 1; + p->ncol = 1; + parse_fd(p, fd); + + /* On the top level, finalize the parse tree. */ + + if (save_fname == NULL) { + pnode_closetext(p); + if (p->tree->root == NULL) + error_msg(p, "empty document"); + else if ((p->tree->flags & TREE_CLOSED) == 0) + warn_msg(p, "document not closed"); + pnode_unlink(p->doctype); + } + + /* Clean up. */ + + if (fd != STDIN_FILENO) + close(fd); + p->fname = save_fname; + p->nline = save_line; + p->ncol = save_col; return p->tree; }