=================================================================== RCS file: /cvs/mandoc/read.c,v retrieving revision 1.181 retrieving revision 1.198 diff -u -p -r1.181 -r1.198 --- mandoc/read.c 2017/06/24 18:58:33 1.181 +++ mandoc/read.c 2018/08/23 19:33:27 1.198 @@ -1,7 +1,7 @@ -/* $Id: read.c,v 1.181 2017/06/24 18:58:33 schwarze Exp $ */ +/* $Id: read.c,v 1.198 2018/08/23 19:33:27 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010-2017 Ingo Schwarze + * Copyright (c) 2010-2018 Ingo Schwarze * Copyright (c) 2010, 2012 Joerg Sonnenberger * * Permission to use, copy, modify, and distribute this software for any @@ -24,9 +24,6 @@ #include #include -#if HAVE_ERR -#include -#endif #include #include #include @@ -42,7 +39,6 @@ #include "mdoc.h" #include "man.h" #include "libmandoc.h" -#include "roff_int.h" #define REPARSE_LIMIT 1000 @@ -52,7 +48,7 @@ struct mparse { char *sodest; /* filename pointed to by .so */ const char *file; /* filename of current input file */ struct buf *primary; /* buffer currently being parsed */ - struct buf *secondary; /* preprocessed copy of input */ + struct buf *secondary; /* copy of top level input */ const char *os_s; /* default operating system */ mandocmsg mmsg; /* warning/error message handler */ enum mandoclevel file_status; /* status of current parse */ @@ -65,8 +61,9 @@ struct mparse { }; static void choose_parser(struct mparse *); +static void free_buf_list(struct buf *); static void resize_buf(struct buf *, size_t); -static int mparse_buf_r(struct mparse *, struct buf, size_t, int); +static enum rofferr mparse_buf_r(struct mparse *, struct buf, size_t, int); static int read_whole_file(struct mparse *, const char *, int, struct buf *, int *); static void mparse_end(struct mparse *); @@ -75,7 +72,7 @@ static void mparse_parse_buffer(struct mparse *, str static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { MANDOCERR_OK, - MANDOCERR_STYLE, + MANDOCERR_OK, MANDOCERR_WARNING, MANDOCERR_ERROR, MANDOCERR_UNSUPP, @@ -93,33 +90,41 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "unknown architecture", "operating system explicitly specified", "RCS id missing", + "referenced manual not found", "generic style suggestion", "legacy man(7) date format", + "normalizing date format to", + "lower case character in document title", "duplicate RCS id", + "possible typo in section name", + "unterminated quoted argument", "useless macro", "consider using OS macro", "errnos out of order", "duplicate errno", - "description line ends with a full stop", + "trailing delimiter", "no blank before trailing delimiter", + "fill mode already enabled, skipping", + "fill mode already disabled, skipping", + "verbatim \"--\", maybe consider using \\(em", "function name without markup", + "whitespace at end of input line", + "bad comment style", "generic warning", /* related to the prologue */ "missing manual title, using UNTITLED", "missing manual title, using \"\"", - "lower case character in document title", "missing manual section, using \"\"", "unknown manual section", "missing date, using today's date", "cannot parse date, using it verbatim", + "date in the future, using it anyway", "missing Os macro, using \"\"", - "duplicate prologue macro", "late prologue macro", - "skipping late title macro", "prologue macros out of order", /* related to document structure */ @@ -137,6 +142,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "sections out of conventional order", "duplicate section title", "unexpected section", + "cross reference to self", "unusual Xr order", "unusual Xr punctuation", "AUTHORS section without An macro", @@ -150,8 +156,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "blocks badly nested", "nested displays are not portable", "moving content out of list", - "fill mode already enabled, skipping", - "fill mode already disabled, skipping", + "first macro on line", "line scope broken", "skipping blank line in line scope", @@ -168,6 +173,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "missing function name, using \"\"", "empty head in list item", "empty list item", + "missing argument, using next line", "missing font type, using \\fR", "unknown font type, using \\fR", "nothing follows prefix", @@ -179,7 +185,6 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "missing eqn box, using \"\"", /* related to bad macro arguments */ - "unterminated quoted argument", "duplicate argument", "skipping duplicate argument", "skipping duplicate display type", @@ -198,9 +203,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = /* related to plain text */ "blank line in fill mode, using .sp", "tab in filled text", - "whitespace at end of input line", "new sentence, new line", - "bad comment style", "invalid escape sequence", "undefined string, using \"\"", @@ -226,9 +229,12 @@ static const char * const mandocerrs[MANDOCERR_MAX] = /* related to document structure and macros */ NULL, + "duplicate prologue macro", + "skipping late title macro", "input stack limit exceeded, infinite loop?", "skipping bad character", "skipping unknown macro", + "ignoring request outside macro", "skipping insecure request", "skipping item outside list", "skipping column outside column list", @@ -239,6 +245,8 @@ static const char * const mandocerrs[MANDOCERR_MAX] = /* related to request and macro arguments */ "escaped character not allowed in a name", + "using macro argument outside macro", + "argument number is not numeric", "NOT IMPLEMENTED: Bd -file", "skipping display without arguments", "missing list type, using -item", @@ -247,6 +255,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "uname(3) system call failed, using UNKNOWN", "unknown standard specifier", "skipping request without numeric argument", + "excessive shift", "NOT IMPLEMENTED: .so with absolute path or \"..\"", ".so request failed", "skipping all arguments", @@ -282,6 +291,19 @@ resize_buf(struct buf *buf, size_t initial) } static void +free_buf_list(struct buf *buf) +{ + struct buf *tmp; + + while (buf != NULL) { + tmp = buf; + buf = tmp->next; + free(tmp->buf); + free(tmp); + } +} + +static void choose_parser(struct mparse *curp) { char *cp, *ep; @@ -334,24 +356,27 @@ choose_parser(struct mparse *curp) * macros, inline equations, and input line traps) * and indirectly (for .so file inclusion). */ -static int +static enum rofferr mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) { - const struct tbl_span *span; struct buf ln; + struct buf *firstln, *lastln, *thisln; const char *save_file; char *cp; size_t pos; /* byte number in the ln buffer */ - enum rofferr rr; + enum rofferr line_result, result; int of; int lnn; /* line number in the real file */ int fd; unsigned char c; - memset(&ln, 0, sizeof(ln)); - + ln.sz = 256; + ln.buf = mandoc_malloc(ln.sz); + ln.next = NULL; + firstln = NULL; lnn = curp->line; pos = 0; + result = ROFF_CONT; while (i < blk.sz) { if (0 == pos && '\0' == blk.buf[i]) @@ -386,10 +411,10 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size /* * Make sure we have space for the worst - * case of 11 bytes: "\\[u10ffff]\0" + * case of 12 bytes: "\\[u10ffff]\n\0" */ - if (pos + 11 > ln.sz) + if (pos + 12 > ln.sz) resize_buf(&ln, 256); /* @@ -425,13 +450,32 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size ln.buf[pos++] = blk.buf[i++]; } + ln.buf[pos] = '\0'; - if (pos + 1 >= ln.sz) - resize_buf(&ln, 256); + /* + * Maintain a lookaside buffer of all lines. + * parsed from this input source. + */ - if (i == blk.sz || blk.buf[i] == '\0') + thisln = mandoc_malloc(sizeof(*thisln)); + thisln->buf = mandoc_strdup(ln.buf); + thisln->sz = strlen(ln.buf) + 1; + thisln->next = NULL; + if (firstln == NULL) { + firstln = lastln = thisln; + if (curp->secondary == NULL) + curp->secondary = firstln; + } else { + lastln->next = thisln; + lastln = thisln; + } + + /* XXX Ugly hack to mark the end of the input. */ + + if (i == blk.sz || blk.buf[i] == '\0') { ln.buf[pos++] = '\n'; - ln.buf[pos] = '\0'; + ln.buf[pos] = '\0'; + } /* * A significant amount of complexity is contained by @@ -443,42 +487,36 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size */ of = 0; - - /* - * Maintain a lookaside buffer of all parsed lines. We - * only do this if mparse_keep() has been invoked (the - * buffer may be accessed with mparse_getkeep()). - */ - - if (curp->secondary) { - curp->secondary->buf = mandoc_realloc( - curp->secondary->buf, - curp->secondary->sz + pos + 2); - memcpy(curp->secondary->buf + - curp->secondary->sz, - ln.buf, pos); - curp->secondary->sz += pos; - curp->secondary->buf - [curp->secondary->sz] = '\n'; - curp->secondary->sz++; - curp->secondary->buf - [curp->secondary->sz] = '\0'; - } rerun: - rr = roff_parseln(curp->roff, curp->line, &ln, &of); + line_result = roff_parseln(curp->roff, curp->line, &ln, &of); - switch (rr) { + switch (line_result) { case ROFF_REPARSE: - if (++curp->reparse_count > REPARSE_LIMIT) + case ROFF_USERCALL: + if (++curp->reparse_count > REPARSE_LIMIT) { + result = ROFF_IGN; mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, pos, NULL); - else if (mparse_buf_r(curp, ln, of, 0) == 1 || - start == 1) { + } else { + result = mparse_buf_r(curp, ln, of, 0); + if (line_result == ROFF_USERCALL) { + if (result == ROFF_USERRET) + result = ROFF_CONT; + roff_userret(curp->roff); + } + if (start || result == ROFF_CONT) { + pos = 0; + continue; + } + } + goto out; + case ROFF_USERRET: + if (start) { pos = 0; continue; } - free(ln.buf); - return 0; + result = ROFF_USERRET; + goto out; case ROFF_APPEND: pos = strlen(ln.buf); continue; @@ -491,16 +529,8 @@ rerun: if ( ! (curp->options & MPARSE_SO) && (i >= blk.sz || blk.buf[i] == '\0')) { curp->sodest = mandoc_strdup(ln.buf + of); - free(ln.buf); - return 1; + goto out; } - /* - * We remove `so' clauses from our lookaside - * buffer because we're going to descend into - * the file recursively. - */ - if (curp->secondary) - curp->secondary->sz -= pos + 1; save_file = curp->file; if ((fd = mparse_open(curp, ln.buf + of)) != -1) { mparse_readfd(curp, fd, ln.buf + of); @@ -528,21 +558,7 @@ rerun: if (curp->man->macroset == MACROSET_NONE) choose_parser(curp); - /* - * Lastly, push down into the parsers themselves. - * If libroff returns ROFF_TBL, then add it to the - * currently open parse. Since we only get here if - * there does exist data (see tbl_data.c), we're - * guaranteed that something's been allocated. - * Do the same for ROFF_EQN. - */ - - if (rr == ROFF_TBL) - while ((span = roff_span(curp->roff)) != NULL) - roff_addtbl(curp->man, span); - else if (rr == ROFF_EQN) - roff_addeqn(curp->man, roff_eqn(curp->roff)); - else if ((curp->man->macroset == MACROSET_MDOC ? + if ((curp->man->macroset == MACROSET_MDOC ? mdoc_parseln(curp->man, curp->line, ln.buf, of) : man_parseln(curp->man, curp->line, ln.buf, of)) == 2) break; @@ -556,9 +572,11 @@ rerun: pos = 0; } - +out: free(ln.buf); - return 1; + if (firstln != curp->secondary) + free_buf_list(firstln); + return result; } static int @@ -569,9 +587,13 @@ read_whole_file(struct mparse *curp, const char *file, gzFile gz; size_t off; ssize_t ssz; + int gzerrnum, retval; - if (fstat(fd, &st) == -1) - err((int)MANDOCLEVEL_SYSERR, "%s", file); + if (fstat(fd, &st) == -1) { + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, + "fstat: %s", strerror(errno)); + return 0; + } /* * If we're a regular file, try just reading in the whole entry @@ -593,8 +615,24 @@ read_whole_file(struct mparse *curp, const char *file, } if (curp->gzip) { - if ((gz = gzdopen(fd, "rb")) == NULL) - err((int)MANDOCLEVEL_SYSERR, "%s", file); + /* + * Duplicating the file descriptor is required + * because we will have to call gzclose(3) + * to free memory used internally by zlib, + * but that will also close the file descriptor, + * which this function must not do. + */ + if ((fd = dup(fd)) == -1) { + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, + "dup: %s", strerror(errno)); + return 0; + } + if ((gz = gzdopen(fd, "rb")) == NULL) { + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, + "gzdopen: %s", strerror(errno)); + close(fd); + return 0; + } } else gz = NULL; @@ -605,6 +643,7 @@ read_whole_file(struct mparse *curp, const char *file, *with_mmap = 0; off = 0; + retval = 0; fb->sz = 0; fb->buf = NULL; for (;;) { @@ -621,16 +660,29 @@ read_whole_file(struct mparse *curp, const char *file, read(fd, fb->buf + (int)off, fb->sz - off); if (ssz == 0) { fb->sz = off; - return 1; + retval = 1; + break; } - if (ssz == -1) - err((int)MANDOCLEVEL_SYSERR, "%s", file); + if (ssz == -1) { + if (curp->gzip) + (void)gzerror(gz, &gzerrnum); + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s", + curp->gzip && gzerrnum != Z_ERRNO ? + zError(gzerrnum) : strerror(errno)); + break; + } off += (size_t)ssz; } - free(fb->buf); - fb->buf = NULL; - return 0; + if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s", + gzerrnum == Z_ERRNO ? strerror(errno) : + zError(gzerrnum)); + if (retval == 0) { + free(fb->buf); + fb->buf = NULL; + } + return retval; } static void @@ -794,13 +846,12 @@ mparse_reset(struct mparse *curp) { roff_reset(curp->roff); roff_man_reset(curp->man); + free_buf_list(curp->secondary); + curp->secondary = NULL; free(curp->sodest); curp->sodest = NULL; - if (curp->secondary) - curp->secondary->sz = 0; - curp->file_status = MANDOCLEVEL_OK; curp->gzip = 0; } @@ -808,15 +859,11 @@ mparse_reset(struct mparse *curp) void mparse_free(struct mparse *curp) { - roffhash_free(curp->man->mdocmac); roffhash_free(curp->man->manmac); roff_man_free(curp->man); roff_free(curp->roff); - if (curp->secondary) - free(curp->secondary->buf); - - free(curp->secondary); + free_buf_list(curp->secondary); free(curp->sodest); free(curp); } @@ -889,17 +936,10 @@ mparse_strlevel(enum mandoclevel lvl) } void -mparse_keep(struct mparse *p) +mparse_copy(const struct mparse *p) { + struct buf *buf; - assert(NULL == p->secondary); - p->secondary = mandoc_calloc(1, sizeof(struct buf)); -} - -const char * -mparse_getkeep(const struct mparse *p) -{ - - assert(p->secondary); - return p->secondary->sz ? p->secondary->buf : NULL; + for (buf = p->secondary; buf != NULL; buf = buf->next) + puts(buf->buf); }