version 1.209, 2018/12/30 00:49:55 |
version 1.220, 2021/06/27 17:57:54 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
|
* Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org> |
|
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
* |
|
* Top-level functions of the mandoc(3) parser: |
|
* Parser and input encoding selection, decompression, |
|
* handling of input bytes, characters, lines, and files, |
|
* handling of roff(7) loops and file inclusion, |
|
* and steering of the various parsers. |
*/ |
*/ |
#include "config.h" |
#include "config.h" |
|
|
|
|
#include "mandoc_parse.h" |
#include "mandoc_parse.h" |
#include "libmandoc.h" |
#include "libmandoc.h" |
#include "roff_int.h" |
#include "roff_int.h" |
|
#include "tag.h" |
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
Line 147 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 154 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
struct buf *firstln, *lastln, *thisln, *loop; |
struct buf *firstln, *lastln, *thisln, *loop; |
char *cp; |
char *cp; |
size_t pos; /* byte number in the ln buffer */ |
size_t pos; /* byte number in the ln buffer */ |
|
size_t spos; /* at the start of the current line parse */ |
int line_result, result; |
int line_result, result; |
int of; |
int of; |
int lnn; /* line number in the real file */ |
int lnn; /* line number in the real file */ |
Line 157 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 165 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
ln.sz = 256; |
ln.sz = 256; |
ln.buf = mandoc_malloc(ln.sz); |
ln.buf = mandoc_malloc(ln.sz); |
ln.next = NULL; |
ln.next = NULL; |
firstln = loop = NULL; |
firstln = lastln = loop = NULL; |
lnn = curp->line; |
lnn = curp->line; |
pos = 0; |
pos = 0; |
inloop = 0; |
inloop = 0; |
Line 173 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 181 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
curp->filenc & MPARSE_LATIN1) |
curp->filenc & MPARSE_LATIN1) |
curp->filenc = preconv_cue(&blk, i); |
curp->filenc = preconv_cue(&blk, i); |
} |
} |
|
spos = pos; |
|
|
while (i < blk.sz && (start || blk.buf[i] != '\0')) { |
while (i < blk.sz && (start || blk.buf[i] != '\0')) { |
|
|
Line 255 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 264 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
/* XXX Ugly hack to mark the end of the input. */ |
/* XXX Ugly hack to mark the end of the input. */ |
|
|
if (i == blk.sz || blk.buf[i] == '\0') { |
if (i == blk.sz || blk.buf[i] == '\0') { |
|
if (pos + 2 > ln.sz) |
|
resize_buf(&ln, 256); |
ln.buf[pos++] = '\n'; |
ln.buf[pos++] = '\n'; |
ln.buf[pos] = '\0'; |
ln.buf[pos] = '\0'; |
} |
} |
Line 270 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 281 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
|
|
of = 0; |
of = 0; |
rerun: |
rerun: |
line_result = roff_parseln(curp->roff, curp->line, &ln, &of); |
line_result = roff_parseln(curp->roff, curp->line, |
|
&ln, &of, start && spos == 0 ? pos : 0); |
|
|
/* Process options. */ |
/* Process options. */ |
|
|
Line 429 read_whole_file(struct mparse *curp, int fd, struct bu |
|
Line 441 read_whole_file(struct mparse *curp, int fd, struct bu |
|
int gzerrnum, retval; |
int gzerrnum, retval; |
|
|
if (fstat(fd, &st) == -1) { |
if (fstat(fd, &st) == -1) { |
mandoc_msg(MANDOCERR_FILE, 0, 0, |
mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno)); |
"fstat: %s", strerror(errno)); |
return -1; |
return 0; |
|
} |
} |
|
|
/* |
/* |
Line 444 read_whole_file(struct mparse *curp, int fd, struct bu |
|
Line 455 read_whole_file(struct mparse *curp, int fd, struct bu |
|
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
if (st.st_size > 0x7fffffff) { |
if (st.st_size > 0x7fffffff) { |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
return 0; |
return -1; |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
fb->sz = (size_t)st.st_size; |
fb->sz = (size_t)st.st_size; |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
if (fb->buf != MAP_FAILED) |
if (fb->buf != MAP_FAILED) |
return 1; |
return 0; |
} |
} |
|
|
if (curp->gzip) { |
if (curp->gzip) { |
Line 462 read_whole_file(struct mparse *curp, int fd, struct bu |
|
Line 473 read_whole_file(struct mparse *curp, int fd, struct bu |
|
* which this function must not do. |
* which this function must not do. |
*/ |
*/ |
if ((fd = dup(fd)) == -1) { |
if ((fd = dup(fd)) == -1) { |
mandoc_msg(MANDOCERR_FILE, 0, 0, |
mandoc_msg(MANDOCERR_DUP, 0, 0, |
"dup: %s", strerror(errno)); |
"%s", strerror(errno)); |
return 0; |
return -1; |
} |
} |
if ((gz = gzdopen(fd, "rb")) == NULL) { |
if ((gz = gzdopen(fd, "rb")) == NULL) { |
mandoc_msg(MANDOCERR_FILE, 0, 0, |
mandoc_msg(MANDOCERR_GZDOPEN, 0, 0, |
"gzdopen: %s", strerror(errno)); |
"%s", strerror(errno)); |
close(fd); |
close(fd); |
return 0; |
return -1; |
} |
} |
} else |
} else |
gz = NULL; |
gz = NULL; |
Line 482 read_whole_file(struct mparse *curp, int fd, struct bu |
|
Line 493 read_whole_file(struct mparse *curp, int fd, struct bu |
|
|
|
*with_mmap = 0; |
*with_mmap = 0; |
off = 0; |
off = 0; |
retval = 0; |
retval = -1; |
fb->sz = 0; |
fb->sz = 0; |
fb->buf = NULL; |
fb->buf = NULL; |
for (;;) { |
for (;;) { |
Line 498 read_whole_file(struct mparse *curp, int fd, struct bu |
|
Line 509 read_whole_file(struct mparse *curp, int fd, struct bu |
|
read(fd, fb->buf + (int)off, fb->sz - off); |
read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
retval = 1; |
retval = 0; |
break; |
break; |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
if (curp->gzip) |
if (curp->gzip) |
(void)gzerror(gz, &gzerrnum); |
(void)gzerror(gz, &gzerrnum); |
mandoc_msg(MANDOCERR_FILE, 0, 0, "read: %s", |
mandoc_msg(MANDOCERR_READ, 0, 0, "%s", |
curp->gzip && gzerrnum != Z_ERRNO ? |
curp->gzip && gzerrnum != Z_ERRNO ? |
zError(gzerrnum) : strerror(errno)); |
zError(gzerrnum) : strerror(errno)); |
break; |
break; |
Line 513 read_whole_file(struct mparse *curp, int fd, struct bu |
|
Line 524 read_whole_file(struct mparse *curp, int fd, struct bu |
|
} |
} |
|
|
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) |
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) |
mandoc_msg(MANDOCERR_FILE, 0, 0, "gzclose: %s", |
mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s", |
gzerrnum == Z_ERRNO ? strerror(errno) : |
gzerrnum == Z_ERRNO ? strerror(errno) : |
zError(gzerrnum)); |
zError(gzerrnum)); |
if (retval == 0) { |
if (retval == -1) { |
free(fb->buf); |
free(fb->buf); |
fb->buf = NULL; |
fb->buf = NULL; |
} |
} |
Line 546 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 557 mparse_readfd(struct mparse *curp, int fd, const char |
|
|
|
struct buf blk; |
struct buf blk; |
struct buf *save_primary; |
struct buf *save_primary; |
const char *save_filename; |
const char *save_filename, *cp; |
size_t offset; |
size_t offset; |
int save_filenc, save_lineno; |
int save_filenc, save_lineno; |
int with_mmap; |
int with_mmap; |
Line 554 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 565 mparse_readfd(struct mparse *curp, int fd, const char |
|
if (recursion_depth > 64) { |
if (recursion_depth > 64) { |
mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL); |
mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL); |
return; |
return; |
} |
} else if (recursion_depth == 0 && |
if (read_whole_file(curp, fd, &blk, &with_mmap) == 0) |
(cp = strrchr(filename, '.')) != NULL && |
|
cp[1] >= '1' && cp[1] <= '9') |
|
curp->man->filesec = cp[1]; |
|
else |
|
curp->man->filesec = '\0'; |
|
|
|
if (read_whole_file(curp, fd, &blk, &with_mmap) == -1) |
return; |
return; |
|
|
/* |
/* |
|
|
mparse_open(struct mparse *curp, const char *file) |
mparse_open(struct mparse *curp, const char *file) |
{ |
{ |
char *cp; |
char *cp; |
int fd; |
int fd, save_errno; |
|
|
cp = strrchr(file, '.'); |
cp = strrchr(file, '.'); |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
Line 623 mparse_open(struct mparse *curp, const char *file) |
|
Line 640 mparse_open(struct mparse *curp, const char *file) |
|
*/ |
*/ |
|
|
if ( ! curp->gzip) { |
if ( ! curp->gzip) { |
|
save_errno = errno; |
mandoc_asprintf(&cp, "%s.gz", file); |
mandoc_asprintf(&cp, "%s.gz", file); |
fd = open(cp, O_RDONLY); |
fd = open(cp, O_RDONLY); |
free(cp); |
free(cp); |
|
errno = save_errno; |
if (fd != -1) { |
if (fd != -1) { |
curp->gzip = 1; |
curp->gzip = 1; |
return fd; |
return fd; |
Line 661 mparse_alloc(int options, enum mandoc_os os_e, const c |
|
Line 680 mparse_alloc(int options, enum mandoc_os os_e, const c |
|
} |
} |
curp->man->meta.first->tok = TOKEN_NONE; |
curp->man->meta.first->tok = TOKEN_NONE; |
curp->man->meta.os_e = os_e; |
curp->man->meta.os_e = os_e; |
|
tag_alloc(); |
return curp; |
return curp; |
} |
} |
|
|
void |
void |
mparse_reset(struct mparse *curp) |
mparse_reset(struct mparse *curp) |
{ |
{ |
|
tag_free(); |
roff_reset(curp->roff); |
roff_reset(curp->roff); |
roff_man_reset(curp->man); |
roff_man_reset(curp->man); |
free_buf_list(curp->secondary); |
free_buf_list(curp->secondary); |
curp->secondary = NULL; |
curp->secondary = NULL; |
curp->gzip = 0; |
curp->gzip = 0; |
|
tag_alloc(); |
} |
} |
|
|
void |
void |
mparse_free(struct mparse *curp) |
mparse_free(struct mparse *curp) |
{ |
{ |
|
tag_free(); |
roffhash_free(curp->man->mdocmac); |
roffhash_free(curp->man->mdocmac); |
roffhash_free(curp->man->manmac); |
roffhash_free(curp->man->manmac); |
roff_man_free(curp->man); |
roff_man_free(curp->man); |
Line 688 mparse_free(struct mparse *curp) |
|
Line 711 mparse_free(struct mparse *curp) |
|
struct roff_meta * |
struct roff_meta * |
mparse_result(struct mparse *curp) |
mparse_result(struct mparse *curp) |
{ |
{ |
|
roff_state_reset(curp->man); |
if (curp->options & MPARSE_VALIDATE) { |
if (curp->options & MPARSE_VALIDATE) { |
if (curp->man->meta.macroset == MACROSET_MDOC) |
if (curp->man->meta.macroset == MACROSET_MDOC) |
mdoc_validate(curp->man); |
mdoc_validate(curp->man); |
else |
else |
man_validate(curp->man); |
man_validate(curp->man); |
|
tag_postprocess(curp->man, curp->man->meta.first); |
} |
} |
return &curp->man->meta; |
return &curp->man->meta; |
} |
} |