version 1.92, 2014/10/20 19:04:45 |
version 1.93, 2014/10/25 01:03:52 |
|
|
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
struct buf { |
|
char *buf; /* binary input buffer */ |
|
size_t sz; /* size of binary buffer */ |
|
}; |
|
|
|
struct mparse { |
struct mparse { |
struct man *pman; /* persistent man parser */ |
struct man *pman; /* persistent man parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
|
|
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
int options; /* parser options */ |
int options; /* parser options */ |
|
int filenc; /* encoding of the current file */ |
int reparse_count; /* finite interp. stack */ |
int reparse_count; /* finite interp. stack */ |
int line; /* line number in the file */ |
int line; /* line number in the file */ |
}; |
}; |
Line 326 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 322 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
lnn = curp->line; |
lnn = curp->line; |
pos = 0; |
pos = 0; |
|
|
for (i = 0; i < (int)blk.sz; ) { |
for (i = blk.offs; i < (int)blk.sz; ) { |
if (0 == pos && '\0' == blk.buf[i]) |
if (0 == pos && '\0' == blk.buf[i]) |
break; |
break; |
|
|
if (start) { |
if (start) { |
curp->line = lnn; |
curp->line = lnn; |
curp->reparse_count = 0; |
curp->reparse_count = 0; |
|
|
|
if (lnn < 3 && |
|
curp->filenc & MPARSE_UTF8 && |
|
curp->filenc & MPARSE_LATIN1) { |
|
blk.offs = i; |
|
curp->filenc = preconv_cue(&blk); |
|
} |
} |
} |
|
|
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
Line 353 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 356 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
} |
} |
|
|
/* |
/* |
* Make sure we have space for at least |
* Make sure we have space for the worst |
* one backslash and one other character |
* case of 11 bytes: "\\[u10ffff]\0" |
* and the trailing NUL byte. |
|
*/ |
*/ |
|
|
if (pos + 2 >= (int)ln.sz) |
if (pos + 11 > (int)ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
/* |
/* |
* Warn about bogus characters. If you're using |
* Encode 8-bit input. |
* non-ASCII encoding, you're screwing your |
|
* readers. Since I'd rather this not happen, |
|
* I'll be helpful and replace these characters |
|
* with "?", so we don't display gibberish. |
|
* Note to manual writers: use special characters. |
|
*/ |
*/ |
|
|
c = (unsigned char) blk.buf[i]; |
c = blk.buf[i]; |
|
if (c & 0x80) { |
|
blk.offs = i; |
|
ln.offs = pos; |
|
if (curp->filenc && preconv_encode( |
|
&blk, &ln, &curp->filenc)) { |
|
pos = ln.offs; |
|
i = blk.offs; |
|
} else { |
|
mandoc_vmsg(MANDOCERR_BADCHAR, |
|
curp, curp->line, pos, |
|
"0x%x", c); |
|
ln.buf[pos++] = '?'; |
|
i++; |
|
} |
|
continue; |
|
} |
|
|
if ( ! (isascii(c) && |
/* |
(isgraph(c) || isblank(c)))) { |
* Exclude control characters. |
|
*/ |
|
|
|
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
mandoc_vmsg(MANDOCERR_BADCHAR, curp, |
mandoc_vmsg(MANDOCERR_BADCHAR, curp, |
curp->line, pos, "0x%x", c); |
curp->line, pos, "0x%x", c); |
i++; |
i++; |
Line 633 read_whole_file(struct mparse *curp, const char *file, |
|
Line 649 read_whole_file(struct mparse *curp, const char *file, |
|
return(0); |
return(0); |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
|
fb->offs = 0; |
fb->sz = (size_t)st.st_size; |
fb->sz = (size_t)st.st_size; |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
if (fb->buf != MAP_FAILED) |
if (fb->buf != MAP_FAILED) |
Line 664 read_whole_file(struct mparse *curp, const char *file, |
|
Line 681 read_whole_file(struct mparse *curp, const char *file, |
|
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
|
fb->offs = 0; |
return(1); |
return(1); |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
Line 735 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
Line 753 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
curp->line = 1; |
curp->line = 1; |
recursion_depth++; |
recursion_depth++; |
|
|
|
/* Skip an UTF-8 byte order mark. */ |
|
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
|
(unsigned char)blk.buf[0] == 0xef && |
|
(unsigned char)blk.buf[1] == 0xbb && |
|
(unsigned char)blk.buf[2] == 0xbf) { |
|
blk.offs = 3; |
|
curp->filenc &= ~MPARSE_LATIN1; |
|
} |
|
|
mparse_buf_r(curp, blk, 1); |
mparse_buf_r(curp, blk, 1); |
|
|
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
Line 752 mparse_readmem(struct mparse *curp, const void *buf, s |
|
Line 779 mparse_readmem(struct mparse *curp, const void *buf, s |
|
|
|
blk.buf = UNCONST(buf); |
blk.buf = UNCONST(buf); |
blk.sz = len; |
blk.sz = len; |
|
blk.offs = 0; |
|
|
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
return(curp->file_status); |
return(curp->file_status); |
Line 762 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 790 mparse_readfd(struct mparse *curp, int fd, const char |
|
{ |
{ |
struct buf blk; |
struct buf blk; |
int with_mmap; |
int with_mmap; |
|
int save_filenc; |
|
|
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
curp->file_status = MANDOCLEVEL_SYSERR; |
curp->file_status = MANDOCLEVEL_SYSERR; |
Line 780 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 809 mparse_readfd(struct mparse *curp, int fd, const char |
|
*/ |
*/ |
|
|
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
|
save_filenc = curp->filenc; |
|
curp->filenc = curp->options & |
|
(MPARSE_UTF8 | MPARSE_LATIN1); |
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
|
curp->filenc = save_filenc; |
#if HAVE_MMAP |
#if HAVE_MMAP |
if (with_mmap) |
if (with_mmap) |
munmap(blk.buf, blk.sz); |
munmap(blk.buf, blk.sz); |