version 1.89, 2014/10/11 21:14:16 |
version 1.94, 2014/10/28 17:36:19 |
|
|
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
struct buf { |
|
char *buf; /* binary input buffer */ |
|
size_t sz; /* size of binary buffer */ |
|
}; |
|
|
|
struct mparse { |
struct mparse { |
struct man *pman; /* persistent man parser */ |
struct man *pman; /* persistent man parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
struct man *man; /* man parser */ |
struct man *man; /* man parser */ |
struct mdoc *mdoc; /* mdoc parser */ |
struct mdoc *mdoc; /* mdoc parser */ |
struct roff *roff; /* roff parser (!NULL) */ |
struct roff *roff; /* roff parser (!NULL) */ |
|
const struct mchars *mchars; /* character table */ |
char *sodest; /* filename pointed to by .so */ |
char *sodest; /* filename pointed to by .so */ |
const char *file; /* filename of current input file */ |
const char *file; /* filename of current input file */ |
struct buf *primary; /* buffer currently being parsed */ |
struct buf *primary; /* buffer currently being parsed */ |
|
|
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
int options; /* parser options */ |
int options; /* parser options */ |
|
int filenc; /* encoding of the current file */ |
int reparse_count; /* finite interp. stack */ |
int reparse_count; /* finite interp. stack */ |
int line; /* line number in the file */ |
int line; /* line number in the file */ |
}; |
}; |
Line 149 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 146 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"missing font type, using \\fR", |
"missing font type, using \\fR", |
"unknown font type, using \\fR", |
"unknown font type, using \\fR", |
"missing -std argument, adding it", |
"missing -std argument, adding it", |
|
"missing eqn box, using \"\"", |
|
|
/* related to bad macro arguments */ |
/* related to bad macro arguments */ |
"unterminated quoted argument", |
"unterminated quoted argument", |
Line 179 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 177 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"equation scope open on exit", |
"equation scope open on exit", |
"overlapping equation scopes", |
"overlapping equation scopes", |
"unexpected end of equation", |
"unexpected end of equation", |
"equation syntax error", |
|
|
|
/* related to tables */ |
/* related to tables */ |
"bad table syntax", |
"bad table syntax", |
Line 211 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 208 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"skipping request without numeric argument", |
"skipping request without numeric argument", |
"skipping all arguments", |
"skipping all arguments", |
"skipping excess arguments", |
"skipping excess arguments", |
|
"divide by zero", |
|
|
"generic fatal error", |
"generic fatal error", |
|
|
Line 325 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 323 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
lnn = curp->line; |
lnn = curp->line; |
pos = 0; |
pos = 0; |
|
|
for (i = 0; i < (int)blk.sz; ) { |
for (i = blk.offs; i < (int)blk.sz; ) { |
if (0 == pos && '\0' == blk.buf[i]) |
if (0 == pos && '\0' == blk.buf[i]) |
break; |
break; |
|
|
if (start) { |
if (start) { |
curp->line = lnn; |
curp->line = lnn; |
curp->reparse_count = 0; |
curp->reparse_count = 0; |
|
|
|
if (lnn < 3 && |
|
curp->filenc & MPARSE_UTF8 && |
|
curp->filenc & MPARSE_LATIN1) { |
|
blk.offs = i; |
|
curp->filenc = preconv_cue(&blk); |
|
} |
} |
} |
|
|
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
Line 352 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 357 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
} |
} |
|
|
/* |
/* |
* Make sure we have space for at least |
* Make sure we have space for the worst |
* one backslash and one other character |
* case of 11 bytes: "\\[u10ffff]\0" |
* and the trailing NUL byte. |
|
*/ |
*/ |
|
|
if (pos + 2 >= (int)ln.sz) |
if (pos + 11 > (int)ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
/* |
/* |
* Warn about bogus characters. If you're using |
* Encode 8-bit input. |
* non-ASCII encoding, you're screwing your |
|
* readers. Since I'd rather this not happen, |
|
* I'll be helpful and replace these characters |
|
* with "?", so we don't display gibberish. |
|
* Note to manual writers: use special characters. |
|
*/ |
*/ |
|
|
c = (unsigned char) blk.buf[i]; |
c = blk.buf[i]; |
|
if (c & 0x80) { |
|
blk.offs = i; |
|
ln.offs = pos; |
|
if (curp->filenc && preconv_encode( |
|
&blk, &ln, &curp->filenc)) { |
|
pos = ln.offs; |
|
i = blk.offs; |
|
} else { |
|
mandoc_vmsg(MANDOCERR_BADCHAR, |
|
curp, curp->line, pos, |
|
"0x%x", c); |
|
ln.buf[pos++] = '?'; |
|
i++; |
|
} |
|
continue; |
|
} |
|
|
if ( ! (isascii(c) && |
/* |
(isgraph(c) || isblank(c)))) { |
* Exclude control characters. |
|
*/ |
|
|
|
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
mandoc_vmsg(MANDOCERR_BADCHAR, curp, |
mandoc_vmsg(MANDOCERR_BADCHAR, curp, |
curp->line, pos, "0x%x", c); |
curp->line, pos, "0x%x", c); |
i++; |
i++; |
Line 632 read_whole_file(struct mparse *curp, const char *file, |
|
Line 650 read_whole_file(struct mparse *curp, const char *file, |
|
return(0); |
return(0); |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
|
fb->offs = 0; |
fb->sz = (size_t)st.st_size; |
fb->sz = (size_t)st.st_size; |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
if (fb->buf != MAP_FAILED) |
if (fb->buf != MAP_FAILED) |
Line 663 read_whole_file(struct mparse *curp, const char *file, |
|
Line 682 read_whole_file(struct mparse *curp, const char *file, |
|
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
|
fb->offs = 0; |
return(1); |
return(1); |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
Line 734 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
Line 754 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
curp->line = 1; |
curp->line = 1; |
recursion_depth++; |
recursion_depth++; |
|
|
|
/* Skip an UTF-8 byte order mark. */ |
|
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
|
(unsigned char)blk.buf[0] == 0xef && |
|
(unsigned char)blk.buf[1] == 0xbb && |
|
(unsigned char)blk.buf[2] == 0xbf) { |
|
blk.offs = 3; |
|
curp->filenc &= ~MPARSE_LATIN1; |
|
} |
|
|
mparse_buf_r(curp, blk, 1); |
mparse_buf_r(curp, blk, 1); |
|
|
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
Line 751 mparse_readmem(struct mparse *curp, const void *buf, s |
|
Line 780 mparse_readmem(struct mparse *curp, const void *buf, s |
|
|
|
blk.buf = UNCONST(buf); |
blk.buf = UNCONST(buf); |
blk.sz = len; |
blk.sz = len; |
|
blk.offs = 0; |
|
|
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
return(curp->file_status); |
return(curp->file_status); |
Line 761 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 791 mparse_readfd(struct mparse *curp, int fd, const char |
|
{ |
{ |
struct buf blk; |
struct buf blk; |
int with_mmap; |
int with_mmap; |
|
int save_filenc; |
|
|
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
curp->file_status = MANDOCLEVEL_SYSERR; |
curp->file_status = MANDOCLEVEL_SYSERR; |
Line 768 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 799 mparse_readfd(struct mparse *curp, int fd, const char |
|
(*curp->mmsg)(MANDOCERR_SYSOPEN, |
(*curp->mmsg)(MANDOCERR_SYSOPEN, |
curp->file_status, |
curp->file_status, |
file, 0, 0, strerror(errno)); |
file, 0, 0, strerror(errno)); |
goto out; |
return(curp->file_status); |
} |
} |
|
|
/* |
/* |
Line 778 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 809 mparse_readfd(struct mparse *curp, int fd, const char |
|
* the parse phase for the file. |
* the parse phase for the file. |
*/ |
*/ |
|
|
if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap)) |
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
goto out; |
save_filenc = curp->filenc; |
|
curp->filenc = curp->options & |
mparse_parse_buffer(curp, blk, file); |
(MPARSE_UTF8 | MPARSE_LATIN1); |
|
mparse_parse_buffer(curp, blk, file); |
|
curp->filenc = save_filenc; |
#if HAVE_MMAP |
#if HAVE_MMAP |
if (with_mmap) |
if (with_mmap) |
munmap(blk.buf, blk.sz); |
munmap(blk.buf, blk.sz); |
else |
else |
#endif |
#endif |
free(blk.buf); |
free(blk.buf); |
|
} |
|
|
if (STDIN_FILENO != fd && -1 == close(fd)) |
if (STDIN_FILENO != fd && -1 == close(fd)) |
perror(file); |
perror(file); |
out: |
|
return(curp->file_status); |
return(curp->file_status); |
} |
} |
|
|
Line 882 mparse_wait(struct mparse *curp, pid_t child_pid) |
|
Line 915 mparse_wait(struct mparse *curp, pid_t child_pid) |
|
} |
} |
|
|
struct mparse * |
struct mparse * |
mparse_alloc(int options, enum mandoclevel wlevel, |
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, |
mandocmsg mmsg, const char *defos) |
const struct mchars *mchars, const char *defos) |
{ |
{ |
struct mparse *curp; |
struct mparse *curp; |
|
|
Line 896 mparse_alloc(int options, enum mandoclevel wlevel, |
|
Line 929 mparse_alloc(int options, enum mandoclevel wlevel, |
|
curp->mmsg = mmsg; |
curp->mmsg = mmsg; |
curp->defos = defos; |
curp->defos = defos; |
|
|
curp->roff = roff_alloc(curp, options); |
curp->mchars = mchars; |
|
curp->roff = roff_alloc(curp, curp->mchars, options); |
if (curp->options & MPARSE_MDOC) |
if (curp->options & MPARSE_MDOC) |
curp->pmdoc = mdoc_alloc( |
curp->pmdoc = mdoc_alloc( |
curp->roff, curp, curp->defos, |
curp->roff, curp, curp->defos, |