version 1.83, 2014/09/06 22:39:36 |
version 1.93, 2014/10/25 01:03:52 |
|
|
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
struct buf { |
|
char *buf; /* binary input buffer */ |
|
size_t sz; /* size of binary buffer */ |
|
}; |
|
|
|
struct mparse { |
struct mparse { |
struct man *pman; /* persistent man parser */ |
struct man *pman; /* persistent man parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
|
|
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
int options; /* parser options */ |
int options; /* parser options */ |
|
int filenc; /* encoding of the current file */ |
int reparse_count; /* finite interp. stack */ |
int reparse_count; /* finite interp. stack */ |
int line; /* line number in the file */ |
int line; /* line number in the file */ |
}; |
}; |
|
|
|
static void choose_parser(struct mparse *); |
static void resize_buf(struct buf *, size_t); |
static void resize_buf(struct buf *, size_t); |
static void mparse_buf_r(struct mparse *, struct buf, int); |
static void mparse_buf_r(struct mparse *, struct buf, int); |
static void pset(const char *, int, struct mparse *); |
|
static int read_whole_file(struct mparse *, const char *, int, |
static int read_whole_file(struct mparse *, const char *, int, |
struct buf *, int *); |
struct buf *, int *); |
static void mparse_end(struct mparse *); |
static void mparse_end(struct mparse *); |
Line 117 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 113 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"sections out of conventional order", |
"sections out of conventional order", |
"duplicate section title", |
"duplicate section title", |
"unexpected section", |
"unexpected section", |
|
"unusual Xr order", |
|
"unusual Xr punctuation", |
|
"AUTHORS section without An macro", |
|
|
/* related to macros and nesting */ |
/* related to macros and nesting */ |
"obsolete macro", |
"obsolete macro", |
Line 146 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 145 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"missing font type, using \\fR", |
"missing font type, using \\fR", |
"unknown font type, using \\fR", |
"unknown font type, using \\fR", |
"missing -std argument, adding it", |
"missing -std argument, adding it", |
|
"missing eqn box, using \"\"", |
|
|
/* related to bad macro arguments */ |
/* related to bad macro arguments */ |
"unterminated quoted argument", |
"unterminated quoted argument", |
Line 155 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 155 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"skipping duplicate list type", |
"skipping duplicate list type", |
"skipping -width argument", |
"skipping -width argument", |
"unknown AT&T UNIX version", |
"unknown AT&T UNIX version", |
|
"comma in function argument", |
|
"parenthesis in function name", |
"invalid content in Rs block", |
"invalid content in Rs block", |
"invalid Boolean argument", |
"invalid Boolean argument", |
"unknown font, skipping request", |
"unknown font, skipping request", |
Line 174 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 176 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"equation scope open on exit", |
"equation scope open on exit", |
"overlapping equation scopes", |
"overlapping equation scopes", |
"unexpected end of equation", |
"unexpected end of equation", |
"equation syntax error", |
|
|
|
/* related to tables */ |
/* related to tables */ |
"bad table syntax", |
"bad table syntax", |
Line 206 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 207 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"skipping request without numeric argument", |
"skipping request without numeric argument", |
"skipping all arguments", |
"skipping all arguments", |
"skipping excess arguments", |
"skipping excess arguments", |
|
"divide by zero", |
|
|
"generic fatal error", |
"generic fatal error", |
|
|
Line 247 resize_buf(struct buf *buf, size_t initial) |
|
Line 249 resize_buf(struct buf *buf, size_t initial) |
|
} |
} |
|
|
static void |
static void |
pset(const char *buf, int pos, struct mparse *curp) |
choose_parser(struct mparse *curp) |
{ |
{ |
char *cp, *ep; |
char *cp, *ep; |
int format; |
int format; |
int i; |
|
|
|
if ('.' == buf[0] || '\'' == buf[0]) { |
|
for (i = 1; buf[i]; i++) |
|
if (' ' != buf[i] && '\t' != buf[i]) |
|
break; |
|
if ('\0' == buf[i]) |
|
return; |
|
} |
|
|
|
/* |
/* |
* If neither command line arguments -mdoc or -man select |
* If neither command line arguments -mdoc or -man select |
* a parser nor the roff parser found a .Dd or .TH macro |
* a parser nor the roff parser found a .Dd or .TH macro |
Line 271 pset(const char *buf, int pos, struct mparse *curp) |
|
Line 264 pset(const char *buf, int pos, struct mparse *curp) |
|
cp = curp->primary->buf; |
cp = curp->primary->buf; |
ep = cp + curp->primary->sz; |
ep = cp + curp->primary->sz; |
while (cp < ep) { |
while (cp < ep) { |
if (*cp == '.' || *cp != '\'') { |
if (*cp == '.' || *cp == '\'') { |
cp++; |
cp++; |
if (cp[0] == 'D' && cp[1] == 'd') { |
if (cp[0] == 'D' && cp[1] == 'd') { |
format = MPARSE_MDOC; |
format = MPARSE_MDOC; |
Line 329 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 322 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
lnn = curp->line; |
lnn = curp->line; |
pos = 0; |
pos = 0; |
|
|
for (i = 0; i < (int)blk.sz; ) { |
for (i = blk.offs; i < (int)blk.sz; ) { |
if (0 == pos && '\0' == blk.buf[i]) |
if (0 == pos && '\0' == blk.buf[i]) |
break; |
break; |
|
|
if (start) { |
if (start) { |
curp->line = lnn; |
curp->line = lnn; |
curp->reparse_count = 0; |
curp->reparse_count = 0; |
|
|
|
if (lnn < 3 && |
|
curp->filenc & MPARSE_UTF8 && |
|
curp->filenc & MPARSE_LATIN1) { |
|
blk.offs = i; |
|
curp->filenc = preconv_cue(&blk); |
|
} |
} |
} |
|
|
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
Line 356 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 356 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
} |
} |
|
|
/* |
/* |
* Make sure we have space for at least |
* Make sure we have space for the worst |
* one backslash and one other character |
* case of 11 bytes: "\\[u10ffff]\0" |
* and the trailing NUL byte. |
|
*/ |
*/ |
|
|
if (pos + 2 >= (int)ln.sz) |
if (pos + 11 > (int)ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
/* |
/* |
* Warn about bogus characters. If you're using |
* Encode 8-bit input. |
* non-ASCII encoding, you're screwing your |
|
* readers. Since I'd rather this not happen, |
|
* I'll be helpful and replace these characters |
|
* with "?", so we don't display gibberish. |
|
* Note to manual writers: use special characters. |
|
*/ |
*/ |
|
|
c = (unsigned char) blk.buf[i]; |
c = blk.buf[i]; |
|
if (c & 0x80) { |
|
blk.offs = i; |
|
ln.offs = pos; |
|
if (curp->filenc && preconv_encode( |
|
&blk, &ln, &curp->filenc)) { |
|
pos = ln.offs; |
|
i = blk.offs; |
|
} else { |
|
mandoc_vmsg(MANDOCERR_BADCHAR, |
|
curp, curp->line, pos, |
|
"0x%x", c); |
|
ln.buf[pos++] = '?'; |
|
i++; |
|
} |
|
continue; |
|
} |
|
|
if ( ! (isascii(c) && |
/* |
(isgraph(c) || isblank(c)))) { |
* Exclude control characters. |
|
*/ |
|
|
|
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
mandoc_vmsg(MANDOCERR_BADCHAR, curp, |
mandoc_vmsg(MANDOCERR_BADCHAR, curp, |
curp->line, pos, "0x%x", c); |
curp->line, pos, "0x%x", c); |
i++; |
i++; |
|
|
*/ |
*/ |
|
|
if ( ! (curp->man || curp->mdoc)) |
if ( ! (curp->man || curp->mdoc)) |
pset(ln.buf + of, pos - of, curp); |
choose_parser(curp); |
|
|
/* |
/* |
* Lastly, push down into the parsers themselves. One |
* Lastly, push down into the parsers themselves. |
* of these will have already been set in the pset() |
|
* routine. |
|
* If libroff returns ROFF_TBL, then add it to the |
* If libroff returns ROFF_TBL, then add it to the |
* currently open parse. Since we only get here if |
* currently open parse. Since we only get here if |
* there does exist data (see tbl_data.c), we're |
* there does exist data (see tbl_data.c), we're |
Line 638 read_whole_file(struct mparse *curp, const char *file, |
|
Line 649 read_whole_file(struct mparse *curp, const char *file, |
|
return(0); |
return(0); |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
|
fb->offs = 0; |
fb->sz = (size_t)st.st_size; |
fb->sz = (size_t)st.st_size; |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
if (fb->buf != MAP_FAILED) |
if (fb->buf != MAP_FAILED) |
Line 669 read_whole_file(struct mparse *curp, const char *file, |
|
Line 681 read_whole_file(struct mparse *curp, const char *file, |
|
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
|
fb->offs = 0; |
return(1); |
return(1); |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
Line 723 mparse_end(struct mparse *curp) |
|
Line 736 mparse_end(struct mparse *curp) |
|
static void |
static void |
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
{ |
{ |
|
struct buf *svprimary; |
const char *svfile; |
const char *svfile; |
static int recursion_depth; |
static int recursion_depth; |
|
|
Line 734 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
Line 748 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
/* Line number is per-file. */ |
/* Line number is per-file. */ |
svfile = curp->file; |
svfile = curp->file; |
curp->file = file; |
curp->file = file; |
|
svprimary = curp->primary; |
curp->primary = &blk; |
curp->primary = &blk; |
curp->line = 1; |
curp->line = 1; |
recursion_depth++; |
recursion_depth++; |
|
|
|
/* Skip an UTF-8 byte order mark. */ |
|
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
|
(unsigned char)blk.buf[0] == 0xef && |
|
(unsigned char)blk.buf[1] == 0xbb && |
|
(unsigned char)blk.buf[2] == 0xbf) { |
|
blk.offs = 3; |
|
curp->filenc &= ~MPARSE_LATIN1; |
|
} |
|
|
mparse_buf_r(curp, blk, 1); |
mparse_buf_r(curp, blk, 1); |
|
|
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
mparse_end(curp); |
mparse_end(curp); |
|
|
|
curp->primary = svprimary; |
curp->file = svfile; |
curp->file = svfile; |
} |
} |
|
|
Line 754 mparse_readmem(struct mparse *curp, const void *buf, s |
|
Line 779 mparse_readmem(struct mparse *curp, const void *buf, s |
|
|
|
blk.buf = UNCONST(buf); |
blk.buf = UNCONST(buf); |
blk.sz = len; |
blk.sz = len; |
|
blk.offs = 0; |
|
|
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
return(curp->file_status); |
return(curp->file_status); |
Line 764 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 790 mparse_readfd(struct mparse *curp, int fd, const char |
|
{ |
{ |
struct buf blk; |
struct buf blk; |
int with_mmap; |
int with_mmap; |
|
int save_filenc; |
|
|
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
curp->file_status = MANDOCLEVEL_SYSERR; |
curp->file_status = MANDOCLEVEL_SYSERR; |
Line 771 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 798 mparse_readfd(struct mparse *curp, int fd, const char |
|
(*curp->mmsg)(MANDOCERR_SYSOPEN, |
(*curp->mmsg)(MANDOCERR_SYSOPEN, |
curp->file_status, |
curp->file_status, |
file, 0, 0, strerror(errno)); |
file, 0, 0, strerror(errno)); |
goto out; |
return(curp->file_status); |
} |
} |
|
|
/* |
/* |
Line 781 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 808 mparse_readfd(struct mparse *curp, int fd, const char |
|
* the parse phase for the file. |
* the parse phase for the file. |
*/ |
*/ |
|
|
if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap)) |
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
goto out; |
save_filenc = curp->filenc; |
|
curp->filenc = curp->options & |
mparse_parse_buffer(curp, blk, file); |
(MPARSE_UTF8 | MPARSE_LATIN1); |
|
mparse_parse_buffer(curp, blk, file); |
|
curp->filenc = save_filenc; |
#if HAVE_MMAP |
#if HAVE_MMAP |
if (with_mmap) |
if (with_mmap) |
munmap(blk.buf, blk.sz); |
munmap(blk.buf, blk.sz); |
else |
else |
#endif |
#endif |
free(blk.buf); |
free(blk.buf); |
|
} |
|
|
if (STDIN_FILENO != fd && -1 == close(fd)) |
if (STDIN_FILENO != fd && -1 == close(fd)) |
perror(file); |
perror(file); |
out: |
|
return(curp->file_status); |
return(curp->file_status); |
} |
} |
|
|