version 1.74, 2014/07/30 23:38:52 |
version 1.125, 2015/02/06 11:54:36 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
*/ |
*/ |
#ifdef HAVE_CONFIG_H |
|
#include "config.h" |
#include "config.h" |
#endif |
|
|
|
#ifdef HAVE_MMAP |
#include <sys/types.h> |
# include <sys/stat.h> |
#if HAVE_MMAP |
# include <sys/mman.h> |
#include <sys/mman.h> |
|
#include <sys/stat.h> |
#endif |
#endif |
|
#include <sys/wait.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
|
|
#include "libmandoc.h" |
#include "libmandoc.h" |
#include "mdoc.h" |
#include "mdoc.h" |
#include "man.h" |
#include "man.h" |
#include "main.h" |
|
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
struct buf { |
|
char *buf; /* binary input buffer */ |
|
size_t sz; /* size of binary buffer */ |
|
}; |
|
|
|
struct mparse { |
struct mparse { |
enum mandoclevel file_status; /* status of current parse */ |
|
enum mandoclevel wlevel; /* ignore messages below this */ |
|
int line; /* line number in the file */ |
|
int options; /* parser options */ |
|
struct man *pman; /* persistent man parser */ |
struct man *pman; /* persistent man parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
struct mdoc *pmdoc; /* persistent mdoc parser */ |
struct man *man; /* man parser */ |
struct man *man; /* man parser */ |
struct mdoc *mdoc; /* mdoc parser */ |
struct mdoc *mdoc; /* mdoc parser */ |
struct roff *roff; /* roff parser (!NULL) */ |
struct roff *roff; /* roff parser (!NULL) */ |
|
const struct mchars *mchars; /* character table */ |
char *sodest; /* filename pointed to by .so */ |
char *sodest; /* filename pointed to by .so */ |
int reparse_count; /* finite interp. stack */ |
const char *file; /* filename of current input file */ |
mandocmsg mmsg; /* warning/error message handler */ |
struct buf *primary; /* buffer currently being parsed */ |
const char *file; |
struct buf *secondary; /* preprocessed copy of input */ |
struct buf *secondary; |
|
const char *defos; /* default operating system */ |
const char *defos; /* default operating system */ |
|
mandocmsg mmsg; /* warning/error message handler */ |
|
enum mandoclevel file_status; /* status of current parse */ |
|
enum mandoclevel wlevel; /* ignore messages below this */ |
|
int options; /* parser options */ |
|
int filenc; /* encoding of the current file */ |
|
int reparse_count; /* finite interp. stack */ |
|
int line; /* line number in the file */ |
|
pid_t child; /* the gunzip(1) process */ |
}; |
}; |
|
|
|
static void choose_parser(struct mparse *); |
static void resize_buf(struct buf *, size_t); |
static void resize_buf(struct buf *, size_t); |
static void mparse_buf_r(struct mparse *, struct buf, int); |
static void mparse_buf_r(struct mparse *, struct buf, size_t, int); |
static void pset(const char *, int, struct mparse *); |
|
static int read_whole_file(struct mparse *, const char *, int, |
static int read_whole_file(struct mparse *, const char *, int, |
struct buf *, int *); |
struct buf *, int *); |
static void mparse_end(struct mparse *); |
static void mparse_end(struct mparse *); |
Line 82 static const enum mandocerr mandoclimits[MANDOCLEVEL_M |
|
Line 80 static const enum mandocerr mandoclimits[MANDOCLEVEL_M |
|
MANDOCERR_WARNING, |
MANDOCERR_WARNING, |
MANDOCERR_WARNING, |
MANDOCERR_WARNING, |
MANDOCERR_ERROR, |
MANDOCERR_ERROR, |
MANDOCERR_FATAL, |
MANDOCERR_UNSUPP, |
MANDOCERR_MAX, |
MANDOCERR_MAX, |
MANDOCERR_MAX |
MANDOCERR_MAX |
}; |
}; |
Line 93 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 91 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"generic warning", |
"generic warning", |
|
|
/* related to the prologue */ |
/* related to the prologue */ |
"missing .TH macro, using \"unknown 1\"", |
"missing manual title, using UNTITLED", |
|
"missing manual title, using \"\"", |
"lower case character in document title", |
"lower case character in document title", |
|
"missing manual section, using \"\"", |
"unknown manual section", |
"unknown manual section", |
"unknown manual volume or arch", |
|
"missing date, using today's date", |
"missing date, using today's date", |
"cannot parse date, using it verbatim", |
"cannot parse date, using it verbatim", |
"prologue macros out of order", |
"missing Os macro, using \"\"", |
"duplicate prologue macro", |
"duplicate prologue macro", |
"incomplete prologue, terminated by", |
"late prologue macro", |
"skipping prologue macro in body", |
"skipping late title macro", |
|
"prologue macros out of order", |
|
|
/* related to document structure */ |
/* related to document structure */ |
".so is fragile, better use ln(1)", |
".so is fragile, better use ln(1)", |
Line 110 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 110 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"content before first section header", |
"content before first section header", |
"first section is not \"NAME\"", |
"first section is not \"NAME\"", |
"bad NAME section contents", |
"bad NAME section contents", |
|
"missing description line, using \"\"", |
"sections out of conventional order", |
"sections out of conventional order", |
"duplicate section title", |
"duplicate section title", |
"unexpected section", |
"unexpected section", |
|
"unusual Xr order", |
|
"unusual Xr punctuation", |
|
"AUTHORS section without An macro", |
|
|
/* related to macros and nesting */ |
/* related to macros and nesting */ |
"obsolete macro", |
"obsolete macro", |
|
"macro neither callable nor escaped", |
"skipping paragraph macro", |
"skipping paragraph macro", |
"moving paragraph macro out of list", |
"moving paragraph macro out of list", |
"skipping no-space macro", |
"skipping no-space macro", |
Line 123 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 128 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"nested displays are not portable", |
"nested displays are not portable", |
"moving content out of list", |
"moving content out of list", |
".Vt block has child macro", |
".Vt block has child macro", |
"fill mode already enabled, skipping .fi", |
"fill mode already enabled, skipping", |
"fill mode already disabled, skipping .nf", |
"fill mode already disabled, skipping", |
"line scope broken", |
"line scope broken", |
|
|
/* related to missing macro arguments */ |
/* related to missing macro arguments */ |
"skipping empty request", |
"skipping empty request", |
"conditional request controls empty scope", |
"conditional request controls empty scope", |
"skipping empty macro", |
"skipping empty macro", |
|
"empty block", |
"empty argument, using 0n", |
"empty argument, using 0n", |
"argument count wrong", |
|
"missing display type, using -ragged", |
"missing display type, using -ragged", |
"list type is not the first argument", |
"list type is not the first argument", |
"missing -width in -tag list, using 8n", |
"missing -width in -tag list, using 8n", |
"missing name for .Ex, using \"\"", |
"missing utility name, using \"\"", |
|
"missing function name, using \"\"", |
"empty head in list item", |
"empty head in list item", |
"empty list item", |
"empty list item", |
"missing font type, using \\fR", |
"missing font type, using \\fR", |
"unknown font type, using \\fR", |
"unknown font type, using \\fR", |
|
"nothing follows prefix", |
|
"empty reference block", |
"missing -std argument, adding it", |
"missing -std argument, adding it", |
|
"missing option string, using \"\"", |
|
"missing resource identifier, using \"\"", |
|
"missing eqn box, using \"\"", |
|
|
/* related to bad macro arguments */ |
/* related to bad macro arguments */ |
"skipping argument", |
|
"unterminated quoted argument", |
"unterminated quoted argument", |
"duplicate argument", |
"duplicate argument", |
|
"skipping duplicate argument", |
"skipping duplicate display type", |
"skipping duplicate display type", |
"skipping duplicate list type", |
"skipping duplicate list type", |
|
"skipping -width argument", |
"unknown AT&T UNIX version", |
"unknown AT&T UNIX version", |
|
"comma in function argument", |
|
"parenthesis in function name", |
"invalid content in Rs block", |
"invalid content in Rs block", |
"invalid Boolean argument", |
"invalid Boolean argument", |
"unknown font, skipping request", |
"unknown font, skipping request", |
Line 162 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 176 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"invalid escape sequence", |
"invalid escape sequence", |
"undefined string, using \"\"", |
"undefined string, using \"\"", |
|
|
|
/* related to tables */ |
|
"tbl line starts with span", |
|
"tbl column starts with span", |
|
"skipping vertical bar in tbl layout", |
|
|
"generic error", |
"generic error", |
|
|
/* related to equations */ |
|
"unexpected equation scope closure", |
|
"equation scope open on exit", |
|
"overlapping equation scopes", |
|
"unexpected end of equation", |
|
"equation syntax error", |
|
|
|
/* related to tables */ |
/* related to tables */ |
"bad table syntax", |
"non-alphabetic character in tbl options", |
"bad table option", |
"skipping unknown tbl option", |
"bad table layout", |
"missing tbl option argument", |
"no table layout cells specified", |
"wrong tbl option argument size", |
"no table data cells specified", |
"empty tbl layout", |
"ignore data in cell", |
"invalid character in tbl layout", |
"data block still open", |
"unmatched parenthesis in tbl layout", |
"ignoring extra data cells", |
"tbl without any data cells", |
|
"ignoring data in spanned tbl cell", |
|
"ignoring extra tbl data cells", |
|
"data block open at end of tbl", |
|
|
/* related to document structure and macros */ |
/* related to document structure and macros */ |
|
NULL, |
"input stack limit exceeded, infinite loop?", |
"input stack limit exceeded, infinite loop?", |
"skipping bad character", |
"skipping bad character", |
"skipping unknown macro", |
"skipping unknown macro", |
|
"skipping insecure request", |
"skipping item outside list", |
"skipping item outside list", |
"skipping column outside column list", |
"skipping column outside column list", |
"skipping end of block that is not open", |
"skipping end of block that is not open", |
|
"fewer RS blocks open, skipping", |
"inserting missing end of block", |
"inserting missing end of block", |
"appending missing end of block", |
"appending missing end of block", |
|
|
/* related to request and macro arguments */ |
/* related to request and macro arguments */ |
"escaped character not allowed in a name", |
"escaped character not allowed in a name", |
"argument count wrong", |
"argument count wrong", |
|
"NOT IMPLEMENTED: Bd -file", |
"missing list type, using -item", |
"missing list type, using -item", |
"missing manual name, using \"\"", |
"missing manual name, using \"\"", |
"uname(3) system call failed, using UNKNOWN", |
"uname(3) system call failed, using UNKNOWN", |
"unknown standard specifier", |
"unknown standard specifier", |
"skipping request without numeric argument", |
"skipping request without numeric argument", |
|
"NOT IMPLEMENTED: .so with absolute path or \"..\"", |
|
".so request failed", |
"skipping all arguments", |
"skipping all arguments", |
"skipping excess arguments", |
"skipping excess arguments", |
|
"divide by zero", |
|
|
"generic fatal error", |
"unsupported feature", |
|
|
"input too large", |
"input too large", |
"column syntax is inconsistent", |
"unsupported control character", |
"NOT IMPLEMENTED: .Bd -file", |
"unsupported roff request", |
"NOT IMPLEMENTED: .so with absolute path or \"..\"", |
"eqn delim option in tbl", |
".so request failed", |
"unsupported tbl layout modifier", |
"static buffer exhausted", |
"ignoring macro in table", |
|
|
/* system errors */ |
|
NULL, |
|
"cannot stat file", |
|
"cannot read file", |
|
}; |
}; |
|
|
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
Line 222 static const char * const mandoclevels[MANDOCLEVEL_MAX |
|
Line 238 static const char * const mandoclevels[MANDOCLEVEL_MAX |
|
"RESERVED", |
"RESERVED", |
"WARNING", |
"WARNING", |
"ERROR", |
"ERROR", |
"FATAL", |
"UNSUPP", |
"BADARG", |
"BADARG", |
"SYSERR" |
"SYSERR" |
}; |
}; |
Line 237 resize_buf(struct buf *buf, size_t initial) |
|
Line 253 resize_buf(struct buf *buf, size_t initial) |
|
} |
} |
|
|
static void |
static void |
pset(const char *buf, int pos, struct mparse *curp) |
choose_parser(struct mparse *curp) |
{ |
{ |
int i; |
char *cp, *ep; |
|
int format; |
|
|
/* |
/* |
* Try to intuit which kind of manual parser should be used. If |
* If neither command line arguments -mdoc or -man select |
* passed in by command-line (-man, -mdoc), then use that |
* a parser nor the roff parser found a .Dd or .TH macro |
* explicitly. If passed as -mandoc, then try to guess from the |
* yet, look ahead in the main input buffer. |
* line: either skip dot-lines, use -mdoc when finding `.Dt', or |
|
* default to -man, which is more lenient. |
|
* |
|
* Separate out pmdoc/pman from mdoc/man: the first persists |
|
* through all parsers, while the latter is used per-parse. |
|
*/ |
*/ |
|
|
if ('.' == buf[0] || '\'' == buf[0]) { |
if ((format = roff_getformat(curp->roff)) == 0) { |
for (i = 1; buf[i]; i++) |
cp = curp->primary->buf; |
if (' ' != buf[i] && '\t' != buf[i]) |
ep = cp + curp->primary->sz; |
|
while (cp < ep) { |
|
if (*cp == '.' || *cp == '\'') { |
|
cp++; |
|
if (cp[0] == 'D' && cp[1] == 'd') { |
|
format = MPARSE_MDOC; |
|
break; |
|
} |
|
if (cp[0] == 'T' && cp[1] == 'H') { |
|
format = MPARSE_MAN; |
|
break; |
|
} |
|
} |
|
cp = memchr(cp, '\n', ep - cp); |
|
if (cp == NULL) |
break; |
break; |
if ('\0' == buf[i]) |
cp++; |
return; |
} |
} |
} |
|
|
if (MPARSE_MDOC & curp->options) { |
if (format == MPARSE_MDOC) { |
curp->mdoc = curp->pmdoc; |
|
return; |
|
} else if (MPARSE_MAN & curp->options) { |
|
curp->man = curp->pman; |
|
return; |
|
} |
|
|
|
if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { |
|
if (NULL == curp->pmdoc) |
if (NULL == curp->pmdoc) |
curp->pmdoc = mdoc_alloc( |
curp->pmdoc = mdoc_alloc( |
curp->roff, curp, curp->defos, |
curp->roff, curp, curp->defos, |
Line 278 pset(const char *buf, int pos, struct mparse *curp) |
|
Line 296 pset(const char *buf, int pos, struct mparse *curp) |
|
return; |
return; |
} |
} |
|
|
|
/* Fall back to man(7) as a last resort. */ |
|
|
if (NULL == curp->pman) |
if (NULL == curp->pman) |
curp->pman = man_alloc(curp->roff, curp, |
curp->pman = man_alloc( |
|
curp->roff, curp, curp->defos, |
MPARSE_QUICK & curp->options ? 1 : 0); |
MPARSE_QUICK & curp->options ? 1 : 0); |
assert(curp->pman); |
assert(curp->pman); |
curp->man = curp->pman; |
curp->man = curp->pman; |
} |
} |
|
|
/* |
/* |
* Main parse routine for an opened file. This is called for each |
* Main parse routine for a buffer. |
* opened file and simply loops around the full input file, possibly |
* It assumes encoding and line numbering are already set up. |
* nesting (i.e., with `so'). |
* It can recurse directly (for invocations of user-defined |
|
* macros, inline equations, and input line traps) |
|
* and indirectly (for .so file inclusion). |
*/ |
*/ |
static void |
static void |
mparse_buf_r(struct mparse *curp, struct buf blk, int start) |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
{ |
{ |
const struct tbl_span *span; |
const struct tbl_span *span; |
struct buf ln; |
struct buf ln; |
|
const char *save_file; |
|
char *cp; |
|
size_t pos; /* byte number in the ln buffer */ |
enum rofferr rr; |
enum rofferr rr; |
int i, of, rc; |
int of; |
int pos; /* byte number in the ln buffer */ |
|
int lnn; /* line number in the real file */ |
int lnn; /* line number in the real file */ |
|
int fd; |
|
pid_t save_child; |
unsigned char c; |
unsigned char c; |
|
|
memset(&ln, 0, sizeof(struct buf)); |
memset(&ln, 0, sizeof(ln)); |
|
|
lnn = curp->line; |
lnn = curp->line; |
pos = 0; |
pos = 0; |
|
|
for (i = 0; i < (int)blk.sz; ) { |
while (i < blk.sz) { |
if (0 == pos && '\0' == blk.buf[i]) |
if (0 == pos && '\0' == blk.buf[i]) |
break; |
break; |
|
|
if (start) { |
if (start) { |
curp->line = lnn; |
curp->line = lnn; |
curp->reparse_count = 0; |
curp->reparse_count = 0; |
|
|
|
if (lnn < 3 && |
|
curp->filenc & MPARSE_UTF8 && |
|
curp->filenc & MPARSE_LATIN1) |
|
curp->filenc = preconv_cue(&blk, i); |
} |
} |
|
|
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
while (i < blk.sz && (start || blk.buf[i] != '\0')) { |
|
|
/* |
/* |
* When finding an unescaped newline character, |
* When finding an unescaped newline character, |
Line 323 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 355 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
* Skip a preceding carriage return, if any. |
* Skip a preceding carriage return, if any. |
*/ |
*/ |
|
|
if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && |
if ('\r' == blk.buf[i] && i + 1 < blk.sz && |
'\n' == blk.buf[i + 1]) |
'\n' == blk.buf[i + 1]) |
++i; |
++i; |
if ('\n' == blk.buf[i]) { |
if ('\n' == blk.buf[i]) { |
Line 333 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 365 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
} |
} |
|
|
/* |
/* |
* Make sure we have space for at least |
* Make sure we have space for the worst |
* one backslash and one other character |
* case of 11 bytes: "\\[u10ffff]\0" |
* and the trailing NUL byte. |
|
*/ |
*/ |
|
|
if (pos + 2 >= (int)ln.sz) |
if (pos + 11 > ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
/* |
/* |
* Warn about bogus characters. If you're using |
* Encode 8-bit input. |
* non-ASCII encoding, you're screwing your |
|
* readers. Since I'd rather this not happen, |
|
* I'll be helpful and replace these characters |
|
* with "?", so we don't display gibberish. |
|
* Note to manual writers: use special characters. |
|
*/ |
*/ |
|
|
c = (unsigned char) blk.buf[i]; |
c = blk.buf[i]; |
|
if (c & 0x80) { |
|
if ( ! (curp->filenc && preconv_encode( |
|
&blk, &i, &ln, &pos, &curp->filenc))) { |
|
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
|
curp->line, pos, "0x%x", c); |
|
ln.buf[pos++] = '?'; |
|
i++; |
|
} |
|
continue; |
|
} |
|
|
if ( ! (isascii(c) && |
/* |
(isgraph(c) || isblank(c)))) { |
* Exclude control characters. |
mandoc_msg(MANDOCERR_BADCHAR, curp, |
*/ |
curp->line, pos, NULL); |
|
|
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
|
mandoc_vmsg(c == 0x00 || c == 0x04 || |
|
c > 0x0a ? MANDOCERR_CHAR_BAD : |
|
MANDOCERR_CHAR_UNSUPP, |
|
curp, curp->line, pos, "0x%x", c); |
i++; |
i++; |
ln.buf[pos++] = '?'; |
ln.buf[pos++] = '?'; |
continue; |
continue; |
Line 363 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 404 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
|
|
/* Trailing backslash = a plain char. */ |
/* Trailing backslash = a plain char. */ |
|
|
if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { |
if (blk.buf[i] != '\\' || i + 1 == blk.sz) { |
ln.buf[pos++] = blk.buf[i++]; |
ln.buf[pos++] = blk.buf[i++]; |
continue; |
continue; |
} |
} |
Line 375 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 416 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
* skip that one as well. |
* skip that one as well. |
*/ |
*/ |
|
|
if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && |
if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && |
'\n' == blk.buf[i + 2]) |
'\n' == blk.buf[i + 2]) |
++i; |
++i; |
if ('\n' == blk.buf[i + 1]) { |
if ('\n' == blk.buf[i + 1]) { |
Line 387 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 428 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { |
if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { |
i += 2; |
i += 2; |
/* Comment, skip to end of line */ |
/* Comment, skip to end of line */ |
for (; i < (int)blk.sz; ++i) { |
for (; i < blk.sz; ++i) { |
if ('\n' == blk.buf[i]) { |
if ('\n' == blk.buf[i]) { |
++i; |
++i; |
++lnn; |
++lnn; |
Line 411 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 452 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
|
|
if ( ! (isascii(c) && |
if ( ! (isascii(c) && |
(isgraph(c) || isblank(c)))) { |
(isgraph(c) || isblank(c)))) { |
mandoc_msg(MANDOCERR_BADCHAR, curp, |
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
curp->line, pos, NULL); |
curp->line, pos, "0x%x", c); |
i += 2; |
i += 2; |
ln.buf[pos++] = '?'; |
ln.buf[pos++] = '?'; |
continue; |
continue; |
Line 424 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 465 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
ln.buf[pos++] = blk.buf[i++]; |
ln.buf[pos++] = blk.buf[i++]; |
} |
} |
|
|
if (pos >= (int)ln.sz) |
if (pos >= ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
ln.buf[pos] = '\0'; |
ln.buf[pos] = '\0'; |
Line 461 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 502 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
[curp->secondary->sz] = '\0'; |
[curp->secondary->sz] = '\0'; |
} |
} |
rerun: |
rerun: |
rr = roff_parseln(curp->roff, curp->line, |
rr = roff_parseln(curp->roff, curp->line, &ln, &of); |
&ln.buf, &ln.sz, of, &of); |
|
|
|
switch (rr) { |
switch (rr) { |
case ROFF_REPARSE: |
case ROFF_REPARSE: |
if (REPARSE_LIMIT >= ++curp->reparse_count) |
if (REPARSE_LIMIT >= ++curp->reparse_count) |
mparse_buf_r(curp, ln, 0); |
mparse_buf_r(curp, ln, of, 0); |
else |
else |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
curp->line, pos, NULL); |
curp->line, pos, NULL); |
pos = 0; |
pos = 0; |
continue; |
continue; |
case ROFF_APPEND: |
case ROFF_APPEND: |
pos = (int)strlen(ln.buf); |
pos = strlen(ln.buf); |
continue; |
continue; |
case ROFF_RERUN: |
case ROFF_RERUN: |
goto rerun; |
goto rerun; |
case ROFF_IGN: |
case ROFF_IGN: |
pos = 0; |
pos = 0; |
continue; |
continue; |
case ROFF_ERR: |
|
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
|
break; |
|
case ROFF_SO: |
case ROFF_SO: |
if (0 == (MPARSE_SO & curp->options) && |
if ( ! (curp->options & MPARSE_SO) && |
(i >= (int)blk.sz || '\0' == blk.buf[i])) { |
(i >= blk.sz || blk.buf[i] == '\0')) { |
curp->sodest = mandoc_strdup(ln.buf + of); |
curp->sodest = mandoc_strdup(ln.buf + of); |
free(ln.buf); |
free(ln.buf); |
return; |
return; |
|
|
*/ |
*/ |
if (curp->secondary) |
if (curp->secondary) |
curp->secondary->sz -= pos + 1; |
curp->secondary->sz -= pos + 1; |
mparse_readfd(curp, -1, ln.buf + of); |
save_file = curp->file; |
if (MANDOCLEVEL_FATAL <= curp->file_status) { |
save_child = curp->child; |
|
if (mparse_open(curp, &fd, ln.buf + of) == |
|
MANDOCLEVEL_OK) { |
|
mparse_readfd(curp, fd, ln.buf + of); |
|
curp->file = save_file; |
|
} else { |
|
curp->file = save_file; |
mandoc_vmsg(MANDOCERR_SO_FAIL, |
mandoc_vmsg(MANDOCERR_SO_FAIL, |
curp, curp->line, pos, |
curp, curp->line, pos, |
".so %s", ln.buf + of); |
".so %s", ln.buf + of); |
break; |
ln.sz = mandoc_asprintf(&cp, |
|
".sp\nSee the file %s.\n.sp", |
|
ln.buf + of); |
|
free(ln.buf); |
|
ln.buf = cp; |
|
of = 0; |
|
mparse_buf_r(curp, ln, of, 0); |
} |
} |
|
curp->child = save_child; |
pos = 0; |
pos = 0; |
continue; |
continue; |
default: |
default: |
|
|
} |
} |
|
|
/* |
/* |
* If we encounter errors in the recursive parse, make |
|
* sure we don't continue parsing. |
|
*/ |
|
|
|
if (MANDOCLEVEL_FATAL <= curp->file_status) |
|
break; |
|
|
|
/* |
|
* If input parsers have not been allocated, do so now. |
* If input parsers have not been allocated, do so now. |
* We keep these instanced between parsers, but set them |
* We keep these instanced between parsers, but set them |
* locally per parse routine since we can use different |
* locally per parse routine since we can use different |
|
|
*/ |
*/ |
|
|
if ( ! (curp->man || curp->mdoc)) |
if ( ! (curp->man || curp->mdoc)) |
pset(ln.buf + of, pos - of, curp); |
choose_parser(curp); |
|
|
/* |
/* |
* Lastly, push down into the parsers themselves. One |
* Lastly, push down into the parsers themselves. |
* of these will have already been set in the pset() |
|
* routine. |
|
* If libroff returns ROFF_TBL, then add it to the |
* If libroff returns ROFF_TBL, then add it to the |
* currently open parse. Since we only get here if |
* currently open parse. Since we only get here if |
* there does exist data (see tbl_data.c), we're |
* there does exist data (see tbl_data.c), we're |
|
|
* Do the same for ROFF_EQN. |
* Do the same for ROFF_EQN. |
*/ |
*/ |
|
|
rc = -1; |
if (rr == ROFF_TBL) { |
|
while ((span = roff_span(curp->roff)) != NULL) |
|
if (curp->man == NULL) |
|
mdoc_addspan(curp->mdoc, span); |
|
else |
|
man_addspan(curp->man, span); |
|
} else if (rr == ROFF_EQN) { |
|
if (curp->man == NULL) |
|
mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff)); |
|
else |
|
man_addeqn(curp->man, roff_eqn(curp->roff)); |
|
} else if ((curp->man == NULL ? |
|
mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) : |
|
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) |
|
break; |
|
|
if (ROFF_TBL == rr) |
|
while (NULL != (span = roff_span(curp->roff))) { |
|
rc = curp->man ? |
|
man_addspan(curp->man, span) : |
|
mdoc_addspan(curp->mdoc, span); |
|
if (0 == rc) |
|
break; |
|
} |
|
else if (ROFF_EQN == rr) |
|
rc = curp->mdoc ? |
|
mdoc_addeqn(curp->mdoc, |
|
roff_eqn(curp->roff)) : |
|
man_addeqn(curp->man, |
|
roff_eqn(curp->roff)); |
|
else if (curp->man || curp->mdoc) |
|
rc = curp->man ? |
|
man_parseln(curp->man, |
|
curp->line, ln.buf, of) : |
|
mdoc_parseln(curp->mdoc, |
|
curp->line, ln.buf, of); |
|
|
|
if (0 == rc) { |
|
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
|
break; |
|
} else if (2 == rc) |
|
break; |
|
|
|
/* Temporary buffers typically are not full. */ |
/* Temporary buffers typically are not full. */ |
|
|
if (0 == start && '\0' == blk.buf[i]) |
if (0 == start && '\0' == blk.buf[i]) |
Line 589 read_whole_file(struct mparse *curp, const char *file, |
|
Line 616 read_whole_file(struct mparse *curp, const char *file, |
|
size_t off; |
size_t off; |
ssize_t ssz; |
ssize_t ssz; |
|
|
#ifdef HAVE_MMAP |
#if HAVE_MMAP |
struct stat st; |
struct stat st; |
if (-1 == fstat(fd, &st)) { |
if (-1 == fstat(fd, &st)) { |
curp->file_status = MANDOCLEVEL_SYSERR; |
perror(file); |
if (curp->mmsg) |
exit((int)MANDOCLEVEL_SYSERR); |
(*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status, |
|
file, 0, 0, strerror(errno)); |
|
return(0); |
|
} |
} |
|
|
/* |
/* |
Line 608 read_whole_file(struct mparse *curp, const char *file, |
|
Line 632 read_whole_file(struct mparse *curp, const char *file, |
|
|
|
if (S_ISREG(st.st_mode)) { |
if (S_ISREG(st.st_mode)) { |
if (st.st_size >= (1U << 31)) { |
if (st.st_size >= (1U << 31)) { |
curp->file_status = MANDOCLEVEL_FATAL; |
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); |
if (curp->mmsg) |
|
(*curp->mmsg)(MANDOCERR_TOOLARGE, |
|
curp->file_status, file, 0, 0, NULL); |
|
return(0); |
return(0); |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
Line 634 read_whole_file(struct mparse *curp, const char *file, |
|
Line 655 read_whole_file(struct mparse *curp, const char *file, |
|
for (;;) { |
for (;;) { |
if (off == fb->sz) { |
if (off == fb->sz) { |
if (fb->sz == (1U << 31)) { |
if (fb->sz == (1U << 31)) { |
curp->file_status = MANDOCLEVEL_FATAL; |
mandoc_msg(MANDOCERR_TOOLARGE, curp, |
if (curp->mmsg) |
0, 0, NULL); |
(*curp->mmsg)(MANDOCERR_TOOLARGE, |
|
curp->file_status, |
|
file, 0, 0, NULL); |
|
break; |
break; |
} |
} |
resize_buf(fb, 65536); |
resize_buf(fb, 65536); |
Line 649 read_whole_file(struct mparse *curp, const char *file, |
|
Line 667 read_whole_file(struct mparse *curp, const char *file, |
|
return(1); |
return(1); |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
curp->file_status = MANDOCLEVEL_SYSERR; |
perror(file); |
if (curp->mmsg) |
exit((int)MANDOCLEVEL_SYSERR); |
(*curp->mmsg)(MANDOCERR_SYSREAD, |
|
curp->file_status, file, 0, 0, |
|
strerror(errno)); |
|
break; |
|
} |
} |
off += (size_t)ssz; |
off += (size_t)ssz; |
} |
} |
|
|
mparse_end(struct mparse *curp) |
mparse_end(struct mparse *curp) |
{ |
{ |
|
|
if (MANDOCLEVEL_FATAL <= curp->file_status) |
|
return; |
|
|
|
if (curp->mdoc == NULL && |
if (curp->mdoc == NULL && |
curp->man == NULL && |
curp->man == NULL && |
curp->sodest == NULL) { |
curp->sodest == NULL) { |
Line 678 mparse_end(struct mparse *curp) |
|
Line 689 mparse_end(struct mparse *curp) |
|
curp->mdoc = curp->pmdoc; |
curp->mdoc = curp->pmdoc; |
else { |
else { |
if (curp->pman == NULL) |
if (curp->pman == NULL) |
curp->pman = man_alloc(curp->roff, curp, |
curp->pman = man_alloc( |
|
curp->roff, curp, curp->defos, |
curp->options & MPARSE_QUICK ? 1 : 0); |
curp->options & MPARSE_QUICK ? 1 : 0); |
curp->man = curp->pman; |
curp->man = curp->pman; |
} |
} |
} |
} |
|
if (curp->mdoc) |
if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) { |
mdoc_endparse(curp->mdoc); |
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
if (curp->man) |
return; |
man_endparse(curp->man); |
} |
|
|
|
if (curp->man && ! man_endparse(curp->man)) { |
|
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
|
return; |
|
} |
|
|
|
roff_endparse(curp->roff); |
roff_endparse(curp->roff); |
} |
} |
|
|
static void |
static void |
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
{ |
{ |
|
struct buf *svprimary; |
const char *svfile; |
const char *svfile; |
|
size_t offset; |
static int recursion_depth; |
static int recursion_depth; |
|
|
if (64 < recursion_depth) { |
if (64 < recursion_depth) { |
Line 711 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
Line 718 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
/* Line number is per-file. */ |
/* Line number is per-file. */ |
svfile = curp->file; |
svfile = curp->file; |
curp->file = file; |
curp->file = file; |
|
svprimary = curp->primary; |
|
curp->primary = &blk; |
curp->line = 1; |
curp->line = 1; |
recursion_depth++; |
recursion_depth++; |
|
|
mparse_buf_r(curp, blk, 1); |
/* Skip an UTF-8 byte order mark. */ |
|
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
|
(unsigned char)blk.buf[0] == 0xef && |
|
(unsigned char)blk.buf[1] == 0xbb && |
|
(unsigned char)blk.buf[2] == 0xbf) { |
|
offset = 3; |
|
curp->filenc &= ~MPARSE_LATIN1; |
|
} else |
|
offset = 0; |
|
|
if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status) |
mparse_buf_r(curp, blk, offset, 1); |
|
|
|
if (--recursion_depth == 0) |
mparse_end(curp); |
mparse_end(curp); |
|
|
|
curp->primary = svprimary; |
curp->file = svfile; |
curp->file = svfile; |
} |
} |
|
|
enum mandoclevel |
enum mandoclevel |
mparse_readmem(struct mparse *curp, const void *buf, size_t len, |
mparse_readmem(struct mparse *curp, void *buf, size_t len, |
const char *file) |
const char *file) |
{ |
{ |
struct buf blk; |
struct buf blk; |
|
|
blk.buf = UNCONST(buf); |
blk.buf = buf; |
blk.sz = len; |
blk.sz = len; |
|
|
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
return(curp->file_status); |
return(curp->file_status); |
} |
} |
|
|
|
/* |
|
* Read the whole file into memory and call the parsers. |
|
* Called recursively when an .so request is encountered. |
|
*/ |
enum mandoclevel |
enum mandoclevel |
mparse_readfd(struct mparse *curp, int fd, const char *file) |
mparse_readfd(struct mparse *curp, int fd, const char *file) |
{ |
{ |
struct buf blk; |
struct buf blk; |
int with_mmap; |
int with_mmap; |
|
int save_filenc; |
|
|
if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) { |
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
curp->file_status = MANDOCLEVEL_SYSERR; |
save_filenc = curp->filenc; |
if (curp->mmsg) |
curp->filenc = curp->options & |
(*curp->mmsg)(MANDOCERR_SYSOPEN, |
(MPARSE_UTF8 | MPARSE_LATIN1); |
curp->file_status, |
mparse_parse_buffer(curp, blk, file); |
file, 0, 0, strerror(errno)); |
curp->filenc = save_filenc; |
goto out; |
#if HAVE_MMAP |
|
if (with_mmap) |
|
munmap(blk.buf, blk.sz); |
|
else |
|
#endif |
|
free(blk.buf); |
} |
} |
|
|
/* |
if (fd != STDIN_FILENO && close(fd) == -1) |
* Run for each opened file; may be called more than once for |
perror(file); |
* each full parse sequence if the opened file is nested (i.e., |
|
* from `so'). Simply sucks in the whole file and moves into |
|
* the parse phase for the file. |
|
*/ |
|
|
|
if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap)) |
mparse_wait(curp); |
goto out; |
return(curp->file_status); |
|
} |
|
|
mparse_parse_buffer(curp, blk, file); |
enum mandoclevel |
|
mparse_open(struct mparse *curp, int *fd, const char *file) |
|
{ |
|
int pfd[2]; |
|
int save_errno; |
|
char *cp; |
|
|
#ifdef HAVE_MMAP |
curp->file = file; |
if (with_mmap) |
|
munmap(blk.buf, blk.sz); |
|
else |
|
#endif |
|
free(blk.buf); |
|
|
|
if (STDIN_FILENO != fd && -1 == close(fd)) |
/* Unless zipped, try to just open the file. */ |
perror(file); |
|
out: |
if ((cp = strrchr(file, '.')) == NULL || |
return(curp->file_status); |
strcmp(cp + 1, "gz")) { |
|
curp->child = 0; |
|
if ((*fd = open(file, O_RDONLY)) != -1) |
|
return(MANDOCLEVEL_OK); |
|
|
|
/* Open failed; try to append ".gz". */ |
|
|
|
mandoc_asprintf(&cp, "%s.gz", file); |
|
file = cp; |
|
} else |
|
cp = NULL; |
|
|
|
/* Before forking, make sure the file can be read. */ |
|
|
|
save_errno = errno; |
|
if (access(file, R_OK) == -1) { |
|
if (cp != NULL) |
|
errno = save_errno; |
|
free(cp); |
|
*fd = -1; |
|
curp->child = 0; |
|
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); |
|
return(MANDOCLEVEL_ERROR); |
|
} |
|
|
|
/* Run gunzip(1). */ |
|
|
|
if (pipe(pfd) == -1) { |
|
perror("pipe"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
switch (curp->child = fork()) { |
|
case -1: |
|
perror("fork"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
case 0: |
|
close(pfd[0]); |
|
if (dup2(pfd[1], STDOUT_FILENO) == -1) { |
|
perror("dup"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
execlp("gunzip", "gunzip", "-c", file, NULL); |
|
perror("exec"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
default: |
|
close(pfd[1]); |
|
*fd = pfd[0]; |
|
return(MANDOCLEVEL_OK); |
|
} |
} |
} |
|
|
|
enum mandoclevel |
|
mparse_wait(struct mparse *curp) |
|
{ |
|
int status; |
|
|
|
if (curp->child == 0) |
|
return(MANDOCLEVEL_OK); |
|
|
|
if (waitpid(curp->child, &status, 0) == -1) { |
|
perror("wait"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
if (WIFSIGNALED(status)) { |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
|
"gunzip died from signal %d", WTERMSIG(status)); |
|
return(MANDOCLEVEL_ERROR); |
|
} |
|
if (WEXITSTATUS(status)) { |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
|
"gunzip failed with code %d", WEXITSTATUS(status)); |
|
return(MANDOCLEVEL_ERROR); |
|
} |
|
return(MANDOCLEVEL_OK); |
|
} |
|
|
struct mparse * |
struct mparse * |
mparse_alloc(int options, enum mandoclevel wlevel, |
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, |
mandocmsg mmsg, const char *defos) |
const struct mchars *mchars, const char *defos) |
{ |
{ |
struct mparse *curp; |
struct mparse *curp; |
|
|
assert(wlevel <= MANDOCLEVEL_FATAL); |
|
|
|
curp = mandoc_calloc(1, sizeof(struct mparse)); |
curp = mandoc_calloc(1, sizeof(struct mparse)); |
|
|
curp->options = options; |
curp->options = options; |
Line 790 mparse_alloc(int options, enum mandoclevel wlevel, |
|
Line 889 mparse_alloc(int options, enum mandoclevel wlevel, |
|
curp->mmsg = mmsg; |
curp->mmsg = mmsg; |
curp->defos = defos; |
curp->defos = defos; |
|
|
curp->roff = roff_alloc(curp, options); |
curp->mchars = mchars; |
|
curp->roff = roff_alloc(curp, curp->mchars, options); |
if (curp->options & MPARSE_MDOC) |
if (curp->options & MPARSE_MDOC) |
curp->pmdoc = mdoc_alloc( |
curp->pmdoc = mdoc_alloc( |
curp->roff, curp, curp->defos, |
curp->roff, curp, curp->defos, |
curp->options & MPARSE_QUICK ? 1 : 0); |
curp->options & MPARSE_QUICK ? 1 : 0); |
if (curp->options & MPARSE_MAN) |
if (curp->options & MPARSE_MAN) |
curp->pman = man_alloc(curp->roff, curp, |
curp->pman = man_alloc( |
|
curp->roff, curp, curp->defos, |
curp->options & MPARSE_QUICK ? 1 : 0); |
curp->options & MPARSE_QUICK ? 1 : 0); |
|
|
return(curp); |
return(curp); |
Line 877 mandoc_msg(enum mandocerr er, struct mparse *m, |
|
Line 978 mandoc_msg(enum mandocerr er, struct mparse *m, |
|
{ |
{ |
enum mandoclevel level; |
enum mandoclevel level; |
|
|
level = MANDOCLEVEL_FATAL; |
level = MANDOCLEVEL_UNSUPP; |
while (er < mandoclimits[level]) |
while (er < mandoclimits[level]) |
level--; |
level--; |
|
|
if (level < m->wlevel) |
if (level < m->wlevel && er != MANDOCERR_FILE) |
return; |
return; |
|
|
if (m->mmsg) |
if (m->mmsg) |