version 1.163, 2017/03/07 20:00:08 |
version 1.206, 2018/12/14 05:18:03 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
|
|
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#if HAVE_ERR |
|
#include <err.h> |
|
#endif |
|
#include <errno.h> |
#include <errno.h> |
#include <fcntl.h> |
#include <fcntl.h> |
#include <stdarg.h> |
#include <stdarg.h> |
|
|
#include "roff.h" |
#include "roff.h" |
#include "mdoc.h" |
#include "mdoc.h" |
#include "man.h" |
#include "man.h" |
|
#include "mandoc_parse.h" |
#include "libmandoc.h" |
#include "libmandoc.h" |
#include "roff_int.h" |
#include "roff_int.h" |
|
|
|
|
struct roff *roff; /* roff parser (!NULL) */ |
struct roff *roff; /* roff parser (!NULL) */ |
struct roff_man *man; /* man parser */ |
struct roff_man *man; /* man parser */ |
char *sodest; /* filename pointed to by .so */ |
char *sodest; /* filename pointed to by .so */ |
const char *file; /* filename of current input file */ |
|
struct buf *primary; /* buffer currently being parsed */ |
struct buf *primary; /* buffer currently being parsed */ |
struct buf *secondary; /* preprocessed copy of input */ |
struct buf *secondary; /* copy of top level input */ |
const char *defos; /* default operating system */ |
struct buf *loop; /* open .while request line */ |
mandocmsg mmsg; /* warning/error message handler */ |
const char *os_s; /* default operating system */ |
enum mandoclevel file_status; /* status of current parse */ |
|
enum mandoclevel wlevel; /* ignore messages below this */ |
|
int options; /* parser options */ |
int options; /* parser options */ |
int gzip; /* current input file is gzipped */ |
int gzip; /* current input file is gzipped */ |
int filenc; /* encoding of the current file */ |
int filenc; /* encoding of the current file */ |
|
|
}; |
}; |
|
|
static void choose_parser(struct mparse *); |
static void choose_parser(struct mparse *); |
|
static void free_buf_list(struct buf *); |
static void resize_buf(struct buf *, size_t); |
static void resize_buf(struct buf *, size_t); |
static int mparse_buf_r(struct mparse *, struct buf, size_t, int); |
static int mparse_buf_r(struct mparse *, struct buf, size_t, int); |
static int read_whole_file(struct mparse *, const char *, int, |
static int read_whole_file(struct mparse *, int, struct buf *, int *); |
struct buf *, int *); |
|
static void mparse_end(struct mparse *); |
static void mparse_end(struct mparse *); |
static void mparse_parse_buffer(struct mparse *, struct buf, |
|
const char *); |
|
|
|
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { |
|
MANDOCERR_OK, |
|
MANDOCERR_WARNING, |
|
MANDOCERR_WARNING, |
|
MANDOCERR_ERROR, |
|
MANDOCERR_UNSUPP, |
|
MANDOCERR_MAX, |
|
MANDOCERR_MAX |
|
}; |
|
|
|
static const char * const mandocerrs[MANDOCERR_MAX] = { |
|
"ok", |
|
|
|
"generic warning", |
|
|
|
/* related to the prologue */ |
|
"missing manual title, using UNTITLED", |
|
"missing manual title, using \"\"", |
|
"lower case character in document title", |
|
"missing manual section, using \"\"", |
|
"unknown manual section", |
|
"missing date, using today's date", |
|
"cannot parse date, using it verbatim", |
|
"missing Os macro, using \"\"", |
|
"duplicate prologue macro", |
|
"late prologue macro", |
|
"skipping late title macro", |
|
"prologue macros out of order", |
|
|
|
/* related to document structure */ |
|
".so is fragile, better use ln(1)", |
|
"no document body", |
|
"content before first section header", |
|
"first section is not \"NAME\"", |
|
"NAME section without Nm before Nd", |
|
"NAME section without description", |
|
"description not at the end of NAME", |
|
"bad NAME section content", |
|
"missing comma before name", |
|
"missing description line, using \"\"", |
|
"description line outside NAME section", |
|
"sections out of conventional order", |
|
"duplicate section title", |
|
"unexpected section", |
|
"unusual Xr order", |
|
"unusual Xr punctuation", |
|
"AUTHORS section without An macro", |
|
|
|
/* related to macros and nesting */ |
|
"obsolete macro", |
|
"macro neither callable nor escaped", |
|
"skipping paragraph macro", |
|
"moving paragraph macro out of list", |
|
"skipping no-space macro", |
|
"blocks badly nested", |
|
"nested displays are not portable", |
|
"moving content out of list", |
|
"fill mode already enabled, skipping", |
|
"fill mode already disabled, skipping", |
|
"line scope broken", |
|
|
|
/* related to missing macro arguments */ |
|
"skipping empty request", |
|
"conditional request controls empty scope", |
|
"skipping empty macro", |
|
"empty block", |
|
"empty argument, using 0n", |
|
"missing display type, using -ragged", |
|
"list type is not the first argument", |
|
"missing -width in -tag list, using 6n", |
|
"missing utility name, using \"\"", |
|
"missing function name, using \"\"", |
|
"empty head in list item", |
|
"empty list item", |
|
"missing font type, using \\fR", |
|
"unknown font type, using \\fR", |
|
"nothing follows prefix", |
|
"empty reference block", |
|
"missing section argument", |
|
"missing -std argument, adding it", |
|
"missing option string, using \"\"", |
|
"missing resource identifier, using \"\"", |
|
"missing eqn box, using \"\"", |
|
|
|
/* related to bad macro arguments */ |
|
"unterminated quoted argument", |
|
"duplicate argument", |
|
"skipping duplicate argument", |
|
"skipping duplicate display type", |
|
"skipping duplicate list type", |
|
"skipping -width argument", |
|
"wrong number of cells", |
|
"unknown AT&T UNIX version", |
|
"comma in function argument", |
|
"parenthesis in function name", |
|
"invalid content in Rs block", |
|
"invalid Boolean argument", |
|
"unknown font, skipping request", |
|
"odd number of characters in request", |
|
|
|
/* related to plain text */ |
|
"blank line in fill mode, using .sp", |
|
"tab in filled text", |
|
"whitespace at end of input line", |
|
"new sentence, new line", |
|
"bad comment style", |
|
"invalid escape sequence", |
|
"undefined string, using \"\"", |
|
|
|
/* related to tables */ |
|
"tbl line starts with span", |
|
"tbl column starts with span", |
|
"skipping vertical bar in tbl layout", |
|
|
|
"generic error", |
|
|
|
/* related to tables */ |
|
"non-alphabetic character in tbl options", |
|
"skipping unknown tbl option", |
|
"missing tbl option argument", |
|
"wrong tbl option argument size", |
|
"empty tbl layout", |
|
"invalid character in tbl layout", |
|
"unmatched parenthesis in tbl layout", |
|
"tbl without any data cells", |
|
"ignoring data in spanned tbl cell", |
|
"ignoring extra tbl data cells", |
|
"data block open at end of tbl", |
|
|
|
/* related to document structure and macros */ |
|
NULL, |
|
"input stack limit exceeded, infinite loop?", |
|
"skipping bad character", |
|
"skipping unknown macro", |
|
"skipping insecure request", |
|
"skipping item outside list", |
|
"skipping column outside column list", |
|
"skipping end of block that is not open", |
|
"fewer RS blocks open, skipping", |
|
"inserting missing end of block", |
|
"appending missing end of block", |
|
|
|
/* related to request and macro arguments */ |
|
"escaped character not allowed in a name", |
|
"NOT IMPLEMENTED: Bd -file", |
|
"skipping display without arguments", |
|
"missing list type, using -item", |
|
"missing manual name, using \"\"", |
|
"uname(3) system call failed, using UNKNOWN", |
|
"unknown standard specifier", |
|
"skipping request without numeric argument", |
|
"NOT IMPLEMENTED: .so with absolute path or \"..\"", |
|
".so request failed", |
|
"skipping all arguments", |
|
"skipping excess arguments", |
|
"divide by zero", |
|
|
|
"unsupported feature", |
|
"input too large", |
|
"unsupported control character", |
|
"unsupported roff request", |
|
"eqn delim option in tbl", |
|
"unsupported tbl layout modifier", |
|
"ignoring macro in table", |
|
}; |
|
|
|
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
|
"SUCCESS", |
|
"RESERVED", |
|
"WARNING", |
|
"ERROR", |
|
"UNSUPP", |
|
"BADARG", |
|
"SYSERR" |
|
}; |
|
|
|
|
|
static void |
static void |
resize_buf(struct buf *buf, size_t initial) |
resize_buf(struct buf *buf, size_t initial) |
{ |
{ |
Line 259 resize_buf(struct buf *buf, size_t initial) |
|
Line 76 resize_buf(struct buf *buf, size_t initial) |
|
} |
} |
|
|
static void |
static void |
|
free_buf_list(struct buf *buf) |
|
{ |
|
struct buf *tmp; |
|
|
|
while (buf != NULL) { |
|
tmp = buf; |
|
buf = tmp->next; |
|
free(tmp->buf); |
|
free(tmp); |
|
} |
|
} |
|
|
|
static void |
choose_parser(struct mparse *curp) |
choose_parser(struct mparse *curp) |
{ |
{ |
char *cp, *ep; |
char *cp, *ep; |
Line 293 choose_parser(struct mparse *curp) |
|
Line 123 choose_parser(struct mparse *curp) |
|
} |
} |
|
|
if (format == MPARSE_MDOC) { |
if (format == MPARSE_MDOC) { |
mdoc_hash_init(); |
|
curp->man->macroset = MACROSET_MDOC; |
curp->man->macroset = MACROSET_MDOC; |
curp->man->first->tok = TOKEN_NONE; |
if (curp->man->mdocmac == NULL) |
|
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
} else { |
} else { |
man_hash_init(); |
|
curp->man->macroset = MACROSET_MAN; |
curp->man->macroset = MACROSET_MAN; |
curp->man->first->tok = TOKEN_NONE; |
if (curp->man->manmac == NULL) |
|
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
} |
} |
|
curp->man->first->tok = TOKEN_NONE; |
} |
} |
|
|
/* |
/* |
Line 313 choose_parser(struct mparse *curp) |
|
Line 144 choose_parser(struct mparse *curp) |
|
static int |
static int |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
{ |
{ |
const struct tbl_span *span; |
|
struct buf ln; |
struct buf ln; |
const char *save_file; |
struct buf *firstln, *lastln, *thisln, *loop; |
char *cp; |
char *cp; |
size_t pos; /* byte number in the ln buffer */ |
size_t pos; /* byte number in the ln buffer */ |
size_t j; /* auxiliary byte number in the blk buffer */ |
int line_result, result; |
enum rofferr rr; |
|
int of; |
int of; |
int lnn; /* line number in the real file */ |
int lnn; /* line number in the real file */ |
int fd; |
int fd; |
|
int inloop; /* Saw .while on this level. */ |
unsigned char c; |
unsigned char c; |
|
|
memset(&ln, 0, sizeof(ln)); |
ln.sz = 256; |
|
ln.buf = mandoc_malloc(ln.sz); |
|
ln.next = NULL; |
|
firstln = loop = NULL; |
lnn = curp->line; |
lnn = curp->line; |
pos = 0; |
pos = 0; |
|
inloop = 0; |
|
result = ROFF_CONT; |
|
|
while (i < blk.sz) { |
while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) { |
if (0 == pos && '\0' == blk.buf[i]) |
|
break; |
|
|
|
if (start) { |
if (start) { |
curp->line = lnn; |
curp->line = lnn; |
curp->reparse_count = 0; |
curp->reparse_count = 0; |
Line 363 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 194 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
|
|
/* |
/* |
* Make sure we have space for the worst |
* Make sure we have space for the worst |
* case of 11 bytes: "\\[u10ffff]\0" |
* case of 12 bytes: "\\[u10ffff]\n\0" |
*/ |
*/ |
|
|
if (pos + 11 > ln.sz) |
if (pos + 12 > ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
/* |
/* |
Line 377 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 208 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
if (c & 0x80) { |
if (c & 0x80) { |
if ( ! (curp->filenc && preconv_encode( |
if ( ! (curp->filenc && preconv_encode( |
&blk, &i, &ln, &pos, &curp->filenc))) { |
&blk, &i, &ln, &pos, &curp->filenc))) { |
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
mandoc_msg(MANDOCERR_CHAR_BAD, |
curp->line, pos, "0x%x", c); |
curp->line, pos, "0x%x", c); |
ln.buf[pos++] = '?'; |
ln.buf[pos++] = '?'; |
i++; |
i++; |
Line 390 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 221 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
*/ |
*/ |
|
|
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
mandoc_vmsg(c == 0x00 || c == 0x04 || |
mandoc_msg(c == 0x00 || c == 0x04 || |
c > 0x0a ? MANDOCERR_CHAR_BAD : |
c > 0x0a ? MANDOCERR_CHAR_BAD : |
MANDOCERR_CHAR_UNSUPP, |
MANDOCERR_CHAR_UNSUPP, |
curp, curp->line, pos, "0x%x", c); |
curp->line, pos, "0x%x", c); |
i++; |
i++; |
if (c != '\r') |
if (c != '\r') |
ln.buf[pos++] = '?'; |
ln.buf[pos++] = '?'; |
continue; |
continue; |
} |
} |
|
|
/* Trailing backslash = a plain char. */ |
ln.buf[pos++] = blk.buf[i++]; |
|
} |
|
ln.buf[pos] = '\0'; |
|
|
if (blk.buf[i] != '\\' || i + 1 == blk.sz) { |
/* |
ln.buf[pos++] = blk.buf[i++]; |
* Maintain a lookaside buffer of all lines. |
continue; |
* parsed from this input source. |
} |
*/ |
|
|
/* |
thisln = mandoc_malloc(sizeof(*thisln)); |
* Found escape and at least one other character. |
thisln->buf = mandoc_strdup(ln.buf); |
* When it's a newline character, skip it. |
thisln->sz = strlen(ln.buf) + 1; |
* When there is a carriage return in between, |
thisln->next = NULL; |
* skip that one as well. |
if (firstln == NULL) { |
*/ |
firstln = lastln = thisln; |
|
if (curp->secondary == NULL) |
|
curp->secondary = firstln; |
|
} else { |
|
lastln->next = thisln; |
|
lastln = thisln; |
|
} |
|
|
if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && |
/* XXX Ugly hack to mark the end of the input. */ |
'\n' == blk.buf[i + 2]) |
|
++i; |
|
if ('\n' == blk.buf[i + 1]) { |
|
i += 2; |
|
++lnn; |
|
continue; |
|
} |
|
|
|
if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { |
if (i == blk.sz || blk.buf[i] == '\0') { |
j = i; |
ln.buf[pos++] = '\n'; |
i += 2; |
ln.buf[pos] = '\0'; |
/* Comment, skip to end of line */ |
|
for (; i < blk.sz; ++i) { |
|
if (blk.buf[i] != '\n') |
|
continue; |
|
if (blk.buf[i - 1] == ' ' || |
|
blk.buf[i - 1] == '\t') |
|
mandoc_msg( |
|
MANDOCERR_SPACE_EOL, |
|
curp, curp->line, |
|
pos + i-1 - j, NULL); |
|
++i; |
|
++lnn; |
|
break; |
|
} |
|
|
|
/* Backout trailing whitespaces */ |
|
for (; pos > 0; --pos) { |
|
if (ln.buf[pos - 1] != ' ') |
|
break; |
|
if (pos > 2 && ln.buf[pos - 2] == '\\') |
|
break; |
|
} |
|
break; |
|
} |
|
|
|
/* Catch escaped bogus characters. */ |
|
|
|
c = (unsigned char) blk.buf[i+1]; |
|
|
|
if ( ! (isascii(c) && |
|
(isgraph(c) || isblank(c)))) { |
|
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
|
curp->line, pos, "0x%x", c); |
|
i += 2; |
|
ln.buf[pos++] = '?'; |
|
continue; |
|
} |
|
|
|
/* Some other escape sequence, copy & cont. */ |
|
|
|
ln.buf[pos++] = blk.buf[i++]; |
|
ln.buf[pos++] = blk.buf[i++]; |
|
} |
} |
|
|
if (pos >= ln.sz) |
|
resize_buf(&ln, 256); |
|
|
|
ln.buf[pos] = '\0'; |
|
|
|
/* |
/* |
* A significant amount of complexity is contained by |
* A significant amount of complexity is contained by |
* the roff preprocessor. It's line-oriented but can be |
* the roff preprocessor. It's line-oriented but can be |
Line 485 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 270 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
*/ |
*/ |
|
|
of = 0; |
of = 0; |
|
rerun: |
|
line_result = roff_parseln(curp->roff, curp->line, &ln, &of); |
|
|
/* |
/* Process options. */ |
* Maintain a lookaside buffer of all parsed lines. We |
|
* only do this if mparse_keep() has been invoked (the |
|
* buffer may be accessed with mparse_getkeep()). |
|
*/ |
|
|
|
if (curp->secondary) { |
if (line_result & ROFF_APPEND) |
curp->secondary->buf = mandoc_realloc( |
assert(line_result == (ROFF_IGN | ROFF_APPEND)); |
curp->secondary->buf, |
|
curp->secondary->sz + pos + 2); |
if (line_result & ROFF_USERCALL) |
memcpy(curp->secondary->buf + |
assert((line_result & ROFF_MASK) == ROFF_REPARSE); |
curp->secondary->sz, |
|
ln.buf, pos); |
if (line_result & ROFF_USERRET) { |
curp->secondary->sz += pos; |
assert(line_result == (ROFF_IGN | ROFF_USERRET)); |
curp->secondary->buf |
if (start == 0) { |
[curp->secondary->sz] = '\n'; |
/* Return from the current macro. */ |
curp->secondary->sz++; |
result = ROFF_USERRET; |
curp->secondary->buf |
goto out; |
[curp->secondary->sz] = '\0'; |
} |
} |
} |
rerun: |
|
rr = roff_parseln(curp->roff, curp->line, &ln, &of); |
|
|
|
switch (rr) { |
switch (line_result & ROFF_LOOPMASK) { |
case ROFF_REPARSE: |
case ROFF_IGN: |
if (++curp->reparse_count > REPARSE_LIMIT) |
break; |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
case ROFF_WHILE: |
|
if (curp->loop != NULL) { |
|
if (loop == curp->loop) |
|
break; |
|
mandoc_msg(MANDOCERR_WHILE_NEST, |
curp->line, pos, NULL); |
curp->line, pos, NULL); |
else if (mparse_buf_r(curp, ln, of, 0) == 1 || |
|
start == 1) { |
|
pos = 0; |
|
continue; |
|
} |
} |
free(ln.buf); |
curp->loop = thisln; |
return 0; |
loop = NULL; |
case ROFF_APPEND: |
inloop = 1; |
pos = strlen(ln.buf); |
break; |
continue; |
case ROFF_LOOPCONT: |
|
case ROFF_LOOPEXIT: |
|
if (curp->loop == NULL) { |
|
mandoc_msg(MANDOCERR_WHILE_FAIL, |
|
curp->line, pos, NULL); |
|
break; |
|
} |
|
if (inloop == 0) { |
|
mandoc_msg(MANDOCERR_WHILE_INTO, |
|
curp->line, pos, NULL); |
|
curp->loop = loop = NULL; |
|
break; |
|
} |
|
if (line_result & ROFF_LOOPCONT) |
|
loop = curp->loop; |
|
else { |
|
curp->loop = loop = NULL; |
|
inloop = 0; |
|
} |
|
break; |
|
default: |
|
abort(); |
|
} |
|
|
|
/* Process the main instruction from the roff parser. */ |
|
|
|
switch (line_result & ROFF_MASK) { |
|
case ROFF_IGN: |
|
break; |
|
case ROFF_CONT: |
|
if (curp->man->macroset == MACROSET_NONE) |
|
choose_parser(curp); |
|
if ((curp->man->macroset == MACROSET_MDOC ? |
|
mdoc_parseln(curp->man, curp->line, ln.buf, of) : |
|
man_parseln(curp->man, curp->line, ln.buf, of) |
|
) == 2) |
|
goto out; |
|
break; |
case ROFF_RERUN: |
case ROFF_RERUN: |
goto rerun; |
goto rerun; |
case ROFF_IGN: |
case ROFF_REPARSE: |
pos = 0; |
if (++curp->reparse_count > REPARSE_LIMIT) { |
continue; |
/* Abort and return to the top level. */ |
|
result = ROFF_IGN; |
|
mandoc_msg(MANDOCERR_ROFFLOOP, |
|
curp->line, pos, NULL); |
|
goto out; |
|
} |
|
result = mparse_buf_r(curp, ln, of, 0); |
|
if (line_result & ROFF_USERCALL) { |
|
roff_userret(curp->roff); |
|
/* Continue normally. */ |
|
if (result & ROFF_USERRET) |
|
result = ROFF_CONT; |
|
} |
|
if (start == 0 && result != ROFF_CONT) |
|
goto out; |
|
break; |
case ROFF_SO: |
case ROFF_SO: |
if ( ! (curp->options & MPARSE_SO) && |
if ( ! (curp->options & MPARSE_SO) && |
(i >= blk.sz || blk.buf[i] == '\0')) { |
(i >= blk.sz || blk.buf[i] == '\0')) { |
curp->sodest = mandoc_strdup(ln.buf + of); |
curp->sodest = mandoc_strdup(ln.buf + of); |
free(ln.buf); |
goto out; |
return 1; |
|
} |
} |
/* |
|
* We remove `so' clauses from our lookaside |
|
* buffer because we're going to descend into |
|
* the file recursively. |
|
*/ |
|
if (curp->secondary) |
|
curp->secondary->sz -= pos + 1; |
|
save_file = curp->file; |
|
if ((fd = mparse_open(curp, ln.buf + of)) != -1) { |
if ((fd = mparse_open(curp, ln.buf + of)) != -1) { |
mparse_readfd(curp, fd, ln.buf + of); |
mparse_readfd(curp, fd, ln.buf + of); |
close(fd); |
close(fd); |
curp->file = save_file; |
|
} else { |
} else { |
curp->file = save_file; |
mandoc_msg(MANDOCERR_SO_FAIL, curp->line, |
mandoc_vmsg(MANDOCERR_SO_FAIL, |
pos, ".so %s", ln.buf + of); |
curp, curp->line, pos, |
|
".so %s", ln.buf + of); |
|
ln.sz = mandoc_asprintf(&cp, |
ln.sz = mandoc_asprintf(&cp, |
".sp\nSee the file %s.\n.sp", |
".sp\nSee the file %s.\n.sp", |
ln.buf + of); |
ln.buf + of); |
|
|
of = 0; |
of = 0; |
mparse_buf_r(curp, ln, of, 0); |
mparse_buf_r(curp, ln, of, 0); |
} |
} |
pos = 0; |
|
continue; |
|
default: |
|
break; |
break; |
|
default: |
|
abort(); |
} |
} |
|
|
if (curp->man->macroset == MACROSET_NONE) |
/* Start the next input line. */ |
choose_parser(curp); |
|
|
|
/* |
if (loop != NULL && |
* Lastly, push down into the parsers themselves. |
(line_result & ROFF_LOOPMASK) == ROFF_IGN) |
* If libroff returns ROFF_TBL, then add it to the |
loop = loop->next; |
* currently open parse. Since we only get here if |
|
* there does exist data (see tbl_data.c), we're |
|
* guaranteed that something's been allocated. |
|
* Do the same for ROFF_EQN. |
|
*/ |
|
|
|
if (rr == ROFF_TBL) |
if (loop != NULL) { |
while ((span = roff_span(curp->roff)) != NULL) |
if ((line_result & ROFF_APPEND) == 0) |
roff_addtbl(curp->man, span); |
*ln.buf = '\0'; |
else if (rr == ROFF_EQN) |
if (ln.sz < loop->sz) |
roff_addeqn(curp->man, roff_eqn(curp->roff)); |
resize_buf(&ln, loop->sz); |
else if ((curp->man->macroset == MACROSET_MDOC ? |
(void)strlcat(ln.buf, loop->buf, ln.sz); |
mdoc_parseln(curp->man, curp->line, ln.buf, of) : |
of = 0; |
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) |
goto rerun; |
break; |
} |
|
|
/* Temporary buffers typically are not full. */ |
pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0; |
|
|
if (0 == start && '\0' == blk.buf[i]) |
|
break; |
|
|
|
/* Start the next input line. */ |
|
|
|
pos = 0; |
|
} |
} |
|
out: |
|
if (inloop) { |
|
if (result != ROFF_USERRET) |
|
mandoc_msg(MANDOCERR_WHILE_OUTOF, |
|
curp->line, pos, NULL); |
|
curp->loop = NULL; |
|
} |
free(ln.buf); |
free(ln.buf); |
return 1; |
if (firstln != curp->secondary) |
|
free_buf_list(firstln); |
|
return result; |
} |
} |
|
|
static int |
static int |
read_whole_file(struct mparse *curp, const char *file, int fd, |
read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap) |
struct buf *fb, int *with_mmap) |
|
{ |
{ |
struct stat st; |
struct stat st; |
gzFile gz; |
gzFile gz; |
size_t off; |
size_t off; |
ssize_t ssz; |
ssize_t ssz; |
|
int gzerrnum, retval; |
|
|
if (fstat(fd, &st) == -1) |
if (fstat(fd, &st) == -1) { |
err((int)MANDOCLEVEL_SYSERR, "%s", file); |
mandoc_msg(MANDOCERR_FILE, 0, 0, |
|
"fstat: %s", strerror(errno)); |
|
return 0; |
|
} |
|
|
/* |
/* |
* If we're a regular file, try just reading in the whole entry |
* If we're a regular file, try just reading in the whole entry |
Line 624 read_whole_file(struct mparse *curp, const char *file, |
|
Line 442 read_whole_file(struct mparse *curp, const char *file, |
|
|
|
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
if (st.st_size > 0x7fffffff) { |
if (st.st_size > 0x7fffffff) { |
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
return 0; |
return 0; |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
Line 635 read_whole_file(struct mparse *curp, const char *file, |
|
Line 453 read_whole_file(struct mparse *curp, const char *file, |
|
} |
} |
|
|
if (curp->gzip) { |
if (curp->gzip) { |
if ((gz = gzdopen(fd, "rb")) == NULL) |
/* |
err((int)MANDOCLEVEL_SYSERR, "%s", file); |
* Duplicating the file descriptor is required |
|
* because we will have to call gzclose(3) |
|
* to free memory used internally by zlib, |
|
* but that will also close the file descriptor, |
|
* which this function must not do. |
|
*/ |
|
if ((fd = dup(fd)) == -1) { |
|
mandoc_msg(MANDOCERR_FILE, 0, 0, |
|
"dup: %s", strerror(errno)); |
|
return 0; |
|
} |
|
if ((gz = gzdopen(fd, "rb")) == NULL) { |
|
mandoc_msg(MANDOCERR_FILE, 0, 0, |
|
"gzdopen: %s", strerror(errno)); |
|
close(fd); |
|
return 0; |
|
} |
} else |
} else |
gz = NULL; |
gz = NULL; |
|
|
Line 647 read_whole_file(struct mparse *curp, const char *file, |
|
Line 481 read_whole_file(struct mparse *curp, const char *file, |
|
|
|
*with_mmap = 0; |
*with_mmap = 0; |
off = 0; |
off = 0; |
|
retval = 0; |
fb->sz = 0; |
fb->sz = 0; |
fb->buf = NULL; |
fb->buf = NULL; |
for (;;) { |
for (;;) { |
if (off == fb->sz) { |
if (off == fb->sz) { |
if (fb->sz == (1U << 31)) { |
if (fb->sz == (1U << 31)) { |
mandoc_msg(MANDOCERR_TOOLARGE, curp, |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
0, 0, NULL); |
|
break; |
break; |
} |
} |
resize_buf(fb, 65536); |
resize_buf(fb, 65536); |
Line 663 read_whole_file(struct mparse *curp, const char *file, |
|
Line 497 read_whole_file(struct mparse *curp, const char *file, |
|
read(fd, fb->buf + (int)off, fb->sz - off); |
read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
return 1; |
retval = 1; |
|
break; |
} |
} |
if (ssz == -1) |
if (ssz == -1) { |
err((int)MANDOCLEVEL_SYSERR, "%s", file); |
if (curp->gzip) |
|
(void)gzerror(gz, &gzerrnum); |
|
mandoc_msg(MANDOCERR_FILE, 0, 0, "read: %s", |
|
curp->gzip && gzerrnum != Z_ERRNO ? |
|
zError(gzerrnum) : strerror(errno)); |
|
break; |
|
} |
off += (size_t)ssz; |
off += (size_t)ssz; |
} |
} |
|
|
free(fb->buf); |
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) |
fb->buf = NULL; |
mandoc_msg(MANDOCERR_FILE, 0, 0, "gzclose: %s", |
return 0; |
gzerrnum == Z_ERRNO ? strerror(errno) : |
|
zError(gzerrnum)); |
|
if (retval == 0) { |
|
free(fb->buf); |
|
fb->buf = NULL; |
|
} |
|
return retval; |
} |
} |
|
|
static void |
static void |
Line 687 mparse_end(struct mparse *curp) |
|
Line 534 mparse_end(struct mparse *curp) |
|
roff_endparse(curp->roff); |
roff_endparse(curp->roff); |
} |
} |
|
|
static void |
/* |
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
* Read the whole file into memory and call the parsers. |
|
* Called recursively when an .so request is encountered. |
|
*/ |
|
void |
|
mparse_readfd(struct mparse *curp, int fd, const char *filename) |
{ |
{ |
struct buf *svprimary; |
|
const char *svfile; |
|
size_t offset; |
|
static int recursion_depth; |
static int recursion_depth; |
|
|
if (64 < recursion_depth) { |
struct buf blk; |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); |
struct buf *save_primary; |
|
const char *save_filename; |
|
size_t offset; |
|
int save_filenc, save_lineno; |
|
int with_mmap; |
|
|
|
if (recursion_depth > 64) { |
|
mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL); |
return; |
return; |
} |
} |
|
if (read_whole_file(curp, fd, &blk, &with_mmap) == 0) |
|
return; |
|
|
/* Line number is per-file. */ |
/* |
svfile = curp->file; |
* Save some properties of the parent file. |
curp->file = file; |
*/ |
svprimary = curp->primary; |
|
|
save_primary = curp->primary; |
|
save_filenc = curp->filenc; |
|
save_lineno = curp->line; |
|
save_filename = mandoc_msg_getinfilename(); |
|
|
curp->primary = &blk; |
curp->primary = &blk; |
|
curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1); |
curp->line = 1; |
curp->line = 1; |
recursion_depth++; |
mandoc_msg_setinfilename(filename); |
|
|
/* Skip an UTF-8 byte order mark. */ |
/* Skip an UTF-8 byte order mark. */ |
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
Line 718 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
Line 581 mparse_parse_buffer(struct mparse *curp, struct buf bl |
|
} else |
} else |
offset = 0; |
offset = 0; |
|
|
|
recursion_depth++; |
mparse_buf_r(curp, blk, offset, 1); |
mparse_buf_r(curp, blk, offset, 1); |
|
|
if (--recursion_depth == 0) |
if (--recursion_depth == 0) |
mparse_end(curp); |
mparse_end(curp); |
|
|
curp->primary = svprimary; |
/* |
curp->file = svfile; |
* Clean up and restore saved parent properties. |
} |
*/ |
|
|
enum mandoclevel |
if (with_mmap) |
mparse_readmem(struct mparse *curp, void *buf, size_t len, |
munmap(blk.buf, blk.sz); |
const char *file) |
else |
{ |
free(blk.buf); |
struct buf blk; |
|
|
|
blk.buf = buf; |
curp->primary = save_primary; |
blk.sz = len; |
curp->filenc = save_filenc; |
|
curp->line = save_lineno; |
mparse_parse_buffer(curp, blk, file); |
if (save_filename != NULL) |
return curp->file_status; |
mandoc_msg_setinfilename(save_filename); |
} |
} |
|
|
/* |
|
* Read the whole file into memory and call the parsers. |
|
* Called recursively when an .so request is encountered. |
|
*/ |
|
enum mandoclevel |
|
mparse_readfd(struct mparse *curp, int fd, const char *file) |
|
{ |
|
struct buf blk; |
|
int with_mmap; |
|
int save_filenc; |
|
|
|
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
|
save_filenc = curp->filenc; |
|
curp->filenc = curp->options & |
|
(MPARSE_UTF8 | MPARSE_LATIN1); |
|
mparse_parse_buffer(curp, blk, file); |
|
curp->filenc = save_filenc; |
|
if (with_mmap) |
|
munmap(blk.buf, blk.sz); |
|
else |
|
free(blk.buf); |
|
} |
|
return curp->file_status; |
|
} |
|
|
|
int |
int |
mparse_open(struct mparse *curp, const char *file) |
mparse_open(struct mparse *curp, const char *file) |
{ |
{ |
char *cp; |
char *cp; |
int fd; |
int fd; |
|
|
curp->file = file; |
|
cp = strrchr(file, '.'); |
cp = strrchr(file, '.'); |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
|
|
Line 797 mparse_open(struct mparse *curp, const char *file) |
|
Line 633 mparse_open(struct mparse *curp, const char *file) |
|
|
|
/* Neither worked, give up. */ |
/* Neither worked, give up. */ |
|
|
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); |
mandoc_msg(MANDOCERR_FILE, 0, 0, "%s", strerror(errno)); |
return -1; |
return -1; |
} |
} |
|
|
struct mparse * |
struct mparse * |
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, |
mparse_alloc(int options, enum mandoc_os os_e, const char *os_s) |
const char *defos) |
|
{ |
{ |
struct mparse *curp; |
struct mparse *curp; |
|
|
curp = mandoc_calloc(1, sizeof(struct mparse)); |
curp = mandoc_calloc(1, sizeof(struct mparse)); |
|
|
curp->options = options; |
curp->options = options; |
curp->wlevel = wlevel; |
curp->os_s = os_s; |
curp->mmsg = mmsg; |
|
curp->defos = defos; |
|
|
|
curp->roff = roff_alloc(curp, options); |
curp->roff = roff_alloc(curp, options); |
curp->man = roff_man_alloc( curp->roff, curp, curp->defos, |
curp->man = roff_man_alloc(curp->roff, curp, curp->os_s, |
curp->options & MPARSE_QUICK ? 1 : 0); |
curp->options & MPARSE_QUICK ? 1 : 0); |
if (curp->options & MPARSE_MDOC) { |
if (curp->options & MPARSE_MDOC) { |
mdoc_hash_init(); |
|
curp->man->macroset = MACROSET_MDOC; |
curp->man->macroset = MACROSET_MDOC; |
|
if (curp->man->mdocmac == NULL) |
|
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
} else if (curp->options & MPARSE_MAN) { |
} else if (curp->options & MPARSE_MAN) { |
man_hash_init(); |
|
curp->man->macroset = MACROSET_MAN; |
curp->man->macroset = MACROSET_MAN; |
|
if (curp->man->manmac == NULL) |
|
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
} |
} |
curp->man->first->tok = TOKEN_NONE; |
curp->man->first->tok = TOKEN_NONE; |
|
curp->man->meta.os_e = os_e; |
return curp; |
return curp; |
} |
} |
|
|
Line 833 mparse_reset(struct mparse *curp) |
|
Line 669 mparse_reset(struct mparse *curp) |
|
{ |
{ |
roff_reset(curp->roff); |
roff_reset(curp->roff); |
roff_man_reset(curp->man); |
roff_man_reset(curp->man); |
|
free_buf_list(curp->secondary); |
|
curp->secondary = NULL; |
free(curp->sodest); |
free(curp->sodest); |
curp->sodest = NULL; |
curp->sodest = NULL; |
|
|
if (curp->secondary) |
|
curp->secondary->sz = 0; |
|
|
|
curp->file_status = MANDOCLEVEL_OK; |
|
curp->gzip = 0; |
curp->gzip = 0; |
} |
} |
|
|
void |
void |
mparse_free(struct mparse *curp) |
mparse_free(struct mparse *curp) |
{ |
{ |
|
roffhash_free(curp->man->mdocmac); |
|
roffhash_free(curp->man->manmac); |
roff_man_free(curp->man); |
roff_man_free(curp->man); |
roff_free(curp->roff); |
roff_free(curp->roff); |
if (curp->secondary) |
free_buf_list(curp->secondary); |
free(curp->secondary->buf); |
|
|
|
free(curp->secondary); |
|
free(curp->sodest); |
free(curp->sodest); |
free(curp); |
free(curp); |
} |
} |
Line 872 mparse_result(struct mparse *curp, struct roff_man **m |
|
Line 702 mparse_result(struct mparse *curp, struct roff_man **m |
|
} |
} |
|
|
void |
void |
mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) |
mparse_copy(const struct mparse *p) |
{ |
{ |
if (curp->file_status > *rc) |
struct buf *buf; |
*rc = curp->file_status; |
|
} |
|
|
|
void |
for (buf = p->secondary; buf != NULL; buf = buf->next) |
mandoc_vmsg(enum mandocerr t, struct mparse *m, |
puts(buf->buf); |
int ln, int pos, const char *fmt, ...) |
|
{ |
|
char buf[256]; |
|
va_list ap; |
|
|
|
va_start(ap, fmt); |
|
(void)vsnprintf(buf, sizeof(buf), fmt, ap); |
|
va_end(ap); |
|
|
|
mandoc_msg(t, m, ln, pos, buf); |
|
} |
|
|
|
void |
|
mandoc_msg(enum mandocerr er, struct mparse *m, |
|
int ln, int col, const char *msg) |
|
{ |
|
enum mandoclevel level; |
|
|
|
level = MANDOCLEVEL_UNSUPP; |
|
while (er < mandoclimits[level]) |
|
level--; |
|
|
|
if (level < m->wlevel && er != MANDOCERR_FILE) |
|
return; |
|
|
|
if (m->mmsg) |
|
(*m->mmsg)(er, level, m->file, ln, col, msg); |
|
|
|
if (m->file_status < level) |
|
m->file_status = level; |
|
} |
|
|
|
const char * |
|
mparse_strerror(enum mandocerr er) |
|
{ |
|
|
|
return mandocerrs[er]; |
|
} |
|
|
|
const char * |
|
mparse_strlevel(enum mandoclevel lvl) |
|
{ |
|
return mandoclevels[lvl]; |
|
} |
|
|
|
void |
|
mparse_keep(struct mparse *p) |
|
{ |
|
|
|
assert(NULL == p->secondary); |
|
p->secondary = mandoc_calloc(1, sizeof(struct buf)); |
|
} |
|
|
|
const char * |
|
mparse_getkeep(const struct mparse *p) |
|
{ |
|
|
|
assert(p->secondary); |
|
return p->secondary->sz ? p->secondary->buf : NULL; |
|
} |
} |