version 1.2, 2011/03/20 11:43:06 |
version 1.220, 2021/06/27 17:57:54 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
|
* Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
* copyright notice and this permission notice appear in all copies. |
* copyright notice and this permission notice appear in all copies. |
* |
* |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
* |
|
* Top-level functions of the mandoc(3) parser: |
|
* Parser and input encoding selection, decompression, |
|
* handling of input bytes, characters, lines, and files, |
|
* handling of roff(7) loops and file inclusion, |
|
* and steering of the various parsers. |
*/ |
*/ |
#include <sys/stat.h> |
#include "config.h" |
|
|
|
#include <sys/types.h> |
#include <sys/mman.h> |
#include <sys/mman.h> |
|
#include <sys/stat.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
|
#include <errno.h> |
#include <fcntl.h> |
#include <fcntl.h> |
|
#include <stdarg.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
#include <unistd.h> |
|
#include <zlib.h> |
|
|
|
#include "mandoc_aux.h" |
#include "mandoc.h" |
#include "mandoc.h" |
|
#include "roff.h" |
#include "mdoc.h" |
#include "mdoc.h" |
#include "man.h" |
#include "man.h" |
#include "roff.h" |
#include "mandoc_parse.h" |
|
#include "libmandoc.h" |
|
#include "roff_int.h" |
|
#include "tag.h" |
|
|
#ifndef MAP_FILE |
|
#define MAP_FILE 0 |
|
#endif |
|
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
struct buf { |
|
char *buf; /* binary input buffer */ |
|
size_t sz; /* size of binary buffer */ |
|
}; |
|
|
|
struct mparse { |
struct mparse { |
enum mandoclevel file_status; /* status of current parse */ |
|
int line; /* line number in the file */ |
|
enum mparset inttype; /* which parser to use */ |
|
struct man *pman; /* persistent man parser */ |
|
struct mdoc *pmdoc; /* persistent mdoc parser */ |
|
struct man *man; /* man parser */ |
|
struct mdoc *mdoc; /* mdoc parser */ |
|
struct roff *roff; /* roff parser (!NULL) */ |
struct roff *roff; /* roff parser (!NULL) */ |
struct regset regs; /* roff registers */ |
struct roff_man *man; /* man parser */ |
|
struct buf *primary; /* buffer currently being parsed */ |
|
struct buf *secondary; /* copy of top level input */ |
|
struct buf *loop; /* open .while request line */ |
|
const char *os_s; /* default operating system */ |
|
int options; /* parser options */ |
|
int gzip; /* current input file is gzipped */ |
|
int filenc; /* encoding of the current file */ |
int reparse_count; /* finite interp. stack */ |
int reparse_count; /* finite interp. stack */ |
mandocmsg mmsg; /* warning/error message handler */ |
int line; /* line number in the file */ |
void *arg; /* argument to mmsg */ |
|
mevt_open evt_open; /* file-open event */ |
|
mevt_close evt_close; /* file-close event */ |
|
const char *svfile; |
|
}; |
}; |
|
|
|
static void choose_parser(struct mparse *); |
|
static void free_buf_list(struct buf *); |
static void resize_buf(struct buf *, size_t); |
static void resize_buf(struct buf *, size_t); |
static void mparse_buf_r(struct mparse *, struct buf, int); |
static int mparse_buf_r(struct mparse *, struct buf, size_t, int); |
static void mparse_readfd_r(struct mparse *, int, const char *, int); |
static int read_whole_file(struct mparse *, int, struct buf *, int *); |
static void pset(const char *, int, struct mparse *); |
|
static void pdesc(struct mparse *, const char *, int); |
|
static int read_whole_file(const char *, int, struct buf *, int *); |
|
static void mparse_end(struct mparse *); |
static void mparse_end(struct mparse *); |
|
|
|
|
static void |
static void |
resize_buf(struct buf *buf, size_t initial) |
resize_buf(struct buf *buf, size_t initial) |
{ |
{ |
Line 77 resize_buf(struct buf *buf, size_t initial) |
|
Line 82 resize_buf(struct buf *buf, size_t initial) |
|
} |
} |
|
|
static void |
static void |
pset(const char *buf, int pos, struct mparse *curp) |
free_buf_list(struct buf *buf) |
{ |
{ |
int i; |
struct buf *tmp; |
|
|
|
while (buf != NULL) { |
|
tmp = buf; |
|
buf = tmp->next; |
|
free(tmp->buf); |
|
free(tmp); |
|
} |
|
} |
|
|
|
static void |
|
choose_parser(struct mparse *curp) |
|
{ |
|
char *cp, *ep; |
|
int format; |
|
|
/* |
/* |
* Try to intuit which kind of manual parser should be used. If |
* If neither command line arguments -mdoc or -man select |
* passed in by command-line (-man, -mdoc), then use that |
* a parser nor the roff parser found a .Dd or .TH macro |
* explicitly. If passed as -mandoc, then try to guess from the |
* yet, look ahead in the main input buffer. |
* line: either skip dot-lines, use -mdoc when finding `.Dt', or |
|
* default to -man, which is more lenient. |
|
* |
|
* Separate out pmdoc/pman from mdoc/man: the first persists |
|
* through all parsers, while the latter is used per-parse. |
|
*/ |
*/ |
|
|
if ('.' == buf[0] || '\'' == buf[0]) { |
if ((format = roff_getformat(curp->roff)) == 0) { |
for (i = 1; buf[i]; i++) |
cp = curp->primary->buf; |
if (' ' != buf[i] && '\t' != buf[i]) |
ep = cp + curp->primary->sz; |
|
while (cp < ep) { |
|
if (*cp == '.' || *cp == '\'') { |
|
cp++; |
|
if (cp[0] == 'D' && cp[1] == 'd') { |
|
format = MPARSE_MDOC; |
|
break; |
|
} |
|
if (cp[0] == 'T' && cp[1] == 'H') { |
|
format = MPARSE_MAN; |
|
break; |
|
} |
|
} |
|
cp = memchr(cp, '\n', ep - cp); |
|
if (cp == NULL) |
break; |
break; |
if ('\0' == buf[i]) |
cp++; |
return; |
} |
} |
} |
|
|
switch (curp->inttype) { |
if (format == MPARSE_MDOC) { |
case (MPARSE_MDOC): |
curp->man->meta.macroset = MACROSET_MDOC; |
if (NULL == curp->pmdoc) |
if (curp->man->mdocmac == NULL) |
curp->pmdoc = mdoc_alloc |
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
(&curp->regs, curp->arg, curp->mmsg); |
} else { |
assert(curp->pmdoc); |
curp->man->meta.macroset = MACROSET_MAN; |
curp->mdoc = curp->pmdoc; |
if (curp->man->manmac == NULL) |
return; |
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
case (MPARSE_MAN): |
|
if (NULL == curp->pman) |
|
curp->pman = man_alloc |
|
(&curp->regs, curp->arg, curp->mmsg); |
|
assert(curp->pman); |
|
curp->man = curp->pman; |
|
return; |
|
default: |
|
break; |
|
} |
} |
|
curp->man->meta.first->tok = TOKEN_NONE; |
if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { |
|
if (NULL == curp->pmdoc) |
|
curp->pmdoc = mdoc_alloc |
|
(&curp->regs, curp->arg, curp->mmsg); |
|
assert(curp->pmdoc); |
|
curp->mdoc = curp->pmdoc; |
|
return; |
|
} |
|
|
|
if (NULL == curp->pman) |
|
curp->pman = man_alloc |
|
(&curp->regs, curp->arg, curp->mmsg); |
|
assert(curp->pman); |
|
curp->man = curp->pman; |
|
} |
} |
|
|
/* |
/* |
* Main parse routine for an opened file. This is called for each |
* Main parse routine for a buffer. |
* opened file and simply loops around the full input file, possibly |
* It assumes encoding and line numbering are already set up. |
* nesting (i.e., with `so'). |
* It can recurse directly (for invocations of user-defined |
|
* macros, inline equations, and input line traps) |
|
* and indirectly (for .so file inclusion). |
*/ |
*/ |
static void |
static int |
mparse_buf_r(struct mparse *curp, struct buf blk, int start) |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
{ |
{ |
const struct tbl_span *span; |
|
struct buf ln; |
struct buf ln; |
enum rofferr rr; |
struct buf *firstln, *lastln, *thisln, *loop; |
int i, of, rc; |
char *cp; |
int pos; /* byte number in the ln buffer */ |
size_t pos; /* byte number in the ln buffer */ |
|
size_t spos; /* at the start of the current line parse */ |
|
int line_result, result; |
|
int of; |
int lnn; /* line number in the real file */ |
int lnn; /* line number in the real file */ |
|
int fd; |
|
int inloop; /* Saw .while on this level. */ |
unsigned char c; |
unsigned char c; |
|
|
memset(&ln, 0, sizeof(struct buf)); |
ln.sz = 256; |
|
ln.buf = mandoc_malloc(ln.sz); |
|
ln.next = NULL; |
|
firstln = lastln = loop = NULL; |
|
lnn = curp->line; |
|
pos = 0; |
|
inloop = 0; |
|
result = ROFF_CONT; |
|
|
lnn = curp->line; |
while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) { |
pos = 0; |
|
|
|
for (i = 0; i < (int)blk.sz; ) { |
|
if (0 == pos && '\0' == blk.buf[i]) |
|
break; |
|
|
|
if (start) { |
if (start) { |
curp->line = lnn; |
curp->line = lnn; |
curp->reparse_count = 0; |
curp->reparse_count = 0; |
|
|
|
if (lnn < 3 && |
|
curp->filenc & MPARSE_UTF8 && |
|
curp->filenc & MPARSE_LATIN1) |
|
curp->filenc = preconv_cue(&blk, i); |
} |
} |
|
spos = pos; |
|
|
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { |
while (i < blk.sz && (start || blk.buf[i] != '\0')) { |
|
|
/* |
/* |
* When finding an unescaped newline character, |
* When finding an unescaped newline character, |
Line 173 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 191 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
* Skip a preceding carriage return, if any. |
* Skip a preceding carriage return, if any. |
*/ |
*/ |
|
|
if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && |
if ('\r' == blk.buf[i] && i + 1 < blk.sz && |
'\n' == blk.buf[i + 1]) |
'\n' == blk.buf[i + 1]) |
++i; |
++i; |
if ('\n' == blk.buf[i]) { |
if ('\n' == blk.buf[i]) { |
Line 182 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 200 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
break; |
break; |
} |
} |
|
|
/* |
/* |
* Warn about bogus characters. If you're using |
* Make sure we have space for the worst |
* non-ASCII encoding, you're screwing your |
* case of 12 bytes: "\\[u10ffff]\n\0" |
* readers. Since I'd rather this not happen, |
|
* I'll be helpful and drop these characters so |
|
* we don't display gibberish. Note to manual |
|
* writers: use special characters. |
|
*/ |
*/ |
|
|
c = (unsigned char) blk.buf[i]; |
if (pos + 12 > ln.sz) |
|
resize_buf(&ln, 256); |
|
|
if ( ! (isascii(c) && |
/* |
(isgraph(c) || isblank(c)))) { |
* Encode 8-bit input. |
curp->mmsg(MANDOCERR_BADCHAR, curp->arg, |
*/ |
curp->line, pos, "ignoring byte"); |
|
i++; |
|
continue; |
|
} |
|
|
|
/* Trailing backslash = a plain char. */ |
c = blk.buf[i]; |
|
if (c & 0x80) { |
if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { |
if ( ! (curp->filenc && preconv_encode( |
if (pos >= (int)ln.sz) |
&blk, &i, &ln, &pos, &curp->filenc))) { |
resize_buf(&ln, 256); |
mandoc_msg(MANDOCERR_CHAR_BAD, |
ln.buf[pos++] = blk.buf[i++]; |
curp->line, pos, "0x%x", c); |
|
ln.buf[pos++] = '?'; |
|
i++; |
|
} |
continue; |
continue; |
} |
} |
|
|
/* |
/* |
* Found escape and at least one other character. |
* Exclude control characters. |
* When it's a newline character, skip it. |
|
* When there is a carriage return in between, |
|
* skip that one as well. |
|
*/ |
*/ |
|
|
if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && |
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
'\n' == blk.buf[i + 2]) |
mandoc_msg(c == 0x00 || c == 0x04 || |
++i; |
c > 0x0a ? MANDOCERR_CHAR_BAD : |
if ('\n' == blk.buf[i + 1]) { |
MANDOCERR_CHAR_UNSUPP, |
i += 2; |
curp->line, pos, "0x%x", c); |
++lnn; |
i++; |
|
if (c != '\r') |
|
ln.buf[pos++] = '?'; |
continue; |
continue; |
} |
} |
|
|
if ('"' == blk.buf[i + 1]) { |
ln.buf[pos++] = blk.buf[i++]; |
i += 2; |
} |
/* Comment, skip to end of line */ |
ln.buf[pos] = '\0'; |
for (; i < (int)blk.sz; ++i) { |
|
if ('\n' == blk.buf[i]) { |
|
++i; |
|
++lnn; |
|
break; |
|
} |
|
} |
|
|
|
/* Backout trailing whitespaces */ |
/* |
for (; pos > 0; --pos) { |
* Maintain a lookaside buffer of all lines. |
if (ln.buf[pos - 1] != ' ') |
* parsed from this input source. |
break; |
*/ |
if (pos > 2 && ln.buf[pos - 2] == '\\') |
|
break; |
|
} |
|
break; |
|
} |
|
|
|
/* Some other escape sequence, copy & cont. */ |
thisln = mandoc_malloc(sizeof(*thisln)); |
|
thisln->buf = mandoc_strdup(ln.buf); |
|
thisln->sz = strlen(ln.buf) + 1; |
|
thisln->next = NULL; |
|
if (firstln == NULL) { |
|
firstln = lastln = thisln; |
|
if (curp->secondary == NULL) |
|
curp->secondary = firstln; |
|
} else { |
|
lastln->next = thisln; |
|
lastln = thisln; |
|
} |
|
|
if (pos + 1 >= (int)ln.sz) |
/* XXX Ugly hack to mark the end of the input. */ |
resize_buf(&ln, 256); |
|
|
|
ln.buf[pos++] = blk.buf[i++]; |
if (i == blk.sz || blk.buf[i] == '\0') { |
ln.buf[pos++] = blk.buf[i++]; |
if (pos + 2 > ln.sz) |
|
resize_buf(&ln, 256); |
|
ln.buf[pos++] = '\n'; |
|
ln.buf[pos] = '\0'; |
} |
} |
|
|
if (pos >= (int)ln.sz) |
|
resize_buf(&ln, 256); |
|
|
|
ln.buf[pos] = '\0'; |
|
|
|
/* |
/* |
* A significant amount of complexity is contained by |
* A significant amount of complexity is contained by |
* the roff preprocessor. It's line-oriented but can be |
* the roff preprocessor. It's line-oriented but can be |
Line 271 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
Line 280 mparse_buf_r(struct mparse *curp, struct buf blk, int |
|
*/ |
*/ |
|
|
of = 0; |
of = 0; |
|
|
rerun: |
rerun: |
rr = roff_parseln |
line_result = roff_parseln(curp->roff, curp->line, |
(curp->roff, curp->line, |
&ln, &of, start && spos == 0 ? pos : 0); |
&ln.buf, &ln.sz, of, &of); |
|
|
|
switch (rr) { |
/* Process options. */ |
case (ROFF_REPARSE): |
|
if (REPARSE_LIMIT >= ++curp->reparse_count) |
|
mparse_buf_r(curp, ln, 0); |
|
else |
|
curp->mmsg(MANDOCERR_ROFFLOOP, curp->arg, |
|
curp->line, pos, NULL); |
|
pos = 0; |
|
continue; |
|
case (ROFF_APPEND): |
|
pos = (int)strlen(ln.buf); |
|
continue; |
|
case (ROFF_RERUN): |
|
goto rerun; |
|
case (ROFF_IGN): |
|
pos = 0; |
|
continue; |
|
case (ROFF_ERR): |
|
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
|
break; |
|
case (ROFF_SO): |
|
mparse_readfd_r(curp, -1, ln.buf + of, 1); |
|
if (MANDOCLEVEL_FATAL <= curp->file_status) |
|
break; |
|
pos = 0; |
|
continue; |
|
default: |
|
break; |
|
} |
|
|
|
/* |
if (line_result & ROFF_APPEND) |
* If we encounter errors in the recursive parse, make |
assert(line_result == (ROFF_IGN | ROFF_APPEND)); |
* sure we don't continue parsing. |
|
*/ |
|
|
|
if (MANDOCLEVEL_FATAL <= curp->file_status) |
if (line_result & ROFF_USERCALL) |
break; |
assert((line_result & ROFF_MASK) == ROFF_REPARSE); |
|
|
/* |
if (line_result & ROFF_USERRET) { |
* If input parsers have not been allocated, do so now. |
assert(line_result == (ROFF_IGN | ROFF_USERRET)); |
* We keep these instanced betwen parsers, but set them |
if (start == 0) { |
* locally per parse routine since we can use different |
/* Return from the current macro. */ |
* parsers with each one. |
result = ROFF_USERRET; |
*/ |
goto out; |
|
} |
|
} |
|
|
if ( ! (curp->man || curp->mdoc)) |
switch (line_result & ROFF_LOOPMASK) { |
pset(ln.buf + of, pos - of, curp); |
case ROFF_IGN: |
|
break; |
/* |
case ROFF_WHILE: |
* Lastly, push down into the parsers themselves. One |
if (curp->loop != NULL) { |
* of these will have already been set in the pset() |
if (loop == curp->loop) |
* routine. |
|
* If libroff returns ROFF_TBL, then add it to the |
|
* currently open parse. Since we only get here if |
|
* there does exist data (see tbl_data.c), we're |
|
* guaranteed that something's been allocated. |
|
* Do the same for ROFF_EQN. |
|
*/ |
|
|
|
rc = -1; |
|
|
|
if (ROFF_TBL == rr) |
|
while (NULL != (span = roff_span(curp->roff))) { |
|
rc = curp->man ? |
|
man_addspan(curp->man, span) : |
|
mdoc_addspan(curp->mdoc, span); |
|
if (0 == rc) |
|
break; |
break; |
|
mandoc_msg(MANDOCERR_WHILE_NEST, |
|
curp->line, pos, NULL); |
} |
} |
else if (ROFF_EQN == rr) |
curp->loop = thisln; |
rc = curp->mdoc ? |
loop = NULL; |
mdoc_addeqn(curp->mdoc, |
inloop = 1; |
roff_eqn(curp->roff)) : |
|
man_addeqn(curp->man, |
|
roff_eqn(curp->roff)); |
|
else if (curp->man || curp->mdoc) |
|
rc = curp->man ? |
|
man_parseln(curp->man, |
|
curp->line, ln.buf, of) : |
|
mdoc_parseln(curp->mdoc, |
|
curp->line, ln.buf, of); |
|
|
|
if (0 == rc) { |
|
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
|
break; |
break; |
|
case ROFF_LOOPCONT: |
|
case ROFF_LOOPEXIT: |
|
if (curp->loop == NULL) { |
|
mandoc_msg(MANDOCERR_WHILE_FAIL, |
|
curp->line, pos, NULL); |
|
break; |
|
} |
|
if (inloop == 0) { |
|
mandoc_msg(MANDOCERR_WHILE_INTO, |
|
curp->line, pos, NULL); |
|
curp->loop = loop = NULL; |
|
break; |
|
} |
|
if (line_result & ROFF_LOOPCONT) |
|
loop = curp->loop; |
|
else { |
|
curp->loop = loop = NULL; |
|
inloop = 0; |
|
} |
|
break; |
|
default: |
|
abort(); |
} |
} |
|
|
/* Temporary buffers typically are not full. */ |
/* Process the main instruction from the roff parser. */ |
|
|
if (0 == start && '\0' == blk.buf[i]) |
switch (line_result & ROFF_MASK) { |
|
case ROFF_IGN: |
break; |
break; |
|
case ROFF_CONT: |
|
if (curp->man->meta.macroset == MACROSET_NONE) |
|
choose_parser(curp); |
|
if ((curp->man->meta.macroset == MACROSET_MDOC ? |
|
mdoc_parseln(curp->man, curp->line, ln.buf, of) : |
|
man_parseln(curp->man, curp->line, ln.buf, of) |
|
) == 2) |
|
goto out; |
|
break; |
|
case ROFF_RERUN: |
|
goto rerun; |
|
case ROFF_REPARSE: |
|
if (++curp->reparse_count > REPARSE_LIMIT) { |
|
/* Abort and return to the top level. */ |
|
result = ROFF_IGN; |
|
mandoc_msg(MANDOCERR_ROFFLOOP, |
|
curp->line, pos, NULL); |
|
goto out; |
|
} |
|
result = mparse_buf_r(curp, ln, of, 0); |
|
if (line_result & ROFF_USERCALL) { |
|
roff_userret(curp->roff); |
|
/* Continue normally. */ |
|
if (result & ROFF_USERRET) |
|
result = ROFF_CONT; |
|
} |
|
if (start == 0 && result != ROFF_CONT) |
|
goto out; |
|
break; |
|
case ROFF_SO: |
|
if ( ! (curp->options & MPARSE_SO) && |
|
(i >= blk.sz || blk.buf[i] == '\0')) { |
|
curp->man->meta.sodest = |
|
mandoc_strdup(ln.buf + of); |
|
goto out; |
|
} |
|
if ((fd = mparse_open(curp, ln.buf + of)) != -1) { |
|
mparse_readfd(curp, fd, ln.buf + of); |
|
close(fd); |
|
} else { |
|
mandoc_msg(MANDOCERR_SO_FAIL, |
|
curp->line, of, ".so %s: %s", |
|
ln.buf + of, strerror(errno)); |
|
ln.sz = mandoc_asprintf(&cp, |
|
".sp\nSee the file %s.\n.sp", |
|
ln.buf + of); |
|
free(ln.buf); |
|
ln.buf = cp; |
|
of = 0; |
|
mparse_buf_r(curp, ln, of, 0); |
|
} |
|
break; |
|
default: |
|
abort(); |
|
} |
|
|
/* Start the next input line. */ |
/* Start the next input line. */ |
|
|
pos = 0; |
if (loop != NULL && |
} |
(line_result & ROFF_LOOPMASK) == ROFF_IGN) |
|
loop = loop->next; |
|
|
free(ln.buf); |
if (loop != NULL) { |
} |
if ((line_result & ROFF_APPEND) == 0) |
|
*ln.buf = '\0'; |
|
if (ln.sz < loop->sz) |
|
resize_buf(&ln, loop->sz); |
|
(void)strlcat(ln.buf, loop->buf, ln.sz); |
|
of = 0; |
|
goto rerun; |
|
} |
|
|
static void |
pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0; |
pdesc(struct mparse *curp, const char *file, int fd) |
|
{ |
|
struct buf blk; |
|
int with_mmap; |
|
|
|
/* |
|
* Run for each opened file; may be called more than once for |
|
* each full parse sequence if the opened file is nested (i.e., |
|
* from `so'). Simply sucks in the whole file and moves into |
|
* the parse phase for the file. |
|
*/ |
|
|
|
if ( ! read_whole_file(file, fd, &blk, &with_mmap)) { |
|
curp->file_status = MANDOCLEVEL_SYSERR; |
|
return; |
|
} |
} |
|
out: |
/* Line number is per-file. */ |
if (inloop) { |
|
if (result != ROFF_USERRET) |
curp->line = 1; |
mandoc_msg(MANDOCERR_WHILE_OUTOF, |
|
curp->line, pos, NULL); |
mparse_buf_r(curp, blk, 1); |
curp->loop = NULL; |
|
} |
if (with_mmap) |
free(ln.buf); |
munmap(blk.buf, blk.sz); |
if (firstln != curp->secondary) |
else |
free_buf_list(firstln); |
free(blk.buf); |
return result; |
} |
} |
|
|
static int |
static int |
read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) |
read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap) |
{ |
{ |
struct stat st; |
struct stat st; |
|
gzFile gz; |
size_t off; |
size_t off; |
ssize_t ssz; |
ssize_t ssz; |
|
int gzerrnum, retval; |
|
|
if (-1 == fstat(fd, &st)) { |
if (fstat(fd, &st) == -1) { |
perror(file); |
mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno)); |
return(0); |
return -1; |
} |
} |
|
|
/* |
/* |
Line 426 read_whole_file(const char *file, int fd, struct buf * |
|
Line 452 read_whole_file(const char *file, int fd, struct buf * |
|
* concerned that this is going to tank any machines. |
* concerned that this is going to tank any machines. |
*/ |
*/ |
|
|
if (S_ISREG(st.st_mode)) { |
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
if (st.st_size >= (1U << 31)) { |
if (st.st_size > 0x7fffffff) { |
fprintf(stderr, "%s: input too large\n", file); |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
return(0); |
return -1; |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
fb->sz = (size_t)st.st_size; |
fb->sz = (size_t)st.st_size; |
fb->buf = mmap(NULL, fb->sz, PROT_READ, |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
MAP_FILE|MAP_SHARED, fd, 0); |
|
if (fb->buf != MAP_FAILED) |
if (fb->buf != MAP_FAILED) |
return(1); |
return 0; |
} |
} |
|
|
|
if (curp->gzip) { |
|
/* |
|
* Duplicating the file descriptor is required |
|
* because we will have to call gzclose(3) |
|
* to free memory used internally by zlib, |
|
* but that will also close the file descriptor, |
|
* which this function must not do. |
|
*/ |
|
if ((fd = dup(fd)) == -1) { |
|
mandoc_msg(MANDOCERR_DUP, 0, 0, |
|
"%s", strerror(errno)); |
|
return -1; |
|
} |
|
if ((gz = gzdopen(fd, "rb")) == NULL) { |
|
mandoc_msg(MANDOCERR_GZDOPEN, 0, 0, |
|
"%s", strerror(errno)); |
|
close(fd); |
|
return -1; |
|
} |
|
} else |
|
gz = NULL; |
|
|
/* |
/* |
* If this isn't a regular file (like, say, stdin), then we must |
* If this isn't a regular file (like, say, stdin), then we must |
* go the old way and just read things in bit by bit. |
* go the old way and just read things in bit by bit. |
Line 446 read_whole_file(const char *file, int fd, struct buf * |
|
Line 493 read_whole_file(const char *file, int fd, struct buf * |
|
|
|
*with_mmap = 0; |
*with_mmap = 0; |
off = 0; |
off = 0; |
|
retval = -1; |
fb->sz = 0; |
fb->sz = 0; |
fb->buf = NULL; |
fb->buf = NULL; |
for (;;) { |
for (;;) { |
if (off == fb->sz) { |
if (off == fb->sz) { |
if (fb->sz == (1U << 31)) { |
if (fb->sz == (1U << 31)) { |
fprintf(stderr, "%s: input too large\n", file); |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
break; |
break; |
} |
} |
resize_buf(fb, 65536); |
resize_buf(fb, 65536); |
} |
} |
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
ssz = curp->gzip ? |
|
gzread(gz, fb->buf + (int)off, fb->sz - off) : |
|
read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
return(1); |
retval = 0; |
|
break; |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
perror(file); |
if (curp->gzip) |
|
(void)gzerror(gz, &gzerrnum); |
|
mandoc_msg(MANDOCERR_READ, 0, 0, "%s", |
|
curp->gzip && gzerrnum != Z_ERRNO ? |
|
zError(gzerrnum) : strerror(errno)); |
break; |
break; |
} |
} |
off += (size_t)ssz; |
off += (size_t)ssz; |
} |
} |
|
|
free(fb->buf); |
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) |
fb->buf = NULL; |
mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s", |
return(0); |
gzerrnum == Z_ERRNO ? strerror(errno) : |
|
zError(gzerrnum)); |
|
if (retval == -1) { |
|
free(fb->buf); |
|
fb->buf = NULL; |
|
} |
|
return retval; |
} |
} |
|
|
static void |
static void |
mparse_end(struct mparse *curp) |
mparse_end(struct mparse *curp) |
{ |
{ |
|
if (curp->man->meta.macroset == MACROSET_NONE) |
|
curp->man->meta.macroset = MACROSET_MAN; |
|
if (curp->man->meta.macroset == MACROSET_MDOC) |
|
mdoc_endparse(curp->man); |
|
else |
|
man_endparse(curp->man); |
|
roff_endparse(curp->roff); |
|
} |
|
|
if (MANDOCLEVEL_FATAL <= curp->file_status) |
/* |
return; |
* Read the whole file into memory and call the parsers. |
|
* Called recursively when an .so request is encountered. |
|
*/ |
|
void |
|
mparse_readfd(struct mparse *curp, int fd, const char *filename) |
|
{ |
|
static int recursion_depth; |
|
|
if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) { |
struct buf blk; |
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
struct buf *save_primary; |
|
const char *save_filename, *cp; |
|
size_t offset; |
|
int save_filenc, save_lineno; |
|
int with_mmap; |
|
|
|
if (recursion_depth > 64) { |
|
mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL); |
return; |
return; |
} |
} else if (recursion_depth == 0 && |
|
(cp = strrchr(filename, '.')) != NULL && |
|
cp[1] >= '1' && cp[1] <= '9') |
|
curp->man->filesec = cp[1]; |
|
else |
|
curp->man->filesec = '\0'; |
|
|
if (curp->man && ! man_endparse(curp->man)) { |
if (read_whole_file(curp, fd, &blk, &with_mmap) == -1) |
assert(MANDOCLEVEL_FATAL <= curp->file_status); |
|
return; |
return; |
} |
|
|
|
#if 0 |
/* |
/* NOTE a parser may not have been assigned, yet. */ |
* Save some properties of the parent file. |
|
*/ |
|
|
if ( ! (curp->man || curp->mdoc)) { |
save_primary = curp->primary; |
/* FIXME: make into an mandoc.h error. */ |
save_filenc = curp->filenc; |
fprintf(stderr, "%s: Not a manual\n", curp->file); |
save_lineno = curp->line; |
curp->file_status = MANDOCLEVEL_FATAL; |
save_filename = mandoc_msg_getinfilename(); |
goto cleanup; |
|
} |
|
#endif |
|
|
|
roff_endparse(curp->roff); |
curp->primary = &blk; |
} |
curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1); |
|
curp->line = 1; |
|
mandoc_msg_setinfilename(filename); |
|
|
static void |
/* Skip an UTF-8 byte order mark. */ |
mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re) |
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
{ |
(unsigned char)blk.buf[0] == 0xef && |
const char *svfile; |
(unsigned char)blk.buf[1] == 0xbb && |
|
(unsigned char)blk.buf[2] == 0xbf) { |
|
offset = 3; |
|
curp->filenc &= ~MPARSE_LATIN1; |
|
} else |
|
offset = 0; |
|
|
if ( ! (*curp->evt_open)(curp->arg, file)) { |
recursion_depth++; |
curp->file_status = MANDOCLEVEL_SYSERR; |
mparse_buf_r(curp, blk, offset, 1); |
return; |
if (--recursion_depth == 0) |
} |
mparse_end(curp); |
|
|
if (-1 == fd) |
/* |
if (-1 == (fd = open(file, O_RDONLY, 0))) { |
* Clean up and restore saved parent properties. |
perror(file); |
*/ |
curp->file_status = MANDOCLEVEL_SYSERR; |
|
return; |
|
} |
|
|
|
svfile = curp->svfile; |
if (with_mmap) |
curp->svfile = file; |
munmap(blk.buf, blk.sz); |
|
else |
|
free(blk.buf); |
|
|
pdesc(curp, file, fd); |
curp->primary = save_primary; |
|
curp->filenc = save_filenc; |
|
curp->line = save_lineno; |
|
if (save_filename != NULL) |
|
mandoc_msg_setinfilename(save_filename); |
|
} |
|
|
if (0 == re && MANDOCLEVEL_FATAL > curp->file_status) |
int |
mparse_end(curp); |
mparse_open(struct mparse *curp, const char *file) |
|
{ |
|
char *cp; |
|
int fd, save_errno; |
|
|
if (STDIN_FILENO != fd && -1 == close(fd)) |
cp = strrchr(file, '.'); |
perror(file); |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
|
|
(*curp->evt_close)(curp->arg, svfile); |
/* First try to use the filename as it is. */ |
curp->svfile = svfile; |
|
} |
|
|
|
enum mandoclevel |
if ((fd = open(file, O_RDONLY)) != -1) |
mparse_readfd(struct mparse *curp, int fd, const char *file) |
return fd; |
{ |
|
|
|
mparse_readfd_r(curp, fd, file, 0); |
/* |
return(curp->file_status); |
* If that doesn't work and the filename doesn't |
} |
* already end in .gz, try appending .gz. |
|
*/ |
|
|
void |
if ( ! curp->gzip) { |
mparse_setstatus(struct mparse *curp, enum mandoclevel lvl) |
save_errno = errno; |
{ |
mandoc_asprintf(&cp, "%s.gz", file); |
|
fd = open(cp, O_RDONLY); |
|
free(cp); |
|
errno = save_errno; |
|
if (fd != -1) { |
|
curp->gzip = 1; |
|
return fd; |
|
} |
|
} |
|
|
if (curp->file_status < lvl) |
/* Neither worked, give up. */ |
curp->file_status = lvl; |
|
|
return -1; |
} |
} |
|
|
struct mparse * |
struct mparse * |
mparse_alloc(enum mparset inttype, mevt_open eopen, |
mparse_alloc(int options, enum mandoc_os os_e, const char *os_s) |
mevt_close eclose, mandocmsg mmsg, void *arg) |
|
{ |
{ |
struct mparse *curp; |
struct mparse *curp; |
|
|
curp = mandoc_calloc(1, sizeof(struct mparse)); |
curp = mandoc_calloc(1, sizeof(struct mparse)); |
|
|
curp->mmsg = mmsg; |
curp->options = options; |
curp->arg = arg; |
curp->os_s = os_s; |
curp->inttype = inttype; |
|
curp->evt_open = eopen; |
|
curp->evt_close = eclose; |
|
|
|
curp->roff = roff_alloc(&curp->regs, arg, mmsg); |
curp->roff = roff_alloc(options); |
return(curp); |
curp->man = roff_man_alloc(curp->roff, curp->os_s, |
|
curp->options & MPARSE_QUICK ? 1 : 0); |
|
if (curp->options & MPARSE_MDOC) { |
|
curp->man->meta.macroset = MACROSET_MDOC; |
|
if (curp->man->mdocmac == NULL) |
|
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
|
} else if (curp->options & MPARSE_MAN) { |
|
curp->man->meta.macroset = MACROSET_MAN; |
|
if (curp->man->manmac == NULL) |
|
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
|
} |
|
curp->man->meta.first->tok = TOKEN_NONE; |
|
curp->man->meta.os_e = os_e; |
|
tag_alloc(); |
|
return curp; |
} |
} |
|
|
void |
void |
mparse_reset(struct mparse *curp) |
mparse_reset(struct mparse *curp) |
{ |
{ |
|
tag_free(); |
memset(&curp->regs, 0, sizeof(struct regset)); |
|
|
|
roff_reset(curp->roff); |
roff_reset(curp->roff); |
|
roff_man_reset(curp->man); |
if (curp->mdoc) |
free_buf_list(curp->secondary); |
mdoc_reset(curp->mdoc); |
curp->secondary = NULL; |
if (curp->man) |
curp->gzip = 0; |
man_reset(curp->man); |
tag_alloc(); |
|
|
curp->file_status = MANDOCLEVEL_OK; |
|
curp->mdoc = NULL; |
|
curp->man = NULL; |
|
} |
} |
|
|
void |
void |
mparse_free(struct mparse *curp) |
mparse_free(struct mparse *curp) |
{ |
{ |
|
tag_free(); |
if (curp->pmdoc) |
roffhash_free(curp->man->mdocmac); |
mdoc_free(curp->pmdoc); |
roffhash_free(curp->man->manmac); |
if (curp->pman) |
roff_man_free(curp->man); |
man_free(curp->pman); |
roff_free(curp->roff); |
if (curp->roff) |
free_buf_list(curp->secondary); |
roff_free(curp->roff); |
|
|
|
free(curp); |
free(curp); |
} |
} |
|
|
|
struct roff_meta * |
|
mparse_result(struct mparse *curp) |
|
{ |
|
roff_state_reset(curp->man); |
|
if (curp->options & MPARSE_VALIDATE) { |
|
if (curp->man->meta.macroset == MACROSET_MDOC) |
|
mdoc_validate(curp->man); |
|
else |
|
man_validate(curp->man); |
|
tag_postprocess(curp->man, curp->man->meta.first); |
|
} |
|
return &curp->man->meta; |
|
} |
|
|
void |
void |
mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man) |
mparse_copy(const struct mparse *p) |
{ |
{ |
|
struct buf *buf; |
|
|
*mdoc = curp->mdoc; |
for (buf = p->secondary; buf != NULL; buf = buf->next) |
*man = curp->man; |
puts(buf->buf); |
} |
} |