version 1.125, 2015/02/06 11:54:36 |
version 1.194, 2018/02/23 21:35:19 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
* copyright notice and this permission notice appear in all copies. |
* copyright notice and this permission notice appear in all copies. |
* |
* |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
|
|
#include "config.h" |
#include "config.h" |
|
|
#include <sys/types.h> |
#include <sys/types.h> |
#if HAVE_MMAP |
|
#include <sys/mman.h> |
#include <sys/mman.h> |
#include <sys/stat.h> |
#include <sys/stat.h> |
#endif |
|
#include <sys/wait.h> |
|
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#include <errno.h> |
#include <errno.h> |
#include <fcntl.h> |
#include <fcntl.h> |
#include <stdarg.h> |
#include <stdarg.h> |
#include <stdint.h> |
|
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
#include <unistd.h> |
|
#include <zlib.h> |
|
|
#include "mandoc.h" |
|
#include "mandoc_aux.h" |
#include "mandoc_aux.h" |
#include "libmandoc.h" |
#include "mandoc.h" |
|
#include "roff.h" |
#include "mdoc.h" |
#include "mdoc.h" |
#include "man.h" |
#include "man.h" |
|
#include "libmandoc.h" |
|
|
#define REPARSE_LIMIT 1000 |
#define REPARSE_LIMIT 1000 |
|
|
struct mparse { |
struct mparse { |
struct man *pman; /* persistent man parser */ |
|
struct mdoc *pmdoc; /* persistent mdoc parser */ |
|
struct man *man; /* man parser */ |
|
struct mdoc *mdoc; /* mdoc parser */ |
|
struct roff *roff; /* roff parser (!NULL) */ |
struct roff *roff; /* roff parser (!NULL) */ |
const struct mchars *mchars; /* character table */ |
struct roff_man *man; /* man parser */ |
char *sodest; /* filename pointed to by .so */ |
char *sodest; /* filename pointed to by .so */ |
const char *file; /* filename of current input file */ |
const char *file; /* filename of current input file */ |
struct buf *primary; /* buffer currently being parsed */ |
struct buf *primary; /* buffer currently being parsed */ |
struct buf *secondary; /* preprocessed copy of input */ |
struct buf *secondary; /* preprocessed copy of input */ |
const char *defos; /* default operating system */ |
const char *os_s; /* default operating system */ |
mandocmsg mmsg; /* warning/error message handler */ |
mandocmsg mmsg; /* warning/error message handler */ |
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel file_status; /* status of current parse */ |
enum mandoclevel wlevel; /* ignore messages below this */ |
enum mandocerr mmin; /* ignore messages below this */ |
int options; /* parser options */ |
int options; /* parser options */ |
|
int gzip; /* current input file is gzipped */ |
int filenc; /* encoding of the current file */ |
int filenc; /* encoding of the current file */ |
int reparse_count; /* finite interp. stack */ |
int reparse_count; /* finite interp. stack */ |
int line; /* line number in the file */ |
int line; /* line number in the file */ |
pid_t child; /* the gunzip(1) process */ |
|
}; |
}; |
|
|
static void choose_parser(struct mparse *); |
static void choose_parser(struct mparse *); |
static void resize_buf(struct buf *, size_t); |
static void resize_buf(struct buf *, size_t); |
static void mparse_buf_r(struct mparse *, struct buf, size_t, int); |
static int mparse_buf_r(struct mparse *, struct buf, size_t, int); |
static int read_whole_file(struct mparse *, const char *, int, |
static int read_whole_file(struct mparse *, const char *, int, |
struct buf *, int *); |
struct buf *, int *); |
static void mparse_end(struct mparse *); |
static void mparse_end(struct mparse *); |
Line 77 static void mparse_parse_buffer(struct mparse *, str |
|
Line 71 static void mparse_parse_buffer(struct mparse *, str |
|
|
|
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { |
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { |
MANDOCERR_OK, |
MANDOCERR_OK, |
|
MANDOCERR_OK, |
MANDOCERR_WARNING, |
MANDOCERR_WARNING, |
MANDOCERR_WARNING, |
|
MANDOCERR_ERROR, |
MANDOCERR_ERROR, |
MANDOCERR_UNSUPP, |
MANDOCERR_UNSUPP, |
MANDOCERR_MAX, |
MANDOCERR_MAX, |
Line 88 static const enum mandocerr mandoclimits[MANDOCLEVEL_M |
|
Line 82 static const enum mandocerr mandoclimits[MANDOCLEVEL_M |
|
static const char * const mandocerrs[MANDOCERR_MAX] = { |
static const char * const mandocerrs[MANDOCERR_MAX] = { |
"ok", |
"ok", |
|
|
|
"base system convention", |
|
|
|
"Mdocdate found", |
|
"Mdocdate missing", |
|
"unknown architecture", |
|
"operating system explicitly specified", |
|
"RCS id missing", |
|
"referenced manual not found", |
|
|
|
"generic style suggestion", |
|
|
|
"legacy man(7) date format", |
|
"lower case character in document title", |
|
"duplicate RCS id", |
|
"possible typo in section name", |
|
"unterminated quoted argument", |
|
"useless macro", |
|
"consider using OS macro", |
|
"errnos out of order", |
|
"duplicate errno", |
|
"trailing delimiter", |
|
"no blank before trailing delimiter", |
|
"fill mode already enabled, skipping", |
|
"fill mode already disabled, skipping", |
|
"function name without markup", |
|
"whitespace at end of input line", |
|
"bad comment style", |
|
|
"generic warning", |
"generic warning", |
|
|
/* related to the prologue */ |
/* related to the prologue */ |
"missing manual title, using UNTITLED", |
"missing manual title, using UNTITLED", |
"missing manual title, using \"\"", |
"missing manual title, using \"\"", |
"lower case character in document title", |
|
"missing manual section, using \"\"", |
"missing manual section, using \"\"", |
"unknown manual section", |
"unknown manual section", |
"missing date, using today's date", |
"missing date, using today's date", |
"cannot parse date, using it verbatim", |
"cannot parse date, using it verbatim", |
|
"date in the future, using it anyway", |
"missing Os macro, using \"\"", |
"missing Os macro, using \"\"", |
"duplicate prologue macro", |
|
"late prologue macro", |
"late prologue macro", |
"skipping late title macro", |
|
"prologue macros out of order", |
"prologue macros out of order", |
|
|
/* related to document structure */ |
/* related to document structure */ |
Line 109 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 129 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"no document body", |
"no document body", |
"content before first section header", |
"content before first section header", |
"first section is not \"NAME\"", |
"first section is not \"NAME\"", |
"bad NAME section contents", |
"NAME section without Nm before Nd", |
|
"NAME section without description", |
|
"description not at the end of NAME", |
|
"bad NAME section content", |
|
"missing comma before name", |
"missing description line, using \"\"", |
"missing description line, using \"\"", |
|
"description line outside NAME section", |
"sections out of conventional order", |
"sections out of conventional order", |
"duplicate section title", |
"duplicate section title", |
"unexpected section", |
"unexpected section", |
|
"cross reference to self", |
"unusual Xr order", |
"unusual Xr order", |
"unusual Xr punctuation", |
"unusual Xr punctuation", |
"AUTHORS section without An macro", |
"AUTHORS section without An macro", |
Line 127 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 153 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"blocks badly nested", |
"blocks badly nested", |
"nested displays are not portable", |
"nested displays are not portable", |
"moving content out of list", |
"moving content out of list", |
".Vt block has child macro", |
"first macro on line", |
"fill mode already enabled, skipping", |
|
"fill mode already disabled, skipping", |
|
"line scope broken", |
"line scope broken", |
|
"skipping blank line in line scope", |
|
|
/* related to missing macro arguments */ |
/* related to missing macro arguments */ |
"skipping empty request", |
"skipping empty request", |
Line 140 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 165 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
"empty argument, using 0n", |
"empty argument, using 0n", |
"missing display type, using -ragged", |
"missing display type, using -ragged", |
"list type is not the first argument", |
"list type is not the first argument", |
"missing -width in -tag list, using 8n", |
"missing -width in -tag list, using 6n", |
"missing utility name, using \"\"", |
"missing utility name, using \"\"", |
"missing function name, using \"\"", |
"missing function name, using \"\"", |
"empty head in list item", |
"empty head in list item", |
"empty list item", |
"empty list item", |
|
"missing argument, using next line", |
"missing font type, using \\fR", |
"missing font type, using \\fR", |
"unknown font type, using \\fR", |
"unknown font type, using \\fR", |
"nothing follows prefix", |
"nothing follows prefix", |
"empty reference block", |
"empty reference block", |
|
"missing section argument", |
"missing -std argument, adding it", |
"missing -std argument, adding it", |
"missing option string, using \"\"", |
"missing option string, using \"\"", |
"missing resource identifier, using \"\"", |
"missing resource identifier, using \"\"", |
"missing eqn box, using \"\"", |
"missing eqn box, using \"\"", |
|
|
/* related to bad macro arguments */ |
/* related to bad macro arguments */ |
"unterminated quoted argument", |
|
"duplicate argument", |
"duplicate argument", |
"skipping duplicate argument", |
"skipping duplicate argument", |
"skipping duplicate display type", |
"skipping duplicate display type", |
"skipping duplicate list type", |
"skipping duplicate list type", |
"skipping -width argument", |
"skipping -width argument", |
|
"wrong number of cells", |
"unknown AT&T UNIX version", |
"unknown AT&T UNIX version", |
"comma in function argument", |
"comma in function argument", |
"parenthesis in function name", |
"parenthesis in function name", |
|
"unknown library name", |
"invalid content in Rs block", |
"invalid content in Rs block", |
"invalid Boolean argument", |
"invalid Boolean argument", |
"unknown font, skipping request", |
"unknown font, skipping request", |
|
"odd number of characters in request", |
|
|
/* related to plain text */ |
/* related to plain text */ |
"blank line in fill mode, using .sp", |
"blank line in fill mode, using .sp", |
"tab in filled text", |
"tab in filled text", |
"whitespace at end of input line", |
"new sentence, new line", |
"bad comment style", |
|
"invalid escape sequence", |
"invalid escape sequence", |
"undefined string, using \"\"", |
"undefined string, using \"\"", |
|
|
Line 198 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 226 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
|
|
/* related to document structure and macros */ |
/* related to document structure and macros */ |
NULL, |
NULL, |
|
"duplicate prologue macro", |
|
"skipping late title macro", |
"input stack limit exceeded, infinite loop?", |
"input stack limit exceeded, infinite loop?", |
"skipping bad character", |
"skipping bad character", |
"skipping unknown macro", |
"skipping unknown macro", |
Line 211 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 241 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
|
|
/* related to request and macro arguments */ |
/* related to request and macro arguments */ |
"escaped character not allowed in a name", |
"escaped character not allowed in a name", |
"argument count wrong", |
|
"NOT IMPLEMENTED: Bd -file", |
"NOT IMPLEMENTED: Bd -file", |
|
"skipping display without arguments", |
"missing list type, using -item", |
"missing list type, using -item", |
|
"argument is not numeric, using 1", |
"missing manual name, using \"\"", |
"missing manual name, using \"\"", |
"uname(3) system call failed, using UNKNOWN", |
"uname(3) system call failed, using UNKNOWN", |
"unknown standard specifier", |
"unknown standard specifier", |
Line 235 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
Line 266 static const char * const mandocerrs[MANDOCERR_MAX] = |
|
|
|
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
"SUCCESS", |
"SUCCESS", |
"RESERVED", |
"STYLE", |
"WARNING", |
"WARNING", |
"ERROR", |
"ERROR", |
"UNSUPP", |
"UNSUPP", |
Line 287 choose_parser(struct mparse *curp) |
|
Line 318 choose_parser(struct mparse *curp) |
|
} |
} |
|
|
if (format == MPARSE_MDOC) { |
if (format == MPARSE_MDOC) { |
if (NULL == curp->pmdoc) |
curp->man->macroset = MACROSET_MDOC; |
curp->pmdoc = mdoc_alloc( |
if (curp->man->mdocmac == NULL) |
curp->roff, curp, curp->defos, |
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
MPARSE_QUICK & curp->options ? 1 : 0); |
} else { |
assert(curp->pmdoc); |
curp->man->macroset = MACROSET_MAN; |
curp->mdoc = curp->pmdoc; |
if (curp->man->manmac == NULL) |
return; |
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
} |
} |
|
curp->man->first->tok = TOKEN_NONE; |
/* Fall back to man(7) as a last resort. */ |
|
|
|
if (NULL == curp->pman) |
|
curp->pman = man_alloc( |
|
curp->roff, curp, curp->defos, |
|
MPARSE_QUICK & curp->options ? 1 : 0); |
|
assert(curp->pman); |
|
curp->man = curp->pman; |
|
} |
} |
|
|
/* |
/* |
Line 313 choose_parser(struct mparse *curp) |
|
Line 336 choose_parser(struct mparse *curp) |
|
* macros, inline equations, and input line traps) |
* macros, inline equations, and input line traps) |
* and indirectly (for .so file inclusion). |
* and indirectly (for .so file inclusion). |
*/ |
*/ |
static void |
static int |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
{ |
{ |
const struct tbl_span *span; |
|
struct buf ln; |
struct buf ln; |
const char *save_file; |
const char *save_file; |
char *cp; |
char *cp; |
Line 325 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 347 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
int of; |
int of; |
int lnn; /* line number in the real file */ |
int lnn; /* line number in the real file */ |
int fd; |
int fd; |
pid_t save_child; |
|
unsigned char c; |
unsigned char c; |
|
|
memset(&ln, 0, sizeof(ln)); |
memset(&ln, 0, sizeof(ln)); |
Line 398 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
Line 419 mparse_buf_r(struct mparse *curp, struct buf blk, size |
|
MANDOCERR_CHAR_UNSUPP, |
MANDOCERR_CHAR_UNSUPP, |
curp, curp->line, pos, "0x%x", c); |
curp, curp->line, pos, "0x%x", c); |
i++; |
i++; |
ln.buf[pos++] = '?'; |
if (c != '\r') |
|
ln.buf[pos++] = '?'; |
continue; |
continue; |
} |
} |
|
|
/* Trailing backslash = a plain char. */ |
|
|
|
if (blk.buf[i] != '\\' || i + 1 == blk.sz) { |
|
ln.buf[pos++] = blk.buf[i++]; |
|
continue; |
|
} |
|
|
|
/* |
|
* Found escape and at least one other character. |
|
* When it's a newline character, skip it. |
|
* When there is a carriage return in between, |
|
* skip that one as well. |
|
*/ |
|
|
|
if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && |
|
'\n' == blk.buf[i + 2]) |
|
++i; |
|
if ('\n' == blk.buf[i + 1]) { |
|
i += 2; |
|
++lnn; |
|
continue; |
|
} |
|
|
|
if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { |
|
i += 2; |
|
/* Comment, skip to end of line */ |
|
for (; i < blk.sz; ++i) { |
|
if ('\n' == blk.buf[i]) { |
|
++i; |
|
++lnn; |
|
break; |
|
} |
|
} |
|
|
|
/* Backout trailing whitespaces */ |
|
for (; pos > 0; --pos) { |
|
if (ln.buf[pos - 1] != ' ') |
|
break; |
|
if (pos > 2 && ln.buf[pos - 2] == '\\') |
|
break; |
|
} |
|
break; |
|
} |
|
|
|
/* Catch escaped bogus characters. */ |
|
|
|
c = (unsigned char) blk.buf[i+1]; |
|
|
|
if ( ! (isascii(c) && |
|
(isgraph(c) || isblank(c)))) { |
|
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
|
curp->line, pos, "0x%x", c); |
|
i += 2; |
|
ln.buf[pos++] = '?'; |
|
continue; |
|
} |
|
|
|
/* Some other escape sequence, copy & cont. */ |
|
|
|
ln.buf[pos++] = blk.buf[i++]; |
ln.buf[pos++] = blk.buf[i++]; |
ln.buf[pos++] = blk.buf[i++]; |
|
} |
} |
|
|
if (pos >= ln.sz) |
if (pos + 1 >= ln.sz) |
resize_buf(&ln, 256); |
resize_buf(&ln, 256); |
|
|
|
if (i == blk.sz || blk.buf[i] == '\0') |
|
ln.buf[pos++] = '\n'; |
ln.buf[pos] = '\0'; |
ln.buf[pos] = '\0'; |
|
|
/* |
/* |
|
|
|
|
switch (rr) { |
switch (rr) { |
case ROFF_REPARSE: |
case ROFF_REPARSE: |
if (REPARSE_LIMIT >= ++curp->reparse_count) |
if (++curp->reparse_count > REPARSE_LIMIT) |
mparse_buf_r(curp, ln, of, 0); |
|
else |
|
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
curp->line, pos, NULL); |
curp->line, pos, NULL); |
pos = 0; |
else if (mparse_buf_r(curp, ln, of, 0) == 1 || |
continue; |
start == 1) { |
|
pos = 0; |
|
continue; |
|
} |
|
free(ln.buf); |
|
return 0; |
case ROFF_APPEND: |
case ROFF_APPEND: |
pos = strlen(ln.buf); |
pos = strlen(ln.buf); |
continue; |
continue; |
|
|
(i >= blk.sz || blk.buf[i] == '\0')) { |
(i >= blk.sz || blk.buf[i] == '\0')) { |
curp->sodest = mandoc_strdup(ln.buf + of); |
curp->sodest = mandoc_strdup(ln.buf + of); |
free(ln.buf); |
free(ln.buf); |
return; |
return 1; |
} |
} |
/* |
/* |
* We remove `so' clauses from our lookaside |
* We remove `so' clauses from our lookaside |
|
|
if (curp->secondary) |
if (curp->secondary) |
curp->secondary->sz -= pos + 1; |
curp->secondary->sz -= pos + 1; |
save_file = curp->file; |
save_file = curp->file; |
save_child = curp->child; |
if ((fd = mparse_open(curp, ln.buf + of)) != -1) { |
if (mparse_open(curp, &fd, ln.buf + of) == |
|
MANDOCLEVEL_OK) { |
|
mparse_readfd(curp, fd, ln.buf + of); |
mparse_readfd(curp, fd, ln.buf + of); |
|
close(fd); |
curp->file = save_file; |
curp->file = save_file; |
} else { |
} else { |
curp->file = save_file; |
curp->file = save_file; |
|
|
of = 0; |
of = 0; |
mparse_buf_r(curp, ln, of, 0); |
mparse_buf_r(curp, ln, of, 0); |
} |
} |
curp->child = save_child; |
|
pos = 0; |
pos = 0; |
continue; |
continue; |
default: |
default: |
break; |
break; |
} |
} |
|
|
/* |
if (curp->man->macroset == MACROSET_NONE) |
* If input parsers have not been allocated, do so now. |
|
* We keep these instanced between parsers, but set them |
|
* locally per parse routine since we can use different |
|
* parsers with each one. |
|
*/ |
|
|
|
if ( ! (curp->man || curp->mdoc)) |
|
choose_parser(curp); |
choose_parser(curp); |
|
|
/* |
if ((curp->man->macroset == MACROSET_MDOC ? |
* Lastly, push down into the parsers themselves. |
mdoc_parseln(curp->man, curp->line, ln.buf, of) : |
* If libroff returns ROFF_TBL, then add it to the |
|
* currently open parse. Since we only get here if |
|
* there does exist data (see tbl_data.c), we're |
|
* guaranteed that something's been allocated. |
|
* Do the same for ROFF_EQN. |
|
*/ |
|
|
|
if (rr == ROFF_TBL) { |
|
while ((span = roff_span(curp->roff)) != NULL) |
|
if (curp->man == NULL) |
|
mdoc_addspan(curp->mdoc, span); |
|
else |
|
man_addspan(curp->man, span); |
|
} else if (rr == ROFF_EQN) { |
|
if (curp->man == NULL) |
|
mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff)); |
|
else |
|
man_addeqn(curp->man, roff_eqn(curp->roff)); |
|
} else if ((curp->man == NULL ? |
|
mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) : |
|
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) |
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) |
break; |
break; |
|
|
|
|
} |
} |
|
|
free(ln.buf); |
free(ln.buf); |
|
return 1; |
} |
} |
|
|
static int |
static int |
read_whole_file(struct mparse *curp, const char *file, int fd, |
read_whole_file(struct mparse *curp, const char *file, int fd, |
struct buf *fb, int *with_mmap) |
struct buf *fb, int *with_mmap) |
{ |
{ |
|
struct stat st; |
|
gzFile gz; |
size_t off; |
size_t off; |
ssize_t ssz; |
ssize_t ssz; |
|
int gzerrnum, retval; |
|
|
#if HAVE_MMAP |
if (fstat(fd, &st) == -1) { |
struct stat st; |
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
if (-1 == fstat(fd, &st)) { |
"fstat: %s", strerror(errno)); |
perror(file); |
return 0; |
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
} |
|
|
/* |
/* |
Line 630 read_whole_file(struct mparse *curp, const char *file, |
|
Line 571 read_whole_file(struct mparse *curp, const char *file, |
|
* concerned that this is going to tank any machines. |
* concerned that this is going to tank any machines. |
*/ |
*/ |
|
|
if (S_ISREG(st.st_mode)) { |
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
if (st.st_size >= (1U << 31)) { |
if (st.st_size > 0x7fffffff) { |
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); |
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); |
return(0); |
return 0; |
} |
} |
*with_mmap = 1; |
*with_mmap = 1; |
fb->sz = (size_t)st.st_size; |
fb->sz = (size_t)st.st_size; |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
if (fb->buf != MAP_FAILED) |
if (fb->buf != MAP_FAILED) |
return(1); |
return 1; |
} |
} |
#endif |
|
|
|
|
if (curp->gzip) { |
|
/* |
|
* Duplicating the file descriptor is required |
|
* because we will have to call gzclose(3) |
|
* to free memory used internally by zlib, |
|
* but that will also close the file descriptor, |
|
* which this function must not do. |
|
*/ |
|
if ((fd = dup(fd)) == -1) { |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
|
"dup: %s", strerror(errno)); |
|
return 0; |
|
} |
|
if ((gz = gzdopen(fd, "rb")) == NULL) { |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
|
"gzdopen: %s", strerror(errno)); |
|
close(fd); |
|
return 0; |
|
} |
|
} else |
|
gz = NULL; |
|
|
/* |
/* |
* If this isn't a regular file (like, say, stdin), then we must |
* If this isn't a regular file (like, say, stdin), then we must |
* go the old way and just read things in bit by bit. |
* go the old way and just read things in bit by bit. |
Line 650 read_whole_file(struct mparse *curp, const char *file, |
|
Line 612 read_whole_file(struct mparse *curp, const char *file, |
|
|
|
*with_mmap = 0; |
*with_mmap = 0; |
off = 0; |
off = 0; |
|
retval = 0; |
fb->sz = 0; |
fb->sz = 0; |
fb->buf = NULL; |
fb->buf = NULL; |
for (;;) { |
for (;;) { |
Line 661 read_whole_file(struct mparse *curp, const char *file, |
|
Line 624 read_whole_file(struct mparse *curp, const char *file, |
|
} |
} |
resize_buf(fb, 65536); |
resize_buf(fb, 65536); |
} |
} |
ssz = read(fd, fb->buf + (int)off, fb->sz - off); |
ssz = curp->gzip ? |
|
gzread(gz, fb->buf + (int)off, fb->sz - off) : |
|
read(fd, fb->buf + (int)off, fb->sz - off); |
if (ssz == 0) { |
if (ssz == 0) { |
fb->sz = off; |
fb->sz = off; |
return(1); |
retval = 1; |
|
break; |
} |
} |
if (ssz == -1) { |
if (ssz == -1) { |
perror(file); |
if (curp->gzip) |
exit((int)MANDOCLEVEL_SYSERR); |
(void)gzerror(gz, &gzerrnum); |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s", |
|
curp->gzip && gzerrnum != Z_ERRNO ? |
|
zError(gzerrnum) : strerror(errno)); |
|
break; |
} |
} |
off += (size_t)ssz; |
off += (size_t)ssz; |
} |
} |
|
|
free(fb->buf); |
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) |
fb->buf = NULL; |
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s", |
return(0); |
gzerrnum == Z_ERRNO ? strerror(errno) : |
|
zError(gzerrnum)); |
|
if (retval == 0) { |
|
free(fb->buf); |
|
fb->buf = NULL; |
|
} |
|
return retval; |
} |
} |
|
|
static void |
static void |
mparse_end(struct mparse *curp) |
mparse_end(struct mparse *curp) |
{ |
{ |
|
if (curp->man->macroset == MACROSET_NONE) |
if (curp->mdoc == NULL && |
curp->man->macroset = MACROSET_MAN; |
curp->man == NULL && |
if (curp->man->macroset == MACROSET_MDOC) |
curp->sodest == NULL) { |
mdoc_endparse(curp->man); |
if (curp->options & MPARSE_MDOC) |
else |
curp->mdoc = curp->pmdoc; |
|
else { |
|
if (curp->pman == NULL) |
|
curp->pman = man_alloc( |
|
curp->roff, curp, curp->defos, |
|
curp->options & MPARSE_QUICK ? 1 : 0); |
|
curp->man = curp->pman; |
|
} |
|
} |
|
if (curp->mdoc) |
|
mdoc_endparse(curp->mdoc); |
|
if (curp->man) |
|
man_endparse(curp->man); |
man_endparse(curp->man); |
roff_endparse(curp->roff); |
roff_endparse(curp->roff); |
} |
} |
Line 752 mparse_readmem(struct mparse *curp, void *buf, size_t |
|
Line 716 mparse_readmem(struct mparse *curp, void *buf, size_t |
|
blk.sz = len; |
blk.sz = len; |
|
|
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
return(curp->file_status); |
return curp->file_status; |
} |
} |
|
|
/* |
/* |
Line 772 mparse_readfd(struct mparse *curp, int fd, const char |
|
Line 736 mparse_readfd(struct mparse *curp, int fd, const char |
|
(MPARSE_UTF8 | MPARSE_LATIN1); |
(MPARSE_UTF8 | MPARSE_LATIN1); |
mparse_parse_buffer(curp, blk, file); |
mparse_parse_buffer(curp, blk, file); |
curp->filenc = save_filenc; |
curp->filenc = save_filenc; |
#if HAVE_MMAP |
|
if (with_mmap) |
if (with_mmap) |
munmap(blk.buf, blk.sz); |
munmap(blk.buf, blk.sz); |
else |
else |
#endif |
|
free(blk.buf); |
free(blk.buf); |
} |
} |
|
return curp->file_status; |
if (fd != STDIN_FILENO && close(fd) == -1) |
|
perror(file); |
|
|
|
mparse_wait(curp); |
|
return(curp->file_status); |
|
} |
} |
|
|
enum mandoclevel |
int |
mparse_open(struct mparse *curp, int *fd, const char *file) |
mparse_open(struct mparse *curp, const char *file) |
{ |
{ |
int pfd[2]; |
|
int save_errno; |
|
char *cp; |
char *cp; |
|
int fd; |
|
|
curp->file = file; |
curp->file = file; |
|
cp = strrchr(file, '.'); |
|
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
|
|
/* Unless zipped, try to just open the file. */ |
/* First try to use the filename as it is. */ |
|
|
if ((cp = strrchr(file, '.')) == NULL || |
if ((fd = open(file, O_RDONLY)) != -1) |
strcmp(cp + 1, "gz")) { |
return fd; |
curp->child = 0; |
|
if ((*fd = open(file, O_RDONLY)) != -1) |
|
return(MANDOCLEVEL_OK); |
|
|
|
/* Open failed; try to append ".gz". */ |
/* |
|
* If that doesn't work and the filename doesn't |
|
* already end in .gz, try appending .gz. |
|
*/ |
|
|
|
if ( ! curp->gzip) { |
mandoc_asprintf(&cp, "%s.gz", file); |
mandoc_asprintf(&cp, "%s.gz", file); |
file = cp; |
fd = open(cp, O_RDONLY); |
} else |
|
cp = NULL; |
|
|
|
/* Before forking, make sure the file can be read. */ |
|
|
|
save_errno = errno; |
|
if (access(file, R_OK) == -1) { |
|
if (cp != NULL) |
|
errno = save_errno; |
|
free(cp); |
free(cp); |
*fd = -1; |
if (fd != -1) { |
curp->child = 0; |
curp->gzip = 1; |
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); |
return fd; |
return(MANDOCLEVEL_ERROR); |
|
} |
|
|
|
/* Run gunzip(1). */ |
|
|
|
if (pipe(pfd) == -1) { |
|
perror("pipe"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
switch (curp->child = fork()) { |
|
case -1: |
|
perror("fork"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
case 0: |
|
close(pfd[0]); |
|
if (dup2(pfd[1], STDOUT_FILENO) == -1) { |
|
perror("dup"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
} |
execlp("gunzip", "gunzip", "-c", file, NULL); |
|
perror("exec"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
default: |
|
close(pfd[1]); |
|
*fd = pfd[0]; |
|
return(MANDOCLEVEL_OK); |
|
} |
} |
} |
|
|
|
enum mandoclevel |
/* Neither worked, give up. */ |
mparse_wait(struct mparse *curp) |
|
{ |
|
int status; |
|
|
|
if (curp->child == 0) |
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); |
return(MANDOCLEVEL_OK); |
return -1; |
|
|
if (waitpid(curp->child, &status, 0) == -1) { |
|
perror("wait"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
if (WIFSIGNALED(status)) { |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
|
"gunzip died from signal %d", WTERMSIG(status)); |
|
return(MANDOCLEVEL_ERROR); |
|
} |
|
if (WEXITSTATUS(status)) { |
|
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
|
"gunzip failed with code %d", WEXITSTATUS(status)); |
|
return(MANDOCLEVEL_ERROR); |
|
} |
|
return(MANDOCLEVEL_OK); |
|
} |
} |
|
|
struct mparse * |
struct mparse * |
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, |
mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg, |
const struct mchars *mchars, const char *defos) |
enum mandoc_os os_e, const char *os_s) |
{ |
{ |
struct mparse *curp; |
struct mparse *curp; |
|
|
curp = mandoc_calloc(1, sizeof(struct mparse)); |
curp = mandoc_calloc(1, sizeof(struct mparse)); |
|
|
curp->options = options; |
curp->options = options; |
curp->wlevel = wlevel; |
curp->mmin = mmin; |
curp->mmsg = mmsg; |
curp->mmsg = mmsg; |
curp->defos = defos; |
curp->os_s = os_s; |
|
|
curp->mchars = mchars; |
curp->roff = roff_alloc(curp, options); |
curp->roff = roff_alloc(curp, curp->mchars, options); |
curp->man = roff_man_alloc(curp->roff, curp, curp->os_s, |
if (curp->options & MPARSE_MDOC) |
curp->options & MPARSE_QUICK ? 1 : 0); |
curp->pmdoc = mdoc_alloc( |
if (curp->options & MPARSE_MDOC) { |
curp->roff, curp, curp->defos, |
curp->man->macroset = MACROSET_MDOC; |
curp->options & MPARSE_QUICK ? 1 : 0); |
if (curp->man->mdocmac == NULL) |
if (curp->options & MPARSE_MAN) |
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
curp->pman = man_alloc( |
} else if (curp->options & MPARSE_MAN) { |
curp->roff, curp, curp->defos, |
curp->man->macroset = MACROSET_MAN; |
curp->options & MPARSE_QUICK ? 1 : 0); |
if (curp->man->manmac == NULL) |
|
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
return(curp); |
} |
|
curp->man->first->tok = TOKEN_NONE; |
|
curp->man->meta.os_e = os_e; |
|
return curp; |
} |
} |
|
|
void |
void |
mparse_reset(struct mparse *curp) |
mparse_reset(struct mparse *curp) |
{ |
{ |
|
|
roff_reset(curp->roff); |
roff_reset(curp->roff); |
|
roff_man_reset(curp->man); |
|
|
if (curp->mdoc) |
free(curp->sodest); |
mdoc_reset(curp->mdoc); |
curp->sodest = NULL; |
if (curp->man) |
|
man_reset(curp->man); |
|
if (curp->secondary) |
if (curp->secondary) |
curp->secondary->sz = 0; |
curp->secondary->sz = 0; |
|
|
curp->file_status = MANDOCLEVEL_OK; |
curp->file_status = MANDOCLEVEL_OK; |
curp->mdoc = NULL; |
curp->gzip = 0; |
curp->man = NULL; |
|
|
|
free(curp->sodest); |
|
curp->sodest = NULL; |
|
} |
} |
|
|
void |
void |
mparse_free(struct mparse *curp) |
mparse_free(struct mparse *curp) |
{ |
{ |
|
|
if (curp->pmdoc) |
roffhash_free(curp->man->mdocmac); |
mdoc_free(curp->pmdoc); |
roffhash_free(curp->man->manmac); |
if (curp->pman) |
roff_man_free(curp->man); |
man_free(curp->pman); |
roff_free(curp->roff); |
if (curp->roff) |
|
roff_free(curp->roff); |
|
if (curp->secondary) |
if (curp->secondary) |
free(curp->secondary->buf); |
free(curp->secondary->buf); |
|
|
Line 943 mparse_free(struct mparse *curp) |
|
Line 843 mparse_free(struct mparse *curp) |
|
} |
} |
|
|
void |
void |
mparse_result(struct mparse *curp, |
mparse_result(struct mparse *curp, struct roff_man **man, |
struct mdoc **mdoc, struct man **man, char **sodest) |
char **sodest) |
{ |
{ |
|
|
if (sodest && NULL != (*sodest = curp->sodest)) { |
if (sodest && NULL != (*sodest = curp->sodest)) { |
*mdoc = NULL; |
|
*man = NULL; |
*man = NULL; |
return; |
return; |
} |
} |
if (mdoc) |
|
*mdoc = curp->mdoc; |
|
if (man) |
if (man) |
*man = curp->man; |
*man = curp->man; |
} |
} |
|
|
void |
void |
|
mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) |
|
{ |
|
if (curp->file_status > *rc) |
|
*rc = curp->file_status; |
|
} |
|
|
|
void |
mandoc_vmsg(enum mandocerr t, struct mparse *m, |
mandoc_vmsg(enum mandocerr t, struct mparse *m, |
int ln, int pos, const char *fmt, ...) |
int ln, int pos, const char *fmt, ...) |
{ |
{ |
Line 978 mandoc_msg(enum mandocerr er, struct mparse *m, |
|
Line 882 mandoc_msg(enum mandocerr er, struct mparse *m, |
|
{ |
{ |
enum mandoclevel level; |
enum mandoclevel level; |
|
|
|
if (er < m->mmin && er != MANDOCERR_FILE) |
|
return; |
|
|
level = MANDOCLEVEL_UNSUPP; |
level = MANDOCLEVEL_UNSUPP; |
while (er < mandoclimits[level]) |
while (er < mandoclimits[level]) |
level--; |
level--; |
|
|
if (level < m->wlevel && er != MANDOCERR_FILE) |
|
return; |
|
|
|
if (m->mmsg) |
if (m->mmsg) |
(*m->mmsg)(er, level, m->file, ln, col, msg); |
(*m->mmsg)(er, level, m->file, ln, col, msg); |
|
|
|
|
mparse_strerror(enum mandocerr er) |
mparse_strerror(enum mandocerr er) |
{ |
{ |
|
|
return(mandocerrs[er]); |
return mandocerrs[er]; |
} |
} |
|
|
const char * |
const char * |
mparse_strlevel(enum mandoclevel lvl) |
mparse_strlevel(enum mandoclevel lvl) |
{ |
{ |
return(mandoclevels[lvl]); |
return mandoclevels[lvl]; |
} |
} |
|
|
void |
void |
Line 1018 mparse_getkeep(const struct mparse *p) |
|
Line 922 mparse_getkeep(const struct mparse *p) |
|
{ |
{ |
|
|
assert(p->secondary); |
assert(p->secondary); |
return(p->secondary->sz ? p->secondary->buf : NULL); |
return p->secondary->sz ? p->secondary->buf : NULL; |
} |
} |