Return to read.c CVS log | Up to [cvsweb.bsd.lv] / mandoc |
version 1.17, 2011/07/17 14:08:49 | version 1.142, 2015/10/06 18:32:19 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> | * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> | * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> | ||
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> | |||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | * copyright notice and this permission notice appear in all copies. | ||
* | * | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | */ | ||
#ifdef HAVE_CONFIG_H | |||
#include "config.h" | #include "config.h" | ||
#endif | |||
#ifdef HAVE_MMAP | #include <sys/types.h> | ||
# include <sys/stat.h> | #if HAVE_MMAP | ||
# include <sys/mman.h> | #include <sys/mman.h> | ||
#include <sys/stat.h> | |||
#endif | #endif | ||
#include <assert.h> | #include <assert.h> | ||
#include <ctype.h> | #include <ctype.h> | ||
#include <errno.h> | |||
#include <fcntl.h> | #include <fcntl.h> | ||
#include <stdarg.h> | #include <stdarg.h> | ||
#include <stdint.h> | |||
#include <stdio.h> | #include <stdio.h> | ||
#include <stdlib.h> | #include <stdlib.h> | ||
#include <string.h> | #include <string.h> | ||
#include <unistd.h> | #include <unistd.h> | ||
#include <zlib.h> | |||
#include "mandoc_aux.h" | |||
#include "mandoc.h" | #include "mandoc.h" | ||
#include "libmandoc.h" | #include "roff.h" | ||
#include "mdoc.h" | #include "mdoc.h" | ||
#include "man.h" | #include "man.h" | ||
#include "libmandoc.h" | |||
#include "roff_int.h" | |||
#ifndef MAP_FILE | |||
#define MAP_FILE 0 | |||
#endif | |||
#define REPARSE_LIMIT 1000 | #define REPARSE_LIMIT 1000 | ||
struct buf { | |||
char *buf; /* binary input buffer */ | |||
size_t sz; /* size of binary buffer */ | |||
}; | |||
struct mparse { | struct mparse { | ||
struct roff_man *man; /* man parser */ | |||
struct roff *roff; /* roff parser (!NULL) */ | |||
const struct mchars *mchars; /* character table */ | |||
char *sodest; /* filename pointed to by .so */ | |||
const char *file; /* filename of current input file */ | |||
struct buf *primary; /* buffer currently being parsed */ | |||
struct buf *secondary; /* preprocessed copy of input */ | |||
const char *defos; /* default operating system */ | |||
mandocmsg mmsg; /* warning/error message handler */ | |||
enum mandoclevel file_status; /* status of current parse */ | enum mandoclevel file_status; /* status of current parse */ | ||
enum mandoclevel wlevel; /* ignore messages below this */ | enum mandoclevel wlevel; /* ignore messages below this */ | ||
int line; /* line number in the file */ | int options; /* parser options */ | ||
enum mparset inttype; /* which parser to use */ | int gzip; /* current input file is gzipped */ | ||
struct man *pman; /* persistent man parser */ | int filenc; /* encoding of the current file */ | ||
struct mdoc *pmdoc; /* persistent mdoc parser */ | |||
struct man *man; /* man parser */ | |||
struct mdoc *mdoc; /* mdoc parser */ | |||
struct roff *roff; /* roff parser (!NULL) */ | |||
struct regset regs; /* roff registers */ | |||
int reparse_count; /* finite interp. stack */ | int reparse_count; /* finite interp. stack */ | ||
mandocmsg mmsg; /* warning/error message handler */ | int line; /* line number in the file */ | ||
void *arg; /* argument to mmsg */ | |||
const char *file; | |||
}; | }; | ||
static void choose_parser(struct mparse *); | |||
static void resize_buf(struct buf *, size_t); | static void resize_buf(struct buf *, size_t); | ||
static void mparse_buf_r(struct mparse *, struct buf, int); | static void mparse_buf_r(struct mparse *, struct buf, size_t, int); | ||
static void mparse_readfd_r(struct mparse *, int, const char *, int); | static int read_whole_file(struct mparse *, const char *, int, | ||
static void pset(const char *, int, struct mparse *); | struct buf *, int *); | ||
static void pdesc(struct mparse *, const char *, int); | |||
static int read_whole_file(const char *, int, struct buf *, int *); | |||
static void mparse_end(struct mparse *); | static void mparse_end(struct mparse *); | ||
static void mparse_parse_buffer(struct mparse *, struct buf, | |||
const char *); | |||
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { | static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { | ||
MANDOCERR_OK, | MANDOCERR_OK, | ||
MANDOCERR_WARNING, | MANDOCERR_WARNING, | ||
MANDOCERR_WARNING, | MANDOCERR_WARNING, | ||
MANDOCERR_ERROR, | MANDOCERR_ERROR, | ||
MANDOCERR_FATAL, | MANDOCERR_UNSUPP, | ||
MANDOCERR_MAX, | MANDOCERR_MAX, | ||
MANDOCERR_MAX | MANDOCERR_MAX | ||
}; | }; | ||
|
|
||
"generic warning", | "generic warning", | ||
/* related to the prologue */ | /* related to the prologue */ | ||
"no title in document", | "missing manual title, using UNTITLED", | ||
"document title should be all caps", | "missing manual title, using \"\"", | ||
"lower case character in document title", | |||
"missing manual section, using \"\"", | |||
"unknown manual section", | "unknown manual section", | ||
"date missing, using today's date", | "missing date, using today's date", | ||
"cannot parse date, using it verbatim", | "cannot parse date, using it verbatim", | ||
"prologue macros out of order", | "missing Os macro, using \"\"", | ||
"duplicate prologue macro", | "duplicate prologue macro", | ||
"macro not allowed in prologue", | "late prologue macro", | ||
"macro not allowed in body", | "skipping late title macro", | ||
"prologue macros out of order", | |||
/* related to document structure */ | /* related to document structure */ | ||
".so is fragile, better use ln(1)", | ".so is fragile, better use ln(1)", | ||
"NAME section must come first", | "no document body", | ||
"bad NAME section contents", | "content before first section header", | ||
"manual name not yet set", | "first section is not \"NAME\"", | ||
"NAME section without name", | |||
"NAME section without description", | |||
"description not at the end of NAME", | |||
"bad NAME section content", | |||
"missing description line, using \"\"", | |||
"sections out of conventional order", | "sections out of conventional order", | ||
"duplicate section name", | "duplicate section title", | ||
"section not in conventional manual section", | "unexpected section", | ||
"unusual Xr order", | |||
"unusual Xr punctuation", | |||
"AUTHORS section without An macro", | |||
/* related to macros and nesting */ | /* related to macros and nesting */ | ||
"skipping obsolete macro", | "obsolete macro", | ||
"macro neither callable nor escaped", | |||
"skipping paragraph macro", | "skipping paragraph macro", | ||
"moving paragraph macro out of list", | |||
"skipping no-space macro", | "skipping no-space macro", | ||
"blocks badly nested", | "blocks badly nested", | ||
"child violates parent syntax", | |||
"nested displays are not portable", | "nested displays are not portable", | ||
"already in literal mode", | "moving content out of list", | ||
"fill mode already enabled, skipping", | |||
"fill mode already disabled, skipping", | |||
"line scope broken", | "line scope broken", | ||
/* related to missing macro arguments */ | /* related to missing macro arguments */ | ||
"skipping empty request", | |||
"conditional request controls empty scope", | |||
"skipping empty macro", | "skipping empty macro", | ||
"argument count wrong", | "empty block", | ||
"missing display type", | "empty argument, using 0n", | ||
"list type must come first", | "missing display type, using -ragged", | ||
"tag lists require a width argument", | "list type is not the first argument", | ||
"missing font type", | "missing -width in -tag list, using 8n", | ||
"skipping end of block that is not open", | "missing utility name, using \"\"", | ||
"missing function name, using \"\"", | |||
"empty head in list item", | |||
"empty list item", | |||
"missing font type, using \\fR", | |||
"unknown font type, using \\fR", | |||
"nothing follows prefix", | |||
"empty reference block", | |||
"missing -std argument, adding it", | |||
"missing option string, using \"\"", | |||
"missing resource identifier, using \"\"", | |||
"missing eqn box, using \"\"", | |||
/* related to bad macro arguments */ | /* related to bad macro arguments */ | ||
"skipping argument", | "unterminated quoted argument", | ||
"duplicate argument", | "duplicate argument", | ||
"duplicate display type", | "skipping duplicate argument", | ||
"duplicate list type", | "skipping duplicate display type", | ||
"skipping duplicate list type", | |||
"skipping -width argument", | |||
"wrong number of cells", | |||
"unknown AT&T UNIX version", | "unknown AT&T UNIX version", | ||
"bad Boolean value", | "comma in function argument", | ||
"unknown font", | "parenthesis in function name", | ||
"unknown standard specifier", | "invalid content in Rs block", | ||
"bad width argument", | "invalid Boolean argument", | ||
"unknown font, skipping request", | |||
"odd number of characters in request", | |||
/* related to plain text */ | /* related to plain text */ | ||
"blank line in non-literal context", | "blank line in fill mode, using .sp", | ||
"tab in non-literal context", | "tab in filled text", | ||
"end of line whitespace", | "whitespace at end of input line", | ||
"bad comment style", | "bad comment style", | ||
"bad escape sequence", | "invalid escape sequence", | ||
"unterminated quoted string", | "undefined string, using \"\"", | ||
/* related to equations */ | /* related to tables */ | ||
"unexpected literal in equation", | "tbl line starts with span", | ||
"tbl column starts with span", | |||
"skipping vertical bar in tbl layout", | |||
"generic error", | "generic error", | ||
/* related to equations */ | |||
"bad equation macro syntax", | |||
/* related to tables */ | /* related to tables */ | ||
"bad table syntax", | "non-alphabetic character in tbl options", | ||
"bad table option", | "skipping unknown tbl option", | ||
"bad table layout", | "missing tbl option argument", | ||
"no table layout cells specified", | "wrong tbl option argument size", | ||
"no table data cells specified", | "empty tbl layout", | ||
"ignore data in cell", | "invalid character in tbl layout", | ||
"data block still open", | "unmatched parenthesis in tbl layout", | ||
"ignoring extra data cells", | "tbl without any data cells", | ||
"ignoring data in spanned tbl cell", | |||
"ignoring extra tbl data cells", | |||
"data block open at end of tbl", | |||
/* related to document structure and macros */ | |||
NULL, | |||
"input stack limit exceeded, infinite loop?", | "input stack limit exceeded, infinite loop?", | ||
"skipping bad character", | "skipping bad character", | ||
"escaped character not allowed in a name", | |||
"skipping text before the first section header", | |||
"skipping unknown macro", | "skipping unknown macro", | ||
"NOT IMPLEMENTED, please use groff: skipping request", | "skipping insecure request", | ||
"argument count wrong", | "skipping item outside list", | ||
"skipping column outside column list", | |||
"skipping end of block that is not open", | "skipping end of block that is not open", | ||
"missing end of block", | "fewer RS blocks open, skipping", | ||
"scope open on exit", | "inserting missing end of block", | ||
"uname(3) system call failed", | "appending missing end of block", | ||
"macro requires line argument(s)", | |||
"macro requires body argument(s)", | |||
"macro requires argument(s)", | |||
"missing list type", | |||
"line argument(s) will be lost", | |||
"body argument(s) will be lost", | |||
"generic fatal error", | /* related to request and macro arguments */ | ||
"escaped character not allowed in a name", | |||
"not a manual", | "NOT IMPLEMENTED: Bd -file", | ||
"column syntax is inconsistent", | "missing list type, using -item", | ||
"NOT IMPLEMENTED: .Bd -file", | "missing manual name, using \"\"", | ||
"line scope broken, syntax violated", | "uname(3) system call failed, using UNKNOWN", | ||
"argument count wrong, violates syntax", | "unknown standard specifier", | ||
"child violates parent syntax", | "skipping request without numeric argument", | ||
"argument count wrong, violates syntax", | |||
"NOT IMPLEMENTED: .so with absolute path or \"..\"", | "NOT IMPLEMENTED: .so with absolute path or \"..\"", | ||
"no document body", | ".so request failed", | ||
"no document prologue", | "skipping all arguments", | ||
"static buffer exhausted", | "skipping excess arguments", | ||
"divide by zero", | |||
"unsupported feature", | |||
"input too large", | |||
"unsupported control character", | |||
"unsupported roff request", | |||
"eqn delim option in tbl", | |||
"unsupported tbl layout modifier", | |||
"ignoring macro in table", | |||
}; | }; | ||
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { | static const char * const mandoclevels[MANDOCLEVEL_MAX] = { | ||
|
|
||
"RESERVED", | "RESERVED", | ||
"WARNING", | "WARNING", | ||
"ERROR", | "ERROR", | ||
"FATAL", | "UNSUPP", | ||
"BADARG", | "BADARG", | ||
"SYSERR" | "SYSERR" | ||
}; | }; | ||
static void | static void | ||
resize_buf(struct buf *buf, size_t initial) | resize_buf(struct buf *buf, size_t initial) | ||
{ | { | ||
|
|
||
} | } | ||
static void | static void | ||
pset(const char *buf, int pos, struct mparse *curp) | choose_parser(struct mparse *curp) | ||
{ | { | ||
int i; | char *cp, *ep; | ||
int format; | |||
/* | /* | ||
* Try to intuit which kind of manual parser should be used. If | * If neither command line arguments -mdoc or -man select | ||
* passed in by command-line (-man, -mdoc), then use that | * a parser nor the roff parser found a .Dd or .TH macro | ||
* explicitly. If passed as -mandoc, then try to guess from the | * yet, look ahead in the main input buffer. | ||
* line: either skip dot-lines, use -mdoc when finding `.Dt', or | |||
* default to -man, which is more lenient. | |||
* | |||
* Separate out pmdoc/pman from mdoc/man: the first persists | |||
* through all parsers, while the latter is used per-parse. | |||
*/ | */ | ||
if ('.' == buf[0] || '\'' == buf[0]) { | if ((format = roff_getformat(curp->roff)) == 0) { | ||
for (i = 1; buf[i]; i++) | cp = curp->primary->buf; | ||
if (' ' != buf[i] && '\t' != buf[i]) | ep = cp + curp->primary->sz; | ||
while (cp < ep) { | |||
if (*cp == '.' || *cp == '\'') { | |||
cp++; | |||
if (cp[0] == 'D' && cp[1] == 'd') { | |||
format = MPARSE_MDOC; | |||
break; | |||
} | |||
if (cp[0] == 'T' && cp[1] == 'H') { | |||
format = MPARSE_MAN; | |||
break; | |||
} | |||
} | |||
cp = memchr(cp, '\n', ep - cp); | |||
if (cp == NULL) | |||
break; | break; | ||
if ('\0' == buf[i]) | cp++; | ||
return; | } | ||
} | } | ||
switch (curp->inttype) { | if (curp->man == NULL) { | ||
case (MPARSE_MDOC): | curp->man = roff_man_alloc(curp->roff, curp, curp->defos, | ||
if (NULL == curp->pmdoc) | curp->options & MPARSE_QUICK ? 1 : 0); | ||
curp->pmdoc = mdoc_alloc(&curp->regs, curp); | curp->man->macroset = MACROSET_MAN; | ||
assert(curp->pmdoc); | curp->man->first->tok = TOKEN_NONE; | ||
curp->mdoc = curp->pmdoc; | |||
return; | |||
case (MPARSE_MAN): | |||
if (NULL == curp->pman) | |||
curp->pman = man_alloc(&curp->regs, curp); | |||
assert(curp->pman); | |||
curp->man = curp->pman; | |||
return; | |||
default: | |||
break; | |||
} | } | ||
if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { | if (format == MPARSE_MDOC) { | ||
if (NULL == curp->pmdoc) | mdoc_hash_init(); | ||
curp->pmdoc = mdoc_alloc(&curp->regs, curp); | curp->man->macroset = MACROSET_MDOC; | ||
assert(curp->pmdoc); | curp->man->first->tok = TOKEN_NONE; | ||
curp->mdoc = curp->pmdoc; | } else { | ||
return; | man_hash_init(); | ||
} | curp->man->macroset = MACROSET_MAN; | ||
curp->man->first->tok = TOKEN_NONE; | |||
if (NULL == curp->pman) | } | ||
curp->pman = man_alloc(&curp->regs, curp); | |||
assert(curp->pman); | |||
curp->man = curp->pman; | |||
} | } | ||
/* | /* | ||
* Main parse routine for an opened file. This is called for each | * Main parse routine for a buffer. | ||
* opened file and simply loops around the full input file, possibly | * It assumes encoding and line numbering are already set up. | ||
* nesting (i.e., with `so'). | * It can recurse directly (for invocations of user-defined | ||
* macros, inline equations, and input line traps) | |||
* and indirectly (for .so file inclusion). | |||
*/ | */ | ||
static void | static void | ||
mparse_buf_r(struct mparse *curp, struct buf blk, int start) | mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) | ||
{ | { | ||
const struct tbl_span *span; | const struct tbl_span *span; | ||
struct buf ln; | struct buf ln; | ||
const char *save_file; | |||
char *cp; | |||
size_t pos; /* byte number in the ln buffer */ | |||
enum rofferr rr; | enum rofferr rr; | ||
int i, of, rc; | int of; | ||
int pos; /* byte number in the ln buffer */ | |||
int lnn; /* line number in the real file */ | int lnn; /* line number in the real file */ | ||
int fd; | |||
unsigned char c; | unsigned char c; | ||
memset(&ln, 0, sizeof(struct buf)); | memset(&ln, 0, sizeof(ln)); | ||
lnn = curp->line; | lnn = curp->line; | ||
pos = 0; | pos = 0; | ||
for (i = 0; i < (int)blk.sz; ) { | while (i < blk.sz) { | ||
if (0 == pos && '\0' == blk.buf[i]) | if (0 == pos && '\0' == blk.buf[i]) | ||
break; | break; | ||
if (start) { | if (start) { | ||
curp->line = lnn; | curp->line = lnn; | ||
curp->reparse_count = 0; | curp->reparse_count = 0; | ||
if (lnn < 3 && | |||
curp->filenc & MPARSE_UTF8 && | |||
curp->filenc & MPARSE_LATIN1) | |||
curp->filenc = preconv_cue(&blk, i); | |||
} | } | ||
while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { | while (i < blk.sz && (start || blk.buf[i] != '\0')) { | ||
/* | /* | ||
* When finding an unescaped newline character, | * When finding an unescaped newline character, | ||
|
|
||
* Skip a preceding carriage return, if any. | * Skip a preceding carriage return, if any. | ||
*/ | */ | ||
if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && | if ('\r' == blk.buf[i] && i + 1 < blk.sz && | ||
'\n' == blk.buf[i + 1]) | '\n' == blk.buf[i + 1]) | ||
++i; | ++i; | ||
if ('\n' == blk.buf[i]) { | if ('\n' == blk.buf[i]) { | ||
|
|
||
break; | break; | ||
} | } | ||
/* | /* | ||
* Warn about bogus characters. If you're using | * Make sure we have space for the worst | ||
* non-ASCII encoding, you're screwing your | * case of 11 bytes: "\\[u10ffff]\0" | ||
* readers. Since I'd rather this not happen, | |||
* I'll be helpful and drop these characters so | |||
* we don't display gibberish. Note to manual | |||
* writers: use special characters. | |||
*/ | */ | ||
c = (unsigned char) blk.buf[i]; | if (pos + 11 > ln.sz) | ||
resize_buf(&ln, 256); | |||
if ( ! (isascii(c) && | /* | ||
(isgraph(c) || isblank(c)))) { | * Encode 8-bit input. | ||
mandoc_msg(MANDOCERR_BADCHAR, curp, | */ | ||
curp->line, pos, "ignoring byte"); | |||
c = blk.buf[i]; | |||
if (c & 0x80) { | |||
if ( ! (curp->filenc && preconv_encode( | |||
&blk, &i, &ln, &pos, &curp->filenc))) { | |||
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, | |||
curp->line, pos, "0x%x", c); | |||
ln.buf[pos++] = '?'; | |||
i++; | |||
} | |||
continue; | |||
} | |||
/* | |||
* Exclude control characters. | |||
*/ | |||
if (c == 0x7f || (c < 0x20 && c != 0x09)) { | |||
mandoc_vmsg(c == 0x00 || c == 0x04 || | |||
c > 0x0a ? MANDOCERR_CHAR_BAD : | |||
MANDOCERR_CHAR_UNSUPP, | |||
curp, curp->line, pos, "0x%x", c); | |||
i++; | i++; | ||
if (c != '\r') | |||
ln.buf[pos++] = '?'; | |||
continue; | continue; | ||
} | } | ||
/* Trailing backslash = a plain char. */ | /* Trailing backslash = a plain char. */ | ||
if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { | if (blk.buf[i] != '\\' || i + 1 == blk.sz) { | ||
if (pos >= (int)ln.sz) | |||
resize_buf(&ln, 256); | |||
ln.buf[pos++] = blk.buf[i++]; | ln.buf[pos++] = blk.buf[i++]; | ||
continue; | continue; | ||
} | } | ||
|
|
||
* skip that one as well. | * skip that one as well. | ||
*/ | */ | ||
if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && | if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && | ||
'\n' == blk.buf[i + 2]) | '\n' == blk.buf[i + 2]) | ||
++i; | ++i; | ||
if ('\n' == blk.buf[i + 1]) { | if ('\n' == blk.buf[i + 1]) { | ||
|
|
||
if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { | if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { | ||
i += 2; | i += 2; | ||
/* Comment, skip to end of line */ | /* Comment, skip to end of line */ | ||
for (; i < (int)blk.sz; ++i) { | for (; i < blk.sz; ++i) { | ||
if ('\n' == blk.buf[i]) { | if ('\n' == blk.buf[i]) { | ||
++i; | ++i; | ||
++lnn; | ++lnn; | ||
|
|
||
break; | break; | ||
} | } | ||
/* Some other escape sequence, copy & cont. */ | /* Catch escaped bogus characters. */ | ||
if (pos + 1 >= (int)ln.sz) | c = (unsigned char) blk.buf[i+1]; | ||
resize_buf(&ln, 256); | |||
if ( ! (isascii(c) && | |||
(isgraph(c) || isblank(c)))) { | |||
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, | |||
curp->line, pos, "0x%x", c); | |||
i += 2; | |||
ln.buf[pos++] = '?'; | |||
continue; | |||
} | |||
/* Some other escape sequence, copy & cont. */ | |||
ln.buf[pos++] = blk.buf[i++]; | ln.buf[pos++] = blk.buf[i++]; | ||
ln.buf[pos++] = blk.buf[i++]; | ln.buf[pos++] = blk.buf[i++]; | ||
} | } | ||
if (pos >= (int)ln.sz) | if (pos >= ln.sz) | ||
resize_buf(&ln, 256); | resize_buf(&ln, 256); | ||
ln.buf[pos] = '\0'; | ln.buf[pos] = '\0'; | ||
|
|
||
of = 0; | of = 0; | ||
/* | |||
* Maintain a lookaside buffer of all parsed lines. We | |||
* only do this if mparse_keep() has been invoked (the | |||
* buffer may be accessed with mparse_getkeep()). | |||
*/ | |||
if (curp->secondary) { | |||
curp->secondary->buf = mandoc_realloc( | |||
curp->secondary->buf, | |||
curp->secondary->sz + pos + 2); | |||
memcpy(curp->secondary->buf + | |||
curp->secondary->sz, | |||
ln.buf, pos); | |||
curp->secondary->sz += pos; | |||
curp->secondary->buf | |||
[curp->secondary->sz] = '\n'; | |||
curp->secondary->sz++; | |||
curp->secondary->buf | |||
[curp->secondary->sz] = '\0'; | |||
} | |||
rerun: | rerun: | ||
rr = roff_parseln | rr = roff_parseln(curp->roff, curp->line, &ln, &of); | ||
(curp->roff, curp->line, | |||
&ln.buf, &ln.sz, of, &of); | |||
switch (rr) { | switch (rr) { | ||
case (ROFF_REPARSE): | case ROFF_REPARSE: | ||
if (REPARSE_LIMIT >= ++curp->reparse_count) | if (REPARSE_LIMIT >= ++curp->reparse_count) | ||
mparse_buf_r(curp, ln, 0); | mparse_buf_r(curp, ln, of, 0); | ||
else | else | ||
mandoc_msg(MANDOCERR_ROFFLOOP, curp, | mandoc_msg(MANDOCERR_ROFFLOOP, curp, | ||
curp->line, pos, NULL); | curp->line, pos, NULL); | ||
pos = 0; | pos = 0; | ||
continue; | continue; | ||
case (ROFF_APPEND): | case ROFF_APPEND: | ||
pos = (int)strlen(ln.buf); | pos = strlen(ln.buf); | ||
continue; | continue; | ||
case (ROFF_RERUN): | case ROFF_RERUN: | ||
goto rerun; | goto rerun; | ||
case (ROFF_IGN): | case ROFF_IGN: | ||
pos = 0; | pos = 0; | ||
continue; | continue; | ||
case (ROFF_ERR): | case ROFF_SO: | ||
assert(MANDOCLEVEL_FATAL <= curp->file_status); | if ( ! (curp->options & MPARSE_SO) && | ||
break; | (i >= blk.sz || blk.buf[i] == '\0')) { | ||
case (ROFF_SO): | curp->sodest = mandoc_strdup(ln.buf + of); | ||
mparse_readfd_r(curp, -1, ln.buf + of, 1); | free(ln.buf); | ||
if (MANDOCLEVEL_FATAL <= curp->file_status) | return; | ||
break; | } | ||
/* | |||
* We remove `so' clauses from our lookaside | |||
* buffer because we're going to descend into | |||
* the file recursively. | |||
*/ | |||
if (curp->secondary) | |||
curp->secondary->sz -= pos + 1; | |||
save_file = curp->file; | |||
if (mparse_open(curp, &fd, ln.buf + of) == | |||
MANDOCLEVEL_OK) { | |||
mparse_readfd(curp, fd, ln.buf + of); | |||
curp->file = save_file; | |||
} else { | |||
curp->file = save_file; | |||
mandoc_vmsg(MANDOCERR_SO_FAIL, | |||
curp, curp->line, pos, | |||
".so %s", ln.buf + of); | |||
ln.sz = mandoc_asprintf(&cp, | |||
".sp\nSee the file %s.\n.sp", | |||
ln.buf + of); | |||
free(ln.buf); | |||
ln.buf = cp; | |||
of = 0; | |||
mparse_buf_r(curp, ln, of, 0); | |||
} | |||
pos = 0; | pos = 0; | ||
continue; | continue; | ||
default: | default: | ||
|
|
||
} | } | ||
/* | /* | ||
* If we encounter errors in the recursive parse, make | |||
* sure we don't continue parsing. | |||
*/ | |||
if (MANDOCLEVEL_FATAL <= curp->file_status) | |||
break; | |||
/* | |||
* If input parsers have not been allocated, do so now. | * If input parsers have not been allocated, do so now. | ||
* We keep these instanced between parsers, but set them | * We keep these instanced between parsers, but set them | ||
* locally per parse routine since we can use different | * locally per parse routine since we can use different | ||
* parsers with each one. | * parsers with each one. | ||
*/ | */ | ||
if ( ! (curp->man || curp->mdoc)) | if (curp->man == NULL || | ||
pset(ln.buf + of, pos - of, curp); | curp->man->macroset == MACROSET_NONE) | ||
choose_parser(curp); | |||
/* | /* | ||
* Lastly, push down into the parsers themselves. One | * Lastly, push down into the parsers themselves. | ||
* of these will have already been set in the pset() | |||
* routine. | |||
* If libroff returns ROFF_TBL, then add it to the | * If libroff returns ROFF_TBL, then add it to the | ||
* currently open parse. Since we only get here if | * currently open parse. Since we only get here if | ||
* there does exist data (see tbl_data.c), we're | * there does exist data (see tbl_data.c), we're | ||
|
|
||
* Do the same for ROFF_EQN. | * Do the same for ROFF_EQN. | ||
*/ | */ | ||
rc = -1; | if (rr == ROFF_TBL) | ||
while ((span = roff_span(curp->roff)) != NULL) | |||
roff_addtbl(curp->man, span); | |||
else if (rr == ROFF_EQN) | |||
roff_addeqn(curp->man, roff_eqn(curp->roff)); | |||
else if ((curp->man->macroset == MACROSET_MDOC ? | |||
mdoc_parseln(curp->man, curp->line, ln.buf, of) : | |||
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) | |||
break; | |||
if (ROFF_TBL == rr) | |||
while (NULL != (span = roff_span(curp->roff))) { | |||
rc = curp->man ? | |||
man_addspan(curp->man, span) : | |||
mdoc_addspan(curp->mdoc, span); | |||
if (0 == rc) | |||
break; | |||
} | |||
else if (ROFF_EQN == rr) | |||
rc = curp->mdoc ? | |||
mdoc_addeqn(curp->mdoc, | |||
roff_eqn(curp->roff)) : | |||
man_addeqn(curp->man, | |||
roff_eqn(curp->roff)); | |||
else if (curp->man || curp->mdoc) | |||
rc = curp->man ? | |||
man_parseln(curp->man, | |||
curp->line, ln.buf, of) : | |||
mdoc_parseln(curp->mdoc, | |||
curp->line, ln.buf, of); | |||
if (0 == rc) { | |||
assert(MANDOCLEVEL_FATAL <= curp->file_status); | |||
break; | |||
} | |||
/* Temporary buffers typically are not full. */ | /* Temporary buffers typically are not full. */ | ||
if (0 == start && '\0' == blk.buf[i]) | if (0 == start && '\0' == blk.buf[i]) | ||
|
|
||
free(ln.buf); | free(ln.buf); | ||
} | } | ||
static void | |||
pdesc(struct mparse *curp, const char *file, int fd) | |||
{ | |||
struct buf blk; | |||
int with_mmap; | |||
/* | |||
* Run for each opened file; may be called more than once for | |||
* each full parse sequence if the opened file is nested (i.e., | |||
* from `so'). Simply sucks in the whole file and moves into | |||
* the parse phase for the file. | |||
*/ | |||
if ( ! read_whole_file(file, fd, &blk, &with_mmap)) { | |||
curp->file_status = MANDOCLEVEL_SYSERR; | |||
return; | |||
} | |||
/* Line number is per-file. */ | |||
curp->line = 1; | |||
mparse_buf_r(curp, blk, 1); | |||
#ifdef HAVE_MMAP | |||
if (with_mmap) | |||
munmap(blk.buf, blk.sz); | |||
else | |||
#endif | |||
free(blk.buf); | |||
} | |||
static int | static int | ||
read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) | read_whole_file(struct mparse *curp, const char *file, int fd, | ||
struct buf *fb, int *with_mmap) | |||
{ | { | ||
gzFile gz; | |||
size_t off; | size_t off; | ||
ssize_t ssz; | ssize_t ssz; | ||
#ifdef HAVE_MMAP | #if HAVE_MMAP | ||
struct stat st; | struct stat st; | ||
if (-1 == fstat(fd, &st)) { | if (-1 == fstat(fd, &st)) { | ||
perror(file); | perror(file); | ||
return(0); | exit((int)MANDOCLEVEL_SYSERR); | ||
} | } | ||
/* | /* | ||
|
|
||
* concerned that this is going to tank any machines. | * concerned that this is going to tank any machines. | ||
*/ | */ | ||
if (S_ISREG(st.st_mode)) { | if (curp->gzip == 0 && S_ISREG(st.st_mode)) { | ||
if (st.st_size >= (1U << 31)) { | if (st.st_size > 0x7fffffff) { | ||
fprintf(stderr, "%s: input too large\n", file); | mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); | ||
return(0); | return 0; | ||
} | } | ||
*with_mmap = 1; | *with_mmap = 1; | ||
fb->sz = (size_t)st.st_size; | fb->sz = (size_t)st.st_size; | ||
fb->buf = mmap(NULL, fb->sz, PROT_READ, | fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); | ||
MAP_FILE|MAP_SHARED, fd, 0); | |||
if (fb->buf != MAP_FAILED) | if (fb->buf != MAP_FAILED) | ||
return(1); | return 1; | ||
} | } | ||
#endif | #endif | ||
if (curp->gzip) { | |||
if ((gz = gzdopen(fd, "rb")) == NULL) { | |||
perror(file); | |||
exit((int)MANDOCLEVEL_SYSERR); | |||
} | |||
} else | |||
gz = NULL; | |||
/* | /* | ||
* If this isn't a regular file (like, say, stdin), then we must | * If this isn't a regular file (like, say, stdin), then we must | ||
* go the old way and just read things in bit by bit. | * go the old way and just read things in bit by bit. | ||
|
|
||
for (;;) { | for (;;) { | ||
if (off == fb->sz) { | if (off == fb->sz) { | ||
if (fb->sz == (1U << 31)) { | if (fb->sz == (1U << 31)) { | ||
fprintf(stderr, "%s: input too large\n", file); | mandoc_msg(MANDOCERR_TOOLARGE, curp, | ||
0, 0, NULL); | |||
break; | break; | ||
} | } | ||
resize_buf(fb, 65536); | resize_buf(fb, 65536); | ||
} | } | ||
ssz = read(fd, fb->buf + (int)off, fb->sz - off); | ssz = curp->gzip ? | ||
gzread(gz, fb->buf + (int)off, fb->sz - off) : | |||
read(fd, fb->buf + (int)off, fb->sz - off); | |||
if (ssz == 0) { | if (ssz == 0) { | ||
fb->sz = off; | fb->sz = off; | ||
return(1); | return 1; | ||
} | } | ||
if (ssz == -1) { | if (ssz == -1) { | ||
perror(file); | perror(file); | ||
break; | exit((int)MANDOCLEVEL_SYSERR); | ||
} | } | ||
off += (size_t)ssz; | off += (size_t)ssz; | ||
} | } | ||
free(fb->buf); | free(fb->buf); | ||
fb->buf = NULL; | fb->buf = NULL; | ||
return(0); | return 0; | ||
} | } | ||
static void | static void | ||
mparse_end(struct mparse *curp) | mparse_end(struct mparse *curp) | ||
{ | { | ||
if (MANDOCLEVEL_FATAL <= curp->file_status) | if (curp->man == NULL && curp->sodest == NULL) | ||
return; | curp->man = roff_man_alloc(curp->roff, curp, curp->defos, | ||
curp->options & MPARSE_QUICK ? 1 : 0); | |||
if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) { | if (curp->man->macroset == MACROSET_NONE) | ||
assert(MANDOCLEVEL_FATAL <= curp->file_status); | curp->man->macroset = MACROSET_MAN; | ||
return; | if (curp->man->macroset == MACROSET_MDOC) | ||
} | mdoc_endparse(curp->man); | ||
else | |||
if (curp->man && ! man_endparse(curp->man)) { | man_endparse(curp->man); | ||
assert(MANDOCLEVEL_FATAL <= curp->file_status); | |||
return; | |||
} | |||
if ( ! (curp->man || curp->mdoc)) { | |||
mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL); | |||
curp->file_status = MANDOCLEVEL_FATAL; | |||
return; | |||
} | |||
roff_endparse(curp->roff); | roff_endparse(curp->roff); | ||
} | } | ||
static void | static void | ||
mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re) | mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) | ||
{ | { | ||
struct buf *svprimary; | |||
const char *svfile; | const char *svfile; | ||
size_t offset; | |||
static int recursion_depth; | |||
if (-1 == fd) | if (64 < recursion_depth) { | ||
if (-1 == (fd = open(file, O_RDONLY, 0))) { | mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); | ||
perror(file); | return; | ||
curp->file_status = MANDOCLEVEL_SYSERR; | } | ||
return; | |||
} | |||
/* Line number is per-file. */ | |||
svfile = curp->file; | svfile = curp->file; | ||
curp->file = file; | curp->file = file; | ||
svprimary = curp->primary; | |||
curp->primary = &blk; | |||
curp->line = 1; | |||
recursion_depth++; | |||
pdesc(curp, file, fd); | /* Skip an UTF-8 byte order mark. */ | ||
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && | |||
(unsigned char)blk.buf[0] == 0xef && | |||
(unsigned char)blk.buf[1] == 0xbb && | |||
(unsigned char)blk.buf[2] == 0xbf) { | |||
offset = 3; | |||
curp->filenc &= ~MPARSE_LATIN1; | |||
} else | |||
offset = 0; | |||
if (0 == re && MANDOCLEVEL_FATAL > curp->file_status) | mparse_buf_r(curp, blk, offset, 1); | ||
if (--recursion_depth == 0) | |||
mparse_end(curp); | mparse_end(curp); | ||
if (STDIN_FILENO != fd && -1 == close(fd)) | curp->primary = svprimary; | ||
perror(file); | |||
curp->file = svfile; | curp->file = svfile; | ||
} | } | ||
enum mandoclevel | enum mandoclevel | ||
mparse_readmem(struct mparse *curp, void *buf, size_t len, | |||
const char *file) | |||
{ | |||
struct buf blk; | |||
blk.buf = buf; | |||
blk.sz = len; | |||
mparse_parse_buffer(curp, blk, file); | |||
return curp->file_status; | |||
} | |||
/* | |||
* Read the whole file into memory and call the parsers. | |||
* Called recursively when an .so request is encountered. | |||
*/ | |||
enum mandoclevel | |||
mparse_readfd(struct mparse *curp, int fd, const char *file) | mparse_readfd(struct mparse *curp, int fd, const char *file) | ||
{ | { | ||
struct buf blk; | |||
int with_mmap; | |||
int save_filenc; | |||
mparse_readfd_r(curp, fd, file, 0); | if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { | ||
return(curp->file_status); | save_filenc = curp->filenc; | ||
curp->filenc = curp->options & | |||
(MPARSE_UTF8 | MPARSE_LATIN1); | |||
mparse_parse_buffer(curp, blk, file); | |||
curp->filenc = save_filenc; | |||
#if HAVE_MMAP | |||
if (with_mmap) | |||
munmap(blk.buf, blk.sz); | |||
else | |||
#endif | |||
free(blk.buf); | |||
} | |||
if (fd != STDIN_FILENO && close(fd) == -1) | |||
perror(file); | |||
return curp->file_status; | |||
} | } | ||
enum mandoclevel | |||
mparse_open(struct mparse *curp, int *fd, const char *file) | |||
{ | |||
char *cp; | |||
curp->file = file; | |||
cp = strrchr(file, '.'); | |||
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); | |||
/* First try to use the filename as it is. */ | |||
if ((*fd = open(file, O_RDONLY)) != -1) | |||
return MANDOCLEVEL_OK; | |||
/* | |||
* If that doesn't work and the filename doesn't | |||
* already end in .gz, try appending .gz. | |||
*/ | |||
if ( ! curp->gzip) { | |||
mandoc_asprintf(&cp, "%s.gz", file); | |||
*fd = open(file, O_RDONLY); | |||
free(cp); | |||
if (*fd != -1) { | |||
curp->gzip = 1; | |||
return MANDOCLEVEL_OK; | |||
} | |||
} | |||
/* Neither worked, give up. */ | |||
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); | |||
return MANDOCLEVEL_ERROR; | |||
} | |||
struct mparse * | struct mparse * | ||
mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg) | mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, | ||
const struct mchars *mchars, const char *defos) | |||
{ | { | ||
struct mparse *curp; | struct mparse *curp; | ||
assert(wlevel <= MANDOCLEVEL_FATAL); | |||
curp = mandoc_calloc(1, sizeof(struct mparse)); | curp = mandoc_calloc(1, sizeof(struct mparse)); | ||
curp->options = options; | |||
curp->wlevel = wlevel; | curp->wlevel = wlevel; | ||
curp->mmsg = mmsg; | curp->mmsg = mmsg; | ||
curp->arg = arg; | curp->defos = defos; | ||
curp->inttype = inttype; | |||
curp->roff = roff_alloc(&curp->regs, curp); | curp->mchars = mchars; | ||
return(curp); | curp->roff = roff_alloc(curp, curp->mchars, options); | ||
curp->man = roff_man_alloc( curp->roff, curp, curp->defos, | |||
curp->options & MPARSE_QUICK ? 1 : 0); | |||
if (curp->options & MPARSE_MDOC) { | |||
mdoc_hash_init(); | |||
curp->man->macroset = MACROSET_MDOC; | |||
} else if (curp->options & MPARSE_MAN) { | |||
man_hash_init(); | |||
curp->man->macroset = MACROSET_MAN; | |||
} | |||
curp->man->first->tok = TOKEN_NONE; | |||
return curp; | |||
} | } | ||
void | void | ||
mparse_reset(struct mparse *curp) | mparse_reset(struct mparse *curp) | ||
{ | { | ||
memset(&curp->regs, 0, sizeof(struct regset)); | |||
roff_reset(curp->roff); | roff_reset(curp->roff); | ||
if (curp->mdoc) | if (curp->man != NULL) | ||
mdoc_reset(curp->mdoc); | roff_man_reset(curp->man); | ||
if (curp->man) | if (curp->secondary) | ||
man_reset(curp->man); | curp->secondary->sz = 0; | ||
curp->file_status = MANDOCLEVEL_OK; | curp->file_status = MANDOCLEVEL_OK; | ||
curp->mdoc = NULL; | |||
curp->man = NULL; | free(curp->sodest); | ||
curp->sodest = NULL; | |||
} | } | ||
void | void | ||
mparse_free(struct mparse *curp) | mparse_free(struct mparse *curp) | ||
{ | { | ||
if (curp->pmdoc) | roff_man_free(curp->man); | ||
mdoc_free(curp->pmdoc); | |||
if (curp->pman) | |||
man_free(curp->pman); | |||
if (curp->roff) | if (curp->roff) | ||
roff_free(curp->roff); | roff_free(curp->roff); | ||
if (curp->secondary) | |||
free(curp->secondary->buf); | |||
free(curp->secondary); | |||
free(curp->sodest); | |||
free(curp); | free(curp); | ||
} | } | ||
void | void | ||
mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man) | mparse_result(struct mparse *curp, struct roff_man **man, | ||
char **sodest) | |||
{ | { | ||
if (mdoc) | if (sodest && NULL != (*sodest = curp->sodest)) { | ||
*mdoc = curp->mdoc; | *man = NULL; | ||
return; | |||
} | |||
if (man) | if (man) | ||
*man = curp->man; | *man = curp->man; | ||
} | } | ||
|
|
||
va_list ap; | va_list ap; | ||
va_start(ap, fmt); | va_start(ap, fmt); | ||
vsnprintf(buf, sizeof(buf) - 1, fmt, ap); | (void)vsnprintf(buf, sizeof(buf), fmt, ap); | ||
va_end(ap); | va_end(ap); | ||
mandoc_msg(t, m, ln, pos, buf); | mandoc_msg(t, m, ln, pos, buf); | ||
} | } | ||
void | void | ||
mandoc_msg(enum mandocerr er, struct mparse *m, | mandoc_msg(enum mandocerr er, struct mparse *m, | ||
int ln, int col, const char *msg) | int ln, int col, const char *msg) | ||
{ | { | ||
enum mandoclevel level; | enum mandoclevel level; | ||
level = MANDOCLEVEL_FATAL; | level = MANDOCLEVEL_UNSUPP; | ||
while (er < mandoclimits[level]) | while (er < mandoclimits[level]) | ||
level--; | level--; | ||
if (level < m->wlevel) | if (level < m->wlevel && er != MANDOCERR_FILE) | ||
return; | return; | ||
if (m->mmsg) | if (m->mmsg) | ||
|
|
||
mparse_strerror(enum mandocerr er) | mparse_strerror(enum mandocerr er) | ||
{ | { | ||
return(mandocerrs[er]); | return mandocerrs[er]; | ||
} | } | ||
const char * | const char * | ||
mparse_strlevel(enum mandoclevel lvl) | mparse_strlevel(enum mandoclevel lvl) | ||
{ | { | ||
return(mandoclevels[lvl]); | return mandoclevels[lvl]; | ||
} | |||
void | |||
mparse_keep(struct mparse *p) | |||
{ | |||
assert(NULL == p->secondary); | |||
p->secondary = mandoc_calloc(1, sizeof(struct buf)); | |||
} | |||
const char * | |||
mparse_getkeep(const struct mparse *p) | |||
{ | |||
assert(p->secondary); | |||
return p->secondary->sz ? p->secondary->buf : NULL; | |||
} | } |