=================================================================== RCS file: /cvs/mandoc/read.c,v retrieving revision 1.111 retrieving revision 1.136 diff -u -p -r1.111 -r1.136 --- mandoc/read.c 2015/01/15 04:26:40 1.111 +++ mandoc/read.c 2015/04/18 17:01:58 1.136 @@ -1,4 +1,4 @@ -/* $Id: read.c,v 1.111 2015/01/15 04:26:40 schwarze Exp $ */ +/* $Id: read.c,v 1.136 2015/04/18 17:01:58 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010-2015 Ingo Schwarze @@ -8,9 +8,9 @@ * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,19 +37,17 @@ #include #include -#include "mandoc.h" #include "mandoc_aux.h" -#include "libmandoc.h" +#include "mandoc.h" +#include "roff.h" #include "mdoc.h" #include "man.h" +#include "libmandoc.h" #define REPARSE_LIMIT 1000 struct mparse { - struct man *pman; /* persistent man parser */ - struct mdoc *pmdoc; /* persistent mdoc parser */ - struct man *man; /* man parser */ - struct mdoc *mdoc; /* mdoc parser */ + struct roff_man *man; /* man parser */ struct roff *roff; /* roff parser (!NULL) */ const struct mchars *mchars; /* character table */ char *sodest; /* filename pointed to by .so */ @@ -80,8 +79,8 @@ static const enum mandocerr mandoclimits[MANDOCLEVEL_M MANDOCERR_WARNING, MANDOCERR_WARNING, MANDOCERR_ERROR, + MANDOCERR_UNSUPP, MANDOCERR_MAX, - MANDOCERR_MAX, MANDOCERR_MAX }; @@ -109,7 +108,11 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "no document body", "content before first section header", "first section is not \"NAME\"", - "bad NAME section contents", + "NAME section without name", + "NAME section without description", + "description not at the end of NAME", + "bad NAME section content", + "missing description line, using \"\"", "sections out of conventional order", "duplicate section title", "unexpected section", @@ -135,18 +138,22 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "skipping empty request", "conditional request controls empty scope", "skipping empty macro", + "empty block", "empty argument, using 0n", - "argument count wrong", "missing display type, using -ragged", "list type is not the first argument", "missing -width in -tag list, using 8n", "missing utility name, using \"\"", + "missing function name, using \"\"", "empty head in list item", "empty list item", "missing font type, using \\fR", "unknown font type, using \\fR", "nothing follows prefix", + "empty reference block", "missing -std argument, adding it", + "missing option string, using \"\"", + "missing resource identifier, using \"\"", "missing eqn box, using \"\"", /* related to bad macro arguments */ @@ -156,12 +163,14 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "skipping duplicate display type", "skipping duplicate list type", "skipping -width argument", + "wrong number of cells", "unknown AT&T UNIX version", "comma in function argument", "parenthesis in function name", "invalid content in Rs block", "invalid Boolean argument", "unknown font, skipping request", + "odd number of characters in request", /* related to plain text */ "blank line in fill mode, using .sp", @@ -171,40 +180,41 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "invalid escape sequence", "undefined string, using \"\"", + /* related to tables */ + "tbl line starts with span", + "tbl column starts with span", + "skipping vertical bar in tbl layout", + "generic error", - /* related to equations */ - "unexpected equation scope closure", - "equation scope open on exit", - "overlapping equation scopes", - "unexpected end of equation", - /* related to tables */ - "bad table syntax", - "bad table option", - "bad table layout", - "no table layout cells specified", - "no table data cells specified", - "ignore data in cell", - "data block still open", - "ignoring extra data cells", - "ignoring macro in table", + "non-alphabetic character in tbl options", + "skipping unknown tbl option", + "missing tbl option argument", + "wrong tbl option argument size", + "empty tbl layout", + "invalid character in tbl layout", + "unmatched parenthesis in tbl layout", + "tbl without any data cells", + "ignoring data in spanned tbl cell", + "ignoring extra tbl data cells", + "data block open at end of tbl", /* related to document structure and macros */ NULL, - "input too large", "input stack limit exceeded, infinite loop?", "skipping bad character", "skipping unknown macro", + "skipping insecure request", "skipping item outside list", "skipping column outside column list", "skipping end of block that is not open", + "fewer RS blocks open, skipping", "inserting missing end of block", "appending missing end of block", /* related to request and macro arguments */ "escaped character not allowed in a name", - "argument count wrong", "NOT IMPLEMENTED: Bd -file", "missing list type, using -item", "missing manual name, using \"\"", @@ -216,6 +226,14 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "skipping all arguments", "skipping excess arguments", "divide by zero", + + "unsupported feature", + "input too large", + "unsupported control character", + "unsupported roff request", + "eqn delim option in tbl", + "unsupported tbl layout modifier", + "ignoring macro in table", }; static const char * const mandoclevels[MANDOCLEVEL_MAX] = { @@ -223,7 +241,7 @@ static const char * const mandoclevels[MANDOCLEVEL_MAX "RESERVED", "WARNING", "ERROR", - "FATAL", + "UNSUPP", "BADARG", "SYSERR" }; @@ -272,23 +290,25 @@ choose_parser(struct mparse *curp) } if (format == MPARSE_MDOC) { - if (NULL == curp->pmdoc) - curp->pmdoc = mdoc_alloc( + if (curp->man == NULL) + curp->man = mdoc_alloc( curp->roff, curp, curp->defos, MPARSE_QUICK & curp->options ? 1 : 0); - assert(curp->pmdoc); - curp->mdoc = curp->pmdoc; + else + curp->man->macroset = MACROSET_MDOC; + mdoc_hash_init(); return; } /* Fall back to man(7) as a last resort. */ - if (NULL == curp->pman) - curp->pman = man_alloc( + if (curp->man == NULL) + curp->man = man_alloc( curp->roff, curp, curp->defos, MPARSE_QUICK & curp->options ? 1 : 0); - assert(curp->pman); - curp->man = curp->pman; + else + curp->man->macroset = MACROSET_MAN; + man_hash_init(); } /* @@ -303,6 +323,7 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size { const struct tbl_span *span; struct buf ln; + const char *save_file; char *cp; size_t pos; /* byte number in the ln buffer */ enum rofferr rr; @@ -364,9 +385,8 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size if (c & 0x80) { if ( ! (curp->filenc && preconv_encode( &blk, &i, &ln, &pos, &curp->filenc))) { - mandoc_vmsg(MANDOCERR_BADCHAR, - curp, curp->line, pos, - "0x%x", c); + mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, + curp->line, pos, "0x%x", c); ln.buf[pos++] = '?'; i++; } @@ -378,10 +398,13 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size */ if (c == 0x7f || (c < 0x20 && c != 0x09)) { - mandoc_vmsg(MANDOCERR_BADCHAR, curp, - curp->line, pos, "0x%x", c); + mandoc_vmsg(c == 0x00 || c == 0x04 || + c > 0x0a ? MANDOCERR_CHAR_BAD : + MANDOCERR_CHAR_UNSUPP, + curp, curp->line, pos, "0x%x", c); i++; - ln.buf[pos++] = '?'; + if (c != '\r') + ln.buf[pos++] = '?'; continue; } @@ -435,7 +458,7 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) { - mandoc_vmsg(MANDOCERR_BADCHAR, curp, + mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, curp->line, pos, "0x%x", c); i += 2; ln.buf[pos++] = '?'; @@ -518,11 +541,14 @@ rerun: */ if (curp->secondary) curp->secondary->sz -= pos + 1; + save_file = curp->file; save_child = curp->child; if (mparse_open(curp, &fd, ln.buf + of) == - MANDOCLEVEL_OK) + MANDOCLEVEL_OK) { mparse_readfd(curp, fd, ln.buf + of); - else { + curp->file = save_file; + } else { + curp->file = save_file; mandoc_vmsg(MANDOCERR_SO_FAIL, curp, curp->line, pos, ".so %s", ln.buf + of); @@ -548,7 +574,8 @@ rerun: * parsers with each one. */ - if ( ! (curp->man || curp->mdoc)) + if (curp->man == NULL || + curp->man->macroset == MACROSET_NONE) choose_parser(curp); /* @@ -562,17 +589,17 @@ rerun: if (rr == ROFF_TBL) { while ((span = roff_span(curp->roff)) != NULL) - if (curp->man == NULL) - mdoc_addspan(curp->mdoc, span); + if (curp->man->macroset == MACROSET_MDOC) + mdoc_addspan(curp->man, span); else man_addspan(curp->man, span); } else if (rr == ROFF_EQN) { - if (curp->man == NULL) - mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff)); + if (curp->man->macroset == MACROSET_MDOC) + mdoc_addeqn(curp->man, roff_eqn(curp->roff)); else man_addeqn(curp->man, roff_eqn(curp->roff)); - } else if ((curp->man == NULL ? - mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) : + } else if ((curp->man->macroset == MACROSET_MDOC ? + mdoc_parseln(curp->man, curp->line, ln.buf, of) : man_parseln(curp->man, curp->line, ln.buf, of)) == 2) break; @@ -611,7 +638,7 @@ read_whole_file(struct mparse *curp, const char *file, */ if (S_ISREG(st.st_mode)) { - if (st.st_size >= (1U << 31)) { + if (st.st_size > 0x7fffffff) { mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); return(0); } @@ -662,22 +689,14 @@ static void mparse_end(struct mparse *curp) { - if (curp->mdoc == NULL && - curp->man == NULL && - curp->sodest == NULL) { - if (curp->options & MPARSE_MDOC) - curp->mdoc = curp->pmdoc; - else { - if (curp->pman == NULL) - curp->pman = man_alloc( - curp->roff, curp, curp->defos, - curp->options & MPARSE_QUICK ? 1 : 0); - curp->man = curp->pman; - } - } - if (curp->mdoc) - mdoc_endparse(curp->mdoc); - if (curp->man) + if (curp->man == NULL && curp->sodest == NULL) + curp->man = man_alloc(curp->roff, curp, curp->defos, + curp->options & MPARSE_QUICK ? 1 : 0); + if (curp->man->macroset == MACROSET_NONE) + curp->man->macroset = MACROSET_MAN; + if (curp->man->macroset == MACROSET_MDOC) + mdoc_endparse(curp->man); + else man_endparse(curp->man); roff_endparse(curp->roff); } @@ -821,6 +840,7 @@ mparse_open(struct mparse *curp, int *fd, const char * perror("dup"); exit((int)MANDOCLEVEL_SYSERR); } + signal(SIGPIPE, SIG_DFL); execlp("gunzip", "gunzip", "-c", file, NULL); perror("exec"); exit((int)MANDOCLEVEL_SYSERR); @@ -843,6 +863,7 @@ mparse_wait(struct mparse *curp) perror("wait"); exit((int)MANDOCLEVEL_SYSERR); } + curp->child = 0; if (WIFSIGNALED(status)) { mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gunzip died from signal %d", WTERMSIG(status)); @@ -871,14 +892,18 @@ mparse_alloc(int options, enum mandoclevel wlevel, man curp->mchars = mchars; curp->roff = roff_alloc(curp, curp->mchars, options); - if (curp->options & MPARSE_MDOC) - curp->pmdoc = mdoc_alloc( + if (curp->options & MPARSE_MDOC) { + curp->man = mdoc_alloc( curp->roff, curp, curp->defos, curp->options & MPARSE_QUICK ? 1 : 0); - if (curp->options & MPARSE_MAN) - curp->pman = man_alloc( + mdoc_hash_init(); + } + if (curp->options & MPARSE_MAN) { + curp->man = man_alloc( curp->roff, curp, curp->defos, curp->options & MPARSE_QUICK ? 1 : 0); + man_hash_init(); + } return(curp); } @@ -889,16 +914,17 @@ mparse_reset(struct mparse *curp) roff_reset(curp->roff); - if (curp->mdoc) - mdoc_reset(curp->mdoc); - if (curp->man) - man_reset(curp->man); + if (curp->man != NULL) { + if (curp->man->macroset == MACROSET_MDOC) + mdoc_reset(curp->man); + else + man_reset(curp->man); + curp->man->macroset = MACROSET_NONE; + } if (curp->secondary) curp->secondary->sz = 0; curp->file_status = MANDOCLEVEL_OK; - curp->mdoc = NULL; - curp->man = NULL; free(curp->sodest); curp->sodest = NULL; @@ -908,10 +934,10 @@ void mparse_free(struct mparse *curp) { - if (curp->pmdoc) - mdoc_free(curp->pmdoc); - if (curp->pman) - man_free(curp->pman); + if (curp->man->macroset == MACROSET_MDOC) + mdoc_free(curp->man); + if (curp->man->macroset == MACROSET_MAN) + man_free(curp->man); if (curp->roff) roff_free(curp->roff); if (curp->secondary) @@ -923,17 +949,14 @@ mparse_free(struct mparse *curp) } void -mparse_result(struct mparse *curp, - struct mdoc **mdoc, struct man **man, char **sodest) +mparse_result(struct mparse *curp, struct roff_man **man, + char **sodest) { if (sodest && NULL != (*sodest = curp->sodest)) { - *mdoc = NULL; *man = NULL; return; } - if (mdoc) - *mdoc = curp->mdoc; if (man) *man = curp->man; } @@ -958,7 +981,7 @@ mandoc_msg(enum mandocerr er, struct mparse *m, { enum mandoclevel level; - level = MANDOCLEVEL_ERROR; + level = MANDOCLEVEL_UNSUPP; while (er < mandoclimits[level]) level--;