=================================================================== RCS file: /cvs/mandoc/main.c,v retrieving revision 1.79 retrieving revision 1.104 diff -u -p -r1.79 -r1.104 --- mandoc/main.c 2010/05/17 22:11:42 1.79 +++ mandoc/main.c 2010/08/20 08:13:43 1.104 @@ -1,6 +1,7 @@ -/* $Id: main.c,v 1.79 2010/05/17 22:11:42 kristaps Exp $ */ +/* $Id: main.c,v 1.104 2010/08/20 08:13:43 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons + * Copyright (c) 2010 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -22,6 +23,7 @@ #include #include +#include #include #include #include @@ -30,11 +32,15 @@ #include #include "mandoc.h" +#include "main.h" #include "mdoc.h" #include "man.h" #include "roff.h" -#include "main.h" +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) /* FIXME: Intel's compiler? LLVM? pcc? */ @@ -65,27 +71,21 @@ enum outt { OUTT_TREE, OUTT_HTML, OUTT_XHTML, - OUTT_LINT + OUTT_LINT, + OUTT_PS, + OUTT_PDF }; struct curparse { const char *file; /* Current parse. */ int fd; /* Current parse. */ - int wflags; - /* FIXME: set by max error */ -#define WARN_WALL (1 << 0) /* All-warnings mask. */ -#define WARN_WERR (1 << 2) /* Warnings->errors. */ - int fflags; -#define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */ -#define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */ -#define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */ -#define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */ -#define FL_STRICT FL_NIGN_ESCAPE | \ - FL_NIGN_MACRO /* ignore nothing */ + enum mandoclevel wlevel; /* Ignore messages below this. */ + int wstop; /* Stop after a file with a warning. */ enum intt inttype; /* which parser to use */ struct man *man; /* man parser */ struct mdoc *mdoc; /* mdoc parser */ struct roff *roff; /* roff parser (!NULL) */ + struct regset regs; /* roff registers */ enum outt outtype; /* which output to use */ out_mdoc outmdoc; /* mdoc output ptr */ out_man outman; /* man output ptr */ @@ -94,27 +94,55 @@ struct curparse { char outopts[BUFSIZ]; /* buf of output opts */ }; +static const char * const mandoclevels[MANDOCLEVEL_MAX] = { + "SUCCESS", + "RESERVED", + "WARNING", + "ERROR", + "FATAL", + "BADARG", + "SYSERR" +}; + +static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { + MANDOCERR_OK, + MANDOCERR_WARNING, + MANDOCERR_WARNING, + MANDOCERR_ERROR, + MANDOCERR_FATAL, + MANDOCERR_MAX, + MANDOCERR_MAX +}; + static const char * const mandocerrs[MANDOCERR_MAX] = { "ok", + + "generic warning", + "text should be uppercase", - "sections out of conentional order", + "sections out of conventional order", "section name repeats", "out of order prologue", "repeated prologue entry", "list type must come first", - "column syntax is inconsistent", "bad standard", "bad library", + "tab in non-literal context", "bad escape sequence", "unterminated quoted string", "argument requires the width argument", "superfluous width argument", + "ignoring argument", "bad date argument", "bad width argument", - "unknown manual sction", + "unknown manual section", "section not in conventional manual section", "end of line whitespace", + "blocks badly nested", "scope open on exit", + + "generic error", + "NAME section must come first", "bad Boolean value", "child violates parent syntax", @@ -134,7 +162,6 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "bad comment style", "unknown macro will be lost", "line scope broken", - "scope broken", "argument count wrong", "request scope close w/none open", "scope already open", @@ -142,13 +169,19 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "macro requires body argument(s)", "macro requires argument(s)", "no title in document", + "missing list type", + "missing display type", + "missing font type", "line argument(s) will be lost", "body argument(s) will be lost", - "missing font type", - "missing display type", - "missing list type", + + "generic fatal error", + + "column syntax is inconsistent", "displays may not be nested", - "no scope to rewind: syntax violated", + "unsupported display type", + "blocks badly nested", + "no such block is open", "scope broken, syntax violated", "line scope broken, syntax violated", "argument count wrong, violates syntax", @@ -157,28 +190,23 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "no document body", "no document prologue", "utsname system call failed", - "memory exhausted", + "static buffer exhausted", }; static void fdesc(struct curparse *); static void ffile(const char *, struct curparse *); -static int foptions(int *, char *); -static struct man *man_init(struct curparse *); -static struct mdoc *mdoc_init(struct curparse *); -static struct roff *roff_init(struct curparse *); static int moptions(enum intt *, char *); static int mmsg(enum mandocerr, void *, int, int, const char *); -static int pset(const char *, int, struct curparse *, +static void pset(const char *, int, struct curparse *, struct man **, struct mdoc **); static int toptions(struct curparse *, char *); static void usage(void) __attribute__((noreturn)); static void version(void) __attribute__((noreturn)); -static int woptions(int *, char *); +static int woptions(struct curparse *, char *); static const char *progname; -static int with_error; -static int with_warning; +static enum mandoclevel exit_status = MANDOCLEVEL_OK; int main(int argc, char *argv[]) @@ -196,17 +224,14 @@ main(int argc, char *argv[]) curp.inttype = INTT_AUTO; curp.outtype = OUTT_ASCII; + curp.wlevel = MANDOCLEVEL_FATAL; /* LINTED */ - while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:"))) + while (-1 != (c = getopt(argc, argv, "m:O:T:VW:"))) switch (c) { - case ('f'): - if ( ! foptions(&curp.fflags, optarg)) - return(EXIT_FAILURE); - break; case ('m'): if ( ! moptions(&curp.inttype, optarg)) - return(EXIT_FAILURE); + return(MANDOCLEVEL_BADARG); break; case ('O'): (void)strlcat(curp.outopts, optarg, BUFSIZ); @@ -214,11 +239,11 @@ main(int argc, char *argv[]) break; case ('T'): if ( ! toptions(&curp, optarg)) - return(EXIT_FAILURE); + return(MANDOCLEVEL_BADARG); break; case ('W'): - if ( ! woptions(&curp.wflags, optarg)) - return(EXIT_FAILURE); + if ( ! woptions(&curp, optarg)) + return(MANDOCLEVEL_BADARG); break; case ('V'): version(); @@ -240,8 +265,7 @@ main(int argc, char *argv[]) while (*argv) { ffile(*argv, &curp); - - if (with_error && !(curp.fflags & FL_IGN_ERRORS)) + if (MANDOCLEVEL_OK != exit_status && curp.wstop) break; ++argv; } @@ -255,8 +279,7 @@ main(int argc, char *argv[]) if (curp.roff) roff_free(curp.roff); - return((with_warning || with_error) ? - EXIT_FAILURE : EXIT_SUCCESS); + return(exit_status); } @@ -265,7 +288,7 @@ version(void) { (void)printf("%s %s\n", progname, VERSION); - exit(EXIT_SUCCESS); + exit(MANDOCLEVEL_OK); } @@ -276,56 +299,10 @@ usage(void) (void)fprintf(stderr, "usage: %s [-V] [-foption] " "[-mformat] [-Ooption] [-Toutput] " "[-Werr] [file...]\n", progname); - exit(EXIT_FAILURE); + exit(MANDOCLEVEL_BADARG); } -static struct man * -man_init(struct curparse *curp) -{ - int pflags; - - /* Defaults from mandoc.1. */ - - pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE; - - if (curp->fflags & FL_NIGN_MACRO) - pflags &= ~MAN_IGN_MACRO; - if (curp->fflags & FL_NIGN_ESCAPE) - pflags &= ~MAN_IGN_ESCAPE; - - return(man_alloc(curp, pflags, mmsg)); -} - - -static struct roff * -roff_init(struct curparse *curp) -{ - - return(roff_alloc(mmsg, curp)); -} - - -static struct mdoc * -mdoc_init(struct curparse *curp) -{ - int pflags; - - /* Defaults from mandoc.1. */ - - pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE; - - if (curp->fflags & FL_IGN_SCOPE) - pflags |= MDOC_IGN_SCOPE; - if (curp->fflags & FL_NIGN_ESCAPE) - pflags &= ~MDOC_IGN_ESCAPE; - if (curp->fflags & FL_NIGN_MACRO) - pflags &= ~MDOC_IGN_MACRO; - - return(mdoc_alloc(curp, pflags, mmsg)); -} - - static void ffile(const char *file, struct curparse *curp) { @@ -333,7 +310,7 @@ ffile(const char *file, struct curparse *curp) curp->file = file; if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) { perror(curp->file); - with_error = 1; + exit_status = MANDOCLEVEL_SYSERR; return; } @@ -344,24 +321,16 @@ ffile(const char *file, struct curparse *curp) } -static int +static void resize_buf(struct buf *buf, size_t initial) { - void *tmp; - size_t sz; - if (buf->sz == 0) - sz = initial; - else - sz = 2 * buf->sz; - tmp = realloc(buf->buf, sz); - if (NULL == tmp) { + buf->sz = buf->sz ? 2 * buf->sz : initial; + buf->buf = realloc(buf->buf, buf->sz); + if (NULL == buf->buf) { perror(NULL); - return(0); + exit(MANDOCLEVEL_SYSERR); } - buf->buf = tmp; - buf->sz = sz; - return(1); } @@ -374,7 +343,6 @@ read_whole_file(struct curparse *curp, struct buf *fb, if (-1 == fstat(curp->fd, &st)) { perror(curp->file); - with_error = 1; return(0); } @@ -389,13 +357,12 @@ read_whole_file(struct curparse *curp, struct buf *fb, if (st.st_size >= (1U << 31)) { fprintf(stderr, "%s: input too large\n", curp->file); - with_error = 1; return(0); } *with_mmap = 1; fb->sz = (size_t)st.st_size; fb->buf = mmap(NULL, fb->sz, PROT_READ, - MAP_FILE, curp->fd, 0); + MAP_FILE|MAP_SHARED, curp->fd, 0); if (fb->buf != MAP_FAILED) return(1); } @@ -416,8 +383,7 @@ read_whole_file(struct curparse *curp, struct buf *fb, curp->file); break; } - if (! resize_buf(fb, 65536)) - break; + resize_buf(fb, 65536); } ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off); if (ssz == 0) { @@ -433,7 +399,6 @@ read_whole_file(struct curparse *curp, struct buf *fb, free(fb->buf); fb->buf = NULL; - with_error = 1; return(0); } @@ -444,6 +409,7 @@ fdesc(struct curparse *curp) struct buf ln, blk; int i, pos, lnn, lnn_start, with_mmap, of; enum rofferr re; + unsigned char c; struct man *man; struct mdoc *mdoc; struct roff *roff; @@ -451,6 +417,7 @@ fdesc(struct curparse *curp) man = NULL; mdoc = NULL; roff = NULL; + memset(&ln, 0, sizeof(struct buf)); /* @@ -458,13 +425,15 @@ fdesc(struct curparse *curp) * memory mapped. ln is a line buffer and grows on-demand. */ - if ( ! read_whole_file(curp, &blk, &with_mmap)) + if ( ! read_whole_file(curp, &blk, &with_mmap)) { + exit_status = MANDOCLEVEL_SYSERR; return; + } if (NULL == curp->roff) - curp->roff = roff_init(curp); - if (NULL == (roff = curp->roff)) - goto bailout; + curp->roff = roff_alloc(&curp->regs, curp, mmsg); + assert(curp->roff); + roff = curp->roff; for (i = 0, lnn = 1; i < (int)blk.sz;) { pos = 0; @@ -475,11 +444,28 @@ fdesc(struct curparse *curp) ++lnn; break; } + + /* + * Warn about bogus characters. If you're using + * non-ASCII encoding, you're screwing your + * readers. Since I'd rather this not happen, + * I'll be helpful and drop these characters so + * we don't display gibberish. Note to manual + * writers: use special characters. + */ + + c = (unsigned char) blk.buf[i]; + if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) { + mmsg(MANDOCERR_BADCHAR, curp, + lnn_start, pos, "ignoring byte"); + i++; + continue; + } + /* Trailing backslash is like a plain character. */ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { if (pos >= (int)ln.sz) - if (! resize_buf(&ln, 256)) - goto bailout; + resize_buf(&ln, 256); ln.buf[pos++] = blk.buf[i++]; continue; } @@ -511,16 +497,14 @@ fdesc(struct curparse *curp) } /* Some other escape sequence, copy and continue. */ if (pos + 1 >= (int)ln.sz) - if (! resize_buf(&ln, 256)) - goto bailout; + resize_buf(&ln, 256); ln.buf[pos++] = blk.buf[i++]; ln.buf[pos++] = blk.buf[i++]; } if (pos >= (int)ln.sz) - if (! resize_buf(&ln, 256)) - goto bailout; + resize_buf(&ln, 256); ln.buf[pos] = '\0'; /* @@ -538,10 +522,12 @@ fdesc(struct curparse *curp) &ln.buf, &ln.sz, of, &of); } while (ROFF_RERUN == re); - if (ROFF_IGN == re) + if (ROFF_IGN == re) { continue; - else if (ROFF_ERR == re) - goto bailout; + } else if (ROFF_ERR == re) { + assert(MANDOCLEVEL_FATAL <= exit_status); + goto cleanup; + } /* * If input parsers have not been allocated, do so now. @@ -551,45 +537,81 @@ fdesc(struct curparse *curp) */ if ( ! (man || mdoc)) - if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc)) - goto bailout; + pset(ln.buf + of, pos - of, curp, &man, &mdoc); /* Lastly, push down into the parsers themselves. */ - if (man && ! man_parseln(man, lnn_start, ln.buf, of)) - goto bailout; - if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) - goto bailout; + if (man && ! man_parseln(man, lnn_start, ln.buf, of)) { + assert(MANDOCLEVEL_FATAL <= exit_status); + goto cleanup; + } + if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) { + assert(MANDOCLEVEL_FATAL <= exit_status); + goto cleanup; + } } /* NOTE a parser may not have been assigned, yet. */ if ( ! (man || mdoc)) { fprintf(stderr, "%s: Not a manual\n", curp->file); - goto bailout; + exit_status = MANDOCLEVEL_FATAL; + goto cleanup; } /* Clean up the parse routine ASTs. */ - if (mdoc && ! mdoc_endparse(mdoc)) - goto bailout; - if (man && ! man_endparse(man)) - goto bailout; - if (roff && ! roff_endparse(roff)) - goto bailout; + if (mdoc && ! mdoc_endparse(mdoc)) { + assert(MANDOCLEVEL_FATAL <= exit_status); + goto cleanup; + } + if (man && ! man_endparse(man)) { + assert(MANDOCLEVEL_FATAL <= exit_status); + goto cleanup; + } + if (roff && ! roff_endparse(roff)) { + assert(MANDOCLEVEL_FATAL <= exit_status); + goto cleanup; + } + /* + * With -Wstop and warnings or errors of at least + * the requested level, do not produce output. + */ + + if (MANDOCLEVEL_OK != exit_status && curp->wstop) + goto cleanup; + /* If unset, allocate output dev now (if applicable). */ if ( ! (curp->outman && curp->outmdoc)) { switch (curp->outtype) { case (OUTT_XHTML): curp->outdata = xhtml_alloc(curp->outopts); - curp->outman = html_man; - curp->outmdoc = html_mdoc; - curp->outfree = html_free; break; case (OUTT_HTML): curp->outdata = html_alloc(curp->outopts); + break; + case (OUTT_ASCII): + curp->outdata = ascii_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_PDF): + curp->outdata = pdf_alloc(curp->outopts); + curp->outfree = pspdf_free; + break; + case (OUTT_PS): + curp->outdata = ps_alloc(curp->outopts); + curp->outfree = pspdf_free; + break; + default: + break; + } + + switch (curp->outtype) { + case (OUTT_HTML): + /* FALLTHROUGH */ + case (OUTT_XHTML): curp->outman = html_man; curp->outmdoc = html_mdoc; curp->outfree = html_free; @@ -598,14 +620,16 @@ fdesc(struct curparse *curp) curp->outman = tree_man; curp->outmdoc = tree_mdoc; break; - case (OUTT_LINT): - break; - default: - curp->outdata = ascii_alloc(80); + case (OUTT_PDF): + /* FALLTHROUGH */ + case (OUTT_ASCII): + /* FALLTHROUGH */ + case (OUTT_PS): curp->outman = terminal_man; curp->outmdoc = terminal_mdoc; - curp->outfree = terminal_free; break; + default: + break; } } @@ -617,6 +641,7 @@ fdesc(struct curparse *curp) (*curp->outmdoc)(curp->outdata, mdoc); cleanup: + memset(&curp->regs, 0, sizeof(struct regset)); if (mdoc) mdoc_reset(mdoc); if (man) @@ -631,14 +656,10 @@ fdesc(struct curparse *curp) free(blk.buf); return; - - bailout: - with_error = 1; - goto cleanup; } -static int +static void pset(const char *buf, int pos, struct curparse *curp, struct man **man, struct mdoc **mdoc) { @@ -656,40 +677,39 @@ pset(const char *buf, int pos, struct curparse *curp, for (i = 1; buf[i]; i++) if (' ' != buf[i] && '\t' != buf[i]) break; - if (0 == buf[i]) - return(1); + if ('\0' == buf[i]) + return; } switch (curp->inttype) { case (INTT_MDOC): if (NULL == curp->mdoc) - curp->mdoc = mdoc_init(curp); - if (NULL == (*mdoc = curp->mdoc)) - return(0); - return(1); + curp->mdoc = mdoc_alloc(&curp->regs, curp, mmsg); + assert(curp->mdoc); + *mdoc = curp->mdoc; + return; case (INTT_MAN): if (NULL == curp->man) - curp->man = man_init(curp); - if (NULL == (*man = curp->man)) - return(0); - return(1); + curp->man = man_alloc(&curp->regs, curp, mmsg); + assert(curp->man); + *man = curp->man; + return; default: break; } if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { if (NULL == curp->mdoc) - curp->mdoc = mdoc_init(curp); - if (NULL == (*mdoc = curp->mdoc)) - return(0); - return(1); + curp->mdoc = mdoc_alloc(&curp->regs, curp, mmsg); + assert(curp->mdoc); + *mdoc = curp->mdoc; + return; } if (NULL == curp->man) - curp->man = man_init(curp); - if (NULL == (*man = curp->man)) - return(0); - return(1); + curp->man = man_alloc(&curp->regs, curp, mmsg); + assert(curp->man); + *man = curp->man; } @@ -720,8 +740,7 @@ toptions(struct curparse *curp, char *arg) curp->outtype = OUTT_ASCII; else if (0 == strcmp(arg, "lint")) { curp->outtype = OUTT_LINT; - curp->wflags |= WARN_WALL; - curp->fflags |= FL_STRICT; + curp->wlevel = MANDOCLEVEL_WARNING; } else if (0 == strcmp(arg, "tree")) curp->outtype = OUTT_TREE; @@ -729,6 +748,10 @@ toptions(struct curparse *curp, char *arg) curp->outtype = OUTT_HTML; else if (0 == strcmp(arg, "xhtml")) curp->outtype = OUTT_XHTML; + else if (0 == strcmp(arg, "ps")) + curp->outtype = OUTT_PS; + else if (0 == strcmp(arg, "pdf")) + curp->outtype = OUTT_PDF; else { fprintf(stderr, "%s: Bad argument\n", arg); return(0); @@ -739,42 +762,37 @@ toptions(struct curparse *curp, char *arg) static int -foptions(int *fflags, char *arg) +woptions(struct curparse *curp, char *arg) { char *v, *o; - const char *toks[8]; + const char *toks[6]; - toks[0] = "ign-scope"; - toks[1] = "no-ign-escape"; - toks[2] = "no-ign-macro"; - toks[3] = "ign-errors"; - toks[4] = "strict"; - toks[5] = "ign-escape"; - toks[6] = NULL; + toks[0] = "stop"; + toks[1] = "all"; + toks[2] = "warning"; + toks[3] = "error"; + toks[4] = "fatal"; + toks[5] = NULL; while (*arg) { o = arg; switch (getsubopt(&arg, UNCONST(toks), &v)) { case (0): - *fflags |= FL_IGN_SCOPE; + curp->wstop = 1; break; case (1): - *fflags |= FL_NIGN_ESCAPE; - break; + /* FALLTHROUGH */ case (2): - *fflags |= FL_NIGN_MACRO; + curp->wlevel = MANDOCLEVEL_WARNING; break; case (3): - *fflags |= FL_IGN_ERRORS; + curp->wlevel = MANDOCLEVEL_ERROR; break; case (4): - *fflags |= FL_STRICT; + curp->wlevel = MANDOCLEVEL_FATAL; break; - case (5): - *fflags &= ~FL_NIGN_ESCAPE; - break; default: - fprintf(stderr, "%s: Bad argument\n", o); + fprintf(stderr, "-W%s: Bad argument\n", o); return(0); } } @@ -784,62 +802,27 @@ foptions(int *fflags, char *arg) static int -woptions(int *wflags, char *arg) -{ - char *v, *o; - const char *toks[3]; - - toks[0] = "all"; - toks[1] = "error"; - toks[2] = NULL; - - while (*arg) { - o = arg; - switch (getsubopt(&arg, UNCONST(toks), &v)) { - case (0): - *wflags |= WARN_WALL; - break; - case (1): - *wflags |= WARN_WERR; - break; - default: - fprintf(stderr, "%s: Bad argument\n", o); - return(0); - } - } - - return(1); -} - - -static int mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg) { struct curparse *cp; + enum mandoclevel level; + level = MANDOCLEVEL_FATAL; + while (t < mandoclimits[level]) + level--; + cp = (struct curparse *)arg; + if (level < cp->wlevel) + return(1); - if (t <= MANDOCERR_ERROR) { - if ( ! (cp->wflags & WARN_WALL)) - return(1); - with_warning = 1; - } else - with_error = 1; - - fprintf(stderr, "%s:%d:%d: %s", cp->file, - ln, col + 1, mandocerrs[t]); - + fprintf(stderr, "%s:%d:%d: %s: %s", + cp->file, ln, col + 1, mandoclevels[level], mandocerrs[t]); if (msg) fprintf(stderr, ": %s", msg); - fputc('\n', stderr); - /* This is superfluous, but whatever. */ - if (t > MANDOCERR_ERROR) - return(0); - if (cp->wflags & WARN_WERR) { - with_error = 1; - return(0); - } - return(1); + if (exit_status < level) + exit_status = level; + + return(level < MANDOCLEVEL_FATAL); }