=================================================================== RCS file: /cvs/mandoc/Attic/mdocml.c,v retrieving revision 1.26 retrieving revision 1.53 diff -u -p -r1.26 -r1.53 --- mandoc/Attic/mdocml.c 2008/12/28 21:25:09 1.26 +++ mandoc/Attic/mdocml.c 2009/02/20 23:35:36 1.53 @@ -1,4 +1,4 @@ -/* $Id: mdocml.c,v 1.26 2008/12/28 21:25:09 kristaps Exp $ */ + /* $Id: mdocml.c,v 1.53 2009/02/20 23:35:36 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -30,86 +30,54 @@ #include "mdoc.h" -#define MD_LINE_SZ (256) +#define MD_LINE_SZ (256) /* Max input line size. */ struct md_parse { - int warn; -#define MD_WARN_ALL (1 << 0) -#define MD_WARN_ERR (1 << 1) - int dbg; - struct mdoc *mdoc; - char *buf; - u_long bufsz; - char *name; - int fd; - int lnn; - char *line; + int warn; /* Warning flags. */ +#define MD_WARN_SYNTAX (1 << 0) /* Show syntax warnings. */ +#define MD_WARN_COMPAT (1 << 1) /* Show compat warnings. */ +#define MD_WARN_ALL (0x03) /* Show all warnings. */ +#define MD_WARN_ERR (1 << 2) /* Make warnings->errors. */ + int dbg; /* Debug level. */ + struct mdoc *mdoc; /* Active parser. */ + char *buf; /* Input buffer. */ + u_long bufsz; /* Input buffer size. */ + char *in; /* Input file name. */ + int fdin; /* Input file desc. */ }; -static void usage(void); +extern char *__progname; -static int parse_begin(struct md_parse *); -static int parse_leave(struct md_parse *, int); -static int io_begin(struct md_parse *); -static int io_leave(struct md_parse *, int); -static int buf_begin(struct md_parse *); -static int buf_leave(struct md_parse *, int); +static void usage(void); -static int msg_err(void *, int, int, enum mdoc_err); -static int msg_warn(void *, int, int, enum mdoc_warn); -static void msg_msg(void *, int, const char *); +static int parse_opts(struct md_parse *, int, char *[]); +static int parse_subopts(struct md_parse *, char *); +static int parse_begin(struct md_parse *); +static int parse_leave(struct md_parse *, int); +static int io_begin(struct md_parse *); +static int io_leave(struct md_parse *, int); +static int buf_begin(struct md_parse *); +static int buf_leave(struct md_parse *, int); + +static void msg_msg(void *, int, int, const char *); +static int msg_err(void *, int, int, const char *); +static int msg_warn(void *, int, int, + enum mdoc_warn, const char *); + #ifdef __linux__ -extern int getsubopt(char **, char *const *, char **); +extern int getsubopt(char **, char *const *, char **); #endif int main(int argc, char *argv[]) { - int c; struct md_parse parser; - char *opts, *v; -#define ALL 0 -#define ERROR 1 - char *toks[] = { "all", "error", NULL }; - extern char *optarg; - extern int optind; - (void)memset(&parser, 0, sizeof(struct md_parse)); - while (-1 != (c = getopt(argc, argv, "vW:"))) - switch (c) { - case ('v'): - parser.dbg++; - break; - case ('W'): - opts = optarg; - while (*opts) - switch (getsubopt(&opts, toks, &v)) { - case (ALL): - parser.warn |= MD_WARN_ALL; - break; - case (ERROR): - parser.warn |= MD_WARN_ERR; - break; - default: - usage(); - return(1); - } - break; - default: - usage(); - return(1); - } - - argv += optind; - argc -= optind; - - parser.name = "-"; - if (1 == argc) - parser.name = *argv++; - + if ( ! parse_opts(&parser, argc, argv)) + return(EXIT_FAILURE); if ( ! io_begin(&parser)) return(EXIT_FAILURE); @@ -121,11 +89,11 @@ static int io_leave(struct md_parse *p, int code) { - if (-1 == p->fd || STDIN_FILENO == p->fd) + if (-1 == p->fdin || STDIN_FILENO == p->fdin) return(code); - if (-1 == close(p->fd)) { - warn("%s", p->name); + if (-1 == close(p->fdin)) { + warn("%s", p->in); code = 0; } return(code); @@ -133,13 +101,82 @@ io_leave(struct md_parse *p, int code) static int +parse_subopts(struct md_parse *p, char *arg) +{ + char *v; + char *toks[] = { "all", "compat", + "syntax", "error", NULL }; + + /* + * Future -Wxxx levels and so on should be here. For now we + * only recognise syntax and compat warnings as categories, + * beyond the usually "all" and "error" (make warn error out). + */ + + while (*arg) + switch (getsubopt(&arg, toks, &v)) { + case (0): + p->warn |= MD_WARN_ALL; + break; + case (1): + p->warn |= MD_WARN_COMPAT; + break; + case (2): + p->warn |= MD_WARN_SYNTAX; + break; + case (3): + p->warn |= MD_WARN_ERR; + break; + default: + usage(); + return(0); + } + + return(1); +} + + +static int +parse_opts(struct md_parse *p, int argc, char *argv[]) +{ + int c; + + extern char *optarg; + extern int optind; + + p->in = "-"; + + while (-1 != (c = getopt(argc, argv, "vW:"))) + switch (c) { + case ('v'): + p->dbg++; + break; + case ('W'): + if ( ! parse_subopts(p, optarg)) + return(0); + break; + default: + usage(); + return(0); + } + + argv += optind; + if (0 == (argc -= optind)) + return(1); + + p->in = *argv++; + return(1); +} + + +static int io_begin(struct md_parse *p) { - p->fd = STDIN_FILENO; - if (0 != strncmp(p->name, "-", 1)) - if (-1 == (p->fd = open(p->name, O_RDONLY, 0))) { - warn("%s", p->name); + p->fdin = STDIN_FILENO; + if (0 != strncmp(p->in, "-", 1)) + if (-1 == (p->fdin = open(p->in, O_RDONLY, 0))) { + warn("%s", p->in); return(io_leave(p, 0)); } @@ -162,11 +199,17 @@ buf_begin(struct md_parse *p) { struct stat st; - if (-1 == fstat(p->fd, &st)) { - warn("%s", p->name); - return(1); + if (-1 == fstat(p->fdin, &st)) { + warn("%s", p->in); + return(0); } + /* + * Try to intuit the fastest way of sucking down buffered data + * by using either the block buffer size or the hard-coded one. + * This is inspired by bin/cat.c. + */ + p->bufsz = MAX(st.st_blksize, BUFSIZ); if (NULL == (p->buf = malloc(p->bufsz))) { @@ -178,88 +221,27 @@ buf_begin(struct md_parse *p) } -static void -print_node(const struct mdoc_node *n, int indent) +static int +parse_leave(struct md_parse *p, int code) { - const char *p, *t; - int i, j; - size_t argc, sz; - char **params; - struct mdoc_arg *argv; + extern int termprint(const struct mdoc_node *, + const struct mdoc_meta *); + /*extern int treeprint(const struct mdoc_node *, + const struct mdoc_meta *);*/ - argv = NULL; - argc = 0; - params = NULL; - sz = 0; + if (NULL == p->mdoc) + return(code); - switch (n->type) { - case (MDOC_TEXT): - assert(NULL == n->child); - p = n->data.text.string; - t = "text"; - break; - case (MDOC_BODY): - p = mdoc_macronames[n->data.body.tok]; - t = "block-body"; - break; - case (MDOC_HEAD): - p = mdoc_macronames[n->data.head.tok]; - t = "block-head"; - params = n->data.head.args; - sz = n->data.head.sz; - break; - case (MDOC_ELEM): - assert(NULL == n->child); - p = mdoc_macronames[n->data.elem.tok]; - t = "element"; - argv = n->data.elem.argv; - argc = n->data.elem.argc; - params = n->data.elem.args; - sz = n->data.elem.sz; - break; - case (MDOC_BLOCK): - p = mdoc_macronames[n->data.block.tok]; - t = "block"; - argv = n->data.block.argv; - argc = n->data.block.argc; - break; - default: - abort(); - /* NOTREACHED */ - } + if ( ! mdoc_endparse(p->mdoc)) + code = 0; - for (i = 0; i < indent; i++) - (void)printf(" "); - (void)printf("%s (%s)", p, t); + /* TODO */ + if (code && ! termprint(mdoc_node(p->mdoc), mdoc_meta(p->mdoc))) + code = 0; + /*if (code && ! treeprint(mdoc_node(p->mdoc), mdoc_meta(p->mdoc))) + code = 0;*/ - for (i = 0; i < (int)argc; i++) { - (void)printf(" -%s", mdoc_argnames[argv[i].arg]); - for (j = 0; j < (int)argv[i].sz; j++) - (void)printf(" \"%s\"", argv[i].value[j]); - } - - for (i = 0; i < (int)sz; i++) - (void)printf(" \"%s\"", params[i]); - - (void)printf("\n"); - - if (n->child) - print_node(n->child, indent + 1); - if (n->next) - print_node(n->next, indent); -} - - -static int -parse_leave(struct md_parse *p, int code) -{ - const struct mdoc_node *n; - - if (p->mdoc) { - if ((n = mdoc_result(p->mdoc))) - print_node(n, 0); - mdoc_free(p->mdoc); - } + mdoc_free(p->mdoc); return(code); } @@ -269,8 +251,9 @@ parse_begin(struct md_parse *p) { ssize_t sz, i; size_t pos; - char line[256], sv[256]; + char line[MD_LINE_SZ]; struct mdoc_cb cb; + int lnn; cb.mdoc_err = msg_err; cb.mdoc_warn = msg_warn; @@ -279,12 +262,19 @@ parse_begin(struct md_parse *p) if (NULL == (p->mdoc = mdoc_alloc(p, &cb))) return(parse_leave(p, 0)); - p->lnn = 1; - p->line = sv; + /* + * This is a little more complicated than fgets. TODO: have + * some benchmarks that show it's faster (note that I want to + * check many, many manuals simultaneously, so speed is + * important). Fill a buffer (sized to the block size) with a + * single read, then parse \n-terminated lines into a line + * buffer, which is passed to the parser. Hard-code the line + * buffer to a particular size -- a reasonable assumption. + */ - for (pos = 0; ; ) { - if (-1 == (sz = read(p->fd, p->buf, p->bufsz))) { - warn("%s", p->name); + for (lnn = 1, pos = 0; ; ) { + if (-1 == (sz = read(p->fdin, p->buf, p->bufsz))) { + warn("%s", p->in); return(parse_leave(p, 0)); } else if (0 == sz) break; @@ -292,21 +282,18 @@ parse_begin(struct md_parse *p) for (i = 0; i < sz; i++) { if ('\n' != p->buf[i]) { if (pos < sizeof(line)) { - sv[(int)pos] = p->buf[(int)i]; - line[(int)pos++] = - p->buf[(int)i]; + line[(int)pos++] = p->buf[(int)i]; continue; } - warnx("%s: line %d too long", - p->name, p->lnn); + warnx("%s: line %d too long", p->in, lnn); return(parse_leave(p, 0)); } - line[(int)pos] = sv[(int)pos] = 0; - if ( ! mdoc_parseln(p->mdoc, line)) + line[(int)pos] = 0; + if ( ! mdoc_parseln(p->mdoc, lnn, line)) return(parse_leave(p, 0)); - p->lnn++; + lnn++; pos = 0; } } @@ -316,218 +303,67 @@ parse_begin(struct md_parse *p) static int -msg_err(void *arg, int tok, int col, enum mdoc_err type) +msg_err(void *arg, int line, int col, const char *msg) { - char *fmt, *lit; struct md_parse *p; - int i; p = (struct md_parse *)arg; - fmt = lit = NULL; - - switch (type) { - case (ERR_SYNTAX_QUOTE): - lit = "syntax: disallowed argument quotation"; - break; - case (ERR_SYNTAX_UNQUOTE): - lit = "syntax: unterminated quotation"; - break; - case (ERR_SYNTAX_WS): - lit = "syntax: whitespace in argument"; - break; - case (ERR_SYNTAX_ARGFORM): - fmt = "syntax: macro `%s' arguments malformed"; - break; - case (ERR_SYNTAX_ARG): - fmt = "syntax: unknown argument for macro `%s'"; - break; - case (ERR_SCOPE_BREAK): - /* Which scope is broken? */ - fmt = "scope: macro `%s' breaks prior explicit scope"; - break; - case (ERR_SCOPE_NOCTX): - fmt = "scope: closure macro `%s' has no context"; - break; - case (ERR_SCOPE_NONEST): - fmt = "scope: macro `%s' may not be nested in the current context"; - break; - case (ERR_MACRO_NOTSUP): - fmt = "macro `%s' not supported"; - break; - case (ERR_MACRO_NOTCALL): - fmt = "macro `%s' not callable"; - break; - case (ERR_SEC_PROLOGUE): - fmt = "macro `%s' cannot be called in the prologue"; - break; - case (ERR_SEC_NPROLOGUE): - fmt = "macro `%s' called outside of prologue"; - break; - case (ERR_ARGS_GE1): - fmt = "macro `%s' expects one or more arguments"; - break; - case (ERR_ARGS_MANY): - fmt = "macro `%s' has too many arguments"; - break; - case (ERR_SEC_PROLOGUE_OO): - fmt = "prologue macro `%s' is out-of-order"; - break; - case (ERR_SEC_PROLOGUE_REP): - fmt = "prologue macro `%s' repeated"; - break; - case (ERR_SEC_NAME): - lit = "`NAME' section must be first"; - break; - case (ERR_SYNTAX_ARGVAL): - lit = "syntax: expected value for macro argument"; - break; - case (ERR_SYNTAX_ARGBAD): - lit = "syntax: invalid value for macro argument"; - break; - case (ERR_SYNTAX_ARGMANY): - lit = "syntax: too many values for macro argument"; - break; - default: - abort(); - /* NOTREACHED */ - } - - if (fmt) { - (void)fprintf(stderr, "%s:%d: error: ", - p->name, p->lnn); - (void)fprintf(stderr, fmt, mdoc_macronames[tok]); - } else - (void)fprintf(stderr, "%s:%d: error: %s", - p->name, p->lnn, lit); - - if (p->dbg < 1) { - if (-1 != col) - (void)fprintf(stderr, " (column %d)\n", col); - return(0); - } else if (-1 == col) { - (void)fprintf(stderr, "\nFrom: %s", p->line); - return(0); - } - - (void)fprintf(stderr, "\nFrom: %s\n ", p->line); - for (i = 0; i < col; i++) - (void)fprintf(stderr, " "); - (void)fprintf(stderr, "^\n"); - + warnx("%s:%d: error: %s (column %d)", + p->in, line, msg, col); return(0); } static void -msg_msg(void *arg, int col, const char *msg) +msg_msg(void *arg, int line, int col, const char *msg) { struct md_parse *p; - int i; p = (struct md_parse *)arg; - if (p->dbg < 2) + if (0 == p->dbg) return; - (void)printf("%s:%d: %s", p->name, p->lnn, msg); - - if (p->dbg < 3) { - if (-1 != col) - (void)printf(" (column %d)\n", col); - return; - } else if (-1 == col) { - (void)printf("\nFrom %s\n", p->line); - return; - } - - (void)printf("\nFrom: %s\n ", p->line); - for (i = 0; i < col; i++) - (void)printf(" "); - (void)printf("^\n"); + warnx("%s:%d: debug: %s (column %d)", + p->in, line, msg, col); } static int -msg_warn(void *arg, int tok, int col, enum mdoc_warn type) +msg_warn(void *arg, int line, int col, + enum mdoc_warn type, const char *msg) { - char *fmt, *lit; struct md_parse *p; - int i; - extern char *__progname; p = (struct md_parse *)arg; - if ( ! (p->warn & MD_WARN_ALL)) - return(1); - - fmt = lit = NULL; - switch (type) { - case (WARN_SYNTAX_WS_EOLN): - lit = "syntax: whitespace at end-of-line"; - break; - case (WARN_SYNTAX_QUOTED): - lit = "syntax: quotation mark starting string"; - break; - case (WARN_SYNTAX_MACLIKE): - lit = "syntax: macro-like argument"; - break; - case (WARN_SYNTAX_ARGLIKE): - lit = "syntax: argument-like value"; - break; - case (WARN_SEC_OO): - lit = "section is out of conventional order"; - break; - case (WARN_ARGS_GE1): - fmt = "macro `%s' suggests one or more arguments"; - break; - case (WARN_ARGS_EQ0): - fmt = "macro `%s' suggests zero arguments"; - break; - case (WARN_IGN_AFTER_BLK): - fmt = "ignore: macro `%s' ignored after block macro"; - break; - case (WARN_IGN_BEFORE_BLK): - fmt = "ignore: macro before block macro `%s' ignored"; - break; - default: - abort(); - /* NOTREACHED */ + case (WARN_COMPAT): + if (p->warn & MD_WARN_COMPAT) + break; + return(1); + case (WARN_SYNTAX): + if (p->warn & MD_WARN_SYNTAX) + break; + return(1); } - if (fmt) { - (void)fprintf(stderr, "%s:%d: warning: ", - p->name, p->lnn); - (void)fprintf(stderr, fmt, mdoc_macronames[tok]); - } else - (void)fprintf(stderr, "%s:%d: warning: %s", - p->name, p->lnn, lit); + warnx("%s:%d: warning: %s (column %d)", + p->in, line, msg, col); - if (p->dbg >= 1) { - (void)fprintf(stderr, "\nFrom: %s\n ", p->line); - for (i = 0; i < col; i++) - (void)fprintf(stderr, " "); - (void)fprintf(stderr, "^\n"); - } else - (void)fprintf(stderr, " (column %d)\n", col); + if ( ! (p->warn & MD_WARN_ERR)) + return(1); - if (p->warn & MD_WARN_ERR) { - (void)fprintf(stderr, "%s: considering warnings as " - "errors\n", __progname); - return(0); - } - - return(1); + warnx("%s: considering warnings as errors", __progname); + return(0); } static void usage(void) { - extern char *__progname; - (void)fprintf(stderr, "usage: %s [-v] [-Wwarn...] [infile]\n", - __progname); + warnx("usage: %s [-v] [-Wwarn...] [infile]", __progname); }