=================================================================== RCS file: /cvs/mandoc/Attic/html4_strict.c,v retrieving revision 1.2 retrieving revision 1.6 diff -u -p -r1.2 -r1.6 --- mandoc/Attic/html4_strict.c 2008/11/23 19:10:03 1.2 +++ mandoc/Attic/html4_strict.c 2008/11/24 08:50:33 1.6 @@ -1,4 +1,4 @@ -/* $Id: html4_strict.c,v 1.2 2008/11/23 19:10:03 kristaps Exp $ */ +/* $Id: html4_strict.c,v 1.6 2008/11/24 08:50:33 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -27,78 +27,85 @@ #include "libmdocml.h" #include "private.h" -enum roffd { - ROFF_ENTER = 0, - ROFF_EXIT +#define ROFF_MAXARG 10 + +enum roffd { + ROFF_ENTER = 0, + ROFF_EXIT }; -enum rofftype { - ROFF_NONE = 0, - ROFF_LAYOUT +enum rofftype { + ROFF_TITLE, + ROFF_COMMENT, + ROFF_TEXT, + ROFF_LAYOUT }; -struct rofftree; +#define ROFFCALL_ARGS \ + const struct md_args *arg, struct md_mbuf *out, \ + const struct md_rbuf *in, const char *argv[], \ + enum roffd type, struct rofftree *tree -#define ROFFCALL_ARGS const struct md_args *arg, \ - struct md_mbuf *out, \ - const struct md_rbuf *in, \ - const char *buf, size_t sz, \ - size_t pos, enum roffd type, \ - struct rofftree *tree -typedef int (*roffcall)(ROFFCALL_ARGS); +struct rofftree; -static int roff_Dd(ROFFCALL_ARGS); -static int roff_Dt(ROFFCALL_ARGS); -static int roff_Os(ROFFCALL_ARGS); -static int roff_Sh(ROFFCALL_ARGS); - struct rofftok { - char id; -#define ROFF___ 0 -#define ROFF_Dd 1 -#define ROFF_Dt 2 -#define ROFF_Os 3 -#define ROFF_Sh 4 -#define ROFF_Max 5 + int id; char name[2]; - roffcall cb; + int (*cb)(ROFFCALL_ARGS); enum rofftype type; int flags; -#define ROFF_NESTED (1 << 0) +#define ROFF_NESTED (1 << 0) +#define ROFF_PARSED (1 << 1) +#define ROFF_CALLABLE (1 << 2) +#define ROFF_QUOTES (1 << 3) }; -static const struct rofftok tokens[ROFF_Max] = { - { ROFF___, "\\\"", NULL, ROFF_NONE, 0 }, - { ROFF_Dd, "Dd", roff_Dd, ROFF_NONE, 0 }, - { ROFF_Dt, "Dt", roff_Dt, ROFF_NONE, 0 }, - { ROFF_Os, "Os", roff_Os, ROFF_LAYOUT, 0 }, - { ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 }, -}; - struct roffnode { int tok; struct roffnode *parent; - /* TODO: line number at acquisition. */ + size_t line; }; -struct rofftree { +struct rofftree { struct roffnode *last; time_t date; char title[256]; char section[256]; char volume[256]; int state; -#define ROFF_PRELUDE_Os (1 << 1) -#define ROFF_PRELUDE_Dt (1 << 2) -#define ROFF_PRELUDE_Dd (1 << 3) +#define ROFF_PRELUDE (1 << 1) +#define ROFF_PRELUDE_Os (1 << 2) +#define ROFF_PRELUDE_Dt (1 << 3) +#define ROFF_PRELUDE_Dd (1 << 4) +#define ROFF_BODY (1 << 5) }; +#define ROFF___ 0 +#define ROFF_Dd 1 +#define ROFF_Dt 2 +#define ROFF_Os 3 +#define ROFF_Sh 4 +#define ROFF_An 5 +#define ROFF_Li 6 +#define ROFF_MAX 7 + +static int roff_Dd(ROFFCALL_ARGS); +static int roff_Dt(ROFFCALL_ARGS); +static int roff_Os(ROFFCALL_ARGS); +static int roff_Sh(ROFFCALL_ARGS); +static int roff_An(ROFFCALL_ARGS); +static int roff_Li(ROFFCALL_ARGS); + +static struct roffnode *roffnode_new(int, size_t, + struct rofftree *); +static void roffnode_free(int, struct rofftree *); + static int rofffind(const char *); +static int roffargs(int, char *, char **); static int roffparse(const struct md_args *, struct md_mbuf *, const struct md_rbuf *, - const char *, size_t, - struct rofftree *); + char *, size_t, struct rofftree *); static int textparse(struct md_mbuf *, const struct md_rbuf *, const char *, size_t, @@ -108,25 +115,44 @@ static void dbg_enter(const struct md_args *, int); static void dbg_leave(const struct md_args *, int); +static const struct rofftok tokens[ROFF_MAX] = +{ +{ ROFF___, "\\\"", NULL, ROFF_COMMENT, 0 }, +{ ROFF_Dd, "Dd", roff_Dd, ROFF_TITLE, 0 }, +{ ROFF_Dt, "Dt", roff_Dt, ROFF_TITLE, 0 }, +{ ROFF_Os, "Os", roff_Os, ROFF_TITLE, 0 }, +{ ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 }, +{ ROFF_An, "An", roff_An, ROFF_TEXT, ROFF_PARSED }, +{ ROFF_Li, "Li", roff_Li, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, +}; + + int md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out, - const struct md_rbuf *in, void *data) + const struct md_rbuf *in, int error, void *data) { struct rofftree *tree; - int error; assert(args); assert(data); tree = (struct rofftree *)data; - error = 0; + if (-1 == error) + out = NULL; + + /* LINTED */ while (tree->last) - if ( ! (*tokens[tree->last->tok].cb) - (args, error ? NULL : out, in, NULL, - 0, 0, ROFF_EXIT, tree)) - error = 1; + if ( ! (*tokens[tree->last->tok].cb)(args, out, in, + NULL, ROFF_EXIT, tree)) + out = NULL; + if (out && (ROFF_PRELUDE & tree->state)) { + warnx("%s: prelude never finished", in->name); + error = 1; + } + free(tree); + return(error ? 0 : 1); } @@ -149,6 +175,8 @@ md_init_html4_strict(const struct md_args *args, struc return(0); } + tree->state = ROFF_PRELUDE; + *data = tree; return(1); } @@ -156,7 +184,7 @@ md_init_html4_strict(const struct md_args *args, struc int md_line_html4_strict(const struct md_args *args, struct md_mbuf *out, - const struct md_rbuf *in, const char *buf, + const struct md_rbuf *in, char *buf, size_t sz, void *data) { struct rofftree *tree; @@ -206,20 +234,46 @@ textparse(struct md_mbuf *out, const struct md_rbuf *i static int +roffargs(int tok, char *buf, char **argv) +{ + int i; + + (void)tok;/* FIXME: quotable strings? */ + + assert(tok >= 0 && tok < ROFF_MAX); + assert('.' == *buf); + + /* LINTED */ + for (i = 0; *buf && i < ROFF_MAXARG; i++) { + argv[i] = buf++; + while (*buf && ! isspace(*buf)) + buf++; + if (NULL == *buf) { + continue; + } + *buf++ = 0; + while (*buf && isspace(*buf)) + buf++; + } + + assert(i > 0); + if (i < ROFF_MAXARG) + argv[i] = NULL; + + return(ROFF_MAXARG > i); +} + + +static int roffparse(const struct md_args *args, struct md_mbuf *out, - const struct md_rbuf *in, const char *buf, - size_t sz, struct rofftree *tree) + const struct md_rbuf *in, char *buf, size_t sz, + struct rofftree *tree) { - int tokid, t; - size_t pos; + int tok, t; struct roffnode *node; + char *argv[ROFF_MAXARG]; - assert(args); - assert(out); - assert(in); - assert(buf); assert(sz > 0); - assert(tree); /* * Extract the token identifier from the buffer. If there's no @@ -229,20 +283,65 @@ roffparse(const struct md_args *args, struct md_mbuf * */ if (3 > sz) { - warnx("%s: malformed input (line %zu, col 1)", + warnx("%s: malformed line (line %zu)", in->name, in->line); return(0); - } else if (ROFF_Max == (tokid = rofffind(buf + 1))) { - warnx("%s: unknown token `%c%c' (line %zu, col 1)", + } else if (ROFF_MAX == (tok = rofffind(buf + 1))) { + warnx("%s: unknown line token `%c%c' (line %zu)", in->name, *(buf + 1), *(buf + 2), in->line); return(0); - } else if (NULL == tokens[tokid].cb) - return(1); /* Skip token. */ + } else if (ROFF_COMMENT == tokens[tok].type) + /* Ignore comment tokens. */ + return(1); + + if ( ! roffargs(tok, buf, argv)) { + warnx("%s: too many arguments to `%s' (line %zu)", + in->name, tokens[tok].name, in->line); + return(0); + } - pos = 3; + /* Domain cross-contamination (and sanity) checks. */ + switch (tokens[tok].type) { + case (ROFF_TITLE): + if (ROFF_PRELUDE & tree->state) { + assert( ! (ROFF_BODY & tree->state)); + break; + } + assert(ROFF_BODY & tree->state); + warnx("%s: prelude token `%s' in body (line %zu)", + in->name, tokens[tok].name, in->line); + return(0); + case (ROFF_LAYOUT): + /* FALLTHROUGH */ + case (ROFF_TEXT): + if (ROFF_BODY & tree->state) { + assert( ! (ROFF_PRELUDE & tree->state)); + break; + } + assert(ROFF_PRELUDE & tree->state); + warnx("%s: body token `%s' in prelude (line %zu)", + in->name, tokens[tok].name, in->line); + return(0); + case (ROFF_COMMENT): + return(1); + default: + abort(); + } + /* + * Text-domain checks. + */ + + if (ROFF_TEXT == tokens[tok].type && + ! (ROFF_PARSED & tokens[tok].flags)) { + warnx("%s: text token `%s' not callable (line %zu)", + in->name, tokens[tok].name, in->line); + return(0); + } + + /* * If this is a non-nestable layout token and we're below a * token of the same type, then recurse upward to the token, * closing out the interim scopes. @@ -254,44 +353,47 @@ roffparse(const struct md_args *args, struct md_mbuf * node = NULL; - if (ROFF_LAYOUT == tokens[tokid].type && - ! (ROFF_NESTED & tokens[tokid].flags)) { + if (ROFF_LAYOUT == tokens[tok].type && + ! (ROFF_NESTED & tokens[tok].flags)) { for (node = tree->last; node; node = node->parent) { - if (node->tok == tokid) + if (node->tok == tok) break; /* Don't break nested scope. */ if ( ! (ROFF_NESTED & tokens[node->tok].flags)) continue; - warnx("%s: scope of %s broken by %s " - "(line %zu, col %zu)", - in->name, tokens[tokid].name, + warnx("%s: scope of %s (line %zu) broken by " + "%s (line %zu)", in->name, + tokens[tok].name, + node->line, tokens[node->tok].name, - in->line, pos); + in->line); return(0); } } + if (node) { - assert(ROFF_LAYOUT == tokens[tokid].type); - assert( ! (ROFF_NESTED & tokens[tokid].flags)); - assert(node->tok == tokid); + assert(ROFF_LAYOUT == tokens[tok].type); + assert( ! (ROFF_NESTED & tokens[tok].flags)); + assert(node->tok == tok); /* Clear up to last scoped token. */ + /* LINTED */ do { t = tree->last->tok; if ( ! (*tokens[tree->last->tok].cb) (args, out, in, NULL, - 0, 0, ROFF_EXIT, tree)) + ROFF_EXIT, tree)) return(0); - } while (t != tokid); + } while (t != tok); } /* Proceed with actual token processing. */ - return((*tokens[tokid].cb)(args, out, in, buf, sz, - pos, ROFF_ENTER, tree)); + return((*tokens[tok].cb)(args, out, in, (const char **)argv, + ROFF_ENTER, tree)); } @@ -302,231 +404,255 @@ rofffind(const char *name) assert(name); /* FIXME: use a table, this is slow but ok for now. */ - for (i = 0; i < ROFF_Max; i++) + + /* LINTED */ + for (i = 0; i < ROFF_MAX; i++) + /* LINTED */ if (0 == strncmp(name, tokens[i].name, 2)) - return(i); + return((int)i); - return(ROFF_Max); + return(ROFF_MAX); } -/* ARGUSED */ +static struct roffnode * +roffnode_new(int tokid, size_t line, struct rofftree *tree) +{ + struct roffnode *p; + + if (NULL == (p = malloc(sizeof(struct roffnode)))) { + warn("malloc"); + return(NULL); + } + + p->line = line; + p->tok = tokid; + p->parent = tree->last; + tree->last = p; + return(p); +} + + +static void +roffnode_free(int tokid, struct rofftree *tree) +{ + struct roffnode *p; + + assert(tree->last); + assert(tree->last->tok == tokid); + + p = tree->last; + tree->last = tree->last->parent; + free(p); +} + + +static int dbg_lvl = 0; + + +static void +dbg_enter(const struct md_args *args, int tokid) +{ + int i; + static char buf[72]; + + assert(args); + if ( ! (args->dbg & MD_DBG_TREE)) + return; + assert(tokid >= 0 && tokid <= ROFF_MAX); + + buf[0] = 0; + + switch (tokens[tokid].type) { + case (ROFF_LAYOUT): + /* FALLTHROUGH */ + case (ROFF_TEXT): + (void)strlcat(buf, "body: ", sizeof(buf)); + break; + case (ROFF_TITLE): + (void)strlcat(buf, "prelude: ", sizeof(buf)); + break; + default: + abort(); + } + + /* LINTED */ + for (i = 0; i < dbg_lvl; i++) + (void)strlcat(buf, " ", sizeof(buf)); + + (void)strlcat(buf, tokens[tokid].name, sizeof(buf)); + + (void)printf("%s\n", buf); + + if (ROFF_LAYOUT == tokens[tokid].type) + dbg_lvl++; +} + + +static void +dbg_leave(const struct md_args *args, int tokid) +{ + assert(args); + if ( ! (args->dbg & MD_DBG_TREE)) + return; + if (ROFF_LAYOUT != tokens[tokid].type) + return; + + assert(tokid >= 0 && tokid <= ROFF_MAX); + assert(dbg_lvl > 0); + dbg_lvl--; +} + + +/* ARGSUSED */ static int roff_Dd(ROFFCALL_ARGS) { - assert(in); - assert(tree); - assert(arg); - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); - assert(type == ROFF_ENTER); + dbg_enter(arg, ROFF_Dd); - if (tree->last) { - warnx("%s: superfluous prelude (line %zu, col %zu)", - in->name, in->line, pos); + assert(ROFF_PRELUDE & tree->state); + if (ROFF_PRELUDE_Dt & tree->state || + ROFF_PRELUDE_Dd & tree->state) { + warnx("%s: prelude `Dd' out-of-order (line %zu)", + in->name, in->line); return(0); } - if (0 != tree->state) { - warnx("%s: bad manual prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(1); - } - - /* TODO: parse date from buffer. */ - - tree->date = time(NULL); + assert(NULL == tree->last); tree->state |= ROFF_PRELUDE_Dd; - (void)printf("Dd\n"); + dbg_leave(arg, ROFF_Dd); return(1); } +/* ARGSUSED */ static int roff_Dt(ROFFCALL_ARGS) { - assert(in); - assert(tree); - assert(arg); - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); - assert(type == ROFF_ENTER); + dbg_enter(arg, ROFF_Dt); - if (tree->last) { - warnx("%s: superfluous prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(0); - } - + assert(ROFF_PRELUDE & tree->state); if ( ! (ROFF_PRELUDE_Dd & tree->state) || - (ROFF_PRELUDE_Os & tree->state) || (ROFF_PRELUDE_Dt & tree->state)) { - warnx("%s: bad manual prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(1); + warnx("%s: prelude `Dt' out-of-order (line %zu)", + in->name, in->line); + return(0); } - /* TODO: parse titles from buffer. */ - + assert(NULL == tree->last); tree->state |= ROFF_PRELUDE_Dt; - (void)printf("Dt\n"); + dbg_leave(arg, ROFF_Dt); return(1); } +/* ARGSUSED */ static int roff_Os(ROFFCALL_ARGS) { - struct roffnode *node; - assert(arg); - assert(tree); - assert(in); - if (ROFF_EXIT == type) { - assert(tree->last); - assert(tree->last->tok == ROFF_Os); - - /* TODO: flush out ML footer. */ - - node = tree->last; - tree->last = node->parent; - free(node); - + roffnode_free(ROFF_Os, tree); dbg_leave(arg, ROFF_Os); - return(1); } - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); + dbg_enter(arg, ROFF_Os); - if (tree->last) { - warnx("%s: superfluous prelude (line %zu, col %zu)", - in->name, in->line, pos); + assert(ROFF_PRELUDE & tree->state); + if ( ! (ROFF_PRELUDE_Dt & tree->state) || + ! (ROFF_PRELUDE_Dd & tree->state)) { + warnx("%s: prelude `Os' out-of-order (line %zu)", + in->name, in->line); return(0); } - if ((ROFF_PRELUDE_Os & tree->state) || - ! (ROFF_PRELUDE_Dt & tree->state) || - ! (ROFF_PRELUDE_Dd & tree->state)) { - warnx("%s: bad manual prelude (line %zu, col %zu)", - in->name, in->line, pos); - return(1); - } - - node = malloc(sizeof(struct roffnode)); - if (NULL == node) { - warn("malloc"); + assert(NULL == tree->last); + if (NULL == roffnode_new(ROFF_Os, in->line, tree)) return(0); - } - node->tok = ROFF_Os; - node->parent = NULL; tree->state |= ROFF_PRELUDE_Os; - tree->last = node; + tree->state &= ~ROFF_PRELUDE; + tree->state |= ROFF_BODY; - dbg_enter(arg, ROFF_Os); - return(1); } -static int +/* ARGSUSED */ +static int roff_Sh(ROFFCALL_ARGS) { - struct roffnode *node; - assert(arg); - assert(tree); - assert(tree->last); - assert(in); - if (ROFF_EXIT == type) { - assert(tree->last->tok == ROFF_Sh); - - node = tree->last; - tree->last = node->parent; - free(node); - + roffnode_free(ROFF_Sh, tree); dbg_leave(arg, ROFF_Sh); - return(1); } - assert(out); - assert(buf); - assert(sz > 0); - assert(pos > 0); + dbg_enter(arg, ROFF_Sh); - node = malloc(sizeof(struct roffnode)); - if (NULL == node) { - warn("malloc"); + if (NULL == roffnode_new(ROFF_Sh, in->line, tree)) return(0); - } - node->tok = ROFF_Sh; - node->parent = tree->last; - tree->last = node; + dbg_leave(arg, ROFF_Li); - dbg_enter(arg, ROFF_Sh); - return(1); } -static int dbg_lvl = 0; /* FIXME: de-globalise. */ +/* ARGSUSED */ +static int +roff_Li(ROFFCALL_ARGS) +{ + dbg_enter(arg, ROFF_Li); + dbg_leave(arg, ROFF_Li); -static void -dbg_enter(const struct md_args *args, int tokid) -{ - int i; + return(1); +} - assert(args); - if ( ! (args->dbg & MD_DBG_TREE)) - return; - assert(tokid >= 0 && tokid <= ROFF_Max); +#if 0 +static int +parse_args(void) +{ + skip_whitespace(); - for (i = 0; i < dbg_lvl; i++) - (void)printf(" "); + while (pos < sz) { - (void)printf("%s\n", tokens[tokid].name); + if (is_arg) { + } else if (parsable) { + if (is_callable_token()) { + } + } - if (ROFF_LAYOUT == tokens[tokid].type) - dbg_lvl++; + skip_whitespace(); + } } +#endif -static void -dbg_leave(const struct md_args *args, int tokid) +/* ARGSUSED */ +static int +roff_An(ROFFCALL_ARGS) { - int i; - assert(args); - if ( ! (args->dbg & MD_DBG_TREE)) - return; + dbg_enter(arg, ROFF_An); - assert(tokid >= 0 && tokid <= ROFF_Max); - assert(dbg_lvl > 0); + /* Do our ML stuff. */ - dbg_lvl--; - for (i = 0; i < dbg_lvl; i++) - (void)printf(" "); + /*parse_args();*/ - (void)printf("%s\n", tokens[tokid].name); -} + /* Do our trailing whitespace stuff. */ + dbg_leave(arg, ROFF_An); + + return(1); +}