=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.126 retrieving revision 1.131 diff -u -p -r1.126 -r1.131 --- mandoc/mdoc_validate.c 2010/11/29 14:50:33 1.126 +++ mandoc/mdoc_validate.c 2010/11/30 12:35:10 1.131 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.126 2010/11/29 14:50:33 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.131 2010/11/30 12:35:10 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * @@ -36,6 +36,9 @@ #define PRE_ARGS struct mdoc *mdoc, struct mdoc_node *n #define POST_ARGS struct mdoc *mdoc +#define NUMSIZ 32 +#define DATESIZ 32 + enum check_ineq { CHECK_LT, CHECK_GT, @@ -84,8 +87,12 @@ static int post_an(POST_ARGS); static int post_at(POST_ARGS); static int post_bf(POST_ARGS); static int post_bl(POST_ARGS); +static int post_bl_block(POST_ARGS); +static int post_bl_block_width(POST_ARGS); +static int post_bl_block_tag(POST_ARGS); static int post_bl_head(POST_ARGS); static int post_defaults(POST_ARGS); +static int post_literal(POST_ARGS); static int post_eoln(POST_ARGS); static int post_dt(POST_ARGS); static int post_it(POST_ARGS); @@ -105,6 +112,7 @@ static int pre_dd(PRE_ARGS); static int pre_display(PRE_ARGS); static int pre_dt(PRE_ARGS); static int pre_it(PRE_ARGS); +static int pre_literal(PRE_ARGS); static int pre_os(PRE_ARGS); static int pre_par(PRE_ARGS); static int pre_rv(PRE_ARGS); @@ -113,12 +121,14 @@ static int pre_ss(PRE_ARGS); static v_post posts_an[] = { post_an, NULL }; static v_post posts_at[] = { post_at, post_defaults, NULL }; -static v_post posts_bd_bk[] = { hwarn_eq0, bwarn_ge1, NULL }; +static v_post posts_bd[] = { post_literal, hwarn_eq0, bwarn_ge1, NULL }; static v_post posts_bf[] = { hwarn_le1, post_bf, NULL }; +static v_post posts_bk[] = { hwarn_eq0, bwarn_ge1, NULL }; static v_post posts_bl[] = { bwarn_ge1, post_bl, NULL }; static v_post posts_bool[] = { eerr_eq1, ebool, NULL }; static v_post posts_eoln[] = { post_eoln, NULL }; static v_post posts_defaults[] = { post_defaults, NULL }; +static v_post posts_dl[] = { post_literal, bwarn_ge1, herr_eq0, NULL }; static v_post posts_dt[] = { post_dt, NULL }; static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; static v_post posts_it[] = { post_it, NULL }; @@ -137,9 +147,10 @@ static v_post posts_vt[] = { post_vt, NULL }; static v_post posts_wline[] = { bwarn_ge1, herr_eq0, NULL }; static v_post posts_wtext[] = { ewarn_ge1, NULL }; static v_pre pres_an[] = { pre_an, NULL }; -static v_pre pres_bd[] = { pre_display, pre_bd, pre_par, NULL }; +static v_pre pres_bd[] = { pre_display, pre_bd, pre_literal, pre_par, NULL }; static v_pre pres_bl[] = { pre_bl, pre_par, NULL }; static v_pre pres_d1[] = { pre_display, NULL }; +static v_pre pres_dl[] = { pre_literal, pre_display, NULL }; static v_pre pres_dd[] = { pre_dd, NULL }; static v_pre pres_dt[] = { pre_dt, NULL }; static v_pre pres_er[] = { NULL, NULL }; @@ -161,8 +172,8 @@ const struct valids mdoc_valids[MDOC_MAX] = { { pres_ss, posts_ss }, /* Ss */ { pres_pp, posts_notext }, /* Pp */ { pres_d1, posts_wline }, /* D1 */ - { pres_d1, posts_wline }, /* Dl */ - { pres_bd, posts_bd_bk }, /* Bd */ + { pres_dl, posts_dl }, /* Dl */ + { pres_bd, posts_bd }, /* Bd */ { NULL, NULL }, /* Ed */ { pres_bl, posts_bl }, /* Bl */ { NULL, NULL }, /* El */ @@ -188,7 +199,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_nm }, /* Nm */ { NULL, posts_wline }, /* Op */ { NULL, NULL }, /* Ot */ - { NULL, NULL }, /* Pa */ + { NULL, posts_defaults }, /* Pa */ { pres_rv, NULL }, /* Rv */ { NULL, posts_st }, /* St */ { NULL, NULL }, /* Va */ @@ -253,7 +264,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Fc */ { NULL, NULL }, /* Oo */ { NULL, NULL }, /* Oc */ - { NULL, posts_bd_bk }, /* Bk */ + { NULL, posts_bk }, /* Bk */ { NULL, NULL }, /* Ek */ { NULL, posts_eoln }, /* Bt */ { NULL, NULL }, /* Hf */ @@ -582,6 +593,7 @@ pre_display(PRE_ARGS) if (MDOC_BLOCK == node->type) if (MDOC_Bd == node->tok) break; + if (NULL == node) return(1); @@ -1077,17 +1089,38 @@ post_bf(POST_ARGS) return(1); } - static int post_lb(POST_ARGS) { + const char *p; + char *buf; + size_t sz; - if (mdoc_a2lib(mdoc->last->child->string)) + assert(mdoc->last->child); + assert(MDOC_TEXT == mdoc->last->child->type); + + p = mdoc_a2lib(mdoc->last->child->string); + + /* If lookup ok, replace with table value. */ + + if (p) { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADLIB)); -} + } + /* If not, use "library ``xxxx''. */ + sz = strlen(mdoc->last->child->string) + + 2 + strlen("\\(lqlibrary\\(rq"); + buf = mandoc_malloc(sz); + snprintf(buf, sz, "library \\(lq%s\\(rq", + mdoc->last->child->string); + free(mdoc->last->child->string); + mdoc->last->child->string = buf; + return(1); +} + static int post_eoln(POST_ARGS) { @@ -1127,23 +1160,69 @@ post_vt(POST_ARGS) static int post_nm(POST_ARGS) { + struct mdoc_node *nn; + char buf[BUFSIZ]; - if (mdoc->last->child) + /* If no child specified, make sure we have the meta name. */ + + if (NULL == mdoc->last->child && NULL == mdoc->meta.name) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME); return(1); - if (mdoc->meta.name) + } else if (mdoc->meta.name) return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME)); + + /* If no meta name, set it from the child. */ + + buf[0] = '\0'; + + for (nn = mdoc->last->child; nn; nn = nn->next) { + /* XXX - copied from concat(). */ + assert(MDOC_TEXT == nn->type); + + if (strlcat(buf, nn->string, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, nn, MANDOCERR_MEM); + return(0); + } + + if (NULL == nn->next) + continue; + + if (strlcat(buf, " ", BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, nn, MANDOCERR_MEM); + return(0); + } + } + + mdoc->meta.name = mandoc_strdup(buf); + return(1); } static int +post_literal(POST_ARGS) +{ + + /* + * The `Dl' (note "el" not "one") and `Bd' macros unset the + * MDOC_LITERAL flag as they leave. Note that `Bd' only sets + * this in literal mode, but it doesn't hurt to just switch it + * off in general since displays can't be nested. + */ + + if (MDOC_BODY == mdoc->last->type) + mdoc->last->flags &= ~MDOC_LITERAL; + + return(1); +} + +static int post_defaults(POST_ARGS) { struct mdoc_node *nn; /* * The `Ar' defaults to "file ..." if no value is provided as an - * argument; the `Mt' macro uses "~"; the `Li' just gets an - * empty string. + * argument; the `Mt' and `Pa' macros use "~"; the `Li' just + * gets an empty string. */ if (mdoc->last->child) @@ -1169,6 +1248,8 @@ post_defaults(POST_ARGS) if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "")) return(0); break; + case (MDOC_Pa): + /* FALLTHROUGH */ case (MDOC_Mt): if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "~")) return(0); @@ -1323,28 +1404,220 @@ post_it(POST_ARGS) return(1); } - static int -post_bl_head(POST_ARGS) +post_bl_block(POST_ARGS) { struct mdoc_node *n; - assert(mdoc->last->parent); - n = mdoc->last->parent; + /* + * These are fairly complicated, so we've broken them into two + * functions. post_bl_block_tag() is called when a -tag is + * specified, but no -width (it must be guessed). The second + * when a -width is specified (macro indicators must be + * rewritten into real lengths). + */ - if (LIST_column == n->data.Bl->type) { - if (n->data.Bl->ncols && mdoc->last->nchild) { - mdoc_nmsg(mdoc, n, MANDOCERR_COLUMNS); + n = mdoc->last; + + if (LIST_tag == n->data.Bl->type && + NULL == n->data.Bl->width) { + if ( ! post_bl_block_tag(mdoc)) return(0); + } else if (NULL != n->data.Bl->width) { + if ( ! post_bl_block_width(mdoc)) + return(0); + } else + return(1); + + assert(n->data.Bl->width); + return(1); +} + +static int +post_bl_block_width(POST_ARGS) +{ + size_t width; + int i; + enum mdoct tok; + struct mdoc_node *n; + char buf[NUMSIZ]; + + n = mdoc->last; + + /* + * Calculate the real width of a list from the -width string, + * which may contain a macro (with a known default width), a + * literal string, or a scaling width. + * + * If the value to -width is a macro, then we re-write it to be + * the macro's width as set in share/tmac/mdoc/doc-common. + */ + + if (0 == strcmp(n->data.Bl->width, "Ds")) + width = 6; + else if (MDOC_MAX == (tok = mdoc_hash_find(n->data.Bl->width))) + return(1); + else if (0 == (width = mdoc_macro2len(tok))) + return(mdoc_nmsg(mdoc, n, MANDOCERR_BADWIDTH)); + + /* The value already exists: free and reallocate it. */ + + assert(n->args); + + for (i = 0; i < (int)n->args->argc; i++) + if (MDOC_Width == n->args->argv[i].arg) + break; + + assert(i < (int)n->args->argc); + + snprintf(buf, NUMSIZ, "%zun", width); + free(n->args->argv[i].value[0]); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->data.Bl->width = n->args->argv[i].value[0]; + return(1); +} + +static int +post_bl_block_tag(POST_ARGS) +{ + struct mdoc_node *n, *nn; + size_t sz, ssz; + int i; + char buf[NUMSIZ]; + + /* + * Calculate the -width for a `Bl -tag' list if it hasn't been + * provided. Uses the first head macro. NOTE AGAIN: this is + * ONLY if the -width argument has NOT been provided. See + * post_bl_block_width() for converting the -width string. + */ + + sz = 10; + n = mdoc->last; + + for (nn = n->body->child; nn; nn = nn->next) { + if (MDOC_It != nn->tok) + continue; + + assert(MDOC_BLOCK == nn->type); + nn = nn->head->child; + + if (nn == NULL) { + /* No -width for .Bl and first .It is emtpy */ + if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG)) + return(0); + break; } + + if (MDOC_TEXT == nn->type) { + sz = strlen(nn->string) + 1; + break; + } + + if (0 != (ssz = mdoc_macro2len(nn->tok))) + sz = ssz; + else if ( ! mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG)) + return(0); + + break; + } + + /* Defaults to ten ens. */ + + snprintf(buf, NUMSIZ, "%zun", sz); + + /* + * We have to dynamically add this to the macro's argument list. + * We're guaranteed that a MDOC_Width doesn't already exist. + */ + + assert(n->args); + i = (int)(n->args->argc)++; + + n->args->argv = mandoc_realloc(n->args->argv, + n->args->argc * sizeof(struct mdoc_argv)); + + n->args->argv[i].arg = MDOC_Width; + n->args->argv[i].line = n->line; + n->args->argv[i].pos = n->pos; + n->args->argv[i].sz = 1; + n->args->argv[i].value = mandoc_malloc(sizeof(char *)); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->data.Bl->width = n->args->argv[i].value[0]; + return(1); +} + + +static int +post_bl_head(POST_ARGS) +{ + struct mdoc_node *np, *nn, *nnp; + int i, j; + + if (LIST_column != mdoc->last->data.Bl->type) + /* FIXME: this should be ERROR class... */ + return(hwarn_eq0(mdoc)); + + /* + * Convert old-style lists, where the column width specifiers + * trail as macro parameters, to the new-style ("normal-form") + * lists where they're argument values following -column. + */ + + /* First, disallow both types and allow normal-form. */ + + /* + * TODO: technically, we can accept both and just merge the two + * lists, but I'll leave that for another day. + */ + + if (mdoc->last->data.Bl->ncols && mdoc->last->nchild) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_COLUMNS); + return(0); + } else if (NULL == mdoc->last->child) return(1); + + np = mdoc->last->parent; + assert(np->args); + + for (j = 0; j < (int)np->args->argc; j++) + if (MDOC_Column == np->args->argv[j].arg) + break; + + assert(j < (int)np->args->argc); + assert(0 == np->args->argv[j].sz); + + /* + * Accomodate for new-style groff column syntax. Shuffle the + * child nodes, all of which must be TEXT, as arguments for the + * column field. Then, delete the head children. + */ + + np->args->argv[j].sz = (size_t)mdoc->last->nchild; + np->args->argv[j].value = mandoc_malloc + ((size_t)mdoc->last->nchild * sizeof(char *)); + + mdoc->last->data.Bl->ncols = np->args->argv[j].sz; + mdoc->last->data.Bl->cols = (const char **)np->args->argv[j].value; + + for (i = 0, nn = mdoc->last->child; nn; i++) { + np->args->argv[j].value[i] = nn->string; + nn->string = NULL; + nnp = nn; + nn = nn->next; + mdoc_node_delete(NULL, nnp); } - /* FIXME: should be ERROR class. */ - return(hwarn_eq0(mdoc)); + mdoc->last->nchild = 0; + mdoc->last->child = NULL; + + return(1); } - static int post_bl(POST_ARGS) { @@ -1352,6 +1625,8 @@ post_bl(POST_ARGS) if (MDOC_HEAD == mdoc->last->type) return(post_bl_head(mdoc)); + if (MDOC_BLOCK == mdoc->last->type) + return(post_bl_block(mdoc)); if (MDOC_BODY != mdoc->last->type) return(1); if (NULL == mdoc->last->child) @@ -1417,17 +1692,26 @@ post_root(POST_ARGS) return(0); } - static int post_st(POST_ARGS) { + const char *p; - if (mdoc_a2st(mdoc->last->child->string)) - return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADSTANDARD)); -} + assert(MDOC_TEXT == mdoc->last->child->type); + p = mdoc_a2st(mdoc->last->child->string); + if (p == NULL) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADSTANDARD); + mdoc_node_delete(mdoc, mdoc->last); + } else { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); + } + + return(1); +} + static int post_rs(POST_ARGS) { @@ -1692,5 +1976,36 @@ pre_par(PRE_ARGS) mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_IGNPAR); mdoc_node_delete(mdoc, mdoc->last); + return(1); +} + +static int +pre_literal(PRE_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + + /* + * The `Dl' (note "el" not "one") and `Bd -literal' and `Bd + * -unfilled' macros set MDOC_LITERAL on entrance to the body. + */ + + switch (n->tok) { + case (MDOC_Dl): + mdoc->flags |= MDOC_LITERAL; + break; + case (MDOC_Bd): + assert(n->data.Bd); + if (DISP_literal == n->data.Bd->type) + mdoc->flags |= MDOC_LITERAL; + if (DISP_unfilled == n->data.Bd->type) + mdoc->flags |= MDOC_LITERAL; + break; + default: + abort(); + /* NOTREACHED */ + } + return(1); }