=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.121 retrieving revision 1.129 diff -u -p -r1.121 -r1.129 --- mandoc/mdoc_validate.c 2010/10/11 15:46:19 1.121 +++ mandoc/mdoc_validate.c 2010/11/29 16:06:46 1.129 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.121 2010/10/11 15:46:19 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.129 2010/11/29 16:06:46 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * @@ -85,6 +85,9 @@ static int post_at(POST_ARGS); static int post_bf(POST_ARGS); static int post_bl(POST_ARGS); static int post_bl_head(POST_ARGS); +static int post_defaults(POST_ARGS); +static int post_literal(POST_ARGS); +static int post_eoln(POST_ARGS); static int post_dt(POST_ARGS); static int post_it(POST_ARGS); static int post_lb(POST_ARGS); @@ -95,7 +98,6 @@ static int post_sh(POST_ARGS); static int post_sh_body(POST_ARGS); static int post_sh_head(POST_ARGS); static int post_st(POST_ARGS); -static int post_eoln(POST_ARGS); static int post_vt(POST_ARGS); static int pre_an(PRE_ARGS); static int pre_bd(PRE_ARGS); @@ -104,19 +106,23 @@ static int pre_dd(PRE_ARGS); static int pre_display(PRE_ARGS); static int pre_dt(PRE_ARGS); static int pre_it(PRE_ARGS); +static int pre_literal(PRE_ARGS); static int pre_os(PRE_ARGS); -static int pre_pp(PRE_ARGS); +static int pre_par(PRE_ARGS); static int pre_rv(PRE_ARGS); static int pre_sh(PRE_ARGS); static int pre_ss(PRE_ARGS); static v_post posts_an[] = { post_an, NULL }; -static v_post posts_at[] = { post_at, NULL }; -static v_post posts_bd_bk[] = { hwarn_eq0, bwarn_ge1, NULL }; +static v_post posts_at[] = { post_at, post_defaults, NULL }; +static v_post posts_bd[] = { post_literal, hwarn_eq0, bwarn_ge1, NULL }; static v_post posts_bf[] = { hwarn_le1, post_bf, NULL }; +static v_post posts_bk[] = { hwarn_eq0, bwarn_ge1, NULL }; static v_post posts_bl[] = { bwarn_ge1, post_bl, NULL }; static v_post posts_bool[] = { eerr_eq1, ebool, NULL }; static v_post posts_eoln[] = { post_eoln, NULL }; +static v_post posts_defaults[] = { post_defaults, NULL }; +static v_post posts_dl[] = { post_literal, bwarn_ge1, herr_eq0, NULL }; static v_post posts_dt[] = { post_dt, NULL }; static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; static v_post posts_it[] = { post_it, NULL }; @@ -135,9 +141,10 @@ static v_post posts_vt[] = { post_vt, NULL }; static v_post posts_wline[] = { bwarn_ge1, herr_eq0, NULL }; static v_post posts_wtext[] = { ewarn_ge1, NULL }; static v_pre pres_an[] = { pre_an, NULL }; -static v_pre pres_bd[] = { pre_display, pre_bd, pre_pp, NULL }; -static v_pre pres_bl[] = { pre_bl, pre_pp, NULL }; +static v_pre pres_bd[] = { pre_display, pre_bd, pre_literal, pre_par, NULL }; +static v_pre pres_bl[] = { pre_bl, pre_par, NULL }; static v_pre pres_d1[] = { pre_display, NULL }; +static v_pre pres_dl[] = { pre_literal, pre_display, NULL }; static v_pre pres_dd[] = { pre_dd, NULL }; static v_pre pres_dt[] = { pre_dt, NULL }; static v_pre pres_er[] = { NULL, NULL }; @@ -145,7 +152,7 @@ static v_pre pres_ex[] = { NULL, NULL }; static v_pre pres_fd[] = { NULL, NULL }; static v_pre pres_it[] = { pre_it, NULL }; static v_pre pres_os[] = { pre_os, NULL }; -static v_pre pres_pp[] = { pre_pp, NULL }; +static v_pre pres_pp[] = { pre_par, NULL }; static v_pre pres_rv[] = { pre_rv, NULL }; static v_pre pres_sh[] = { pre_sh, NULL }; static v_pre pres_ss[] = { pre_ss, NULL }; @@ -159,15 +166,15 @@ const struct valids mdoc_valids[MDOC_MAX] = { { pres_ss, posts_ss }, /* Ss */ { pres_pp, posts_notext }, /* Pp */ { pres_d1, posts_wline }, /* D1 */ - { pres_d1, posts_wline }, /* Dl */ - { pres_bd, posts_bd_bk }, /* Bd */ + { pres_dl, posts_dl }, /* Dl */ + { pres_bd, posts_bd }, /* Bd */ { NULL, NULL }, /* Ed */ { pres_bl, posts_bl }, /* Bl */ { NULL, NULL }, /* El */ { pres_it, posts_it }, /* It */ { NULL, posts_text }, /* Ad */ { pres_an, posts_an }, /* An */ - { NULL, NULL }, /* Ar */ + { NULL, posts_defaults }, /* Ar */ { NULL, posts_text }, /* Cd */ { NULL, NULL }, /* Cm */ { NULL, NULL }, /* Dv */ @@ -181,12 +188,12 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_wtext }, /* Ft */ { NULL, posts_text }, /* Ic */ { NULL, posts_text1 }, /* In */ - { NULL, NULL }, /* Li */ + { NULL, posts_defaults }, /* Li */ { NULL, posts_nd }, /* Nd */ { NULL, posts_nm }, /* Nm */ { NULL, posts_wline }, /* Op */ { NULL, NULL }, /* Ot */ - { NULL, NULL }, /* Pa */ + { NULL, posts_defaults }, /* Pa */ { pres_rv, NULL }, /* Rv */ { NULL, posts_st }, /* St */ { NULL, NULL }, /* Va */ @@ -251,7 +258,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Fc */ { NULL, NULL }, /* Oo */ { NULL, NULL }, /* Oc */ - { NULL, posts_bd_bk }, /* Bk */ + { NULL, posts_bk }, /* Bk */ { NULL, NULL }, /* Ek */ { NULL, posts_eoln }, /* Bt */ { NULL, NULL }, /* Hf */ @@ -260,7 +267,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_lb }, /* Lb */ { NULL, posts_notext }, /* Lp */ { NULL, posts_text }, /* Lk */ - { NULL, posts_text }, /* Mt */ + { NULL, posts_defaults }, /* Mt */ { NULL, posts_wline }, /* Brq */ { NULL, NULL }, /* Bro */ { NULL, NULL }, /* Brc */ @@ -275,7 +282,26 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Ta */ }; +#define RSORD_MAX 14 /* Number of `Rs' blocks. */ +static const enum mdoct rsord[RSORD_MAX] = { + MDOC__A, + MDOC__T, + MDOC__B, + MDOC__I, + MDOC__J, + MDOC__R, + MDOC__N, + MDOC__V, + MDOC__P, + MDOC__Q, + MDOC__D, + MDOC__O, + MDOC__C, + MDOC__U +}; + + int mdoc_valid_pre(struct mdoc *mdoc, struct mdoc_node *n) { @@ -561,6 +587,7 @@ pre_display(PRE_ARGS) if (MDOC_BLOCK == node->type) if (MDOC_Bd == node->tok) break; + if (NULL == node) return(1); @@ -1056,17 +1083,38 @@ post_bf(POST_ARGS) return(1); } - static int post_lb(POST_ARGS) { + const char *p; + char *buf; + size_t sz; - if (mdoc_a2lib(mdoc->last->child->string)) + assert(mdoc->last->child); + assert(MDOC_TEXT == mdoc->last->child->type); + + p = mdoc_a2lib(mdoc->last->child->string); + + /* If lookup ok, replace with table value. */ + + if (p) { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADLIB)); -} + } + /* If not, use "library ``xxxx''. */ + sz = strlen(mdoc->last->child->string) + + 2 + strlen("\\(lqlibrary\\(rq"); + buf = mandoc_malloc(sz); + snprintf(buf, sz, "library \\(lq%s\\(rq", + mdoc->last->child->string); + free(mdoc->last->child->string); + mdoc->last->child->string = buf; + return(1); +} + static int post_eoln(POST_ARGS) { @@ -1106,28 +1154,146 @@ post_vt(POST_ARGS) static int post_nm(POST_ARGS) { + struct mdoc_node *nn; + char buf[BUFSIZ]; - if (mdoc->last->child) + /* If no child specified, make sure we have the meta name. */ + + if (NULL == mdoc->last->child && NULL == mdoc->meta.name) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME); return(1); - if (mdoc->meta.name) + } else if (mdoc->meta.name) return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME)); + + /* If no meta name, set it from the child. */ + + buf[0] = '\0'; + + for (nn = mdoc->last->child; nn; nn = nn->next) { + /* XXX - copied from concat(). */ + assert(MDOC_TEXT == nn->type); + + if (strlcat(buf, nn->string, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, nn, MANDOCERR_MEM); + return(0); + } + + if (NULL == nn->next) + continue; + + if (strlcat(buf, " ", BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, nn, MANDOCERR_MEM); + return(0); + } + } + + mdoc->meta.name = mandoc_strdup(buf); + return(1); } +static int +post_literal(POST_ARGS) +{ + + /* + * The `Dl' (note "el" not "one") and `Bd' macros unset the + * MDOC_LITERAL flag as they leave. Note that `Bd' only sets + * this in literal mode, but it doesn't hurt to just switch it + * off in general since displays can't be nested. + */ + if (MDOC_BODY == mdoc->last->type) + mdoc->last->flags &= ~MDOC_LITERAL; + + return(1); +} + static int +post_defaults(POST_ARGS) +{ + struct mdoc_node *nn; + + /* + * The `Ar' defaults to "file ..." if no value is provided as an + * argument; the `Mt' and `Pa' macros use "~"; the `Li' just + * gets an empty string. + */ + + if (mdoc->last->child) + return(1); + + nn = mdoc->last; + mdoc->next = MDOC_NEXT_CHILD; + + switch (nn->tok) { + case (MDOC_Ar): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "file")) + return(0); + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "...")) + return(0); + break; + case (MDOC_At): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "AT&T")) + return(0); + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "UNIX")) + return(0); + break; + case (MDOC_Li): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "")) + return(0); + break; + case (MDOC_Pa): + /* FALLTHROUGH */ + case (MDOC_Mt): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "~")) + return(0); + break; + default: + abort(); + /* NOTREACHED */ + } + + mdoc->last = nn; + return(1); +} + +static int post_at(POST_ARGS) { + const char *p, *q; + char *buf; + size_t sz; + /* + * If we have a child, look it up in the standard keys. If a + * key exist, use that instead of the child; if it doesn't, + * prefix "AT&T UNIX " to the existing data. + */ + if (NULL == mdoc->last->child) return(1); + assert(MDOC_TEXT == mdoc->last->child->type); - if (mdoc_a2att(mdoc->last->child->string)) - return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADATT)); -} + p = mdoc_a2att(mdoc->last->child->string); + if (p) { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); + } else { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADATT); + p = "AT&T UNIX "; + q = mdoc->last->child->string; + sz = strlen(p) + strlen(q) + 1; + buf = mandoc_malloc(sz); + strlcpy(buf, p, sz); + strlcat(buf, q, sz); + free(mdoc->last->child->string); + mdoc->last->child->string = buf; + } + return(1); +} + static int post_an(POST_ARGS) { @@ -1326,60 +1492,119 @@ post_root(POST_ARGS) return(0); } - static int post_st(POST_ARGS) { + const char *p; - if (mdoc_a2st(mdoc->last->child->string)) - return(1); - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADSTANDARD)); -} + assert(MDOC_TEXT == mdoc->last->child->type); + p = mdoc_a2st(mdoc->last->child->string); + if (p == NULL) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADSTANDARD); + mdoc_node_delete(mdoc, mdoc->last); + } else { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); + } + + return(1); +} + static int post_rs(POST_ARGS) { - struct mdoc_node *nn; + struct mdoc_node *nn, *next, *prev; + int i, j; if (MDOC_BODY != mdoc->last->type) return(1); - for (nn = mdoc->last->child; nn; nn = nn->next) - switch (nn->tok) { - case(MDOC__U): - /* FALLTHROUGH */ - case(MDOC__Q): - /* FALLTHROUGH */ - case(MDOC__C): - /* FALLTHROUGH */ - case(MDOC__A): - /* FALLTHROUGH */ - case(MDOC__B): - /* FALLTHROUGH */ - case(MDOC__D): - /* FALLTHROUGH */ - case(MDOC__I): - /* FALLTHROUGH */ - case(MDOC__J): - /* FALLTHROUGH */ - case(MDOC__N): - /* FALLTHROUGH */ - case(MDOC__O): - /* FALLTHROUGH */ - case(MDOC__P): - /* FALLTHROUGH */ - case(MDOC__R): - /* FALLTHROUGH */ - case(MDOC__T): - /* FALLTHROUGH */ - case(MDOC__V): - break; - default: - mdoc_nmsg(mdoc, nn, MANDOCERR_SYNTCHILD); - return(0); + /* + * Make sure only certain types of nodes are allowed within the + * the `Rs' body. Delete offending nodes and raise a warning. + * Do this before re-ordering for the sake of clarity. + */ + + next = NULL; + for (nn = mdoc->last->child; nn; nn = next) { + for (i = 0; i < RSORD_MAX; i++) + if (nn->tok == rsord[i]) + break; + + if (i < RSORD_MAX) { + next = nn->next; + continue; } + next = nn->next; + mdoc_nmsg(mdoc, nn, MANDOCERR_CHILD); + mdoc_node_delete(mdoc, nn); + } + + /* + * The full `Rs' block needs special handling to order the + * sub-elements according to `rsord'. Pick through each element + * and correctly order it. This is a insertion sort. + */ + + next = NULL; + for (nn = mdoc->last->child->next; nn; nn = next) { + /* Determine order of `nn'. */ + for (i = 0; i < RSORD_MAX; i++) + if (rsord[i] == nn->tok) + break; + + /* + * Remove `nn' from the chain. This somewhat + * repeats mdoc_node_unlink(), but since we're + * just re-ordering, there's no need for the + * full unlink process. + */ + + if (NULL != (next = nn->next)) + next->prev = nn->prev; + + if (NULL != (prev = nn->prev)) + prev->next = nn->next; + + nn->prev = nn->next = NULL; + + /* + * Scan back until we reach a node that's + * ordered before `nn'. + */ + + for ( ; prev ; prev = prev->prev) { + /* Determine order of `prev'. */ + for (j = 0; j < RSORD_MAX; j++) + if (rsord[j] == prev->tok) + break; + + if (j <= i) + break; + } + + /* + * Set `nn' back into its correct place in front + * of the `prev' node. + */ + + nn->prev = prev; + + if (prev) { + if (prev->next) + prev->next->prev = nn; + nn->next = prev->next; + prev->next = nn; + } else { + mdoc->last->child->prev = nn; + nn->next = mdoc->last->child; + mdoc->last->child = nn; + } + } + return(1); } @@ -1433,34 +1658,33 @@ post_sh_body(POST_ARGS) static int post_sh_head(POST_ARGS) { - char buf[BUFSIZ]; - enum mdoc_sec sec; - const struct mdoc_node *n; + char buf[BUFSIZ]; + enum mdoc_sec sec; + struct mdoc_node *n; /* * Process a new section. Sections are either "named" or - * "custom"; custom sections are user-defined, while named ones - * usually follow a conventional order and may only appear in - * certain manual sections. + * "custom". Custom sections are user-defined, while named ones + * follow a conventional order and may only appear in certain + * manual sections. */ buf[0] = '\0'; - /* - * FIXME: yes, these can use a dynamic buffer, but I don't do so - * in the interests of simplicity. - */ + /* FIXME: use dynamic buffer... */ for (n = mdoc->last->child; n; n = n->next) { - /* XXX - copied from compact(). */ + /* XXX - copied from concat(). */ assert(MDOC_TEXT == n->type); if (strlcat(buf, n->string, BUFSIZ) >= BUFSIZ) { mdoc_nmsg(mdoc, n, MANDOCERR_MEM); return(0); } + if (NULL == n->next) continue; + if (strlcat(buf, " ", BUFSIZ) >= BUFSIZ) { mdoc_nmsg(mdoc, n, MANDOCERR_MEM); return(0); @@ -1469,41 +1693,60 @@ post_sh_head(POST_ARGS) sec = mdoc_str2sec(buf); - /* - * Check: NAME should always be first, CUSTOM has no roles, - * non-CUSTOM has a conventional order to be followed. - */ + /* The NAME should be first. */ if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) - if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NAMESECFIRST)) - return(0); + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NAMESECFIRST); + /* The SYNOPSIS gets special attention in other areas. */ + + if (SEC_SYNOPSIS == sec) + mdoc->flags |= MDOC_SYNOPSIS; + else + mdoc->flags &= ~MDOC_SYNOPSIS; + + /* Mark our last section. */ + + mdoc->lastsec = sec; + + /* We don't care about custom sections after this. */ + if (SEC_CUSTOM == sec) return(1); + /* + * Check whether our non-custom section is being repeated or is + * out of order. + */ + if (sec == mdoc->lastnamed) - if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECREP)) - return(0); + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECREP); if (sec < mdoc->lastnamed) - if ( ! mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECOOO)) - return(0); + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECOOO); - /* - * Check particular section/manual conventions. LIBRARY can - * only occur in manual section 2, 3, and 9. - */ + /* Mark the last named section. */ + mdoc->lastnamed = sec; + + /* Check particular section/manual conventions. */ + + assert(mdoc->meta.msec); + switch (sec) { + case (SEC_RETURN_VALUES): + /* FALLTHROUGH */ + case (SEC_ERRORS): + /* FALLTHROUGH */ case (SEC_LIBRARY): - assert(mdoc->meta.msec); if (*mdoc->meta.msec == '2') break; if (*mdoc->meta.msec == '3') break; if (*mdoc->meta.msec == '9') break; - return(mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECMSEC)); + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECMSEC); + break; default: break; } @@ -1511,15 +1754,17 @@ post_sh_head(POST_ARGS) return(1); } - static int -pre_pp(PRE_ARGS) +pre_par(PRE_ARGS) { if (NULL == mdoc->last) return(1); - /* Don't allow prior `Lp' or `Pp'. */ + /* + * Don't allow prior `Lp' or `Pp' prior to a paragraph-type + * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. + */ if (MDOC_Pp != mdoc->last->tok && MDOC_Lp != mdoc->last->tok) return(1); @@ -1531,5 +1776,36 @@ pre_pp(PRE_ARGS) mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_IGNPAR); mdoc_node_delete(mdoc, mdoc->last); + return(1); +} + +static int +pre_literal(PRE_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + + /* + * The `Dl' (note "el" not "one") and `Bd -literal' and `Bd + * -unfilled' macros set MDOC_LITERAL on entrance to the body. + */ + + switch (n->tok) { + case (MDOC_Dl): + mdoc->flags |= MDOC_LITERAL; + break; + case (MDOC_Bd): + assert(n->data.Bd); + if (DISP_literal == n->data.Bd->type) + mdoc->flags |= MDOC_LITERAL; + if (DISP_unfilled == n->data.Bd->type) + mdoc->flags |= MDOC_LITERAL; + break; + default: + abort(); + /* NOTREACHED */ + } + return(1); }