=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.156 retrieving revision 1.178 diff -u -p -r1.156 -r1.178 --- mandoc/mdoc_validate.c 2011/02/06 21:44:36 1.156 +++ mandoc/mdoc_validate.c 2011/11/18 14:58:24 1.178 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.156 2011/02/06 21:44:36 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.178 2011/11/18 14:58:24 joerg Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -33,6 +33,7 @@ #include #include +#include "mdoc.h" #include "mandoc.h" #include "libmdoc.h" #include "libmandoc.h" @@ -71,10 +72,10 @@ static void check_text(struct mdoc *, int, int, char static void check_argv(struct mdoc *, struct mdoc_node *, struct mdoc_argv *); static void check_args(struct mdoc *, struct mdoc_node *); +static int concat(char *, const struct mdoc_node *, size_t); +static enum mdoc_sec a2sec(const char *); +static size_t macro2len(enum mdoct); -static int concat(struct mdoc *, char *, - const struct mdoc_node *, size_t); - static int ebool(POST_ARGS); static int berr_ge1(POST_ARGS); static int bwarn_ge1(POST_ARGS); @@ -140,7 +141,7 @@ static v_post posts_bx[] = { post_bx, NULL }; static v_post posts_bool[] = { ebool, NULL }; static v_post posts_eoln[] = { post_eoln, NULL }; static v_post posts_defaults[] = { post_defaults, NULL }; -static v_post posts_dd[] = { ewarn_ge1, post_dd, post_prol, NULL }; +static v_post posts_dd[] = { post_dd, post_prol, NULL }; static v_post posts_dl[] = { post_literal, bwarn_ge1, NULL }; static v_post posts_dt[] = { post_dt, post_prol, NULL }; static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; @@ -152,9 +153,9 @@ static v_post posts_notext[] = { ewarn_eq0, NULL }; static v_post posts_ns[] = { post_ns, NULL }; static v_post posts_os[] = { post_os, post_prol, NULL }; static v_post posts_rs[] = { post_rs, NULL }; -static v_post posts_sh[] = { post_ignpar, hwarn_ge1, bwarn_ge1, post_sh, NULL }; +static v_post posts_sh[] = { post_ignpar, hwarn_ge1, post_sh, NULL }; static v_post posts_sp[] = { ewarn_le1, NULL }; -static v_post posts_ss[] = { post_ignpar, hwarn_ge1, bwarn_ge1, NULL }; +static v_post posts_ss[] = { post_ignpar, hwarn_ge1, NULL }; static v_post posts_st[] = { post_st, NULL }; static v_post posts_std[] = { post_std, NULL }; static v_post posts_text[] = { ewarn_ge1, NULL }; @@ -177,7 +178,7 @@ static v_pre pres_sh[] = { pre_sh, NULL }; static v_pre pres_ss[] = { pre_ss, NULL }; static v_pre pres_std[] = { pre_std, NULL }; -const struct valids mdoc_valids[MDOC_MAX] = { +static const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Ap */ { pres_dd, posts_dd }, /* Dd */ { pres_dt, posts_dt }, /* Dt */ @@ -221,7 +222,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_text }, /* Xr */ { NULL, posts_text }, /* %A */ { NULL, posts_text }, /* %B */ /* FIXME: can be used outside Rs/Re. */ - { NULL, posts_text }, /* %D */ /* FIXME: check date with mandoc_a2time(). */ + { NULL, posts_text }, /* %D */ { NULL, posts_text }, /* %I */ { NULL, posts_text }, /* %J */ { NULL, posts_text }, /* %N */ @@ -321,6 +322,30 @@ static const enum mdoct rsord[RSORD_MAX] = { MDOC__U }; +static const char * const secnames[SEC__MAX] = { + NULL, + "NAME", + "LIBRARY", + "SYNOPSIS", + "DESCRIPTION", + "IMPLEMENTATION NOTES", + "RETURN VALUES", + "ENVIRONMENT", + "FILES", + "EXIT STATUS", + "EXAMPLES", + "DIAGNOSTICS", + "COMPATIBILITY", + "ERRORS", + "SEE ALSO", + "STANDARDS", + "HISTORY", + "AUTHORS", + "CAVEATS", + "BUGS", + "SECURITY CONSIDERATIONS", + NULL +}; int mdoc_valid_pre(struct mdoc *mdoc, struct mdoc_node *n) @@ -369,6 +394,8 @@ mdoc_valid_post(struct mdoc *mdoc) switch (mdoc->last->type) { case (MDOC_TEXT): /* FALLTHROUGH */ + case (MDOC_EQN): + /* FALLTHROUGH */ case (MDOC_TBL): return(1); case (MDOC_ROOT): @@ -418,10 +445,10 @@ check_count(struct mdoc *m, enum mdoc_type type, } t = lvl == CHECK_WARN ? MANDOCERR_ARGCWARN : MANDOCERR_ARGCOUNT; - - return(mdoc_vmsg(m, t, m->last->line, m->last->pos, + mandoc_vmsg(t, m->parse, m->last->line, m->last->pos, "want %s%d children (have %d)", - p, val, m->last->nchild)); + p, val, m->last->nchild); + return(1); } static int @@ -516,32 +543,13 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; - size_t sz; + char *cp; - for ( ; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + if (MDOC_LITERAL & m->flags) + return; - if ('\0' == *p) - break; - - pos += (int)sz; - - if ('\t' == *p) { - if ( ! (MDOC_LITERAL & m->flags)) - mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); - continue; - } - - if (0 == (c = mandoc_special(p))) { - mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; - } - - p += c - 1; - pos += c - 1; - } + for (cp = p; NULL != (p = strchr(p, '\t')); p++) + mdoc_pmsg(m, ln, pos + (int)(p - cp), MANDOCERR_BADTAB); } static int @@ -553,10 +561,9 @@ check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type (t == n->parent->type)) return(1); - mdoc_vmsg(mdoc, MANDOCERR_SYNTCHILD, - n->line, n->pos, "want parent %s", - MDOC_ROOT == t ? "" : - mdoc_macronames[tok]); + mandoc_vmsg(MANDOCERR_SYNTCHILD, mdoc->parse, n->line, + n->pos, "want parent %s", MDOC_ROOT == t ? + "" : mdoc_macronames[tok]); return(0); } @@ -695,7 +702,7 @@ pre_bl(PRE_ARGS) if (LIST_column == lt) { n->norm->Bl.ncols = n->args->argv[i].sz; - n->norm->Bl.cols = (const char **) + n->norm->Bl.cols = (void *) n->args->argv[i].value; } } @@ -863,7 +870,7 @@ pre_sh(PRE_ARGS) if (MDOC_BLOCK != n->type) return(1); - mdoc->regs->regs[(int)REG_nS].set = 0; + roff_regunset(mdoc->roff, REG_nS); return(check_parent(mdoc, n, MDOC_MAX, MDOC_ROOT)); } @@ -917,7 +924,7 @@ static int pre_dt(PRE_ARGS) { - if (0 == mdoc->meta.date || mdoc->meta.os) + if (NULL == mdoc->meta.date || mdoc->meta.os) mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO); if (mdoc->meta.title) @@ -930,7 +937,7 @@ static int pre_os(PRE_ARGS) { - if (NULL == mdoc->meta.title || 0 == mdoc->meta.date) + if (NULL == mdoc->meta.title || NULL == mdoc->meta.date) mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO); if (mdoc->meta.os) @@ -1097,6 +1104,7 @@ static int post_nm(POST_ARGS) { char buf[BUFSIZ]; + int c; /* If no child specified, make sure we have the meta name. */ @@ -1108,11 +1116,14 @@ post_nm(POST_ARGS) /* If no meta name, set it from the child. */ - if ( ! concat(mdoc, buf, mdoc->last->child, BUFSIZ)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); return(0); + } + assert(c); mdoc->meta.name = mandoc_strdup(buf); - return(1); } @@ -1238,7 +1249,7 @@ post_an(POST_ARGS) static int post_it(POST_ARGS) { - int i, cols, rc; + int i, cols; enum mdoc_list lt; struct mdoc_node *n, *c; enum mandocerr er; @@ -1304,10 +1315,10 @@ post_it(POST_ARGS) else er = MANDOCERR_SYNTARGCOUNT; - rc = mdoc_vmsg(mdoc, er, - mdoc->last->line, mdoc->last->pos, + mandoc_vmsg(er, mdoc->parse, mdoc->last->line, + mdoc->last->pos, "columns == %d (have %d)", cols, i); - return(rc); + return(MANDOCERR_ARGCOUNT == er); default: break; } @@ -1368,7 +1379,7 @@ post_bl_block_width(POST_ARGS) width = 6; else if (MDOC_MAX == (tok = mdoc_hash_find(n->norm->Bl.width))) return(1); - else if (0 == (width = mdoc_macro2len(tok))) { + else if (0 == (width = macro2len(tok))) { mdoc_nmsg(mdoc, n, MANDOCERR_BADWIDTH); return(1); } @@ -1383,7 +1394,7 @@ post_bl_block_width(POST_ARGS) assert(i < (int)n->args->argc); - snprintf(buf, NUMSIZ, "%zun", width); + snprintf(buf, NUMSIZ, "%un", (unsigned int)width); free(n->args->argv[i].value[0]); n->args->argv[i].value[0] = mandoc_strdup(buf); @@ -1425,7 +1436,7 @@ post_bl_block_tag(POST_ARGS) break; } - if (0 != (ssz = mdoc_macro2len(nn->tok))) + if (0 != (ssz = macro2len(nn->tok))) sz = ssz; break; @@ -1433,7 +1444,7 @@ post_bl_block_tag(POST_ARGS) /* Defaults to ten ens. */ - snprintf(buf, NUMSIZ, "%zun", sz); + snprintf(buf, NUMSIZ, "%un", (unsigned int)sz); /* * We have to dynamically add this to the macro's argument list. @@ -1499,7 +1510,7 @@ post_bl_head(POST_ARGS) assert(0 == np->args->argv[j].sz); /* - * Accomodate for new-style groff column syntax. Shuffle the + * Accommodate for new-style groff column syntax. Shuffle the * child nodes, all of which must be TEXT, as arguments for the * column field. Then, delete the head children. */ @@ -1509,7 +1520,7 @@ post_bl_head(POST_ARGS) ((size_t)mdoc->last->nchild * sizeof(char *)); mdoc->last->norm->Bl.ncols = np->args->argv[j].sz; - mdoc->last->norm->Bl.cols = (const char **)np->args->argv[j].value; + mdoc->last->norm->Bl.cols = (void *)np->args->argv[j].value; for (i = 0, nn = mdoc->last->child; nn; i++) { np->args->argv[j].value[i] = nn->string; @@ -1682,6 +1693,10 @@ post_rs(POST_ARGS) mdoc_node_delete(mdoc, nn); } + /* Bail out early if a plain text node is found inside .Rs. */ + if (NULL == mdoc->last->child) + return(1); + /* * The full `Rs' block needs special handling to order the * sub-elements according to `rsord'. Pick through each element @@ -1808,6 +1823,7 @@ post_sh_head(POST_ARGS) { char buf[BUFSIZ]; enum mdoc_sec sec; + int c; /* * Process a new section. Sections are either "named" or @@ -1816,11 +1832,14 @@ post_sh_head(POST_ARGS) * manual sections. */ - if ( ! concat(mdoc, buf, mdoc->last->child, BUFSIZ)) + sec = SEC_CUSTOM; + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); return(0); + } else if (1 == c) + sec = a2sec(buf); - sec = mdoc_str2sec(buf); - /* The NAME should be first. */ if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) @@ -1968,25 +1987,28 @@ post_dd(POST_ARGS) { char buf[DATESIZE]; struct mdoc_node *n; + int c; - n = mdoc->last; + if (mdoc->meta.date) + free(mdoc->meta.date); - if (NULL == n->child) { - mdoc->meta.date = time(NULL); + n = mdoc->last; + if (NULL == n->child || '\0' == n->child->string[0]) { + mdoc->meta.date = mandoc_normdate + (mdoc->parse, NULL, n->line, n->pos); return(1); } - if ( ! concat(mdoc, buf, n->child, DATESIZE)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, DATESIZE))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); return(0); - - mdoc->meta.date = mandoc_a2time - (MTIME_MDOCDATE | MTIME_CANONICAL, buf); - - if (0 == mdoc->meta.date) { - mdoc_nmsg(mdoc, n, MANDOCERR_BADDATE); - mdoc->meta.date = time(NULL); } + assert(c); + mdoc->meta.date = mandoc_normdate + (mdoc->parse, buf, n->line, n->pos); + return(1); } @@ -2012,7 +2034,7 @@ post_dt(POST_ARGS) if (NULL != (nn = n->child)) for (p = nn->string; *p; p++) { - if (toupper((u_char)*p) == *p) + if (toupper((unsigned char)*p) == *p) continue; /* @@ -2127,7 +2149,8 @@ post_bx(POST_ARGS) n = mdoc->last->child; if (n && NULL != (n = n->next)) - *n->string = toupper((unsigned char)*n->string); + *n->string = (char)toupper + ((unsigned char)*n->string); return(1); } @@ -2137,6 +2160,7 @@ post_os(POST_ARGS) { struct mdoc_node *n; char buf[BUFSIZ]; + int c; #ifndef OSNAME struct utsname utsname; #endif @@ -2153,9 +2177,14 @@ post_os(POST_ARGS) if (mdoc->meta.os) free(mdoc->meta.os); - if ( ! concat(mdoc, buf, n->child, BUFSIZ)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, BUFSIZ))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); return(0); + } + assert(c); + /* XXX: yes, these can all be dynamically-adjusted buffers, but * it's really not worth the extra hackery. */ @@ -2167,7 +2196,7 @@ post_os(POST_ARGS) return(0); } #else /*!OSNAME */ - if (uname(&utsname)) { + if (-1 == uname(&utsname)) { mdoc_nmsg(mdoc, n, MANDOCERR_UNAME); mdoc->meta.os = mandoc_strdup("UNKNOWN"); return(post_prol(mdoc)); @@ -2221,36 +2250,130 @@ post_std(POST_ARGS) return(1); } +/* + * Concatenate a node, stopping at the first non-text. + * Concatenation is separated by a single whitespace. + * Returns -1 on fatal (string overrun) error, 0 if child nodes were + * encountered, 1 otherwise. + */ static int -concat(struct mdoc *m, char *p, const struct mdoc_node *n, size_t sz) +concat(char *p, const struct mdoc_node *n, size_t sz) { - p[0] = '\0'; + for ( ; NULL != n; n = n->next) { + if (MDOC_TEXT != n->type) + return(0); + if ('\0' != p[0] && strlcat(p, " ", sz) >= sz) + return(-1); + if (strlcat(p, n->string, sz) >= sz) + return(-1); + concat(p, n->child, sz); + } - /* - * Concatenate sibling nodes together. All siblings must be of - * type MDOC_TEXT or an assertion is raised. Concatenation is - * separated by a single whitespace. Returns 0 on fatal (string - * overrun) error. - */ + return(1); +} - for ( ; n; n = n->next) { - assert(MDOC_TEXT == n->type); +static enum mdoc_sec +a2sec(const char *p) +{ + int i; - if (strlcat(p, n->string, sz) >= sz) { - mdoc_nmsg(m, n, MANDOCERR_MEM); - return(0); - } + for (i = 0; i < (int)SEC__MAX; i++) + if (secnames[i] && 0 == strcmp(p, secnames[i])) + return((enum mdoc_sec)i); - if (NULL == n->next) - continue; + return(SEC_CUSTOM); +} - if (strlcat(p, " ", sz) >= sz) { - mdoc_nmsg(m, n, MANDOCERR_MEM); - return(0); - } - } +static size_t +macro2len(enum mdoct macro) +{ - return(1); + switch (macro) { + case(MDOC_Ad): + return(12); + case(MDOC_Ao): + return(12); + case(MDOC_An): + return(12); + case(MDOC_Aq): + return(12); + case(MDOC_Ar): + return(12); + case(MDOC_Bo): + return(12); + case(MDOC_Bq): + return(12); + case(MDOC_Cd): + return(12); + case(MDOC_Cm): + return(10); + case(MDOC_Do): + return(10); + case(MDOC_Dq): + return(12); + case(MDOC_Dv): + return(12); + case(MDOC_Eo): + return(12); + case(MDOC_Em): + return(10); + case(MDOC_Er): + return(17); + case(MDOC_Ev): + return(15); + case(MDOC_Fa): + return(12); + case(MDOC_Fl): + return(10); + case(MDOC_Fo): + return(16); + case(MDOC_Fn): + return(16); + case(MDOC_Ic): + return(10); + case(MDOC_Li): + return(16); + case(MDOC_Ms): + return(6); + case(MDOC_Nm): + return(10); + case(MDOC_No): + return(12); + case(MDOC_Oo): + return(10); + case(MDOC_Op): + return(14); + case(MDOC_Pa): + return(32); + case(MDOC_Pf): + return(12); + case(MDOC_Po): + return(12); + case(MDOC_Pq): + return(12); + case(MDOC_Ql): + return(16); + case(MDOC_Qo): + return(12); + case(MDOC_So): + return(12); + case(MDOC_Sq): + return(12); + case(MDOC_Sy): + return(6); + case(MDOC_Sx): + return(16); + case(MDOC_Tn): + return(10); + case(MDOC_Va): + return(12); + case(MDOC_Vt): + return(12); + case(MDOC_Xr): + return(10); + default: + break; + }; + return(0); } -