=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.164 retrieving revision 1.185 diff -u -p -r1.164 -r1.185 --- mandoc/mdoc_validate.c 2011/03/20 16:02:05 1.164 +++ mandoc/mdoc_validate.c 2012/07/10 14:38:51 1.185 @@ -1,7 +1,7 @@ -/* $Id: mdoc_validate.c,v 1.164 2011/03/20 16:02:05 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.185 2012/07/10 14:38:51 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2011 Ingo Schwarze + * Copyright (c) 2008-2012 Kristaps Dzonsons + * Copyright (c) 2010, 2011, 2012 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -33,6 +33,7 @@ #include #include +#include "mdoc.h" #include "mandoc.h" #include "libmdoc.h" #include "libmandoc.h" @@ -71,9 +72,7 @@ static void check_text(struct mdoc *, int, int, char static void check_argv(struct mdoc *, struct mdoc_node *, struct mdoc_argv *); static void check_args(struct mdoc *, struct mdoc_node *); - -static int concat(struct mdoc *, char *, - const struct mdoc_node *, size_t); +static int concat(char *, const struct mdoc_node *, size_t); static enum mdoc_sec a2sec(const char *); static size_t macro2len(enum mdoct); @@ -154,9 +153,9 @@ static v_post posts_notext[] = { ewarn_eq0, NULL }; static v_post posts_ns[] = { post_ns, NULL }; static v_post posts_os[] = { post_os, post_prol, NULL }; static v_post posts_rs[] = { post_rs, NULL }; -static v_post posts_sh[] = { post_ignpar, hwarn_ge1, bwarn_ge1, post_sh, NULL }; +static v_post posts_sh[] = { post_ignpar, hwarn_ge1, post_sh, NULL }; static v_post posts_sp[] = { ewarn_le1, NULL }; -static v_post posts_ss[] = { post_ignpar, hwarn_ge1, bwarn_ge1, NULL }; +static v_post posts_ss[] = { post_ignpar, hwarn_ge1, NULL }; static v_post posts_st[] = { post_st, NULL }; static v_post posts_std[] = { post_std, NULL }; static v_post posts_text[] = { ewarn_ge1, NULL }; @@ -544,32 +543,13 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; - size_t sz; + char *cp; - for ( ; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + if (MDOC_LITERAL & m->flags) + return; - if ('\0' == *p) - break; - - pos += (int)sz; - - if ('\t' == *p) { - if ( ! (MDOC_LITERAL & m->flags)) - mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); - continue; - } - - if (0 == (c = mandoc_special(p))) { - mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; - } - - p += c - 1; - pos += c - 1; - } + for (cp = p; NULL != (p = strchr(p, '\t')); p++) + mdoc_pmsg(m, ln, pos + (int)(p - cp), MANDOCERR_BADTAB); } static int @@ -681,8 +661,13 @@ pre_bl(PRE_ARGS) comp = 1; break; case (MDOC_Width): - dup = (NULL != n->norm->Bl.width); - width = n->args->argv[i].value[0]; + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + width = n->args->argv[i].value[0]; + dup = (NULL != n->norm->Bl.width); + break; + } + mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); break; case (MDOC_Offset): /* NB: this can be empty! */ @@ -722,7 +707,7 @@ pre_bl(PRE_ARGS) if (LIST_column == lt) { n->norm->Bl.ncols = n->args->argv[i].sz; - n->norm->Bl.cols = (const char **) + n->norm->Bl.cols = (void *) n->args->argv[i].value; } } @@ -748,14 +733,14 @@ pre_bl(PRE_ARGS) /* * Validate the width field. Some list types don't need width * types and should be warned about them. Others should have it - * and must also be warned. + * and must also be warned. Yet others have a default and need + * no warning. */ switch (n->norm->Bl.type) { case (LIST_tag): - if (n->norm->Bl.width) - break; - mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG); + if (NULL == n->norm->Bl.width) + mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG); break; case (LIST_column): /* FALLTHROUGH */ @@ -769,6 +754,18 @@ pre_bl(PRE_ARGS) if (n->norm->Bl.width) mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); break; + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (NULL == n->norm->Bl.width) + n->norm->Bl.width = "2n"; + break; + case (LIST_enum): + if (NULL == n->norm->Bl.width) + n->norm->Bl.width = "3n"; + break; default: break; } @@ -890,7 +887,7 @@ pre_sh(PRE_ARGS) if (MDOC_BLOCK != n->type) return(1); - mdoc->regs->regs[(int)REG_nS].set = 0; + roff_regunset(mdoc->roff, REG_nS); return(check_parent(mdoc, n, MDOC_MAX, MDOC_ROOT)); } @@ -1124,6 +1121,7 @@ static int post_nm(POST_ARGS) { char buf[BUFSIZ]; + int c; /* If no child specified, make sure we have the meta name. */ @@ -1135,11 +1133,14 @@ post_nm(POST_ARGS) /* If no meta name, set it from the child. */ - if ( ! concat(mdoc, buf, mdoc->last->child, BUFSIZ)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); return(0); + } + assert(c); mdoc->meta.name = mandoc_strdup(buf); - return(1); } @@ -1410,7 +1411,7 @@ post_bl_block_width(POST_ARGS) assert(i < (int)n->args->argc); - snprintf(buf, NUMSIZ, "%zun", width); + snprintf(buf, NUMSIZ, "%un", (unsigned int)width); free(n->args->argv[i].value[0]); n->args->argv[i].value[0] = mandoc_strdup(buf); @@ -1460,7 +1461,7 @@ post_bl_block_tag(POST_ARGS) /* Defaults to ten ens. */ - snprintf(buf, NUMSIZ, "%zun", sz); + snprintf(buf, NUMSIZ, "%un", (unsigned int)sz); /* * We have to dynamically add this to the macro's argument list. @@ -1526,7 +1527,7 @@ post_bl_head(POST_ARGS) assert(0 == np->args->argv[j].sz); /* - * Accomodate for new-style groff column syntax. Shuffle the + * Accommodate for new-style groff column syntax. Shuffle the * child nodes, all of which must be TEXT, as arguments for the * column field. Then, delete the head children. */ @@ -1536,7 +1537,7 @@ post_bl_head(POST_ARGS) ((size_t)mdoc->last->nchild * sizeof(char *)); mdoc->last->norm->Bl.ncols = np->args->argv[j].sz; - mdoc->last->norm->Bl.cols = (const char **)np->args->argv[j].value; + mdoc->last->norm->Bl.cols = (void *)np->args->argv[j].value; for (i = 0, nn = mdoc->last->child; nn; i++) { np->args->argv[j].value[i] = nn->string; @@ -1710,6 +1711,14 @@ post_rs(POST_ARGS) } /* + * Nothing to sort if only invalid nodes were found + * inside the `Rs' body. + */ + + if (NULL == mdoc->last->child) + return(1); + + /* * The full `Rs' block needs special handling to order the * sub-elements according to `rsord'. Pick through each element * and correctly order it. This is a insertion sort. @@ -1834,7 +1843,9 @@ static int post_sh_head(POST_ARGS) { char buf[BUFSIZ]; + struct mdoc_node *n; enum mdoc_sec sec; + int c; /* * Process a new section. Sections are either "named" or @@ -1843,11 +1854,14 @@ post_sh_head(POST_ARGS) * manual sections. */ - if ( ! concat(mdoc, buf, mdoc->last->child, BUFSIZ)) + sec = SEC_CUSTOM; + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); return(0); + } else if (1 == c) + sec = a2sec(buf); - sec = a2sec(buf); - /* The NAME should be first. */ if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) @@ -1864,6 +1878,20 @@ post_sh_head(POST_ARGS) mdoc->lastsec = sec; + /* + * Set the section attribute for the current HEAD, for its + * parent BLOCK, and for the HEAD children; the latter can + * only be TEXT nodes, so no recursion is needed. + * For other blocks and elements, including .Sh BODY, this is + * done when allocating the node data structures, but for .Sh + * BLOCK and HEAD, the section is still unknown at that time. + */ + + mdoc->last->parent->sec = sec; + mdoc->last->sec = sec; + for (n = mdoc->last->child; n; n = n->next) + n->sec = sec; + /* We don't care about custom sections after this. */ if (SEC_CUSTOM == sec) @@ -1995,6 +2023,7 @@ post_dd(POST_ARGS) { char buf[DATESIZE]; struct mdoc_node *n; + int c; if (mdoc->meta.date) free(mdoc->meta.date); @@ -2006,9 +2035,13 @@ post_dd(POST_ARGS) return(1); } - if ( ! concat(mdoc, buf, n->child, DATESIZE)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, DATESIZE))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); return(0); + } + assert(c); mdoc->meta.date = mandoc_normdate (mdoc->parse, buf, n->line, n->pos); @@ -2037,7 +2070,7 @@ post_dt(POST_ARGS) if (NULL != (nn = n->child)) for (p = nn->string; *p; p++) { - if (toupper((u_char)*p) == *p) + if (toupper((unsigned char)*p) == *p) continue; /* @@ -2083,7 +2116,7 @@ post_dt(POST_ARGS) * arch = NULL */ - cp = mdoc_a2msec(nn->string); + cp = mandoc_a2msec(nn->string); if (cp) { mdoc->meta.vol = mandoc_strdup(cp); mdoc->meta.msec = mandoc_strdup(nn->string); @@ -2163,6 +2196,7 @@ post_os(POST_ARGS) { struct mdoc_node *n; char buf[BUFSIZ]; + int c; #ifndef OSNAME struct utsname utsname; #endif @@ -2170,30 +2204,36 @@ post_os(POST_ARGS) n = mdoc->last; /* - * Set the operating system by way of the `Os' macro. Note that - * if an argument isn't provided and -DOSNAME="\"foo\"" is - * provided during compilation, this value will be used instead - * of filling in "sysname release" from uname(). + * Set the operating system by way of the `Os' macro. + * The order of precedence is: + * 1. the argument of the `Os' macro, unless empty + * 2. the -Ios=foo command line argument, if provided + * 3. -DOSNAME="\"foo\"", if provided during compilation + * 4. "sysname release" from uname(3) */ - if (mdoc->meta.os) - free(mdoc->meta.os); + free(mdoc->meta.os); - if ( ! concat(mdoc, buf, n->child, BUFSIZ)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, BUFSIZ))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); return(0); + } - /* XXX: yes, these can all be dynamically-adjusted buffers, but - * it's really not worth the extra hackery. - */ + assert(c); if ('\0' == buf[0]) { + if (mdoc->defos) { + mdoc->meta.os = mandoc_strdup(mdoc->defos); + return(1); + } #ifdef OSNAME if (strlcat(buf, OSNAME, BUFSIZ) >= BUFSIZ) { mdoc_nmsg(mdoc, n, MANDOCERR_MEM); return(0); } #else /*!OSNAME */ - if (uname(&utsname)) { + if (-1 == uname(&utsname)) { mdoc_nmsg(mdoc, n, MANDOCERR_UNAME); mdoc->meta.os = mandoc_strdup("UNKNOWN"); return(post_prol(mdoc)); @@ -2247,34 +2287,24 @@ post_std(POST_ARGS) return(1); } +/* + * Concatenate a node, stopping at the first non-text. + * Concatenation is separated by a single whitespace. + * Returns -1 on fatal (string overrun) error, 0 if child nodes were + * encountered, 1 otherwise. + */ static int -concat(struct mdoc *m, char *p, const struct mdoc_node *n, size_t sz) +concat(char *p, const struct mdoc_node *n, size_t sz) { - p[0] = '\0'; - - /* - * Concatenate sibling nodes together. All siblings must be of - * type MDOC_TEXT or an assertion is raised. Concatenation is - * separated by a single whitespace. Returns 0 on fatal (string - * overrun) error. - */ - - for ( ; n; n = n->next) { - assert(MDOC_TEXT == n->type); - - if (strlcat(p, n->string, sz) >= sz) { - mdoc_nmsg(m, n, MANDOCERR_MEM); + for ( ; NULL != n; n = n->next) { + if (MDOC_TEXT != n->type) return(0); - } - - if (NULL == n->next) - continue; - - if (strlcat(p, " ", sz) >= sz) { - mdoc_nmsg(m, n, MANDOCERR_MEM); - return(0); - } + if ('\0' != p[0] && strlcat(p, " ", sz) >= sz) + return(-1); + if (strlcat(p, n->string, sz) >= sz) + return(-1); + concat(p, n->child, sz); } return(1);