=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.320 retrieving revision 1.335 diff -u -p -r1.320 -r1.335 --- mandoc/mdoc_validate.c 2017/04/24 23:06:18 1.320 +++ mandoc/mdoc_validate.c 2017/06/11 14:24:55 1.335 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.320 2017/04/24 23:06:18 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.335 2017/06/11 14:24:55 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -53,12 +53,13 @@ typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); static void check_text(struct roff_man *, int, int, char *); +static void check_bsd(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); -static void rewrite_macro2len(char **); +static void rewrite_macro2len(struct roff_man *, char **); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); @@ -74,6 +75,7 @@ static void post_bx(POST_ARGS); static void post_defaults(POST_ARGS); static void post_display(POST_ARGS); static void post_dd(POST_ARGS); +static void post_delim(POST_ARGS); static void post_dt(POST_ARGS); static void post_en(POST_ARGS); static void post_es(POST_ARGS); @@ -105,6 +107,7 @@ static void post_sh_authors(POST_ARGS); static void post_sm(POST_ARGS); static void post_st(POST_ARGS); static void post_std(POST_ARGS); +static void post_useless(POST_ARGS); static void post_xr(POST_ARGS); static void post_xx(POST_ARGS); @@ -122,33 +125,33 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] post_bl, /* Bl */ NULL, /* El */ post_it, /* It */ - NULL, /* Ad */ + post_delim, /* Ad */ post_an, /* An */ NULL, /* Ap */ post_defaults, /* Ar */ NULL, /* Cd */ - NULL, /* Cm */ - NULL, /* Dv */ - NULL, /* Er */ - NULL, /* Ev */ + post_delim, /* Cm */ + post_delim, /* Dv */ + post_delim, /* Er */ + post_delim, /* Ev */ post_ex, /* Ex */ post_fa, /* Fa */ NULL, /* Fd */ - NULL, /* Fl */ + post_delim, /* Fl */ post_fn, /* Fn */ - NULL, /* Ft */ - NULL, /* Ic */ - NULL, /* In */ + post_delim, /* Ft */ + post_delim, /* Ic */ + post_delim, /* In */ post_defaults, /* Li */ post_nd, /* Nd */ post_nm, /* Nm */ - NULL, /* Op */ + post_delim, /* Op */ post_obsolete, /* Ot */ post_defaults, /* Pa */ post_rv, /* Rv */ post_st, /* St */ - NULL, /* Va */ - NULL, /* Vt */ + post_delim, /* Va */ + post_delim, /* Vt */ post_xr, /* Xr */ NULL, /* %A */ post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ @@ -162,12 +165,12 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %V */ NULL, /* Ac */ - NULL, /* Ao */ - NULL, /* Aq */ + post_delim, /* Ao */ + post_delim, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ - NULL, /* Bo */ + post_delim, /* Bo */ NULL, /* Bq */ post_xx, /* Bsx */ post_bx, /* Bx */ @@ -177,61 +180,58 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] NULL, /* Dq */ NULL, /* Ec */ NULL, /* Ef */ - NULL, /* Em */ + post_delim, /* Em */ NULL, /* Eo */ post_xx, /* Fx */ - NULL, /* Ms */ + post_delim, /* Ms */ NULL, /* No */ post_ns, /* Ns */ post_xx, /* Nx */ post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ - NULL, /* Po */ - NULL, /* Pq */ + post_delim, /* Po */ + post_delim, /* Pq */ NULL, /* Qc */ - NULL, /* Ql */ - NULL, /* Qo */ - NULL, /* Qq */ + post_delim, /* Ql */ + post_delim, /* Qo */ + post_delim, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ - NULL, /* So */ - NULL, /* Sq */ + post_delim, /* So */ + post_delim, /* Sq */ post_sm, /* Sm */ post_hyph, /* Sx */ - NULL, /* Sy */ - NULL, /* Tn */ + post_delim, /* Sy */ + post_useless, /* Tn */ post_xx, /* Ux */ NULL, /* Xc */ NULL, /* Xo */ post_fo, /* Fo */ NULL, /* Fc */ - NULL, /* Oo */ + post_delim, /* Oo */ NULL, /* Oc */ post_bk, /* Bk */ NULL, /* Ek */ post_eoln, /* Bt */ - NULL, /* Hf */ + post_obsolete, /* Hf */ post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ post_par, /* Lp */ - NULL, /* Lk */ + post_delim, /* Lk */ post_defaults, /* Mt */ - NULL, /* Brq */ - NULL, /* Bro */ + post_delim, /* Brq */ + post_delim, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ post_en, /* En */ post_xx, /* Dx */ NULL, /* %Q */ - post_par, /* br */ - post_par, /* sp */ NULL, /* %U */ NULL, /* Ta */ - NULL, /* ll */ }; static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd; @@ -304,6 +304,10 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->sec != SEC_SYNOPSIS || (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); + if (n->parent->tok == MDOC_Sh || + n->parent->tok == MDOC_Ss || + n->parent->tok == MDOC_It) + check_bsd(mdoc, n->line, n->pos, n->string); break; case ROFFT_EQN: case ROFFT_TBL: @@ -327,6 +331,20 @@ mdoc_node_validate(struct roff_man *mdoc) /* Call the macro's postprocessor. */ + if (n->tok < ROFF_MAX) { + switch(n->tok) { + case ROFF_br: + case ROFF_sp: + post_par(mdoc); + break; + default: + roff_validate(mdoc); + break; + } + break; + } + + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); p = mdoc_valids + n->tok; if (*p) (*p)(mdoc); @@ -372,6 +390,120 @@ check_text(struct roff_man *mdoc, int ln, int pos, cha } static void +check_bsd(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const char *cp; + + if ((cp = strstr(p, "OpenBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Ox"); + if ((cp = strstr(p, "NetBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Nx"); + if ((cp = strstr(p, "FreeBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Fx"); + if ((cp = strstr(p, "DragonFly")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Dx"); +} + +static void +post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc, *cp; + int nw; + enum mdelim delim; + enum roff_tok tok; + + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc <= nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_Po || tok == MDOC_Pq || + tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, + nch->line, nch->pos + (lc - nch->string), + "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void post_bl_norm(POST_ARGS) { struct roff_node *n; @@ -451,7 +583,7 @@ post_bl_norm(POST_ARGS) mdoc->parse, argv->line, argv->pos, "Bl -width %s", argv->value[0]); - rewrite_macro2len(argv->value); + rewrite_macro2len(mdoc, argv->value); n->norm->Bl.width = argv->value[0]; break; case MDOC_Offset: @@ -466,7 +598,7 @@ post_bl_norm(POST_ARGS) mdoc->parse, argv->line, argv->pos, "Bl -offset %s", argv->value[0]); - rewrite_macro2len(argv->value); + rewrite_macro2len(mdoc, argv->value); n->norm->Bl.offs = argv->value[0]; break; default: @@ -593,7 +725,7 @@ post_bd(POST_ARGS) mdoc->parse, argv->line, argv->pos, "Bd -offset %s", argv->value[0]); - rewrite_macro2len(argv->value); + rewrite_macro2len(mdoc, argv->value); n->norm->Bd.offs = argv->value[0]; break; case MDOC_Compact: @@ -660,6 +792,7 @@ post_eoln(POST_ARGS) { struct roff_node *n; + post_useless(mdoc); n = mdoc->last; if (n->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, n->line, @@ -745,6 +878,8 @@ post_lb(POST_ARGS) struct roff_node *n; const char *p; + post_delim(mdoc); + n = mdoc->last; assert(n->child->type == ROFFT_TEXT); mdoc->next = ROFF_NEXT_CHILD; @@ -757,6 +892,9 @@ post_lb(POST_ARGS) return; } + mandoc_vmsg(MANDOCERR_LB_BAD, mdoc->parse, n->child->line, + n->child->pos, "Lb %s", n->child->string); + roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); @@ -855,6 +993,16 @@ post_obsolete(POST_ARGS) n->line, n->pos, roff_name[n->tok]); } +static void +post_useless(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + mandoc_msg(MANDOCERR_MACRO_USELESS, mdoc->parse, + n->line, n->pos, roff_name[n->tok]); +} + /* * Block macros. */ @@ -992,6 +1140,7 @@ post_fa(POST_ARGS) break; } } + post_delim(mdoc); } static void @@ -1014,6 +1163,9 @@ post_nm(POST_ARGS) mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, n->line, n->pos, "Nm"); + if (n->type == ROFFT_ELEM) + post_delim(mdoc); + if ((n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) || (n->child != NULL && n->child->type == ROFFT_TEXT) || mdoc->meta.name == NULL) @@ -1029,6 +1181,7 @@ static void post_nd(POST_ARGS) { struct roff_node *n; + size_t sz; n = mdoc->last; @@ -1042,6 +1195,11 @@ post_nd(POST_ARGS) if (n->child == NULL) mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, n->line, n->pos, "Nd"); + else if (n->last->type == ROFFT_TEXT && + (sz = strlen(n->last->string)) != 0 && + n->last->string[sz - 1] == '.') + mandoc_msg(MANDOCERR_ND_DOT, mdoc->parse, + n->last->line, n->last->pos + sz - 1, NULL); post_hyph(mdoc); } @@ -1098,17 +1256,18 @@ post_defaults(POST_ARGS) { struct roff_node *nn; + if (mdoc->last->child != NULL) { + post_delim(mdoc); + return; + } + /* * The `Ar' defaults to "file ..." if no value is provided as an * argument; the `Mt' and `Pa' macros use "~"; the `Li' just * gets an empty string. */ - if (mdoc->last->child != NULL) - return; - nn = mdoc->last; - switch (nn->tok) { case MDOC_Ar: mdoc->next = ROFF_NEXT_CHILD; @@ -1172,6 +1331,8 @@ post_an(POST_ARGS) if (nch == NULL) mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, np->line, np->pos, "An"); + else + post_delim(mdoc); } else if (nch != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, nch->line, nch->pos, "An ... %s", nch->string); @@ -1200,6 +1361,8 @@ post_xx(POST_ARGS) struct roff_node *n; const char *os; + post_delim(mdoc); + n = mdoc->last; switch (n->tok) { case MDOC_Bsx: @@ -1310,7 +1473,7 @@ post_bl_block(POST_ARGS) switch (nc->tok) { case MDOC_Pp: case MDOC_Lp: - case MDOC_br: + case ROFF_br: break; default: nc = NULL; @@ -1338,8 +1501,8 @@ post_bl_block(POST_ARGS) * If the argument of -offset or -width is a macro, * replace it with the associated default width. */ -void -rewrite_macro2len(char **arg) +static void +rewrite_macro2len(struct roff_man *mdoc, char **arg) { size_t width; enum roff_tok tok; @@ -1348,7 +1511,7 @@ rewrite_macro2len(char **arg) return; else if ( ! strcmp(*arg, "Ds")) width = 6; - else if ((tok = mdoc_hash_find(*arg)) == TOKEN_NONE) + else if ((tok = roffhash_find(mdoc->mdocmac, *arg, 0)) == TOKEN_NONE) return; else width = macro2len(tok); @@ -1426,6 +1589,8 @@ post_bl(POST_ARGS) struct roff_node *nparent, *nprev; /* of the Bl block */ struct roff_node *nblock, *nbody; /* of the Bl */ struct roff_node *nchild, *nnext; /* of the Bl body */ + const char *prev_Er; + int order; nbody = mdoc->last; switch (nbody->type) { @@ -1526,6 +1691,34 @@ post_bl(POST_ARGS) nchild = nnext; } + + if (mdoc->meta.os_e != MDOC_OS_NETBSD) + return; + + prev_Er = NULL; + for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) { + if (nchild->tok != MDOC_It) + continue; + if ((nnext = nchild->head->child) == NULL) + continue; + if (nnext->type == ROFFT_BLOCK) + nnext = nnext->body->child; + if (nnext == NULL || nnext->tok != MDOC_Er) + continue; + nnext = nnext->child; + if (prev_Er != NULL) { + order = strcmp(prev_Er, nnext->string); + if (order > 0) + mandoc_vmsg(MANDOCERR_ER_ORDER, + mdoc->parse, nnext->line, nnext->pos, + "Er %s %s", prev_Er, nnext->string); + else if (order == 0) + mandoc_vmsg(MANDOCERR_ER_REP, + mdoc->parse, nnext->line, nnext->pos, + "Er %s", prev_Er); + } + prev_Er = nnext->string; + } } static void @@ -1868,7 +2061,7 @@ post_sh_see_also(POST_ARGS) if (isalpha((const unsigned char)*name)) return; lastpunct = n->string; - if (n->next == NULL) + if (n->next == NULL || n->next->tok == MDOC_Rs) mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse, n->line, n->pos, "%s after %s(%s)", lastpunct, lastname, lastsec); @@ -2004,9 +2197,9 @@ post_xr(POST_ARGS) if (nch->next == NULL) { mandoc_vmsg(MANDOCERR_XR_NOSEC, mdoc->parse, n->line, n->pos, "Xr %s", nch->string); - return; - } - assert(nch->next == n->last); + } else + assert(nch->next == n->last); + post_delim(mdoc); } static void @@ -2015,6 +2208,9 @@ post_ignpar(POST_ARGS) struct roff_node *np; switch (mdoc->last->type) { + case ROFFT_BLOCK: + post_prevpar(mdoc); + return; case ROFFT_HEAD: post_hyph(mdoc); return; @@ -2061,7 +2257,7 @@ post_prevpar(POST_ARGS) if (n->prev->tok != MDOC_Pp && n->prev->tok != MDOC_Lp && - n->prev->tok != MDOC_br) + n->prev->tok != ROFF_br) return; if (n->tok == MDOC_Bl && n->norm->Bl.comp) return; @@ -2082,10 +2278,10 @@ post_par(POST_ARGS) struct roff_node *np; np = mdoc->last; - if (np->tok != MDOC_br && np->tok != MDOC_sp) + if (np->tok != ROFF_br && np->tok != ROFF_sp) post_prevpar(mdoc); - if (np->tok == MDOC_sp) { + if (np->tok == ROFF_sp) { if (np->child != NULL && np->child->next != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, np->child->next->line, np->child->next->pos, @@ -2100,8 +2296,8 @@ post_par(POST_ARGS) if (np->tok != MDOC_Sh && np->tok != MDOC_Ss) return; } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && - (mdoc->last->tok != MDOC_br || - (np->tok != MDOC_sp && np->tok != MDOC_br))) + (mdoc->last->tok != ROFF_br || + (np->tok != ROFF_sp && np->tok != ROFF_br))) return; mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, @@ -2250,11 +2446,21 @@ static void post_bx(POST_ARGS) { struct roff_node *n, *nch; + const char *macro; + post_delim(mdoc); + n = mdoc->last; nch = n->child; if (nch != NULL) { + macro = !strcmp(nch->string, "Open") ? "Ox" : + !strcmp(nch->string, "Net") ? "Nx" : + !strcmp(nch->string, "Free") ? "Fx" : + !strcmp(nch->string, "DragonFly") ? "Dx" : NULL; + if (macro != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + n->line, n->pos, macro); mdoc->last = nch; nch = nch->next; mdoc->next = ROFF_NEXT_SIBLING; @@ -2321,11 +2527,11 @@ post_os(POST_ARGS) mdoc->meta.os = NULL; deroff(&mdoc->meta.os, n); if (mdoc->meta.os) - return; + goto out; if (mdoc->defos) { mdoc->meta.os = mandoc_strdup(mdoc->defos); - return; + goto out; } #ifdef OSNAME @@ -2342,6 +2548,10 @@ post_os(POST_ARGS) } mdoc->meta.os = mandoc_strdup(defbuf); #endif /*!OSNAME*/ + +out: mdoc->meta.os_e = strstr(mdoc->meta.os, "OpenBSD") != NULL ? + MDOC_OS_OPENBSD : strstr(mdoc->meta.os, "NetBSD") != NULL ? + MDOC_OS_NETBSD : MDOC_OS_OTHER; } enum roff_sec