=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.323 retrieving revision 1.334 diff -u -p -r1.323 -r1.334 --- mandoc/mdoc_validate.c 2017/05/04 17:48:29 1.323 +++ mandoc/mdoc_validate.c 2017/06/10 16:54:16 1.334 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.323 2017/05/04 17:48:29 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.334 2017/06/10 16:54:16 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -53,6 +53,7 @@ typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); static void check_text(struct roff_man *, int, int, char *); +static void check_bsd(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); @@ -74,6 +75,7 @@ static void post_bx(POST_ARGS); static void post_defaults(POST_ARGS); static void post_display(POST_ARGS); static void post_dd(POST_ARGS); +static void post_delim(POST_ARGS); static void post_dt(POST_ARGS); static void post_en(POST_ARGS); static void post_es(POST_ARGS); @@ -105,6 +107,7 @@ static void post_sh_authors(POST_ARGS); static void post_sm(POST_ARGS); static void post_st(POST_ARGS); static void post_std(POST_ARGS); +static void post_useless(POST_ARGS); static void post_xr(POST_ARGS); static void post_xx(POST_ARGS); @@ -122,33 +125,33 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] post_bl, /* Bl */ NULL, /* El */ post_it, /* It */ - NULL, /* Ad */ + post_delim, /* Ad */ post_an, /* An */ NULL, /* Ap */ post_defaults, /* Ar */ NULL, /* Cd */ - NULL, /* Cm */ - NULL, /* Dv */ - NULL, /* Er */ - NULL, /* Ev */ + post_delim, /* Cm */ + post_delim, /* Dv */ + post_delim, /* Er */ + post_delim, /* Ev */ post_ex, /* Ex */ post_fa, /* Fa */ NULL, /* Fd */ - NULL, /* Fl */ + post_delim, /* Fl */ post_fn, /* Fn */ - NULL, /* Ft */ - NULL, /* Ic */ - NULL, /* In */ + post_delim, /* Ft */ + post_delim, /* Ic */ + post_delim, /* In */ post_defaults, /* Li */ post_nd, /* Nd */ post_nm, /* Nm */ - NULL, /* Op */ + post_delim, /* Op */ post_obsolete, /* Ot */ post_defaults, /* Pa */ post_rv, /* Rv */ post_st, /* St */ - NULL, /* Va */ - NULL, /* Vt */ + post_delim, /* Va */ + post_delim, /* Vt */ post_xr, /* Xr */ NULL, /* %A */ post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ @@ -162,12 +165,12 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %V */ NULL, /* Ac */ - NULL, /* Ao */ - NULL, /* Aq */ + post_delim, /* Ao */ + post_delim, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ - NULL, /* Bo */ + post_delim, /* Bo */ NULL, /* Bq */ post_xx, /* Bsx */ post_bx, /* Bx */ @@ -177,60 +180,58 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] NULL, /* Dq */ NULL, /* Ec */ NULL, /* Ef */ - NULL, /* Em */ + post_delim, /* Em */ NULL, /* Eo */ post_xx, /* Fx */ - NULL, /* Ms */ - NULL, /* No */ + post_delim, /* Ms */ + post_delim, /* No */ post_ns, /* Ns */ post_xx, /* Nx */ post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ - NULL, /* Po */ - NULL, /* Pq */ + post_delim, /* Po */ + post_delim, /* Pq */ NULL, /* Qc */ - NULL, /* Ql */ - NULL, /* Qo */ - NULL, /* Qq */ + post_delim, /* Ql */ + post_delim, /* Qo */ + post_delim, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ - NULL, /* So */ - NULL, /* Sq */ + post_delim, /* So */ + post_delim, /* Sq */ post_sm, /* Sm */ post_hyph, /* Sx */ - NULL, /* Sy */ - NULL, /* Tn */ + post_delim, /* Sy */ + post_useless, /* Tn */ post_xx, /* Ux */ NULL, /* Xc */ NULL, /* Xo */ post_fo, /* Fo */ NULL, /* Fc */ - NULL, /* Oo */ + post_delim, /* Oo */ NULL, /* Oc */ post_bk, /* Bk */ NULL, /* Ek */ post_eoln, /* Bt */ - NULL, /* Hf */ + post_obsolete, /* Hf */ post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ post_par, /* Lp */ - NULL, /* Lk */ + post_delim, /* Lk */ post_defaults, /* Mt */ - NULL, /* Brq */ - NULL, /* Bro */ + post_delim, /* Brq */ + post_delim, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ post_en, /* En */ post_xx, /* Dx */ NULL, /* %Q */ - post_par, /* sp */ NULL, /* %U */ NULL, /* Ta */ - NULL, /* ll */ }; static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd; @@ -303,6 +304,10 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->sec != SEC_SYNOPSIS || (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); + if (n->parent->tok == MDOC_Sh || + n->parent->tok == MDOC_Ss || + n->parent->tok == MDOC_It) + check_bsd(mdoc, n->line, n->pos, n->string); break; case ROFFT_EQN: case ROFFT_TBL: @@ -329,10 +334,12 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->tok < ROFF_MAX) { switch(n->tok) { case ROFF_br: + case ROFF_sp: post_par(mdoc); break; default: - abort(); + roff_validate(mdoc); + break; } break; } @@ -383,6 +390,120 @@ check_text(struct roff_man *mdoc, int ln, int pos, cha } static void +check_bsd(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const char *cp; + + if ((cp = strstr(p, "OpenBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Ox"); + if ((cp = strstr(p, "NetBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Nx"); + if ((cp = strstr(p, "FreeBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Fx"); + if ((cp = strstr(p, "DragonFly")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Dx"); +} + +static void +post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc, *cp; + int nw; + enum mdelim delim; + enum roff_tok tok; + + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc <= nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po || + tok == MDOC_Pq || tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, + nch->line, nch->pos + (lc - nch->string), + "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void post_bl_norm(POST_ARGS) { struct roff_node *n; @@ -671,6 +792,7 @@ post_eoln(POST_ARGS) { struct roff_node *n; + post_useless(mdoc); n = mdoc->last; if (n->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, n->line, @@ -756,6 +878,8 @@ post_lb(POST_ARGS) struct roff_node *n; const char *p; + post_delim(mdoc); + n = mdoc->last; assert(n->child->type == ROFFT_TEXT); mdoc->next = ROFF_NEXT_CHILD; @@ -768,6 +892,9 @@ post_lb(POST_ARGS) return; } + mandoc_vmsg(MANDOCERR_LB_BAD, mdoc->parse, n->child->line, + n->child->pos, "Lb %s", n->child->string); + roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); @@ -866,6 +993,16 @@ post_obsolete(POST_ARGS) n->line, n->pos, roff_name[n->tok]); } +static void +post_useless(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + mandoc_msg(MANDOCERR_MACRO_USELESS, mdoc->parse, + n->line, n->pos, roff_name[n->tok]); +} + /* * Block macros. */ @@ -1003,6 +1140,7 @@ post_fa(POST_ARGS) break; } } + post_delim(mdoc); } static void @@ -1025,6 +1163,9 @@ post_nm(POST_ARGS) mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, n->line, n->pos, "Nm"); + if (n->type == ROFFT_ELEM) + post_delim(mdoc); + if ((n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) || (n->child != NULL && n->child->type == ROFFT_TEXT) || mdoc->meta.name == NULL) @@ -1040,6 +1181,7 @@ static void post_nd(POST_ARGS) { struct roff_node *n; + size_t sz; n = mdoc->last; @@ -1053,6 +1195,11 @@ post_nd(POST_ARGS) if (n->child == NULL) mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, n->line, n->pos, "Nd"); + else if (n->last->type == ROFFT_TEXT && + (sz = strlen(n->last->string)) != 0 && + n->last->string[sz - 1] == '.') + mandoc_msg(MANDOCERR_ND_DOT, mdoc->parse, + n->last->line, n->last->pos + sz - 1, NULL); post_hyph(mdoc); } @@ -1109,17 +1256,18 @@ post_defaults(POST_ARGS) { struct roff_node *nn; + if (mdoc->last->child != NULL) { + post_delim(mdoc); + return; + } + /* * The `Ar' defaults to "file ..." if no value is provided as an * argument; the `Mt' and `Pa' macros use "~"; the `Li' just * gets an empty string. */ - if (mdoc->last->child != NULL) - return; - nn = mdoc->last; - switch (nn->tok) { case MDOC_Ar: mdoc->next = ROFF_NEXT_CHILD; @@ -1183,6 +1331,8 @@ post_an(POST_ARGS) if (nch == NULL) mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, np->line, np->pos, "An"); + else + post_delim(mdoc); } else if (nch != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, nch->line, nch->pos, "An ... %s", nch->string); @@ -1211,6 +1361,8 @@ post_xx(POST_ARGS) struct roff_node *n; const char *os; + post_delim(mdoc); + n = mdoc->last; switch (n->tok) { case MDOC_Bsx: @@ -1437,6 +1589,8 @@ post_bl(POST_ARGS) struct roff_node *nparent, *nprev; /* of the Bl block */ struct roff_node *nblock, *nbody; /* of the Bl */ struct roff_node *nchild, *nnext; /* of the Bl body */ + const char *prev_Er; + int order; nbody = mdoc->last; switch (nbody->type) { @@ -1537,6 +1691,34 @@ post_bl(POST_ARGS) nchild = nnext; } + + if (mdoc->meta.os_e != MDOC_OS_NETBSD) + return; + + prev_Er = NULL; + for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) { + if (nchild->tok != MDOC_It) + continue; + if ((nnext = nchild->head->child) == NULL) + continue; + if (nnext->type == ROFFT_BLOCK) + nnext = nnext->body->child; + if (nnext == NULL || nnext->tok != MDOC_Er) + continue; + nnext = nnext->child; + if (prev_Er != NULL) { + order = strcmp(prev_Er, nnext->string); + if (order > 0) + mandoc_vmsg(MANDOCERR_ER_ORDER, + mdoc->parse, nnext->line, nnext->pos, + "Er %s %s", prev_Er, nnext->string); + else if (order == 0) + mandoc_vmsg(MANDOCERR_ER_REP, + mdoc->parse, nnext->line, nnext->pos, + "Er %s", prev_Er); + } + prev_Er = nnext->string; + } } static void @@ -1879,7 +2061,7 @@ post_sh_see_also(POST_ARGS) if (isalpha((const unsigned char)*name)) return; lastpunct = n->string; - if (n->next == NULL) + if (n->next == NULL || n->next->tok == MDOC_Rs) mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse, n->line, n->pos, "%s after %s(%s)", lastpunct, lastname, lastsec); @@ -2015,9 +2197,9 @@ post_xr(POST_ARGS) if (nch->next == NULL) { mandoc_vmsg(MANDOCERR_XR_NOSEC, mdoc->parse, n->line, n->pos, "Xr %s", nch->string); - return; - } - assert(nch->next == n->last); + } else + assert(nch->next == n->last); + post_delim(mdoc); } static void @@ -2096,10 +2278,10 @@ post_par(POST_ARGS) struct roff_node *np; np = mdoc->last; - if (np->tok != ROFF_br && np->tok != MDOC_sp) + if (np->tok != ROFF_br && np->tok != ROFF_sp) post_prevpar(mdoc); - if (np->tok == MDOC_sp) { + if (np->tok == ROFF_sp) { if (np->child != NULL && np->child->next != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, np->child->next->line, np->child->next->pos, @@ -2115,7 +2297,7 @@ post_par(POST_ARGS) return; } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && (mdoc->last->tok != ROFF_br || - (np->tok != MDOC_sp && np->tok != ROFF_br))) + (np->tok != ROFF_sp && np->tok != ROFF_br))) return; mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, @@ -2264,11 +2446,21 @@ static void post_bx(POST_ARGS) { struct roff_node *n, *nch; + const char *macro; + post_delim(mdoc); + n = mdoc->last; nch = n->child; if (nch != NULL) { + macro = !strcmp(nch->string, "Open") ? "Ox" : + !strcmp(nch->string, "Net") ? "Nx" : + !strcmp(nch->string, "Free") ? "Fx" : + !strcmp(nch->string, "DragonFly") ? "Dx" : NULL; + if (macro != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + n->line, n->pos, macro); mdoc->last = nch; nch = nch->next; mdoc->next = ROFF_NEXT_SIBLING; @@ -2335,11 +2527,11 @@ post_os(POST_ARGS) mdoc->meta.os = NULL; deroff(&mdoc->meta.os, n); if (mdoc->meta.os) - return; + goto out; if (mdoc->defos) { mdoc->meta.os = mandoc_strdup(mdoc->defos); - return; + goto out; } #ifdef OSNAME @@ -2356,6 +2548,10 @@ post_os(POST_ARGS) } mdoc->meta.os = mandoc_strdup(defbuf); #endif /*!OSNAME*/ + +out: mdoc->meta.os_e = strstr(mdoc->meta.os, "OpenBSD") != NULL ? + MDOC_OS_OPENBSD : strstr(mdoc->meta.os, "NetBSD") != NULL ? + MDOC_OS_NETBSD : MDOC_OS_OTHER; } enum roff_sec