=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.325 retrieving revision 1.338 diff -u -p -r1.325 -r1.338 --- mandoc/mdoc_validate.c 2017/05/05 13:17:55 1.325 +++ mandoc/mdoc_validate.c 2017/06/11 20:03:02 1.338 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.325 2017/05/05 13:17:55 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.338 2017/06/11 20:03:02 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -56,6 +56,7 @@ static void check_text(struct roff_man *, int, int, c static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); static void rewrite_macro2len(struct roff_man *, char **); @@ -74,6 +75,7 @@ static void post_bx(POST_ARGS); static void post_defaults(POST_ARGS); static void post_display(POST_ARGS); static void post_dd(POST_ARGS); +static void post_delim(POST_ARGS); static void post_dt(POST_ARGS); static void post_en(POST_ARGS); static void post_es(POST_ARGS); @@ -105,6 +107,7 @@ static void post_sh_authors(POST_ARGS); static void post_sm(POST_ARGS); static void post_st(POST_ARGS); static void post_std(POST_ARGS); +static void post_useless(POST_ARGS); static void post_xr(POST_ARGS); static void post_xx(POST_ARGS); @@ -122,33 +125,33 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] post_bl, /* Bl */ NULL, /* El */ post_it, /* It */ - NULL, /* Ad */ + post_delim, /* Ad */ post_an, /* An */ NULL, /* Ap */ post_defaults, /* Ar */ NULL, /* Cd */ - NULL, /* Cm */ - NULL, /* Dv */ - NULL, /* Er */ - NULL, /* Ev */ + post_delim, /* Cm */ + post_delim, /* Dv */ + post_delim, /* Er */ + post_delim, /* Ev */ post_ex, /* Ex */ post_fa, /* Fa */ NULL, /* Fd */ - NULL, /* Fl */ + post_delim, /* Fl */ post_fn, /* Fn */ - NULL, /* Ft */ - NULL, /* Ic */ - NULL, /* In */ + post_delim, /* Ft */ + post_delim, /* Ic */ + post_delim, /* In */ post_defaults, /* Li */ post_nd, /* Nd */ post_nm, /* Nm */ - NULL, /* Op */ + post_delim, /* Op */ post_obsolete, /* Ot */ post_defaults, /* Pa */ post_rv, /* Rv */ post_st, /* St */ - NULL, /* Va */ - NULL, /* Vt */ + post_delim, /* Va */ + post_delim, /* Vt */ post_xr, /* Xr */ NULL, /* %A */ post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ @@ -162,12 +165,12 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %V */ NULL, /* Ac */ - NULL, /* Ao */ - NULL, /* Aq */ + post_delim, /* Ao */ + post_delim, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ - NULL, /* Bo */ + post_delim, /* Bo */ NULL, /* Bq */ post_xx, /* Bsx */ post_bx, /* Bx */ @@ -177,57 +180,56 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] NULL, /* Dq */ NULL, /* Ec */ NULL, /* Ef */ - NULL, /* Em */ + post_delim, /* Em */ NULL, /* Eo */ post_xx, /* Fx */ - NULL, /* Ms */ + post_delim, /* Ms */ NULL, /* No */ post_ns, /* Ns */ post_xx, /* Nx */ post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ - NULL, /* Po */ - NULL, /* Pq */ + post_delim, /* Po */ + post_delim, /* Pq */ NULL, /* Qc */ - NULL, /* Ql */ - NULL, /* Qo */ - NULL, /* Qq */ + post_delim, /* Ql */ + post_delim, /* Qo */ + post_delim, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ - NULL, /* So */ - NULL, /* Sq */ + post_delim, /* So */ + post_delim, /* Sq */ post_sm, /* Sm */ post_hyph, /* Sx */ - NULL, /* Sy */ - NULL, /* Tn */ + post_delim, /* Sy */ + post_useless, /* Tn */ post_xx, /* Ux */ NULL, /* Xc */ NULL, /* Xo */ post_fo, /* Fo */ NULL, /* Fc */ - NULL, /* Oo */ + post_delim, /* Oo */ NULL, /* Oc */ post_bk, /* Bk */ NULL, /* Ek */ post_eoln, /* Bt */ - NULL, /* Hf */ + post_obsolete, /* Hf */ post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ post_par, /* Lp */ - NULL, /* Lk */ + post_delim, /* Lk */ post_defaults, /* Mt */ - NULL, /* Brq */ - NULL, /* Bro */ + post_delim, /* Brq */ + post_delim, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ post_en, /* En */ post_xx, /* Dx */ NULL, /* %Q */ - post_par, /* sp */ NULL, /* %U */ NULL, /* Ta */ }; @@ -302,6 +304,11 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->sec != SEC_SYNOPSIS || (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); + if (n->parent->tok == MDOC_It || + (n->parent->type == ROFFT_BODY && + (n->parent->tok == MDOC_Sh || + n->parent->tok == MDOC_Ss))) + check_toptext(mdoc, n->line, n->pos, n->string); break; case ROFFT_EQN: case ROFFT_TBL: @@ -328,6 +335,7 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->tok < ROFF_MAX) { switch(n->tok) { case ROFF_br: + case ROFF_sp: post_par(mdoc); break; default: @@ -383,6 +391,136 @@ check_text(struct roff_man *mdoc, int ln, int pos, cha } static void +check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) +{ + const char *cp, *cpr; + + if (*p == '\0') + return; + + if ((cp = strstr(p, "OpenBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Ox"); + if ((cp = strstr(p, "NetBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Nx"); + if ((cp = strstr(p, "FreeBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Fx"); + if ((cp = strstr(p, "DragonFly")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Dx"); + + cp = p; + while ((cp = strstr(cp + 1, "()")) != NULL) { + for (cpr = cp - 1; cpr >= p; cpr--) + if (*cpr != '_' && !isalnum((unsigned char)*cpr)) + break; + if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) { + cpr++; + mandoc_vmsg(MANDOCERR_FUNC, mdoc->parse, + ln, pos + (cpr - p), + "%.*s()", (int)(cp - cpr), cpr); + } + } +} + +static void +post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc, *cp; + int nw; + enum mdelim delim; + enum roff_tok tok; + + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc <= nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_Po || tok == MDOC_Pq || + tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, + nch->line, nch->pos + (lc - nch->string), + "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void post_bl_norm(POST_ARGS) { struct roff_node *n; @@ -671,6 +809,7 @@ post_eoln(POST_ARGS) { struct roff_node *n; + post_useless(mdoc); n = mdoc->last; if (n->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, n->line, @@ -756,6 +895,8 @@ post_lb(POST_ARGS) struct roff_node *n; const char *p; + post_delim(mdoc); + n = mdoc->last; assert(n->child->type == ROFFT_TEXT); mdoc->next = ROFF_NEXT_CHILD; @@ -768,6 +909,9 @@ post_lb(POST_ARGS) return; } + mandoc_vmsg(MANDOCERR_LB_BAD, mdoc->parse, n->child->line, + n->child->pos, "Lb %s", n->child->string); + roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); @@ -866,6 +1010,16 @@ post_obsolete(POST_ARGS) n->line, n->pos, roff_name[n->tok]); } +static void +post_useless(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + mandoc_msg(MANDOCERR_MACRO_USELESS, mdoc->parse, + n->line, n->pos, roff_name[n->tok]); +} + /* * Block macros. */ @@ -1003,6 +1157,7 @@ post_fa(POST_ARGS) break; } } + post_delim(mdoc); } static void @@ -1025,6 +1180,9 @@ post_nm(POST_ARGS) mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, n->line, n->pos, "Nm"); + if (n->type == ROFFT_ELEM) + post_delim(mdoc); + if ((n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) || (n->child != NULL && n->child->type == ROFFT_TEXT) || mdoc->meta.name == NULL) @@ -1040,6 +1198,7 @@ static void post_nd(POST_ARGS) { struct roff_node *n; + size_t sz; n = mdoc->last; @@ -1053,6 +1212,11 @@ post_nd(POST_ARGS) if (n->child == NULL) mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, n->line, n->pos, "Nd"); + else if (n->last->type == ROFFT_TEXT && + (sz = strlen(n->last->string)) != 0 && + n->last->string[sz - 1] == '.') + mandoc_msg(MANDOCERR_ND_DOT, mdoc->parse, + n->last->line, n->last->pos + sz - 1, NULL); post_hyph(mdoc); } @@ -1109,17 +1273,18 @@ post_defaults(POST_ARGS) { struct roff_node *nn; + if (mdoc->last->child != NULL) { + post_delim(mdoc); + return; + } + /* * The `Ar' defaults to "file ..." if no value is provided as an * argument; the `Mt' and `Pa' macros use "~"; the `Li' just * gets an empty string. */ - if (mdoc->last->child != NULL) - return; - nn = mdoc->last; - switch (nn->tok) { case MDOC_Ar: mdoc->next = ROFF_NEXT_CHILD; @@ -1183,6 +1348,8 @@ post_an(POST_ARGS) if (nch == NULL) mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, np->line, np->pos, "An"); + else + post_delim(mdoc); } else if (nch != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, nch->line, nch->pos, "An ... %s", nch->string); @@ -1211,6 +1378,8 @@ post_xx(POST_ARGS) struct roff_node *n; const char *os; + post_delim(mdoc); + n = mdoc->last; switch (n->tok) { case MDOC_Bsx: @@ -1437,6 +1606,8 @@ post_bl(POST_ARGS) struct roff_node *nparent, *nprev; /* of the Bl block */ struct roff_node *nblock, *nbody; /* of the Bl */ struct roff_node *nchild, *nnext; /* of the Bl body */ + const char *prev_Er; + int order; nbody = mdoc->last; switch (nbody->type) { @@ -1537,6 +1708,34 @@ post_bl(POST_ARGS) nchild = nnext; } + + if (mdoc->meta.os_e != MDOC_OS_NETBSD) + return; + + prev_Er = NULL; + for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) { + if (nchild->tok != MDOC_It) + continue; + if ((nnext = nchild->head->child) == NULL) + continue; + if (nnext->type == ROFFT_BLOCK) + nnext = nnext->body->child; + if (nnext == NULL || nnext->tok != MDOC_Er) + continue; + nnext = nnext->child; + if (prev_Er != NULL) { + order = strcmp(prev_Er, nnext->string); + if (order > 0) + mandoc_vmsg(MANDOCERR_ER_ORDER, + mdoc->parse, nnext->line, nnext->pos, + "Er %s %s", prev_Er, nnext->string); + else if (order == 0) + mandoc_vmsg(MANDOCERR_ER_REP, + mdoc->parse, nnext->line, nnext->pos, + "Er %s", prev_Er); + } + prev_Er = nnext->string; + } } static void @@ -1591,9 +1790,8 @@ post_root(POST_ARGS) /* Add missing prologue data. */ if (mdoc->meta.date == NULL) - mdoc->meta.date = mdoc->quick ? - mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, 0, 0); + mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : + mandoc_normdate(mdoc, NULL, 0, 0); if (mdoc->meta.title == NULL) { mandoc_msg(MANDOCERR_DT_NOTITLE, @@ -1879,7 +2077,7 @@ post_sh_see_also(POST_ARGS) if (isalpha((const unsigned char)*name)) return; lastpunct = n->string; - if (n->next == NULL) + if (n->next == NULL || n->next->tok == MDOC_Rs) mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse, n->line, n->pos, "%s after %s(%s)", lastpunct, lastname, lastsec); @@ -2015,9 +2213,9 @@ post_xr(POST_ARGS) if (nch->next == NULL) { mandoc_vmsg(MANDOCERR_XR_NOSEC, mdoc->parse, n->line, n->pos, "Xr %s", nch->string); - return; - } - assert(nch->next == n->last); + } else + assert(nch->next == n->last); + post_delim(mdoc); } static void @@ -2096,10 +2294,10 @@ post_par(POST_ARGS) struct roff_node *np; np = mdoc->last; - if (np->tok != ROFF_br && np->tok != MDOC_sp) + if (np->tok != ROFF_br && np->tok != ROFF_sp) post_prevpar(mdoc); - if (np->tok == MDOC_sp) { + if (np->tok == ROFF_sp) { if (np->child != NULL && np->child->next != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, np->child->next->line, np->child->next->pos, @@ -2115,7 +2313,7 @@ post_par(POST_ARGS) return; } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && (mdoc->last->tok != ROFF_br || - (np->tok != MDOC_sp && np->tok != ROFF_br))) + (np->tok != ROFF_sp && np->tok != ROFF_br))) return; mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, @@ -2149,7 +2347,7 @@ post_dd(POST_ARGS) if (n->child == NULL || n->child->string[0] == '\0') { mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); + mandoc_normdate(mdoc, NULL, n->line, n->pos); return; } @@ -2158,7 +2356,7 @@ post_dd(POST_ARGS) if (mdoc->quick) mdoc->meta.date = datestr; else { - mdoc->meta.date = mandoc_normdate(mdoc->parse, + mdoc->meta.date = mandoc_normdate(mdoc, datestr, n->line, n->pos); free(datestr); } @@ -2264,11 +2462,21 @@ static void post_bx(POST_ARGS) { struct roff_node *n, *nch; + const char *macro; + post_delim(mdoc); + n = mdoc->last; nch = n->child; if (nch != NULL) { + macro = !strcmp(nch->string, "Open") ? "Ox" : + !strcmp(nch->string, "Net") ? "Nx" : + !strcmp(nch->string, "Free") ? "Fx" : + !strcmp(nch->string, "DragonFly") ? "Dx" : NULL; + if (macro != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + n->line, n->pos, macro); mdoc->last = nch; nch = nch->next; mdoc->next = ROFF_NEXT_SIBLING; @@ -2335,11 +2543,11 @@ post_os(POST_ARGS) mdoc->meta.os = NULL; deroff(&mdoc->meta.os, n); if (mdoc->meta.os) - return; + goto out; if (mdoc->defos) { mdoc->meta.os = mandoc_strdup(mdoc->defos); - return; + goto out; } #ifdef OSNAME @@ -2356,6 +2564,33 @@ post_os(POST_ARGS) } mdoc->meta.os = mandoc_strdup(defbuf); #endif /*!OSNAME*/ + +out: mdoc->meta.os_e = strstr(mdoc->meta.os, "OpenBSD") != NULL ? + MDOC_OS_OPENBSD : strstr(mdoc->meta.os, "NetBSD") != NULL ? + MDOC_OS_NETBSD : MDOC_OS_OTHER; + + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ + + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) + return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MDOC_OS_OPENBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE_MISSING, + mdoc->parse, n->line, n->pos, + "Dd %s", n->string); + } else { + if (mdoc->meta.os_e == MDOC_OS_NETBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE, + mdoc->parse, n->line, n->pos, + "Dd %s", n->string); + } } enum roff_sec