=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.333 retrieving revision 1.338 diff -u -p -r1.333 -r1.338 --- mandoc/mdoc_validate.c 2017/06/10 01:48:53 1.333 +++ mandoc/mdoc_validate.c 2017/06/11 20:03:02 1.338 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.333 2017/06/10 01:48:53 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.338 2017/06/11 20:03:02 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -53,10 +53,10 @@ typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); static void check_text(struct roff_man *, int, int, char *); -static void check_bsd(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); static void rewrite_macro2len(struct roff_man *, char **); @@ -184,7 +184,7 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] NULL, /* Eo */ post_xx, /* Fx */ post_delim, /* Ms */ - post_delim, /* No */ + NULL, /* No */ post_ns, /* Ns */ post_xx, /* Nx */ post_xx, /* Ox */ @@ -304,10 +304,11 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->sec != SEC_SYNOPSIS || (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); - if (n->parent->tok == MDOC_Sh || - n->parent->tok == MDOC_Ss || - n->parent->tok == MDOC_It) - check_bsd(mdoc, n->line, n->pos, n->string); + if (n->parent->tok == MDOC_It || + (n->parent->type == ROFFT_BODY && + (n->parent->tok == MDOC_Sh || + n->parent->tok == MDOC_Ss))) + check_toptext(mdoc, n->line, n->pos, n->string); break; case ROFFT_EQN: case ROFFT_TBL: @@ -390,10 +391,13 @@ check_text(struct roff_man *mdoc, int ln, int pos, cha } static void -check_bsd(struct roff_man *mdoc, int ln, int pos, char *p) +check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) { - const char *cp; + const char *cp, *cpr; + if (*p == '\0') + return; + if ((cp = strstr(p, "OpenBSD")) != NULL) mandoc_msg(MANDOCERR_BX, mdoc->parse, ln, pos + (cp - p), "Ox"); @@ -406,15 +410,36 @@ check_bsd(struct roff_man *mdoc, int ln, int pos, char if ((cp = strstr(p, "DragonFly")) != NULL) mandoc_msg(MANDOCERR_BX, mdoc->parse, ln, pos + (cp - p), "Dx"); + + cp = p; + while ((cp = strstr(cp + 1, "()")) != NULL) { + for (cpr = cp - 1; cpr >= p; cpr--) + if (*cpr != '_' && !isalnum((unsigned char)*cpr)) + break; + if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) { + cpr++; + mandoc_vmsg(MANDOCERR_FUNC, mdoc->parse, + ln, pos + (cpr - p), + "%.*s()", (int)(cp - cpr), cpr); + } + } } static void post_delim(POST_ARGS) { const struct roff_node *nch; - const char *lc; + const char *lc, *cp; + int nw; enum mdelim delim; + enum roff_tok tok; + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; nch = mdoc->last->last; if (nch == NULL || nch->type != ROFFT_TEXT) return; @@ -424,9 +449,74 @@ post_delim(POST_ARGS) delim = mdoc_isdelim(lc); if (delim == DELIM_NONE || delim == DELIM_OPEN) return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_Po || tok == MDOC_Pq || + tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, nch->line, nch->pos + (lc - nch->string), - "%s%s %s", roff_name[mdoc->last->tok], + "%s%s %s", roff_name[tok], nch == mdoc->last->child ? "" : " ...", nch->string); } @@ -1700,9 +1790,8 @@ post_root(POST_ARGS) /* Add missing prologue data. */ if (mdoc->meta.date == NULL) - mdoc->meta.date = mdoc->quick ? - mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, 0, 0); + mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : + mandoc_normdate(mdoc, NULL, 0, 0); if (mdoc->meta.title == NULL) { mandoc_msg(MANDOCERR_DT_NOTITLE, @@ -2258,7 +2347,7 @@ post_dd(POST_ARGS) if (n->child == NULL || n->child->string[0] == '\0') { mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); + mandoc_normdate(mdoc, NULL, n->line, n->pos); return; } @@ -2267,7 +2356,7 @@ post_dd(POST_ARGS) if (mdoc->quick) mdoc->meta.date = datestr; else { - mdoc->meta.date = mandoc_normdate(mdoc->parse, + mdoc->meta.date = mandoc_normdate(mdoc, datestr, n->line, n->pos); free(datestr); } @@ -2479,6 +2568,29 @@ post_os(POST_ARGS) out: mdoc->meta.os_e = strstr(mdoc->meta.os, "OpenBSD") != NULL ? MDOC_OS_OPENBSD : strstr(mdoc->meta.os, "NetBSD") != NULL ? MDOC_OS_NETBSD : MDOC_OS_OTHER; + + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ + + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) + return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MDOC_OS_OPENBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE_MISSING, + mdoc->parse, n->line, n->pos, + "Dd %s", n->string); + } else { + if (mdoc->meta.os_e == MDOC_OS_NETBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE, + mdoc->parse, n->line, n->pos, + "Dd %s", n->string); + } } enum roff_sec