=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.336 retrieving revision 1.345 diff -u -p -r1.336 -r1.345 --- mandoc/mdoc_validate.c 2017/06/11 17:16:51 1.336 +++ mandoc/mdoc_validate.c 2017/06/29 15:22:17 1.345 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.336 2017/06/11 17:16:51 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.345 2017/06/29 15:22:17 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -60,6 +60,7 @@ static void check_toptext(struct roff_man *, int, int static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); static void rewrite_macro2len(struct roff_man *, char **); +static int similar(const char *, const char *); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); @@ -1459,15 +1460,30 @@ post_it(POST_ARGS) assert(nit->head->child == NULL); - i = 0; - for (nch = nit->child; nch != NULL; nch = nch->next) - if (nch->type == ROFFT_BODY) - i++; + if (nit->head->next->child == NULL && + nit->head->next->next == NULL) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + nit->line, nit->pos, "It"); + roff_node_delete(mdoc, nit); + break; + } + i = 0; + for (nch = nit->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_BODY) + continue; + if (i++ && nch->flags & NODE_LINE) + mandoc_msg(MANDOCERR_TA_LINE, mdoc->parse, + nch->line, nch->pos, "Ta"); + } if (i < cols || i > cols + 1) mandoc_vmsg(MANDOCERR_BL_COL, mdoc->parse, nit->line, nit->pos, "%d columns, %d cells", cols, i); + else if (nit->head->next->child != NULL && + nit->head->next->child->line > nit->line) + mandoc_msg(MANDOCERR_IT_NOARG, mdoc->parse, + nit->line, nit->pos, "Bl -column It"); break; default: abort(); @@ -1709,7 +1725,7 @@ post_bl(POST_ARGS) nchild = nnext; } - if (mdoc->meta.os_e != MDOC_OS_NETBSD) + if (mdoc->meta.os_e != MANDOC_OS_NETBSD) return; prev_Er = NULL; @@ -1728,11 +1744,12 @@ post_bl(POST_ARGS) if (order > 0) mandoc_vmsg(MANDOCERR_ER_ORDER, mdoc->parse, nnext->line, nnext->pos, - "Er %s %s", prev_Er, nnext->string); + "Er %s %s (NetBSD)", + prev_Er, nnext->string); else if (order == 0) mandoc_vmsg(MANDOCERR_ER_REP, mdoc->parse, nnext->line, nnext->pos, - "Er %s", prev_Er); + "Er %s (NetBSD)", prev_Er); } prev_Er = nnext->string; } @@ -1785,14 +1802,35 @@ post_sm(POST_ARGS) static void post_root(POST_ARGS) { + const char *openbsd_arch[] = { + "alpha", "amd64", "arm64", "armv7", "hppa", "i386", + "landisk", "loongson", "luna88k", "macppc", "mips64", + "octeon", "sgi", "socppc", "sparc64", NULL + }; + const char *netbsd_arch[] = { + "acorn26", "acorn32", "algor", "alpha", "amiga", + "arc", "atari", + "bebox", "cats", "cesfic", "cobalt", "dreamcast", + "emips", "evbarm", "evbmips", "evbppc", "evbsh3", "evbsh5", + "hp300", "hpcarm", "hpcmips", "hpcsh", "hppa", + "i386", "ibmnws", "luna68k", + "mac68k", "macppc", "mipsco", "mmeye", "mvme68k", "mvmeppc", + "netwinder", "news68k", "newsmips", "next68k", + "pc532", "playstation2", "pmax", "pmppc", "prep", + "sandpoint", "sbmips", "sgimips", "shark", + "sparc", "sparc64", "sun2", "sun3", + "vax", "walnut", "x68k", "x86", "x86_64", "xen", NULL + }; + const char **arches[] = { NULL, netbsd_arch, openbsd_arch }; + struct roff_node *n; + const char **arch; /* Add missing prologue data. */ if (mdoc->meta.date == NULL) - mdoc->meta.date = mdoc->quick ? - mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, 0, 0); + mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : + mandoc_normdate(mdoc, NULL, 0, 0); if (mdoc->meta.title == NULL) { mandoc_msg(MANDOCERR_DT_NOTITLE, @@ -1807,6 +1845,27 @@ post_root(POST_ARGS) mandoc_msg(MANDOCERR_OS_MISSING, mdoc->parse, 0, 0, NULL); mdoc->meta.os = mandoc_strdup(""); + } else if (mdoc->meta.os_e && + (mdoc->meta.rcsids & (1 << mdoc->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, mdoc->parse, 0, 0, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + + if (mdoc->meta.arch != NULL && + (arch = arches[mdoc->meta.os_e]) != NULL) { + while (*arch != NULL && strcmp(*arch, mdoc->meta.arch)) + arch++; + if (*arch == NULL) { + n = mdoc->first->child; + while (n->tok != MDOC_Dt) + n = n->next; + n = n->child->next->next; + mandoc_vmsg(MANDOCERR_ARCH_BAD, + mdoc->parse, n->line, n->pos, + "Dt ... %s %s", mdoc->meta.arch, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + } } /* Check that we begin with a proper `Sh'. */ @@ -1939,10 +1998,13 @@ post_hyph(POST_ARGS) static void post_ns(POST_ARGS) { + struct roff_node *n; - if (mdoc->last->flags & NODE_LINE) + n = mdoc->last; + if (n->flags & NODE_LINE || + (n->next != NULL && n->next->flags & NODE_DELIMC)) mandoc_msg(MANDOCERR_NS_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, NULL); + n->line, n->pos, NULL); } static void @@ -2105,11 +2167,54 @@ post_sh_authors(POST_ARGS) mdoc->last->line, mdoc->last->pos, NULL); } +/* + * Return an upper bound for the string distance (allowing + * transpositions). Not a full Levenshtein implementation + * because Levenshtein is quadratic in the string length + * and this function is called for every standard name, + * so the check for each custom name would be cubic. + * The following crude heuristics is linear, resulting + * in quadratic behaviour for checking one custom name, + * which does not cause measurable slowdown. + */ +static int +similar(const char *s1, const char *s2) +{ + const int maxdist = 3; + int dist = 0; + + while (s1[0] != '\0' && s2[0] != '\0') { + if (s1[0] == s2[0]) { + s1++; + s2++; + continue; + } + if (++dist > maxdist) + return INT_MAX; + if (s1[1] == s2[1]) { /* replacement */ + s1++; + s2++; + } else if (s1[0] == s2[1] && s1[1] == s2[0]) { + s1 += 2; /* transposition */ + s2 += 2; + } else if (s1[0] == s2[1]) /* insertion */ + s2++; + else if (s1[1] == s2[0]) /* deletion */ + s1++; + else + return INT_MAX; + } + dist += strlen(s1) + strlen(s2); + return dist > maxdist ? INT_MAX : dist; +} + static void post_sh_head(POST_ARGS) { struct roff_node *nch; const char *goodsec; + const char *const *testsec; + int dist, mindist; enum roff_sec sec; /* @@ -2147,8 +2252,25 @@ post_sh_head(POST_ARGS) /* We don't care about custom sections after this. */ - if (sec == SEC_CUSTOM) + if (sec == SEC_CUSTOM) { + if ((nch = mdoc->last->child) == NULL || + nch->type != ROFFT_TEXT || nch->next != NULL) + return; + goodsec = NULL; + mindist = INT_MAX; + for (testsec = secnames + 1; *testsec != NULL; testsec++) { + dist = similar(nch->string, *testsec); + if (dist < mindist) { + goodsec = *testsec; + mindist = dist; + } + } + if (goodsec != NULL) + mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse, + nch->line, nch->pos, "Sh %s instead of %s", + nch->string, goodsec); return; + } /* * Check whether our non-custom section is being repeated or is @@ -2348,7 +2470,7 @@ post_dd(POST_ARGS) if (n->child == NULL || n->child->string[0] == '\0') { mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); + mandoc_normdate(mdoc, NULL, n->line, n->pos); return; } @@ -2357,7 +2479,7 @@ post_dd(POST_ARGS) if (mdoc->quick) mdoc->meta.date = datestr; else { - mdoc->meta.date = mandoc_normdate(mdoc->parse, + mdoc->meta.date = mandoc_normdate(mdoc, datestr, n->line, n->pos); free(datestr); } @@ -2546,8 +2668,8 @@ post_os(POST_ARGS) if (mdoc->meta.os) goto out; - if (mdoc->defos) { - mdoc->meta.os = mandoc_strdup(mdoc->defos); + if (mdoc->os_s != NULL) { + mdoc->meta.os = mandoc_strdup(mdoc->os_s); goto out; } @@ -2566,9 +2688,43 @@ post_os(POST_ARGS) mdoc->meta.os = mandoc_strdup(defbuf); #endif /*!OSNAME*/ -out: mdoc->meta.os_e = strstr(mdoc->meta.os, "OpenBSD") != NULL ? - MDOC_OS_OPENBSD : strstr(mdoc->meta.os, "NetBSD") != NULL ? - MDOC_OS_NETBSD : MDOC_OS_OTHER; +out: + if (mdoc->meta.os_e == MANDOC_OS_OTHER) { + if (strstr(mdoc->meta.os, "OpenBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(mdoc->meta.os, "NetBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_NETBSD; + } + + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ + + if (n->child != NULL) + mandoc_vmsg(MANDOCERR_OS_ARG, mdoc->parse, + n->child->line, n->child->pos, + "Os %s (%s)", n->child->string, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "OpenBSD" : "NetBSD"); + + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) + return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MANDOC_OS_OPENBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE_MISSING, + mdoc->parse, n->line, n->pos, + "Dd %s (OpenBSD)", n->string); + } else { + if (mdoc->meta.os_e == MANDOC_OS_NETBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE, + mdoc->parse, n->line, n->pos, + "Dd %s (NetBSD)", n->string); + } } enum roff_sec