=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.340 retrieving revision 1.343 diff -u -p -r1.340 -r1.343 --- mandoc/mdoc_validate.c 2017/06/24 14:38:33 1.340 +++ mandoc/mdoc_validate.c 2017/06/25 17:43:45 1.343 @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.340 2017/06/24 14:38:33 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.343 2017/06/25 17:43:45 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -60,6 +60,7 @@ static void check_toptext(struct roff_man *, int, int static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); static void rewrite_macro2len(struct roff_man *, char **); +static int similar(const char *, const char *); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); @@ -1786,7 +1787,29 @@ post_sm(POST_ARGS) static void post_root(POST_ARGS) { + const char *openbsd_arch[] = { + "alpha", "amd64", "arm64", "armv7", "hppa", "i386", + "landisk", "loongson", "luna88k", "macppc", "mips64", + "octeon", "sgi", "socppc", "sparc64", NULL + }; + const char *netbsd_arch[] = { + "acorn26", "acorn32", "algor", "alpha", "amiga", + "arc", "atari", + "bebox", "cats", "cesfic", "cobalt", "dreamcast", + "emips", "evbarm", "evbmips", "evbppc", "evbsh3", "evbsh5", + "hp300", "hpcarm", "hpcmips", "hpcsh", "hppa", + "i386", "ibmnws", "luna68k", + "mac68k", "macppc", "mipsco", "mmeye", "mvme68k", "mvmeppc", + "netwinder", "news68k", "newsmips", "next68k", + "pc532", "playstation2", "pmax", "pmppc", "prep", + "sandpoint", "sbmips", "sgimips", "shark", + "sparc", "sparc64", "sun2", "sun3", + "vax", "walnut", "x68k", "x86", "x86_64", "xen", NULL + }; + const char **arches[] = { NULL, netbsd_arch, openbsd_arch }; + struct roff_node *n; + const char **arch; /* Add missing prologue data. */ @@ -1813,6 +1836,23 @@ post_root(POST_ARGS) mdoc->meta.os_e == MANDOC_OS_OPENBSD ? "(OpenBSD)" : "(NetBSD)"); + if (mdoc->meta.arch != NULL && + (arch = arches[mdoc->meta.os_e]) != NULL) { + while (*arch != NULL && strcmp(*arch, mdoc->meta.arch)) + arch++; + if (*arch == NULL) { + n = mdoc->first->child; + while (n->tok != MDOC_Dt) + n = n->next; + n = n->child->next->next; + mandoc_vmsg(MANDOCERR_ARCH_BAD, + mdoc->parse, n->line, n->pos, + "Dt ... %s %s", mdoc->meta.arch, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + } + } + /* Check that we begin with a proper `Sh'. */ n = mdoc->first->child; @@ -2109,11 +2149,54 @@ post_sh_authors(POST_ARGS) mdoc->last->line, mdoc->last->pos, NULL); } +/* + * Return an upper bound for the string distance (allowing + * transpositions). Not a full Levenshtein implementation + * because Levenshtein is quadratic in the string length + * and this function is called for every standard name, + * so the check for each custom name would be cubic. + * The following crude heuristics is linear, resulting + * in quadratic behaviour for checking one custom name, + * which does not cause measurable slowdown. + */ +static int +similar(const char *s1, const char *s2) +{ + const int maxdist = 3; + int dist = 0; + + while (s1[0] != '\0' && s2[0] != '\0') { + if (s1[0] == s2[0]) { + s1++; + s2++; + continue; + } + if (++dist > maxdist) + return INT_MAX; + if (s1[1] == s2[1]) { /* replacement */ + s1++; + s2++; + } else if (s1[0] == s2[1] && s1[1] == s2[0]) { + s1 += 2; /* transposition */ + s2 += 2; + } else if (s1[0] == s2[1]) /* insertion */ + s2++; + else if (s1[1] == s2[0]) /* deletion */ + s1++; + else + return INT_MAX; + } + dist += strlen(s1) + strlen(s2); + return dist > maxdist ? INT_MAX : dist; +} + static void post_sh_head(POST_ARGS) { struct roff_node *nch; const char *goodsec; + const char *const *testsec; + int dist, mindist; enum roff_sec sec; /* @@ -2151,8 +2234,25 @@ post_sh_head(POST_ARGS) /* We don't care about custom sections after this. */ - if (sec == SEC_CUSTOM) + if (sec == SEC_CUSTOM) { + if ((nch = mdoc->last->child) == NULL || + nch->type != ROFFT_TEXT || nch->next != NULL) + return; + goodsec = NULL; + mindist = INT_MAX; + for (testsec = secnames + 1; *testsec != NULL; testsec++) { + dist = similar(nch->string, *testsec); + if (dist < mindist) { + goodsec = *testsec; + mindist = dist; + } + } + if (goodsec != NULL) + mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse, + nch->line, nch->pos, "Sh %s instead of %s", + nch->string, goodsec); return; + } /* * Check whether our non-custom section is being repeated or is @@ -2583,6 +2683,13 @@ out: * Mdocdate conventions because we don't know * the operating system earlier. */ + + if (n->child != NULL) + mandoc_vmsg(MANDOCERR_OS_ARG, mdoc->parse, + n->child->line, n->child->pos, + "Os %s (%s)", n->child->string, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "OpenBSD" : "NetBSD"); while (n->tok != MDOC_Dd) if ((n = n->prev) == NULL)