=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.354 retrieving revision 1.359 diff -u -p -r1.354 -r1.359 --- mandoc/mdoc_validate.c 2018/02/06 16:29:57 1.354 +++ mandoc/mdoc_validate.c 2018/08/01 13:46:15 1.359 @@ -1,7 +1,7 @@ -/* $Id: mdoc_validate.c,v 1.354 2018/02/06 16:29:57 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.359 2018/08/01 13:46:15 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons - * Copyright (c) 2010-2017 Ingo Schwarze + * Copyright (c) 2010-2018 Ingo Schwarze * Copyright (c) 2010 Joerg Sonnenberger * * Permission to use, copy, modify, and distribute this software for any @@ -53,10 +53,11 @@ enum check_ineq { typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); -static void check_text(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_text(struct roff_man *, int, int, char *); +static void check_text_em(struct roff_man *, int, int, char *); static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); @@ -288,7 +289,7 @@ static const char * const secnames[SEC__MAX] = { void mdoc_node_validate(struct roff_man *mdoc) { - struct roff_node *n; + struct roff_node *n, *np; const v_post *p; n = mdoc->last; @@ -305,15 +306,21 @@ mdoc_node_validate(struct roff_man *mdoc) mdoc->next = ROFF_NEXT_SIBLING; switch (n->type) { case ROFFT_TEXT: + np = n->parent; if (n->sec != SEC_SYNOPSIS || - (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) + (np->tok != MDOC_Cd && np->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); - if (n->parent->tok == MDOC_It || - (n->parent->type == ROFFT_BODY && - (n->parent->tok == MDOC_Sh || - n->parent->tok == MDOC_Ss))) + if (np->tok != MDOC_Ql && np->tok != MDOC_Dl && + (np->tok != MDOC_Bd || + (mdoc->flags & MDOC_LITERAL) == 0) && + (np->tok != MDOC_It || np->type != ROFFT_HEAD || + np->parent->parent->norm->Bl.type != LIST_diag)) + check_text_em(mdoc, n->line, n->pos, n->string); + if (np->tok == MDOC_It || (np->type == ROFFT_BODY && + (np->tok == MDOC_Sh || np->tok == MDOC_Ss))) check_toptext(mdoc, n->line, n->pos, n->string); break; + case ROFFT_COMMENT: case ROFFT_EQN: case ROFFT_TBL: break; @@ -395,6 +402,58 @@ check_text(struct roff_man *mdoc, int ln, int pos, cha } static void +check_text_em(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const struct roff_node *np, *nn; + char *cp; + + np = mdoc->last->prev; + nn = mdoc->last->next; + + /* Look for em-dashes wrongly encoded as "--". */ + + for (cp = p; *cp != '\0'; cp++) { + if (cp[0] != '-' || cp[1] != '-') + continue; + cp++; + + /* Skip input sequences of more than two '-'. */ + + if (cp[1] == '-') { + while (cp[1] == '-') + cp++; + continue; + } + + /* Skip "--" directly attached to something else. */ + + if ((cp - p > 1 && cp[-2] != ' ') || + (cp[1] != '\0' && cp[1] != ' ')) + continue; + + /* Require a letter right before or right afterwards. */ + + if ((cp - p > 2 ? + isalpha((unsigned char)cp[-3]) : + np != NULL && + np->type == ROFFT_TEXT && + np->string != '\0' && + isalpha((unsigned char)np->string[ + strlen(np->string) - 1])) || + (cp[1] != '\0' && cp[2] != '\0' ? + isalpha((unsigned char)cp[2]) : + nn != NULL && + nn->type == ROFFT_TEXT && + nn->string != '\0' && + isalpha((unsigned char)*nn->string))) { + mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse, + ln, pos + (int)(cp - p) - 1, NULL); + break; + } + } +} + +static void check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) { const char *cp, *cpr; @@ -946,10 +1005,10 @@ post_lb(POST_ARGS) roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; - roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); + roff_word_alloc(mdoc, n->line, n->pos, "\\(lq"); mdoc->last->flags = NODE_DELIMO | NODE_NOSRC; mdoc->last = mdoc->last->next; - roff_word_alloc(mdoc, n->line, n->pos, "\\(Rq"); + roff_word_alloc(mdoc, n->line, n->pos, "\\(rq"); mdoc->last->flags = NODE_DELIMC | NODE_NOSRC; mdoc->last = n; } @@ -1930,8 +1989,10 @@ post_root(POST_ARGS) /* Check that we begin with a proper `Sh'. */ n = mdoc->first->child; - while (n != NULL && n->tok >= MDOC_Dd && - mdoc_macros[n->tok].flags & MDOC_PROLOGUE) + while (n != NULL && + (n->type == ROFFT_COMMENT || + (n->tok >= MDOC_Dd && + mdoc_macros[n->tok].flags & MDOC_PROLOGUE))) n = n->next; if (n == NULL)