=================================================================== RCS file: /cvs/mandoc/man_validate.c,v retrieving revision 1.139 retrieving revision 1.151 diff -u -p -r1.139 -r1.151 --- mandoc/man_validate.c 2018/12/04 02:53:51 1.139 +++ mandoc/man_validate.c 2020/03/13 15:32:28 1.151 @@ -1,7 +1,7 @@ -/* $OpenBSD: man_validate.c,v 1.139 2018/12/04 02:53:51 schwarze Exp $ */ +/* $Id: man_validate.c,v 1.151 2020/03/13 15:32:28 schwarze Exp $ */ /* + * Copyright (c) 2010, 2012-2020 Ingo Schwarze * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2012-2018 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,6 +14,8 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Validation module for man(7) syntax trees used by mandoc(1). */ #include "config.h" @@ -24,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +34,7 @@ #include "mandoc_aux.h" #include "mandoc.h" #include "roff.h" +#include "tag.h" #include "man.h" #include "libmandoc.h" #include "roff_int.h" @@ -40,17 +44,21 @@ typedef void (*v_check)(CHKARGS); -static void check_abort(CHKARGS); +static void check_abort(CHKARGS) __attribute__((__noreturn__)); static void check_par(CHKARGS); static void check_part(CHKARGS); static void check_root(CHKARGS); +static void check_tag(struct roff_node *, struct roff_node *); static void check_text(CHKARGS); static void post_AT(CHKARGS); +static void post_EE(CHKARGS); +static void post_EX(CHKARGS); static void post_IP(CHKARGS); static void post_OP(CHKARGS); static void post_SH(CHKARGS); static void post_TH(CHKARGS); +static void post_TP(CHKARGS); static void post_UC(CHKARGS); static void post_UR(CHKARGS); static void post_in(CHKARGS); @@ -59,8 +67,8 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = { post_TH, /* TH */ post_SH, /* SH */ post_SH, /* SS */ - NULL, /* TP */ - NULL, /* TQ */ + post_TP, /* TP */ + post_TP, /* TQ */ check_abort,/* LP */ check_par, /* PP */ check_abort,/* P */ @@ -77,8 +85,6 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = { NULL, /* I */ NULL, /* IR */ NULL, /* RI */ - NULL, /* nf */ - NULL, /* fi */ NULL, /* RE */ check_part, /* RS */ NULL, /* DT */ @@ -89,8 +95,8 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = { NULL, /* SY */ NULL, /* YS */ post_OP, /* OP */ - NULL, /* EX */ - NULL, /* EE */ + post_EX, /* EX */ + post_EE, /* EE */ post_UR, /* UR */ NULL, /* UE */ post_UR, /* MT */ @@ -100,7 +106,7 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = { /* Validate the subtree rooted at man->last. */ void -man_node_validate(struct roff_man *man) +man_validate(struct roff_man *man) { struct roff_node *n; const v_check *cp; @@ -127,7 +133,7 @@ man_node_validate(struct roff_man *man) man->last = man->last->child; while (man->last != NULL) { - man_node_validate(man); + man_validate(man); if (man->last == n) man->last = man->last->child; else @@ -159,7 +165,7 @@ man_node_validate(struct roff_man *man) if (*cp) (*cp)(man, n); if (man->last == n) - man_state(man, n); + n->flags |= NODE_VALID; break; } } @@ -170,14 +176,12 @@ check_root(CHKARGS) assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); if (n->last == NULL || n->last->type == ROFFT_COMMENT) - mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, - n->line, n->pos, NULL); + mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); else man->meta.hasbody = 1; if (NULL == man->meta.title) { - mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, - n->line, n->pos, NULL); + mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); /* * If a title hasn't been set, do so now (by @@ -186,13 +190,12 @@ check_root(CHKARGS) man->meta.title = mandoc_strdup(""); man->meta.msec = mandoc_strdup(""); - man->meta.date = man->quick ? mandoc_strdup("") : - mandoc_normdate(man, NULL, n->line, n->pos); + man->meta.date = mandoc_normdate(NULL, NULL); } if (man->meta.os_e && (man->meta.rcsids & (1 << man->meta.os_e)) == 0) - mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0, + mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, man->meta.os_e == MANDOC_OS_OPENBSD ? "(OpenBSD)" : "(NetBSD)"); } @@ -203,30 +206,103 @@ check_abort(CHKARGS) abort(); } +/* + * Skip leading whitespace, dashes, backslashes, and font escapes, + * then create a tag if the first following byte is a letter. + * Priority is high unless whitespace is present. + */ static void +check_tag(struct roff_node *n, struct roff_node *nt) +{ + const char *cp, *arg; + int prio, sz; + + if (nt == NULL || nt->type != ROFFT_TEXT) + return; + + cp = nt->string; + prio = TAG_STRONG; + for (;;) { + switch (*cp) { + case ' ': + case '\t': + prio = TAG_WEAK; + /* FALLTHROUGH */ + case '-': + cp++; + break; + case '\\': + cp++; + switch (mandoc_escape(&cp, &arg, &sz)) { + case ESCAPE_FONT: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBI: + case ESCAPE_FONTROMAN: + case ESCAPE_FONTCW: + case ESCAPE_FONTPREV: + case ESCAPE_IGNORE: + break; + case ESCAPE_SPECIAL: + if (sz != 1) + return; + switch (*arg) { + case '-': + case 'e': + break; + default: + return; + } + break; + default: + return; + } + break; + default: + if (isalpha((unsigned char)*cp)) + tag_put(cp, prio, n); + return; + } + } +} + +static void check_text(CHKARGS) { char *cp, *p; - if (MAN_LITERAL & man->flags) + if (n->flags & NODE_NOFILL) return; cp = n->string; for (p = cp; NULL != (p = strchr(p, '\t')); p++) - mandoc_msg(MANDOCERR_FI_TAB, man->parse, - n->line, n->pos + (p - cp), NULL); + mandoc_msg(MANDOCERR_FI_TAB, + n->line, n->pos + (int)(p - cp), NULL); } static void +post_EE(CHKARGS) +{ + if ((n->flags & NODE_NOFILL) == 0) + mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); +} + +static void +post_EX(CHKARGS) +{ + if (n->flags & NODE_NOFILL) + mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); +} + +static void post_OP(CHKARGS) { if (n->child == NULL) - mandoc_msg(MANDOCERR_OP_EMPTY, man->parse, - n->line, n->pos, "OP"); + mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); else if (n->child->next != NULL && n->child->next->next != NULL) { n = n->child->next->next; - mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, n->line, n->pos, "OP ... %s", n->string); } } @@ -248,9 +324,8 @@ post_SH(CHKARGS) } if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { - mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, - nc->line, nc->pos, "%s after %s", - roff_name[nc->tok], roff_name[n->tok]); + mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, + "%s after %s", roff_name[nc->tok], roff_name[n->tok]); roff_node_delete(man, nc); } @@ -260,7 +335,7 @@ post_SH(CHKARGS) */ if ((nc = n->last) != NULL && nc->tok == ROFF_br) { - mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, + mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, "%s at the end of %s", roff_name[nc->tok], roff_name[n->tok]); roff_node_delete(man, nc); @@ -271,8 +346,8 @@ static void post_UR(CHKARGS) { if (n->type == ROFFT_HEAD && n->child == NULL) - mandoc_msg(MANDOCERR_UR_NOHEAD, man->parse, - n->line, n->pos, roff_name[n->tok]); + mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, + "%s", roff_name[n->tok]); check_part(man, n); } @@ -281,8 +356,8 @@ check_part(CHKARGS) { if (n->type == ROFFT_BODY && n->child == NULL) - mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse, - n->line, n->pos, roff_name[n->tok]); + mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, + "%s", roff_name[n->tok]); } static void @@ -297,21 +372,20 @@ check_par(CHKARGS) case ROFFT_BODY: if (n->child != NULL && (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { - mandoc_vmsg(MANDOCERR_PAR_SKIP, - man->parse, n->child->line, n->child->pos, + mandoc_msg(MANDOCERR_PAR_SKIP, + n->child->line, n->child->pos, "%s after %s", roff_name[n->child->tok], roff_name[n->tok]); roff_node_delete(man, n->child); } if (n->child == NULL) - mandoc_vmsg(MANDOCERR_PAR_SKIP, - man->parse, n->line, n->pos, + mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, "%s empty", roff_name[n->tok]); break; case ROFFT_HEAD: if (n->child != NULL) - mandoc_vmsg(MANDOCERR_ARG_SKIP, - man->parse, n->line, n->pos, "%s %s%s", + mandoc_msg(MANDOCERR_ARG_SKIP, + n->line, n->pos, "%s %s%s", roff_name[n->tok], n->child->string, n->child->next != NULL ? " ..." : ""); break; @@ -323,16 +397,17 @@ check_par(CHKARGS) static void post_IP(CHKARGS) { - switch (n->type) { case ROFFT_BLOCK: if (n->head->child == NULL && n->body->child == NULL) roff_node_delete(man, n); break; + case ROFFT_HEAD: + check_tag(n, n->child); + break; case ROFFT_BODY: if (n->parent->head->child == NULL && n->child == NULL) - mandoc_vmsg(MANDOCERR_PAR_SKIP, - man->parse, n->line, n->pos, + mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, "%s empty", roff_name[n->tok]); break; default: @@ -340,7 +415,38 @@ post_IP(CHKARGS) } } +/* + * The first next-line element in the head is the tag. + * If that's a font macro, use its first child instead. + */ static void +post_TP(CHKARGS) +{ + struct roff_node *nt; + + if (n->type != ROFFT_HEAD || (nt = n->child) == NULL) + return; + + while ((nt->flags & NODE_LINE) == 0) + if ((nt = nt->next) == NULL) + return; + + switch (nt->tok) { + case MAN_B: + case MAN_BI: + case MAN_BR: + case MAN_I: + case MAN_IB: + case MAN_IR: + nt = nt->child; + break; + default: + break; + } + check_tag(n, nt); +} + +static void post_TH(CHKARGS) { struct roff_node *nb; @@ -360,14 +466,13 @@ post_TH(CHKARGS) /* ->TITLE<- MSEC DATE OS VOL */ n = n->child; - if (n && n->string) { - for (p = n->string; '\0' != *p; p++) { + if (n != NULL && n->string != NULL) { + for (p = n->string; *p != '\0'; p++) { /* Only warn about this once... */ if (isalpha((unsigned char)*p) && ! isupper((unsigned char)*p)) { - mandoc_vmsg(MANDOCERR_TITLE_CASE, - man->parse, n->line, - n->pos + (p - n->string), + mandoc_msg(MANDOCERR_TITLE_CASE, n->line, + n->pos + (int)(p - n->string), "TH %s", n->string); break; } @@ -375,36 +480,29 @@ post_TH(CHKARGS) man->meta.title = mandoc_strdup(n->string); } else { man->meta.title = mandoc_strdup(""); - mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, - nb->line, nb->pos, "TH"); + mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); } /* TITLE ->MSEC<- DATE OS VOL */ - if (n) + if (n != NULL) n = n->next; - if (n && n->string) + if (n != NULL && n->string != NULL) man->meta.msec = mandoc_strdup(n->string); else { man->meta.msec = mandoc_strdup(""); - mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, + mandoc_msg(MANDOCERR_MSEC_MISSING, nb->line, nb->pos, "TH %s", man->meta.title); } /* TITLE MSEC ->DATE<- OS VOL */ - if (n) + if (n != NULL) n = n->next; - if (n && n->string && '\0' != n->string[0]) { - man->meta.date = man->quick ? - mandoc_strdup(n->string) : - mandoc_normdate(man, n->string, n->line, n->pos); - } else { + if (man->quick && n != NULL) man->meta.date = mandoc_strdup(""); - mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, - n ? n->line : nb->line, - n ? n->pos : nb->pos, "TH"); - } + else + man->meta.date = mandoc_normdate(n, nb); /* TITLE MSEC DATE ->OS<- VOL */ @@ -429,7 +527,7 @@ post_TH(CHKARGS) man->meta.vol = mandoc_strdup(p); if (n != NULL && (n = n->next) != NULL) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, n->line, n->pos, "TH ... %s", n->string); /*