=================================================================== RCS file: /cvs/mandoc/man_validate.c,v retrieving revision 1.145 retrieving revision 1.159 diff -u -p -r1.145 -r1.159 --- mandoc/man_validate.c 2018/12/31 08:38:21 1.145 +++ mandoc/man_validate.c 2023/10/24 20:53:12 1.159 @@ -1,7 +1,7 @@ -/* $Id: man_validate.c,v 1.145 2018/12/31 08:38:21 schwarze Exp $ */ +/* $Id: man_validate.c,v 1.159 2023/10/24 20:53:12 schwarze Exp $ */ /* + * Copyright (c) 2010, 2012-2020, 2023 Ingo Schwarze * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2012-2018 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,6 +14,8 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Validation module for man(7) syntax trees used by mandoc(1). */ #include "config.h" @@ -31,29 +33,33 @@ #include "mandoc_aux.h" #include "mandoc.h" +#include "mandoc_xr.h" #include "roff.h" #include "man.h" #include "libmandoc.h" #include "roff_int.h" #include "libman.h" +#include "tag.h" #define CHKARGS struct roff_man *man, struct roff_node *n typedef void (*v_check)(CHKARGS); -static void check_abort(CHKARGS); static void check_par(CHKARGS); static void check_part(CHKARGS); static void check_root(CHKARGS); +static void check_tag(struct roff_node *, struct roff_node *); static void check_text(CHKARGS); static void post_AT(CHKARGS); static void post_EE(CHKARGS); static void post_EX(CHKARGS); static void post_IP(CHKARGS); +static void post_MR(CHKARGS); static void post_OP(CHKARGS); static void post_SH(CHKARGS); static void post_TH(CHKARGS); +static void post_TP(CHKARGS); static void post_UC(CHKARGS); static void post_UR(CHKARGS); static void post_in(CHKARGS); @@ -62,11 +68,11 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = { post_TH, /* TH */ post_SH, /* SH */ post_SH, /* SS */ - NULL, /* TP */ - NULL, /* TQ */ - check_abort,/* LP */ + post_TP, /* TP */ + post_TP, /* TQ */ + check_par, /* LP */ check_par, /* PP */ - check_abort,/* P */ + check_par, /* P */ post_IP, /* IP */ NULL, /* HP */ NULL, /* SM */ @@ -96,6 +102,7 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = { NULL, /* UE */ post_UR, /* MT */ NULL, /* ME */ + post_MR, /* MR */ }; @@ -107,25 +114,11 @@ man_validate(struct roff_man *man) const v_check *cp; /* - * Translate obsolete macros such that later code - * does not need to look for them. - */ - - n = man->last; - switch (n->tok) { - case MAN_LP: - case MAN_P: - n->tok = MAN_PP; - break; - default: - break; - } - - /* * Iterate over all children, recursing into each one * in turn, depth-first. */ + n = man->last; man->last = man->last->child; while (man->last != NULL) { man_validate(man); @@ -153,7 +146,6 @@ man_validate(struct roff_man *man) default: if (n->tok < ROFF_MAX) { roff_validate(man); - man_state(man, n); break; } assert(n->tok >= MAN_TH && n->tok < MAN_MAX); @@ -161,7 +153,7 @@ man_validate(struct roff_man *man) if (*cp) (*cp)(man, n); if (man->last == n) - man_state(man, n); + n->flags |= NODE_VALID; break; } } @@ -186,8 +178,7 @@ check_root(CHKARGS) man->meta.title = mandoc_strdup(""); man->meta.msec = mandoc_strdup(""); - man->meta.date = man->quick ? mandoc_strdup("") : - mandoc_normdate(man, NULL, n->line, n->pos); + man->meta.date = mandoc_normdate(NULL, NULL); } if (man->meta.os_e && @@ -197,10 +188,66 @@ check_root(CHKARGS) "(OpenBSD)" : "(NetBSD)"); } +/* + * Skip leading whitespace, dashes, backslashes, and font escapes, + * then create a tag if the first following byte is a letter. + * Priority is high unless whitespace is present. + */ static void -check_abort(CHKARGS) +check_tag(struct roff_node *n, struct roff_node *nt) { - abort(); + const char *cp, *arg; + int prio, sz; + + if (nt == NULL || nt->type != ROFFT_TEXT) + return; + + cp = nt->string; + prio = TAG_STRONG; + for (;;) { + switch (*cp) { + case ' ': + case '\t': + prio = TAG_WEAK; + /* FALLTHROUGH */ + case '-': + cp++; + break; + case '\\': + cp++; + switch (mandoc_escape(&cp, &arg, &sz)) { + case ESCAPE_FONT: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBI: + case ESCAPE_FONTROMAN: + case ESCAPE_FONTCR: + case ESCAPE_FONTCB: + case ESCAPE_FONTCI: + case ESCAPE_FONTPREV: + case ESCAPE_IGNORE: + break; + case ESCAPE_SPECIAL: + if (sz != 1) + return; + switch (*arg) { + case '-': + case 'e': + break; + default: + return; + } + break; + default: + return; + } + break; + default: + if (isalpha((unsigned char)*cp)) + tag_put(cp, prio, n); + return; + } + } } static void @@ -248,11 +295,35 @@ static void post_SH(CHKARGS) { struct roff_node *nc; + char *cp, *tag; - if (n->type != ROFFT_BODY || (nc = n->child) == NULL) + nc = n->child; + switch (n->type) { + case ROFFT_HEAD: + tag = NULL; + deroff(&tag, n); + if (tag != NULL) { + for (cp = tag; *cp != '\0'; cp++) + if (*cp == ' ') + *cp = '_'; + if (nc != NULL && nc->type == ROFFT_TEXT && + strcmp(nc->string, tag) == 0) + tag_put(NULL, TAG_STRONG, n); + else + tag_put(tag, TAG_FALLBACK, n); + free(tag); + } return; + case ROFFT_BODY: + if (nc != NULL) + break; + return; + default: + return; + } - if (nc->tok == MAN_PP && nc->body->child != NULL) { + if ((nc->tok == MAN_LP || nc->tok == MAN_PP || nc->tok == MAN_P) && + nc->body->child != NULL) { while (nc->body->last != NULL) { man->next = ROFF_NEXT_CHILD; roff_node_relink(man, nc->body->last); @@ -260,7 +331,8 @@ post_SH(CHKARGS) } } - if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { + if (nc->tok == MAN_LP || nc->tok == MAN_PP || nc->tok == MAN_P || + nc->tok == ROFF_sp || nc->tok == ROFF_br) { mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, "%s after %s", roff_name[nc->tok], roff_name[n->tok]); roff_node_delete(man, nc); @@ -285,13 +357,11 @@ post_UR(CHKARGS) if (n->type == ROFFT_HEAD && n->child == NULL) mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, "%s", roff_name[n->tok]); - check_part(man, n); } static void check_part(CHKARGS) { - if (n->type == ROFFT_BODY && n->child == NULL) mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, "%s", roff_name[n->tok]); @@ -334,12 +404,14 @@ check_par(CHKARGS) static void post_IP(CHKARGS) { - switch (n->type) { case ROFFT_BLOCK: if (n->head->child == NULL && n->body->child == NULL) roff_node_delete(man, n); break; + case ROFFT_HEAD: + check_tag(n, n->child); + break; case ROFFT_BODY: if (n->parent->head->child == NULL && n->child == NULL) mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, @@ -350,7 +422,38 @@ post_IP(CHKARGS) } } +/* + * The first next-line element in the head is the tag. + * If that's a font macro, use its first child instead. + */ static void +post_TP(CHKARGS) +{ + struct roff_node *nt; + + if (n->type != ROFFT_HEAD || (nt = n->child) == NULL) + return; + + while ((nt->flags & NODE_LINE) == 0) + if ((nt = nt->next) == NULL) + return; + + switch (nt->tok) { + case MAN_B: + case MAN_BI: + case MAN_BR: + case MAN_I: + case MAN_IB: + case MAN_IR: + nt = nt->child; + break; + default: + break; + } + check_tag(n, nt); +} + +static void post_TH(CHKARGS) { struct roff_node *nb; @@ -370,8 +473,8 @@ post_TH(CHKARGS) /* ->TITLE<- MSEC DATE OS VOL */ n = n->child; - if (n && n->string) { - for (p = n->string; '\0' != *p; p++) { + if (n != NULL && n->string != NULL) { + for (p = n->string; *p != '\0'; p++) { /* Only warn about this once... */ if (isalpha((unsigned char)*p) && ! isupper((unsigned char)*p)) { @@ -389,11 +492,16 @@ post_TH(CHKARGS) /* TITLE ->MSEC<- DATE OS VOL */ - if (n) + if (n != NULL) n = n->next; - if (n && n->string) + if (n != NULL && n->string != NULL) { man->meta.msec = mandoc_strdup(n->string); - else { + if (man->filesec != '\0' && + man->filesec != *n->string && + *n->string >= '1' && *n->string <= '9') + mandoc_msg(MANDOCERR_MSEC_FILE, n->line, n->pos, + "*.%c vs TH ... %c", man->filesec, *n->string); + } else { man->meta.msec = mandoc_strdup(""); mandoc_msg(MANDOCERR_MSEC_MISSING, nb->line, nb->pos, "TH %s", man->meta.title); @@ -401,18 +509,12 @@ post_TH(CHKARGS) /* TITLE MSEC ->DATE<- OS VOL */ - if (n) + if (n != NULL) n = n->next; - if (n && n->string && '\0' != n->string[0]) { - man->meta.date = man->quick ? - mandoc_strdup(n->string) : - mandoc_normdate(man, n->string, n->line, n->pos); - } else { + if (man->quick && n != NULL) man->meta.date = mandoc_strdup(""); - mandoc_msg(MANDOCERR_DATE_MISSING, - n ? n->line : nb->line, - n ? n->pos : nb->pos, "TH"); - } + else + man->meta.date = mandoc_normdate(n, nb); /* TITLE MSEC DATE ->OS<- VOL */ @@ -445,6 +547,32 @@ post_TH(CHKARGS) * meta-data. */ roff_node_delete(man, man->last); +} + +static void +post_MR(CHKARGS) +{ + struct roff_node *nch; + + if ((nch = n->child) == NULL) { + mandoc_msg(MANDOCERR_NM_NONAME, n->line, n->pos, "MR"); + return; + } + if (nch->next == NULL) { + mandoc_msg(MANDOCERR_XR_NOSEC, + n->line, n->pos, "MR %s", nch->string); + return; + } + if (mandoc_xr_add(nch->next->string, nch->string, nch->line, nch->pos)) + mandoc_msg(MANDOCERR_XR_SELF, nch->line, nch->pos, + "MR %s %s", nch->string, nch->next->string); + if ((nch = nch->next->next) == NULL || nch->next == NULL) + return; + + mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line, nch->next->pos, + "MR ... %s", nch->next->string); + while (nch->next != NULL) + roff_node_delete(man, nch->next); } static void