=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.300 retrieving revision 1.387 diff -u -p -r1.300 -r1.387 --- mandoc/mdoc_validate.c 2015/10/30 19:04:16 1.300 +++ mandoc/mdoc_validate.c 2020/04/26 21:41:07 1.387 @@ -1,7 +1,7 @@ -/* $Id: mdoc_validate.c,v 1.300 2015/10/30 19:04:16 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.387 2020/04/26 21:41:07 schwarze Exp $ */ /* + * Copyright (c) 2010-2020 Ingo Schwarze * Copyright (c) 2008-2012 Kristaps Dzonsons - * Copyright (c) 2010-2015 Ingo Schwarze * Copyright (c) 2010 Joerg Sonnenberger * * Permission to use, copy, modify, and distribute this software for any @@ -15,6 +15,8 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Validation module for mdoc(7) syntax trees used by mandoc(1). */ #include "config.h" @@ -33,11 +35,13 @@ #include "mandoc_aux.h" #include "mandoc.h" +#include "mandoc_xr.h" #include "roff.h" #include "mdoc.h" #include "libmandoc.h" #include "roff_int.h" #include "libmdoc.h" +#include "tag.h" /* FIXME: .Bl -diag can't have non-text children in HEAD. */ @@ -51,14 +55,19 @@ enum check_ineq { typedef void (*v_post)(POST_ARGS); -static void check_text(struct roff_man *, int, int, char *); +static int build_list(struct roff_man *, int); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_text(struct roff_man *, int, int, char *); +static void check_text_em(struct roff_man *, int, int, char *); +static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); -static size_t macro2len(int); -static void rewrite_macro2len(char **); +static size_t macro2len(enum roff_tok); +static void rewrite_macro2len(struct roff_man *, char **); +static int similar(const char *, const char *); +static void post_abort(POST_ARGS) __attribute__((__noreturn__)); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); static void post_at(POST_ARGS); @@ -67,24 +76,27 @@ static void post_bf(POST_ARGS); static void post_bk(POST_ARGS); static void post_bl(POST_ARGS); static void post_bl_block(POST_ARGS); -static void post_bl_block_tag(POST_ARGS); static void post_bl_head(POST_ARGS); static void post_bl_norm(POST_ARGS); static void post_bx(POST_ARGS); static void post_defaults(POST_ARGS); static void post_display(POST_ARGS); static void post_dd(POST_ARGS); +static void post_delim(POST_ARGS); +static void post_delim_nb(POST_ARGS); static void post_dt(POST_ARGS); +static void post_em(POST_ARGS); static void post_en(POST_ARGS); +static void post_er(POST_ARGS); static void post_es(POST_ARGS); static void post_eoln(POST_ARGS); static void post_ex(POST_ARGS); static void post_fa(POST_ARGS); +static void post_fl(POST_ARGS); static void post_fn(POST_ARGS); static void post_fname(POST_ARGS); static void post_fo(POST_ARGS); static void post_hyph(POST_ARGS); -static void post_ignpar(POST_ARGS); static void post_it(POST_ARGS); static void post_lb(POST_ARGS); static void post_nd(POST_ARGS); @@ -96,6 +108,8 @@ static void post_par(POST_ARGS); static void post_prevpar(POST_ARGS); static void post_root(POST_ARGS); static void post_rs(POST_ARGS); +static void post_rv(POST_ARGS); +static void post_section(POST_ARGS); static void post_sh(POST_ARGS); static void post_sh_head(POST_ARGS); static void post_sh_name(POST_ARGS); @@ -104,14 +118,19 @@ static void post_sh_authors(POST_ARGS); static void post_sm(POST_ARGS); static void post_st(POST_ARGS); static void post_std(POST_ARGS); +static void post_sx(POST_ARGS); +static void post_tag(POST_ARGS); +static void post_tg(POST_ARGS); +static void post_useless(POST_ARGS); +static void post_xr(POST_ARGS); +static void post_xx(POST_ARGS); -static v_post mdoc_valids[MDOC_MAX] = { - NULL, /* Ap */ +static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_dd, /* Dd */ post_dt, /* Dt */ post_os, /* Os */ post_sh, /* Sh */ - post_ignpar, /* Ss */ + post_section, /* Ss */ post_par, /* Pp */ post_display, /* D1 */ post_display, /* Dl */ @@ -120,33 +139,34 @@ static v_post mdoc_valids[MDOC_MAX] = { post_bl, /* Bl */ NULL, /* El */ post_it, /* It */ - NULL, /* Ad */ + post_delim_nb, /* Ad */ post_an, /* An */ + NULL, /* Ap */ post_defaults, /* Ar */ NULL, /* Cd */ - NULL, /* Cm */ - NULL, /* Dv */ - NULL, /* Er */ - NULL, /* Ev */ + post_tag, /* Cm */ + post_tag, /* Dv */ + post_er, /* Er */ + post_tag, /* Ev */ post_ex, /* Ex */ post_fa, /* Fa */ NULL, /* Fd */ - NULL, /* Fl */ + post_fl, /* Fl */ post_fn, /* Fn */ - NULL, /* Ft */ - NULL, /* Ic */ - NULL, /* In */ - post_defaults, /* Li */ + post_delim_nb, /* Ft */ + post_tag, /* Ic */ + post_delim_nb, /* In */ + post_tag, /* Li */ post_nd, /* Nd */ post_nm, /* Nm */ - NULL, /* Op */ - post_obsolete, /* Ot */ + post_delim_nb, /* Op */ + post_abort, /* Ot */ post_defaults, /* Pa */ - post_std, /* Rv */ + post_rv, /* Rv */ post_st, /* St */ - NULL, /* Va */ - NULL, /* Vt */ - NULL, /* Xr */ + post_delim_nb, /* Va */ + post_delim_nb, /* Vt */ + post_xr, /* Xr */ NULL, /* %A */ post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %D */ @@ -160,13 +180,13 @@ static v_post mdoc_valids[MDOC_MAX] = { NULL, /* %V */ NULL, /* Ac */ NULL, /* Ao */ - NULL, /* Aq */ + post_delim_nb, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ NULL, /* Bo */ NULL, /* Bq */ - NULL, /* Bsx */ + post_xx, /* Bsx */ post_bx, /* Bx */ post_obsolete, /* Db */ NULL, /* Dc */ @@ -174,32 +194,32 @@ static v_post mdoc_valids[MDOC_MAX] = { NULL, /* Dq */ NULL, /* Ec */ NULL, /* Ef */ - NULL, /* Em */ + post_em, /* Em */ NULL, /* Eo */ - NULL, /* Fx */ - NULL, /* Ms */ - NULL, /* No */ + post_xx, /* Fx */ + post_tag, /* Ms */ + post_tag, /* No */ post_ns, /* Ns */ - NULL, /* Nx */ - NULL, /* Ox */ + post_xx, /* Nx */ + post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ NULL, /* Po */ - NULL, /* Pq */ + post_delim_nb, /* Pq */ NULL, /* Qc */ - NULL, /* Ql */ + post_delim_nb, /* Ql */ NULL, /* Qo */ - NULL, /* Qq */ + post_delim_nb, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ NULL, /* So */ - NULL, /* Sq */ + post_delim_nb, /* Sq */ post_sm, /* Sm */ - post_hyph, /* Sx */ - NULL, /* Sy */ - NULL, /* Tn */ - NULL, /* Ux */ + post_sx, /* Sx */ + post_em, /* Sy */ + post_useless, /* Tn */ + post_xx, /* Ux */ NULL, /* Xc */ NULL, /* Xo */ post_fo, /* Fo */ @@ -209,31 +229,29 @@ static v_post mdoc_valids[MDOC_MAX] = { post_bk, /* Bk */ NULL, /* Ek */ post_eoln, /* Bt */ - NULL, /* Hf */ + post_obsolete, /* Hf */ post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ - post_par, /* Lp */ - NULL, /* Lk */ + post_abort, /* Lp */ + post_delim_nb, /* Lk */ post_defaults, /* Mt */ - NULL, /* Brq */ + post_delim_nb, /* Brq */ NULL, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ post_en, /* En */ - NULL, /* Dx */ + post_xx, /* Dx */ NULL, /* %Q */ - post_par, /* br */ - post_par, /* sp */ NULL, /* %U */ NULL, /* Ta */ - NULL, /* ll */ + post_tg, /* Tg */ }; #define RSORD_MAX 14 /* Number of `Rs' blocks. */ -static const int rsord[RSORD_MAX] = { +static const enum roff_tok rsord[RSORD_MAX] = { MDOC__A, MDOC__T, MDOC__B, @@ -276,30 +294,68 @@ static const char * const secnames[SEC__MAX] = { NULL }; +static int fn_prio = TAG_STRONG; + +/* Validate the subtree rooted at mdoc->last. */ void -mdoc_node_validate(struct roff_man *mdoc) +mdoc_validate(struct roff_man *mdoc) { - struct roff_node *n; - v_post *p; + struct roff_node *n, *np; + const v_post *p; + /* + * Translate obsolete macros to modern macros first + * such that later code does not need to look + * for the obsolete versions. + */ + n = mdoc->last; + switch (n->tok) { + case MDOC_Lp: + n->tok = MDOC_Pp; + break; + case MDOC_Ot: + post_obsolete(mdoc); + n->tok = MDOC_Ft; + break; + default: + break; + } + + /* + * Iterate over all children, recursing into each one + * in turn, depth-first. + */ + mdoc->last = mdoc->last->child; while (mdoc->last != NULL) { - mdoc_node_validate(mdoc); + mdoc_validate(mdoc); if (mdoc->last == n) mdoc->last = mdoc->last->child; else mdoc->last = mdoc->last->next; } + /* Finally validate the macro itself. */ + mdoc->last = n; mdoc->next = ROFF_NEXT_SIBLING; switch (n->type) { case ROFFT_TEXT: - if (n->sec != SEC_SYNOPSIS || n->parent->tok != MDOC_Fd) + np = n->parent; + if (n->sec != SEC_SYNOPSIS || + (np->tok != MDOC_Cd && np->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); + if ((n->flags & NODE_NOFILL) == 0 && + (np->tok != MDOC_It || np->type != ROFFT_HEAD || + np->parent->parent->norm->Bl.type != LIST_diag)) + check_text_em(mdoc, n->line, n->pos, n->string); + if (np->tok == MDOC_It || (np->type == ROFFT_BODY && + (np->tok == MDOC_Sh || np->tok == MDOC_Ss))) + check_toptext(mdoc, n->line, n->pos, n->string); break; + case ROFFT_COMMENT: case ROFFT_EQN: case ROFFT_TBL: break; @@ -316,13 +372,19 @@ mdoc_node_validate(struct roff_man *mdoc) */ if (n->child != NULL) - n->child->flags &= ~MDOC_DELIMC; + n->child->flags &= ~NODE_DELIMC; if (n->last != NULL) - n->last->flags &= ~MDOC_DELIMO; + n->last->flags &= ~NODE_DELIMO; /* Call the macro's postprocessor. */ - p = mdoc_valids + n->tok; + if (n->tok < ROFF_MAX) { + roff_validate(mdoc); + break; + } + + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + p = mdoc_valids + (n->tok - MDOC_Dd); if (*p) (*p)(mdoc); if (mdoc->last == n) @@ -358,15 +420,221 @@ check_text(struct roff_man *mdoc, int ln, int pos, cha { char *cp; - if (MDOC_LITERAL & mdoc->flags) + if (mdoc->last->flags & NODE_NOFILL) return; for (cp = p; NULL != (p = strchr(p, '\t')); p++) - mandoc_msg(MANDOCERR_FI_TAB, mdoc->parse, - ln, pos + (int)(p - cp), NULL); + mandoc_msg(MANDOCERR_FI_TAB, ln, pos + (int)(p - cp), NULL); } static void +check_text_em(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const struct roff_node *np, *nn; + char *cp; + + np = mdoc->last->prev; + nn = mdoc->last->next; + + /* Look for em-dashes wrongly encoded as "--". */ + + for (cp = p; *cp != '\0'; cp++) { + if (cp[0] != '-' || cp[1] != '-') + continue; + cp++; + + /* Skip input sequences of more than two '-'. */ + + if (cp[1] == '-') { + while (cp[1] == '-') + cp++; + continue; + } + + /* Skip "--" directly attached to something else. */ + + if ((cp - p > 1 && cp[-2] != ' ') || + (cp[1] != '\0' && cp[1] != ' ')) + continue; + + /* Require a letter right before or right afterwards. */ + + if ((cp - p > 2 ? + isalpha((unsigned char)cp[-3]) : + np != NULL && + np->type == ROFFT_TEXT && + *np->string != '\0' && + isalpha((unsigned char)np->string[ + strlen(np->string) - 1])) || + (cp[1] != '\0' && cp[2] != '\0' ? + isalpha((unsigned char)cp[2]) : + nn != NULL && + nn->type == ROFFT_TEXT && + isalpha((unsigned char)*nn->string))) { + mandoc_msg(MANDOCERR_DASHDASH, + ln, pos + (int)(cp - p) - 1, NULL); + break; + } + } +} + +static void +check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) +{ + const char *cp, *cpr; + + if (*p == '\0') + return; + + if ((cp = strstr(p, "OpenBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Ox"); + if ((cp = strstr(p, "NetBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Nx"); + if ((cp = strstr(p, "FreeBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Fx"); + if ((cp = strstr(p, "DragonFly")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Dx"); + + cp = p; + while ((cp = strstr(cp + 1, "()")) != NULL) { + for (cpr = cp - 1; cpr >= p; cpr--) + if (*cpr != '_' && !isalnum((unsigned char)*cpr)) + break; + if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) { + cpr++; + mandoc_msg(MANDOCERR_FUNC, ln, pos + (int)(cpr - p), + "%.*s()", (int)(cp - cpr), cpr); + } + } +} + +static void +post_abort(POST_ARGS) +{ + abort(); +} + +static void +post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc; + enum mdelim delim; + enum roff_tok tok; + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc < nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + if (*lc == ')' && (tok == MDOC_Nd || tok == MDOC_Sh || + tok == MDOC_Ss || tok == MDOC_Fo)) + return; + + mandoc_msg(MANDOCERR_DELIM, nch->line, + nch->pos + (int)(lc - nch->string), "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void +post_delim_nb(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc, *cp; + int nw; + enum mdelim delim; + enum roff_tok tok; + + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc <= nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_Pq || tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + + mandoc_msg(MANDOCERR_DELIM_NB, nch->line, + nch->pos + (int)(lc - nch->string), "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void post_bl_norm(POST_ARGS) { struct roff_node *n; @@ -428,40 +696,35 @@ post_bl_norm(POST_ARGS) case MDOC_Compact: if (n->norm->Bl.comp) mandoc_msg(MANDOCERR_ARG_REP, - mdoc->parse, argv->line, - argv->pos, "Bl -compact"); + argv->line, argv->pos, "Bl -compact"); n->norm->Bl.comp = 1; break; case MDOC_Width: wa = argv; if (0 == argv->sz) { mandoc_msg(MANDOCERR_ARG_EMPTY, - mdoc->parse, argv->line, - argv->pos, "Bl -width"); + argv->line, argv->pos, "Bl -width"); n->norm->Bl.width = "0n"; break; } if (NULL != n->norm->Bl.width) - mandoc_vmsg(MANDOCERR_ARG_REP, - mdoc->parse, argv->line, - argv->pos, "Bl -width %s", - argv->value[0]); - rewrite_macro2len(argv->value); + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, + "Bl -width %s", argv->value[0]); + rewrite_macro2len(mdoc, argv->value); n->norm->Bl.width = argv->value[0]; break; case MDOC_Offset: if (0 == argv->sz) { mandoc_msg(MANDOCERR_ARG_EMPTY, - mdoc->parse, argv->line, - argv->pos, "Bl -offset"); + argv->line, argv->pos, "Bl -offset"); break; } if (NULL != n->norm->Bl.offs) - mandoc_vmsg(MANDOCERR_ARG_REP, - mdoc->parse, argv->line, - argv->pos, "Bl -offset %s", - argv->value[0]); - rewrite_macro2len(argv->value); + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, + "Bl -offset %s", argv->value[0]); + rewrite_macro2len(mdoc, argv->value); n->norm->Bl.offs = argv->value[0]; break; default: @@ -474,8 +737,7 @@ post_bl_norm(POST_ARGS) /* Check: multiple list types. */ if (LIST__NONE != n->norm->Bl.type) { - mandoc_vmsg(MANDOCERR_BL_REP, - mdoc->parse, n->line, n->pos, + mandoc_msg(MANDOCERR_BL_REP, n->line, n->pos, "Bl -%s", mdoc_argnames[argv->arg]); continue; } @@ -485,8 +747,8 @@ post_bl_norm(POST_ARGS) if (n->norm->Bl.width || n->norm->Bl.offs || n->norm->Bl.comp) - mandoc_vmsg(MANDOCERR_BL_LATETYPE, - mdoc->parse, n->line, n->pos, "Bl -%s", + mandoc_msg(MANDOCERR_BL_LATETYPE, + n->line, n->pos, "Bl -%s", mdoc_argnames[n->args->argv[0].arg]); n->norm->Bl.type = lt; @@ -499,9 +761,9 @@ post_bl_norm(POST_ARGS) /* Allow lists to default to LIST_item. */ if (LIST__NONE == n->norm->Bl.type) { - mandoc_msg(MANDOCERR_BL_NOTYPE, mdoc->parse, - n->line, n->pos, "Bl"); + mandoc_msg(MANDOCERR_BL_NOTYPE, n->line, n->pos, "Bl"); n->norm->Bl.type = LIST_item; + mdoclt = MDOC_Item; } /* @@ -513,8 +775,8 @@ post_bl_norm(POST_ARGS) switch (n->norm->Bl.type) { case LIST_tag: - if (NULL == n->norm->Bl.width) - mandoc_msg(MANDOCERR_BL_NOWIDTH, mdoc->parse, + if (n->norm->Bl.width == NULL) + mandoc_msg(MANDOCERR_BL_NOWIDTH, n->line, n->pos, "Bl -tag"); break; case LIST_column: @@ -522,19 +784,19 @@ post_bl_norm(POST_ARGS) case LIST_ohang: case LIST_inset: case LIST_item: - if (n->norm->Bl.width) - mandoc_vmsg(MANDOCERR_BL_SKIPW, mdoc->parse, - wa->line, wa->pos, "Bl -%s", - mdoc_argnames[mdoclt]); + if (n->norm->Bl.width != NULL) + mandoc_msg(MANDOCERR_BL_SKIPW, wa->line, wa->pos, + "Bl -%s", mdoc_argnames[mdoclt]); + n->norm->Bl.width = NULL; break; case LIST_bullet: case LIST_dash: case LIST_hyphen: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) n->norm->Bl.width = "2n"; break; case LIST_enum: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) n->norm->Bl.width = "3n"; break; default: @@ -572,29 +834,25 @@ post_bd(POST_ARGS) dt = DISP_literal; break; case MDOC_File: - mandoc_msg(MANDOCERR_BD_FILE, mdoc->parse, - n->line, n->pos, NULL); + mandoc_msg(MANDOCERR_BD_FILE, n->line, n->pos, NULL); break; case MDOC_Offset: if (0 == argv->sz) { mandoc_msg(MANDOCERR_ARG_EMPTY, - mdoc->parse, argv->line, - argv->pos, "Bd -offset"); + argv->line, argv->pos, "Bd -offset"); break; } if (NULL != n->norm->Bd.offs) - mandoc_vmsg(MANDOCERR_ARG_REP, - mdoc->parse, argv->line, - argv->pos, "Bd -offset %s", - argv->value[0]); - rewrite_macro2len(argv->value); + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, + "Bd -offset %s", argv->value[0]); + rewrite_macro2len(mdoc, argv->value); n->norm->Bd.offs = argv->value[0]; break; case MDOC_Compact: if (n->norm->Bd.comp) mandoc_msg(MANDOCERR_ARG_REP, - mdoc->parse, argv->line, - argv->pos, "Bd -compact"); + argv->line, argv->pos, "Bd -compact"); n->norm->Bd.comp = 1; break; default: @@ -606,18 +864,20 @@ post_bd(POST_ARGS) if (DISP__NONE == n->norm->Bd.type) n->norm->Bd.type = dt; else - mandoc_vmsg(MANDOCERR_BD_REP, - mdoc->parse, n->line, n->pos, + mandoc_msg(MANDOCERR_BD_REP, n->line, n->pos, "Bd -%s", mdoc_argnames[argv->arg]); } if (DISP__NONE == n->norm->Bd.type) { - mandoc_msg(MANDOCERR_BD_NOTYPE, mdoc->parse, - n->line, n->pos, "Bd"); + mandoc_msg(MANDOCERR_BD_NOTYPE, n->line, n->pos, "Bd"); n->norm->Bd.type = DISP_ragged; } } +/* + * Stand-alone line macros. + */ + static void post_an_norm(POST_ARGS) { @@ -631,8 +891,7 @@ post_an_norm(POST_ARGS) for (i = 1; i < n->args->argc; i++) { argv = n->args->argv + i; - mandoc_vmsg(MANDOCERR_AN_REP, - mdoc->parse, argv->line, argv->pos, + mandoc_msg(MANDOCERR_AN_REP, argv->line, argv->pos, "An -%s", mdoc_argnames[argv->arg]); } @@ -646,31 +905,346 @@ post_an_norm(POST_ARGS) } static void +post_eoln(POST_ARGS) +{ + struct roff_node *n; + + post_useless(mdoc); + n = mdoc->last; + if (n->child != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, n->line, + n->pos, "%s %s", roff_name[n->tok], n->child->string); + + while (n->child != NULL) + roff_node_delete(mdoc, n->child); + + roff_word_alloc(mdoc, n->line, n->pos, n->tok == MDOC_Bt ? + "is currently in beta test." : "currently under development."); + mdoc->last->flags |= NODE_EOS | NODE_NOSRC; + mdoc->last = n; +} + +static int +build_list(struct roff_man *mdoc, int tok) +{ + struct roff_node *n; + int ic; + + n = mdoc->last->next; + for (ic = 1;; ic++) { + roff_elem_alloc(mdoc, n->line, n->pos, tok); + mdoc->last->flags |= NODE_NOSRC; + roff_node_relink(mdoc, n); + n = mdoc->last = mdoc->last->parent; + mdoc->next = ROFF_NEXT_SIBLING; + if (n->next == NULL) + return ic; + if (ic > 1 || n->next->next != NULL) { + roff_word_alloc(mdoc, n->line, n->pos, ","); + mdoc->last->flags |= NODE_DELIMC | NODE_NOSRC; + } + n = mdoc->last->next; + if (n->next == NULL) { + roff_word_alloc(mdoc, n->line, n->pos, "and"); + mdoc->last->flags |= NODE_NOSRC; + } + } +} + +static void +post_ex(POST_ARGS) +{ + struct roff_node *n; + int ic; + + post_std(mdoc); + + n = mdoc->last; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, "The"); + mdoc->last->flags |= NODE_NOSRC; + + if (mdoc->last->next != NULL) + ic = build_list(mdoc, MDOC_Nm); + else if (mdoc->meta.name != NULL) { + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Nm); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = mdoc->last->parent; + mdoc->next = ROFF_NEXT_SIBLING; + ic = 1; + } else { + mandoc_msg(MANDOCERR_EX_NONAME, n->line, n->pos, "Ex"); + ic = 0; + } + + roff_word_alloc(mdoc, n->line, n->pos, + ic > 1 ? "utilities exit\\~0" : "utility exits\\~0"); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, + "on success, and\\~>0 if an error occurs."); + mdoc->last->flags |= NODE_EOS | NODE_NOSRC; + mdoc->last = n; +} + +static void +post_lb(POST_ARGS) +{ + struct roff_node *n; + const char *p; + + post_delim_nb(mdoc); + + n = mdoc->last; + assert(n->child->type == ROFFT_TEXT); + mdoc->next = ROFF_NEXT_CHILD; + + if ((p = mdoc_a2lib(n->child->string)) != NULL) { + n->child->flags |= NODE_NOPRT; + roff_word_alloc(mdoc, n->line, n->pos, p); + mdoc->last->flags = NODE_NOSRC; + mdoc->last = n; + return; + } + + mandoc_msg(MANDOCERR_LB_BAD, n->child->line, + n->child->pos, "Lb %s", n->child->string); + + roff_word_alloc(mdoc, n->line, n->pos, "library"); + mdoc->last->flags = NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, "\\(lq"); + mdoc->last->flags = NODE_DELIMO | NODE_NOSRC; + mdoc->last = mdoc->last->next; + roff_word_alloc(mdoc, n->line, n->pos, "\\(rq"); + mdoc->last->flags = NODE_DELIMC | NODE_NOSRC; + mdoc->last = n; +} + +static void +post_rv(POST_ARGS) +{ + struct roff_node *n; + int ic; + + post_std(mdoc); + + n = mdoc->last; + mdoc->next = ROFF_NEXT_CHILD; + if (n->child != NULL) { + roff_word_alloc(mdoc, n->line, n->pos, "The"); + mdoc->last->flags |= NODE_NOSRC; + ic = build_list(mdoc, MDOC_Fn); + roff_word_alloc(mdoc, n->line, n->pos, + ic > 1 ? "functions return" : "function returns"); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, + "the value\\~0 if successful;"); + } else + roff_word_alloc(mdoc, n->line, n->pos, "Upon successful " + "completion, the value\\~0 is returned;"); + mdoc->last->flags |= NODE_NOSRC; + + roff_word_alloc(mdoc, n->line, n->pos, "otherwise " + "the value\\~\\-1 is returned and the global variable"); + mdoc->last->flags |= NODE_NOSRC; + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Va); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, "errno"); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = mdoc->last->parent; + mdoc->next = ROFF_NEXT_SIBLING; + roff_word_alloc(mdoc, n->line, n->pos, + "is set to indicate the error."); + mdoc->last->flags |= NODE_EOS | NODE_NOSRC; + mdoc->last = n; +} + +static void post_std(POST_ARGS) { struct roff_node *n; + post_delim(mdoc); + n = mdoc->last; if (n->args && n->args->argc == 1) if (n->args->argv[0].arg == MDOC_Std) return; - mandoc_msg(MANDOCERR_ARG_STD, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + mandoc_msg(MANDOCERR_ARG_STD, n->line, n->pos, + "%s", roff_name[n->tok]); } static void +post_st(POST_ARGS) +{ + struct roff_node *n, *nch; + const char *p; + + n = mdoc->last; + nch = n->child; + assert(nch->type == ROFFT_TEXT); + + if ((p = mdoc_a2st(nch->string)) == NULL) { + mandoc_msg(MANDOCERR_ST_BAD, + nch->line, nch->pos, "St %s", nch->string); + roff_node_delete(mdoc, n); + return; + } + + nch->flags |= NODE_NOPRT; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, nch->line, nch->pos, p); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last= n; +} + +static void +post_tg(POST_ARGS) +{ + struct roff_node *n; /* The .Tg node. */ + struct roff_node *nch; /* The first child of the .Tg node. */ + struct roff_node *nn; /* The next node after the .Tg node. */ + struct roff_node *np; /* The parent of the next node. */ + struct roff_node *nt; /* The TEXT node containing the tag. */ + size_t len; /* The number of bytes in the tag. */ + + /* Find the next node. */ + n = mdoc->last; + for (nn = n; nn != NULL; nn = nn->parent) { + if (nn->next != NULL) { + nn = nn->next; + break; + } + } + + /* Find the tag. */ + nt = nch = n->child; + if (nch == NULL && nn != NULL && nn->child != NULL && + nn->child->type == ROFFT_TEXT) + nt = nn->child; + + /* Validate the tag. */ + if (nt == NULL || *nt->string == '\0') + mandoc_msg(MANDOCERR_MACRO_EMPTY, n->line, n->pos, "Tg"); + if (nt == NULL) { + roff_node_delete(mdoc, n); + return; + } + len = strcspn(nt->string, " \t\\"); + if (nt->string[len] != '\0') + mandoc_msg(MANDOCERR_TG_SPC, nt->line, + nt->pos + len, "Tg %s", nt->string); + + /* Keep only the first argument. */ + if (nch != NULL && nch->next != NULL) { + mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line, + nch->next->pos, "Tg ... %s", nch->next->string); + while (nch->next != NULL) + roff_node_delete(mdoc, nch->next); + } + + /* Drop the macro if the first argument is invalid. */ + if (len == 0 || nt->string[len] != '\0') { + roff_node_delete(mdoc, n); + return; + } + + /* By default, tag the .Tg node itself. */ + if (nn == NULL || nn->flags & NODE_ID) + nn = n; + + /* Explicit tagging of specific macros. */ + switch (nn->tok) { + case MDOC_Sh: + case MDOC_Ss: + case MDOC_Fo: + nn = nn->head->child == NULL ? n : nn->head; + break; + case MDOC_It: + np = nn->parent; + while (np->tok != MDOC_Bl) + np = np->parent; + switch (np->norm->Bl.type) { + case LIST_column: + break; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + case LIST_tag: + nn = nn->head; + break; + case LIST_bullet: + case LIST_dash: + case LIST_enum: + case LIST_hyphen: + case LIST_item: + nn = nn->body->child == NULL ? n : nn->body; + break; + default: + abort(); + } + break; + case MDOC_Bd: + case MDOC_Bl: + case MDOC_D1: + case MDOC_Dl: + nn = nn->body->child == NULL ? n : nn->body; + break; + case MDOC_Pp: + break; + case MDOC_Cm: + case MDOC_Dv: + case MDOC_Em: + case MDOC_Er: + case MDOC_Ev: + case MDOC_Fl: + case MDOC_Fn: + case MDOC_Ic: + case MDOC_Li: + case MDOC_Ms: + case MDOC_No: + case MDOC_Sy: + if (nn->child == NULL) + nn = n; + break; + default: + nn = n; + break; + } + tag_put(nt->string, TAG_MANUAL, nn); + if (nn != n) + n->flags |= NODE_NOPRT; +} + +static void post_obsolete(POST_ARGS) { struct roff_node *n; n = mdoc->last; if (n->type == ROFFT_ELEM || n->type == ROFFT_BLOCK) - mandoc_msg(MANDOCERR_MACRO_OBS, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + mandoc_msg(MANDOCERR_MACRO_OBS, n->line, n->pos, + "%s", roff_name[n->tok]); } static void +post_useless(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + mandoc_msg(MANDOCERR_MACRO_USELESS, n->line, n->pos, + "%s", roff_name[n->tok]); +} + +/* + * Block macros. + */ + +static void post_bf(POST_ARGS) { struct roff_node *np, *nch; @@ -692,14 +1266,14 @@ post_bf(POST_ARGS) nch = np->child; if (np->parent->args == NULL) { if (nch == NULL) { - mandoc_msg(MANDOCERR_BF_NOFONT, mdoc->parse, + mandoc_msg(MANDOCERR_BF_NOFONT, np->line, np->pos, "Bf"); return; } nch = nch->next; } if (nch != NULL) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, nch->line, nch->pos, "Bf ... %s", nch->string); /* Extract argument into data. */ @@ -730,63 +1304,39 @@ post_bf(POST_ARGS) else if ( ! strcmp(np->child->string, "Sy")) np->norm->Bf.font = FONT_Sy; else - mandoc_vmsg(MANDOCERR_BF_BADFONT, mdoc->parse, - np->child->line, np->child->pos, - "Bf %s", np->child->string); + mandoc_msg(MANDOCERR_BF_BADFONT, np->child->line, + np->child->pos, "Bf %s", np->child->string); } static void -post_lb(POST_ARGS) -{ - struct roff_node *n; - const char *stdlibname; - char *libname; - - n = mdoc->last->child; - assert(n->type == ROFFT_TEXT); - - if (NULL == (stdlibname = mdoc_a2lib(n->string))) - mandoc_asprintf(&libname, - "library \\(Lq%s\\(Rq", n->string); - else - libname = mandoc_strdup(stdlibname); - - free(n->string); - n->string = libname; -} - -static void -post_eoln(POST_ARGS) -{ - const struct roff_node *n; - - n = mdoc->last; - if (n->child != NULL) - mandoc_vmsg(MANDOCERR_ARG_SKIP, - mdoc->parse, n->line, n->pos, - "%s %s", mdoc_macronames[n->tok], - n->child->string); -} - -static void post_fname(POST_ARGS) { - const struct roff_node *n; + struct roff_node *n, *nch; const char *cp; size_t pos; - n = mdoc->last->child; - pos = strcspn(n->string, "()"); - cp = n->string + pos; - if ( ! (cp[0] == '\0' || (cp[0] == '(' && cp[1] == '*'))) - mandoc_msg(MANDOCERR_FN_PAREN, mdoc->parse, - n->line, n->pos + pos, n->string); + n = mdoc->last; + nch = n->child; + cp = nch->string; + if (*cp == '(') { + if (cp[strlen(cp + 1)] == ')') + return; + pos = 0; + } else { + pos = strcspn(cp, "()"); + if (cp[pos] == '\0') { + if (n->sec == SEC_DESCRIPTION || + n->sec == SEC_CUSTOM) + tag_put(NULL, fn_prio++, n); + return; + } + } + mandoc_msg(MANDOCERR_FN_PAREN, nch->line, nch->pos + pos, "%s", cp); } static void post_fn(POST_ARGS) { - post_fname(mdoc); post_fa(mdoc); } @@ -802,17 +1352,17 @@ post_fo(POST_ARGS) return; if (n->child == NULL) { - mandoc_msg(MANDOCERR_FO_NOHEAD, mdoc->parse, - n->line, n->pos, "Fo"); + mandoc_msg(MANDOCERR_FO_NOHEAD, n->line, n->pos, "Fo"); return; } if (n->child != n->last) { - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, n->child->next->line, n->child->next->pos, "Fo ... %s", n->child->next->string); while (n->child != n->last) roff_node_delete(mdoc, n->last); - } + } else + post_delim(mdoc); post_fname(mdoc); } @@ -830,12 +1380,12 @@ post_fa(POST_ARGS) break; if (*cp != ',') continue; - mandoc_msg(MANDOCERR_FA_COMMA, mdoc->parse, - n->line, n->pos + (cp - n->string), - n->string); + mandoc_msg(MANDOCERR_FA_COMMA, n->line, + n->pos + (int)(cp - n->string), "%s", n->string); break; } } + post_delim_nb(mdoc); } static void @@ -845,19 +1395,39 @@ post_nm(POST_ARGS) n = mdoc->last; - if (n->last != NULL && - (n->last->tok == MDOC_Pp || - n->last->tok == MDOC_Lp)) - mdoc_node_relink(mdoc, n->last); + if (n->sec == SEC_NAME && n->child != NULL && + n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL) + mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1); - if (mdoc->meta.name != NULL) + if (n->last != NULL && n->last->tok == MDOC_Pp) + roff_node_relink(mdoc, n->last); + + if (mdoc->meta.name == NULL) + deroff(&mdoc->meta.name, n); + + if (mdoc->meta.name == NULL || + (mdoc->lastsec == SEC_NAME && n->child == NULL)) + mandoc_msg(MANDOCERR_NM_NONAME, n->line, n->pos, "Nm"); + + switch (n->type) { + case ROFFT_ELEM: + post_delim_nb(mdoc); + break; + case ROFFT_HEAD: + post_delim(mdoc); + break; + default: return; + } - deroff(&mdoc->meta.name, n); + if ((n->child != NULL && n->child->type == ROFFT_TEXT) || + mdoc->meta.name == NULL) + return; - if (mdoc->meta.name == NULL) - mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, - n->line, n->pos, "Nm"); + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; } static void @@ -870,9 +1440,13 @@ post_nd(POST_ARGS) if (n->type != ROFFT_BODY) return; + if (n->sec != SEC_NAME) + mandoc_msg(MANDOCERR_ND_LATE, n->line, n->pos, "Nd"); + if (n->child == NULL) - mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, - n->line, n->pos, "Nd"); + mandoc_msg(MANDOCERR_ND_EMPTY, n->line, n->pos, "Nd"); + else + post_delim(mdoc); post_hyph(mdoc); } @@ -885,11 +1459,13 @@ post_display(POST_ARGS) n = mdoc->last; switch (n->type) { case ROFFT_BODY: - if (n->end != ENDBODY_NOT) - break; - if (n->child == NULL) - mandoc_msg(MANDOCERR_BLK_EMPTY, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + if (n->end != ENDBODY_NOT) { + if (n->tok == MDOC_Bd && + n->body->parent->args == NULL) + roff_node_delete(mdoc, n); + } else if (n->child == NULL) + mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, + "%s", roff_name[n->tok]); else if (n->tok == MDOC_D1) post_hyph(mdoc); break; @@ -897,10 +1473,10 @@ post_display(POST_ARGS) if (n->tok == MDOC_Bd) { if (n->args == NULL) { mandoc_msg(MANDOCERR_BD_NOARG, - mdoc->parse, n->line, n->pos, "Bd"); + n->line, n->pos, "Bd"); mdoc->next = ROFF_NEXT_SIBLING; while (n->body->child != NULL) - mdoc_node_relink(mdoc, + roff_node_relink(mdoc, n->body->child); roff_node_delete(mdoc, n); break; @@ -910,9 +1486,8 @@ post_display(POST_ARGS) } for (np = n->parent; np != NULL; np = np->parent) { if (np->type == ROFFT_BLOCK && np->tok == MDOC_Bd) { - mandoc_vmsg(MANDOCERR_BD_NEST, - mdoc->parse, n->line, n->pos, - "%s in Bd", mdoc_macronames[n->tok]); + mandoc_msg(MANDOCERR_BD_NEST, n->line, + n->pos, "%s in Bd", roff_name[n->tok]); break; } } @@ -925,50 +1500,39 @@ post_display(POST_ARGS) static void post_defaults(POST_ARGS) { - struct roff_node *nn; + struct roff_node *n; - /* - * The `Ar' defaults to "file ..." if no value is provided as an - * argument; the `Mt' and `Pa' macros use "~"; the `Li' just - * gets an empty string. - */ - - if (mdoc->last->child != NULL) + n = mdoc->last; + if (n->child != NULL) { + post_delim_nb(mdoc); return; - - nn = mdoc->last; - - switch (nn->tok) { + } + mdoc->next = ROFF_NEXT_CHILD; + switch (n->tok) { case MDOC_Ar: - mdoc->next = ROFF_NEXT_CHILD; - roff_word_alloc(mdoc, nn->line, nn->pos, "file"); - roff_word_alloc(mdoc, nn->line, nn->pos, "..."); + roff_word_alloc(mdoc, n->line, n->pos, "file"); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, "..."); break; case MDOC_Pa: case MDOC_Mt: - mdoc->next = ROFF_NEXT_CHILD; - roff_word_alloc(mdoc, nn->line, nn->pos, "~"); + roff_word_alloc(mdoc, n->line, n->pos, "~"); break; default: abort(); } - mdoc->last = nn; + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; } static void post_at(POST_ARGS) { - struct roff_node *n; - const char *std_att; - char *att; + struct roff_node *n, *nch; + const char *att; n = mdoc->last; - if (n->child == NULL) { - mdoc->next = ROFF_NEXT_CHILD; - roff_word_alloc(mdoc, n->line, n->pos, "AT&T UNIX"); - mdoc->last = n; - return; - } + nch = n->child; /* * If we have a child, look it up in the standard keys. If a @@ -976,17 +1540,19 @@ post_at(POST_ARGS) * prefix "AT&T UNIX " to the existing data. */ - n = n->child; - assert(n->type == ROFFT_TEXT); - if ((std_att = mdoc_a2att(n->string)) == NULL) { - mandoc_vmsg(MANDOCERR_AT_BAD, mdoc->parse, - n->line, n->pos, "At %s", n->string); - mandoc_asprintf(&att, "AT&T UNIX %s", n->string); - } else - att = mandoc_strdup(std_att); + att = NULL; + if (nch != NULL && ((att = mdoc_a2att(nch->string)) == NULL)) + mandoc_msg(MANDOCERR_AT_BAD, + nch->line, nch->pos, "At %s", nch->string); - free(n->string); - n->string = att; + mdoc->next = ROFF_NEXT_CHILD; + if (att != NULL) { + roff_word_alloc(mdoc, nch->line, nch->pos, att); + nch->flags |= NODE_NOPRT; + } else + roff_word_alloc(mdoc, n->line, n->pos, "AT&T UNIX"); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; } static void @@ -1000,31 +1566,144 @@ post_an(POST_ARGS) nch = np->child; if (np->norm->An.auth == AUTH__NONE) { if (nch == NULL) - mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + mandoc_msg(MANDOCERR_MACRO_EMPTY, np->line, np->pos, "An"); + else + post_delim_nb(mdoc); } else if (nch != NULL) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, nch->line, nch->pos, "An ... %s", nch->string); } static void -post_en(POST_ARGS) +post_em(POST_ARGS) { + post_tag(mdoc); + tag_put(NULL, TAG_FALLBACK, mdoc->last); +} +static void +post_en(POST_ARGS) +{ post_obsolete(mdoc); if (mdoc->last->type == ROFFT_BLOCK) mdoc->last->norm->Es = mdoc->last_es; } static void -post_es(POST_ARGS) +post_er(POST_ARGS) { + struct roff_node *n; + n = mdoc->last; + if (n->sec == SEC_ERRORS && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Bq && + n->parent->parent->parent->tok == MDOC_It))) + tag_put(NULL, TAG_STRONG, n); + post_delim_nb(mdoc); +} + +static void +post_tag(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if ((n->prev == NULL || + (n->prev->type == ROFFT_TEXT && + strcmp(n->prev->string, "|") == 0)) && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Xo && + n->parent->parent->prev == NULL && + n->parent->parent->parent->tok == MDOC_It))) + tag_put(NULL, TAG_STRONG, n); + post_delim_nb(mdoc); +} + +static void +post_es(POST_ARGS) +{ post_obsolete(mdoc); mdoc->last_es = mdoc->last; } static void +post_fl(POST_ARGS) +{ + struct roff_node *n; + char *cp; + + /* + * Transform ".Fl Fl long" to ".Fl \-long", + * resulting for example in better HTML output. + */ + + n = mdoc->last; + if (n->prev != NULL && n->prev->tok == MDOC_Fl && + n->prev->child == NULL && n->child != NULL && + (n->flags & NODE_LINE) == 0) { + mandoc_asprintf(&cp, "\\-%s", n->child->string); + free(n->child->string); + n->child->string = cp; + roff_node_delete(mdoc, n->prev); + } + post_tag(mdoc); +} + +static void +post_xx(POST_ARGS) +{ + struct roff_node *n; + const char *os; + char *v; + + post_delim_nb(mdoc); + + n = mdoc->last; + switch (n->tok) { + case MDOC_Bsx: + os = "BSD/OS"; + break; + case MDOC_Dx: + os = "DragonFly"; + break; + case MDOC_Fx: + os = "FreeBSD"; + break; + case MDOC_Nx: + os = "NetBSD"; + if (n->child == NULL) + break; + v = n->child->string; + if ((v[0] != '0' && v[0] != '1') || v[1] != '.' || + v[2] < '0' || v[2] > '9' || + v[3] < 'a' || v[3] > 'z' || v[4] != '\0') + break; + n->child->flags |= NODE_NOPRT; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->child->line, n->child->pos, v); + v = mdoc->last->string; + v[3] = toupper((unsigned char)v[3]); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; + break; + case MDOC_Ox: + os = "OpenBSD"; + break; + case MDOC_Ux: + os = "UNIX"; + break; + default: + abort(); + } + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, os); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; +} + +static void post_it(POST_ARGS) { struct roff_node *nbl, *nit, *nch; @@ -1047,9 +1726,8 @@ post_it(POST_ARGS) case LIST_inset: case LIST_diag: if (nit->head->child == NULL) - mandoc_vmsg(MANDOCERR_IT_NOHEAD, - mdoc->parse, nit->line, nit->pos, - "Bl -%s It", + mandoc_msg(MANDOCERR_IT_NOHEAD, + nit->line, nit->pos, "Bl -%s It", mdoc_argnames[nbl->args->argv[0].arg]); break; case LIST_bullet: @@ -1057,31 +1735,45 @@ post_it(POST_ARGS) case LIST_enum: case LIST_hyphen: if (nit->body == NULL || nit->body->child == NULL) - mandoc_vmsg(MANDOCERR_IT_NOBODY, - mdoc->parse, nit->line, nit->pos, - "Bl -%s It", + mandoc_msg(MANDOCERR_IT_NOBODY, + nit->line, nit->pos, "Bl -%s It", mdoc_argnames[nbl->args->argv[0].arg]); /* FALLTHROUGH */ case LIST_item: - if (nit->head->child != NULL) - mandoc_vmsg(MANDOCERR_ARG_SKIP, - mdoc->parse, nit->line, nit->pos, - "It %s", nit->head->child->string); + if ((nch = nit->head->child) != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, + nit->line, nit->pos, "It %s", + nch->type == ROFFT_TEXT ? nch->string : + roff_name[nch->tok]); break; case LIST_column: cols = (int)nbl->norm->Bl.ncols; assert(nit->head->child == NULL); - i = 0; - for (nch = nit->child; nch != NULL; nch = nch->next) - if (nch->type == ROFFT_BODY) - i++; + if (nit->head->next->child == NULL && + nit->head->next->next == NULL) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, + nit->line, nit->pos, "It"); + roff_node_delete(mdoc, nit); + break; + } + i = 0; + for (nch = nit->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_BODY) + continue; + if (i++ && nch->flags & NODE_LINE) + mandoc_msg(MANDOCERR_TA_LINE, + nch->line, nch->pos, "Ta"); + } if (i < cols || i > cols + 1) - mandoc_vmsg(MANDOCERR_BL_COL, - mdoc->parse, nit->line, nit->pos, + mandoc_msg(MANDOCERR_BL_COL, nit->line, nit->pos, "%d columns, %d cells", cols, i); + else if (nit->head->next->child != NULL && + nit->head->next->child->flags & NODE_LINE) + mandoc_msg(MANDOCERR_IT_NOARG, + nit->line, nit->pos, "Bl -column It"); break; default: abort(); @@ -1095,22 +1787,7 @@ post_bl_block(POST_ARGS) post_prevpar(mdoc); - /* - * These are fairly complicated, so we've broken them into two - * functions. post_bl_block_tag() is called when a -tag is - * specified, but no -width (it must be guessed). The second - * when a -width is specified (macro indicators must be - * rewritten into real lengths). - */ - n = mdoc->last; - - if (n->norm->Bl.type == LIST_tag && - n->norm->Bl.width == NULL) { - post_bl_block_tag(mdoc); - assert(n->norm->Bl.width != NULL); - } - for (ni = n->body->child; ni != NULL; ni = ni->next) { if (ni->body == NULL) continue; @@ -1118,24 +1795,21 @@ post_bl_block(POST_ARGS) while (nc != NULL) { switch (nc->tok) { case MDOC_Pp: - case MDOC_Lp: - case MDOC_br: + case ROFF_br: break; default: nc = NULL; continue; } if (ni->next == NULL) { - mandoc_msg(MANDOCERR_PAR_MOVE, - mdoc->parse, nc->line, nc->pos, - mdoc_macronames[nc->tok]); - mdoc_node_relink(mdoc, nc); + mandoc_msg(MANDOCERR_PAR_MOVE, nc->line, + nc->pos, "%s", roff_name[nc->tok]); + roff_node_relink(mdoc, nc); } else if (n->norm->Bl.comp == 0 && n->norm->Bl.type != LIST_column) { - mandoc_vmsg(MANDOCERR_PAR_SKIP, - mdoc->parse, nc->line, nc->pos, - "%s before It", - mdoc_macronames[nc->tok]); + mandoc_msg(MANDOCERR_PAR_SKIP, + nc->line, nc->pos, + "%s before It", roff_name[nc->tok]); roff_node_delete(mdoc, nc); } else break; @@ -1148,17 +1822,17 @@ post_bl_block(POST_ARGS) * If the argument of -offset or -width is a macro, * replace it with the associated default width. */ -void -rewrite_macro2len(char **arg) +static void +rewrite_macro2len(struct roff_man *mdoc, char **arg) { size_t width; - int tok; + enum roff_tok tok; if (*arg == NULL) return; else if ( ! strcmp(*arg, "Ds")) width = 6; - else if ((tok = mdoc_hash_find(*arg)) == TOKEN_NONE) + else if ((tok = roffhash_find(mdoc->mdocmac, *arg, 0)) == TOKEN_NONE) return; else width = macro2len(tok); @@ -1168,71 +1842,6 @@ rewrite_macro2len(char **arg) } static void -post_bl_block_tag(POST_ARGS) -{ - struct roff_node *n, *nn; - size_t sz, ssz; - int i; - char buf[24]; - - /* - * Calculate the -width for a `Bl -tag' list if it hasn't been - * provided. Uses the first head macro. NOTE AGAIN: this is - * ONLY if the -width argument has NOT been provided. See - * rewrite_macro2len() for converting the -width string. - */ - - sz = 10; - n = mdoc->last; - - for (nn = n->body->child; nn != NULL; nn = nn->next) { - if (nn->tok != MDOC_It) - continue; - - assert(nn->type == ROFFT_BLOCK); - nn = nn->head->child; - - if (nn == NULL) - break; - - if (nn->type == ROFFT_TEXT) { - sz = strlen(nn->string) + 1; - break; - } - - if (0 != (ssz = macro2len(nn->tok))) - sz = ssz; - - break; - } - - /* Defaults to ten ens. */ - - (void)snprintf(buf, sizeof(buf), "%un", (unsigned int)sz); - - /* - * We have to dynamically add this to the macro's argument list. - * We're guaranteed that a MDOC_Width doesn't already exist. - */ - - assert(n->args != NULL); - i = (int)(n->args->argc)++; - - n->args->argv = mandoc_reallocarray(n->args->argv, - n->args->argc, sizeof(struct mdoc_argv)); - - n->args->argv[i].arg = MDOC_Width; - n->args->argv[i].line = n->line; - n->args->argv[i].pos = n->pos; - n->args->argv[i].sz = 1; - n->args->argv[i].value = mandoc_malloc(sizeof(char *)); - n->args->argv[i].value[0] = mandoc_strdup(buf); - - /* Set our width! */ - n->norm->Bl.width = n->args->argv[i].value[0]; -} - -static void post_bl_head(POST_ARGS) { struct roff_node *nbl, *nh, *nch, *nnext; @@ -1245,7 +1854,7 @@ post_bl_head(POST_ARGS) if (nh->norm->Bl.type != LIST_column) { if ((nch = nh->child) == NULL) return; - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, nch->line, nch->pos, "Bl ... %s", nch->string); while (nch != NULL) { roff_node_delete(mdoc, nch); @@ -1278,7 +1887,8 @@ post_bl_head(POST_ARGS) argv = nbl->args->argv + j; i = argv->sz; - argv->sz += nh->nchild; + for (nch = nh->child; nch != NULL; nch = nch->next) + argv->sz++; argv->value = mandoc_reallocarray(argv->value, argv->sz, sizeof(char *)); @@ -1291,16 +1901,16 @@ post_bl_head(POST_ARGS) nnext = nch->next; roff_node_delete(NULL, nch); } - nh->nchild = 0; nh->child = NULL; } static void post_bl(POST_ARGS) { - struct roff_node *nparent, *nprev; /* of the Bl block */ - struct roff_node *nblock, *nbody; /* of the Bl */ + struct roff_node *nbody; /* of the Bl */ struct roff_node *nchild, *nnext; /* of the Bl body */ + const char *prev_Er; + int order; nbody = mdoc->last; switch (nbody->type) { @@ -1318,65 +1928,102 @@ post_bl(POST_ARGS) if (nbody->end != ENDBODY_NOT) return; - nchild = nbody->child; - if (nchild == NULL) { - mandoc_msg(MANDOCERR_BLK_EMPTY, mdoc->parse, - nbody->line, nbody->pos, "Bl"); - return; + /* + * Up to the first item, move nodes before the list, + * but leave transparent nodes where they are + * if they precede an item. + * The next non-transparent node is kept in nchild. + * It only needs to be updated after a non-transparent + * node was moved out, and at the very beginning + * when no node at all was moved yet. + */ + + nchild = mdoc->last; + for (;;) { + if (nchild == mdoc->last) + nchild = roff_node_child(nbody); + if (nchild == NULL) { + mdoc->last = nbody; + mandoc_msg(MANDOCERR_BLK_EMPTY, + nbody->line, nbody->pos, "Bl"); + return; + } + if (nchild->tok == MDOC_It) { + mdoc->last = nbody; + break; + } + mandoc_msg(MANDOCERR_BL_MOVE, nbody->child->line, + nbody->child->pos, "%s", roff_name[nbody->child->tok]); + if (nbody->parent->prev == NULL) { + mdoc->last = nbody->parent->parent; + mdoc->next = ROFF_NEXT_CHILD; + } else { + mdoc->last = nbody->parent->prev; + mdoc->next = ROFF_NEXT_SIBLING; + } + roff_node_relink(mdoc, nbody->child); } + + /* + * We have reached the first item, + * so moving nodes out is no longer possible. + * But in .Bl -column, the first rows may be implicit, + * that is, they may not start with .It macros. + * Such rows may be followed by nodes generated on the + * roff level, for example .TS. + * Wrap such roff nodes into an implicit row. + */ + while (nchild != NULL) { - if (nchild->tok == MDOC_It || - (nchild->tok == MDOC_Sm && - nchild->next != NULL && - nchild->next->tok == MDOC_It)) { - nchild = nchild->next; + if (nchild->tok == MDOC_It) { + nchild = roff_node_next(nchild); continue; } - - mandoc_msg(MANDOCERR_BL_MOVE, mdoc->parse, - nchild->line, nchild->pos, - mdoc_macronames[nchild->tok]); - - /* - * Move the node out of the Bl block. - * First, collect all required node pointers. - */ - - nblock = nbody->parent; - nprev = nblock->prev; - nparent = nblock->parent; - nnext = nchild->next; - - /* - * Unlink this child. - */ - - assert(nchild->prev == NULL); - if (--nbody->nchild == 0) { - nbody->child = NULL; - nbody->last = NULL; - assert(nnext == NULL); - } else { - nbody->child = nnext; - nnext->prev = NULL; + nnext = nchild->next; + mdoc->last = nchild->prev; + mdoc->next = ROFF_NEXT_SIBLING; + roff_block_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); + roff_head_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); + mdoc->next = ROFF_NEXT_SIBLING; + roff_body_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); + while (nchild->tok != MDOC_It) { + roff_node_relink(mdoc, nchild); + if (nnext == NULL) + break; + nchild = nnext; + nnext = nchild->next; + mdoc->next = ROFF_NEXT_SIBLING; } + mdoc->last = nbody; + } - /* - * Relink this child. - */ + if (mdoc->meta.os_e != MANDOC_OS_NETBSD) + return; - nchild->parent = nparent; - nchild->prev = nprev; - nchild->next = nblock; - - nblock->prev = nchild; - nparent->nchild++; - if (nprev == NULL) - nparent->child = nchild; - else - nprev->next = nchild; - - nchild = nnext; + prev_Er = NULL; + for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) { + if (nchild->tok != MDOC_It) + continue; + if ((nnext = nchild->head->child) == NULL) + continue; + if (nnext->type == ROFFT_BLOCK) + nnext = nnext->body->child; + if (nnext == NULL || nnext->tok != MDOC_Er) + continue; + nnext = nnext->child; + if (prev_Er != NULL) { + order = strcmp(prev_Er, nnext->string); + if (order > 0) + mandoc_msg(MANDOCERR_ER_ORDER, + nnext->line, nnext->pos, + "Er %s %s (NetBSD)", + prev_Er, nnext->string); + else if (order == 0) + mandoc_msg(MANDOCERR_ER_REP, + nnext->line, nnext->pos, + "Er %s (NetBSD)", prev_Er); + } + prev_Er = nnext->string; } } @@ -1388,8 +2035,7 @@ post_bk(POST_ARGS) n = mdoc->last; if (n->type == ROFFT_BLOCK && n->body->child == NULL) { - mandoc_msg(MANDOCERR_BLK_EMPTY, - mdoc->parse, n->line, n->pos, "Bk"); + mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, "Bk"); roff_node_delete(mdoc, n); } } @@ -1417,10 +2063,9 @@ post_sm(POST_ARGS) return; } - mandoc_vmsg(MANDOCERR_SM_BAD, - mdoc->parse, nch->line, nch->pos, - "%s %s", mdoc_macronames[mdoc->last->tok], nch->string); - mdoc_node_relink(mdoc, nch); + mandoc_msg(MANDOCERR_SM_BAD, nch->line, nch->pos, + "%s %s", roff_name[mdoc->last->tok], nch->string); + roff_node_relink(mdoc, nch); return; } @@ -1432,13 +2077,10 @@ post_root(POST_ARGS) /* Add missing prologue data. */ if (mdoc->meta.date == NULL) - mdoc->meta.date = mdoc->quick ? - mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, 0, 0); + mdoc->meta.date = mandoc_normdate(NULL, NULL); if (mdoc->meta.title == NULL) { - mandoc_msg(MANDOCERR_DT_NOTITLE, - mdoc->parse, 0, 0, "EOF"); + mandoc_msg(MANDOCERR_DT_NOTITLE, 0, 0, "EOF"); mdoc->meta.title = mandoc_strdup("UNTITLED"); } @@ -1446,47 +2088,46 @@ post_root(POST_ARGS) mdoc->meta.vol = mandoc_strdup("LOCAL"); if (mdoc->meta.os == NULL) { - mandoc_msg(MANDOCERR_OS_MISSING, - mdoc->parse, 0, 0, NULL); + mandoc_msg(MANDOCERR_OS_MISSING, 0, 0, NULL); mdoc->meta.os = mandoc_strdup(""); + } else if (mdoc->meta.os_e && + (mdoc->meta.rcsids & (1 << mdoc->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + + if (mdoc->meta.arch != NULL && + arch_valid(mdoc->meta.arch, mdoc->meta.os_e) == 0) { + n = mdoc->meta.first->child; + while (n->tok != MDOC_Dt || + n->child == NULL || + n->child->next == NULL || + n->child->next->next == NULL) + n = n->next; + n = n->child->next->next; + mandoc_msg(MANDOCERR_ARCH_BAD, n->line, n->pos, + "Dt ... %s %s", mdoc->meta.arch, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); } /* Check that we begin with a proper `Sh'. */ - n = mdoc->first->child; - while (n != NULL && n->tok != TOKEN_NONE && - mdoc_macros[n->tok].flags & MDOC_PROLOGUE) + n = mdoc->meta.first->child; + while (n != NULL && + (n->type == ROFFT_COMMENT || + (n->tok >= MDOC_Dd && + mdoc_macro(n->tok)->flags & MDOC_PROLOGUE))) n = n->next; if (n == NULL) - mandoc_msg(MANDOCERR_DOC_EMPTY, mdoc->parse, 0, 0, NULL); + mandoc_msg(MANDOCERR_DOC_EMPTY, 0, 0, NULL); else if (n->tok != MDOC_Sh) - mandoc_msg(MANDOCERR_SEC_BEFORE, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + mandoc_msg(MANDOCERR_SEC_BEFORE, n->line, n->pos, + "%s", roff_name[n->tok]); } static void -post_st(POST_ARGS) -{ - struct roff_node *n, *nch; - const char *p; - - n = mdoc->last; - nch = n->child; - - assert(nch->type == ROFFT_TEXT); - - if ((p = mdoc_a2st(nch->string)) == NULL) { - mandoc_vmsg(MANDOCERR_ST_BAD, mdoc->parse, - nch->line, nch->pos, "St %s", nch->string); - roff_node_delete(mdoc, n); - } else { - free(nch->string); - nch->string = mandoc_strdup(p); - } -} - -static void post_rs(POST_ARGS) { struct roff_node *np, *nch, *next, *prev; @@ -1498,8 +2139,7 @@ post_rs(POST_ARGS) return; if (np->child == NULL) { - mandoc_msg(MANDOCERR_RS_EMPTY, mdoc->parse, - np->line, np->pos, "Rs"); + mandoc_msg(MANDOCERR_RS_EMPTY, np->line, np->pos, "Rs"); return; } @@ -1517,9 +2157,8 @@ post_rs(POST_ARGS) break; if (i == RSORD_MAX) { - mandoc_msg(MANDOCERR_RS_BAD, - mdoc->parse, nch->line, nch->pos, - mdoc_macronames[nch->tok]); + mandoc_msg(MANDOCERR_RS_BAD, nch->line, nch->pos, + "%s", roff_name[nch->tok]); i = -1; } else if (nch->tok == MDOC__J || nch->tok == MDOC__B) np->norm->Rs.quote_T++; @@ -1583,10 +2222,11 @@ post_rs(POST_ARGS) static void post_hyph(POST_ARGS) { - struct roff_node *nch; + struct roff_node *n, *nch; char *cp; - for (nch = mdoc->last->child; nch != NULL; nch = nch->next) { + n = mdoc->last; + for (nch = n->child; nch != NULL; nch = nch->next) { if (nch->type != ROFFT_TEXT) continue; cp = nch->string; @@ -1595,26 +2235,37 @@ post_hyph(POST_ARGS) while (*(++cp) != '\0') if (*cp == '-' && isalpha((unsigned char)cp[-1]) && - isalpha((unsigned char)cp[1])) + isalpha((unsigned char)cp[1])) { + if (n->tag == NULL && n->flags & NODE_ID) + n->tag = mandoc_strdup(nch->string); *cp = ASCII_HYPH; + } } } static void post_ns(POST_ARGS) { + struct roff_node *n; - if (mdoc->last->flags & MDOC_LINE) - mandoc_msg(MANDOCERR_NS_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, NULL); + n = mdoc->last; + if (n->flags & NODE_LINE || + (n->next != NULL && n->next->flags & NODE_DELIMC)) + mandoc_msg(MANDOCERR_NS_SKIP, n->line, n->pos, NULL); } static void +post_sx(POST_ARGS) +{ + post_delim(mdoc); + post_hyph(mdoc); +} + +static void post_sh(POST_ARGS) { + post_section(mdoc); - post_ignpar(mdoc); - switch (mdoc->last->type) { case ROFFT_HEAD: post_sh_head(mdoc); @@ -1650,30 +2301,39 @@ post_sh_name(POST_ARGS) for (n = mdoc->last->child; n != NULL; n = n->next) { switch (n->tok) { case MDOC_Nm: + if (hasnm && n->child != NULL) + mandoc_msg(MANDOCERR_NAMESEC_PUNCT, + n->line, n->pos, + "Nm %s", n->child->string); hasnm = 1; - break; + continue; case MDOC_Nd: hasnd = 1; if (n->next != NULL) mandoc_msg(MANDOCERR_NAMESEC_ND, - mdoc->parse, n->line, n->pos, NULL); + n->line, n->pos, NULL); break; case TOKEN_NONE: - if (hasnm) - break; + if (n->type == ROFFT_TEXT && + n->string[0] == ',' && n->string[1] == '\0' && + n->next != NULL && n->next->tok == MDOC_Nm) { + n = n->next; + continue; + } /* FALLTHROUGH */ default: - mandoc_msg(MANDOCERR_NAMESEC_BAD, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); - break; + mandoc_msg(MANDOCERR_NAMESEC_BAD, + n->line, n->pos, "%s", roff_name[n->tok]); + continue; } + break; } if ( ! hasnm) - mandoc_msg(MANDOCERR_NAMESEC_NONM, mdoc->parse, + mandoc_msg(MANDOCERR_NAMESEC_NONM, mdoc->last->line, mdoc->last->pos, NULL); if ( ! hasnd) - mandoc_msg(MANDOCERR_NAMESEC_NOND, mdoc->parse, + mandoc_msg(MANDOCERR_NAMESEC_NOND, mdoc->last->line, mdoc->last->pos, NULL); } @@ -1688,7 +2348,9 @@ post_sh_see_also(POST_ARGS) n = mdoc->last->child; lastname = lastsec = lastpunct = NULL; while (n != NULL) { - if (n->tok != MDOC_Xr || n->nchild < 2) + if (n->tok != MDOC_Xr || + n->child == NULL || + n->child->next == NULL) break; /* Process one .Xr node. */ @@ -1697,21 +2359,18 @@ post_sh_see_also(POST_ARGS) sec = n->child->next->string; if (lastsec != NULL) { if (lastpunct[0] != ',' || lastpunct[1] != '\0') - mandoc_vmsg(MANDOCERR_XR_PUNCT, - mdoc->parse, n->line, n->pos, - "%s before %s(%s)", lastpunct, - name, sec); + mandoc_msg(MANDOCERR_XR_PUNCT, n->line, + n->pos, "%s before %s(%s)", + lastpunct, name, sec); cmp = strcmp(lastsec, sec); if (cmp > 0) - mandoc_vmsg(MANDOCERR_XR_ORDER, - mdoc->parse, n->line, n->pos, - "%s(%s) after %s(%s)", name, - sec, lastname, lastsec); + mandoc_msg(MANDOCERR_XR_ORDER, n->line, + n->pos, "%s(%s) after %s(%s)", + name, sec, lastname, lastsec); else if (cmp == 0 && strcasecmp(lastname, name) > 0) - mandoc_vmsg(MANDOCERR_XR_ORDER, - mdoc->parse, n->line, n->pos, - "%s after %s", name, lastname); + mandoc_msg(MANDOCERR_XR_ORDER, n->line, + n->pos, "%s after %s", name, lastname); } lastname = name; lastsec = sec; @@ -1731,9 +2390,9 @@ post_sh_see_also(POST_ARGS) if (isalpha((const unsigned char)*name)) return; lastpunct = n->string; - if (n->next == NULL) - mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse, - n->line, n->pos, "%s after %s(%s)", + if (n->next == NULL || n->next->tok == MDOC_Rs) + mandoc_msg(MANDOCERR_XR_PUNCT, n->line, + n->pos, "%s after %s(%s)", lastpunct, lastname, lastsec); n = n->next; } @@ -1744,7 +2403,7 @@ child_an(const struct roff_node *n) { for (n = n->child; n != NULL; n = n->next) - if ((n->tok == MDOC_An && n->nchild) || child_an(n)) + if ((n->tok == MDOC_An && n->child != NULL) || child_an(n)) return 1; return 0; } @@ -1754,15 +2413,59 @@ post_sh_authors(POST_ARGS) { if ( ! child_an(mdoc->last)) - mandoc_msg(MANDOCERR_AN_MISSING, mdoc->parse, + mandoc_msg(MANDOCERR_AN_MISSING, mdoc->last->line, mdoc->last->pos, NULL); } +/* + * Return an upper bound for the string distance (allowing + * transpositions). Not a full Levenshtein implementation + * because Levenshtein is quadratic in the string length + * and this function is called for every standard name, + * so the check for each custom name would be cubic. + * The following crude heuristics is linear, resulting + * in quadratic behaviour for checking one custom name, + * which does not cause measurable slowdown. + */ +static int +similar(const char *s1, const char *s2) +{ + const int maxdist = 3; + int dist = 0; + + while (s1[0] != '\0' && s2[0] != '\0') { + if (s1[0] == s2[0]) { + s1++; + s2++; + continue; + } + if (++dist > maxdist) + return INT_MAX; + if (s1[1] == s2[1]) { /* replacement */ + s1++; + s2++; + } else if (s1[0] == s2[1] && s1[1] == s2[0]) { + s1 += 2; /* transposition */ + s2 += 2; + } else if (s1[0] == s2[1]) /* insertion */ + s2++; + else if (s1[1] == s2[0]) /* deletion */ + s1++; + else + return INT_MAX; + } + dist += strlen(s1) + strlen(s2); + return dist > maxdist ? INT_MAX : dist; +} + static void post_sh_head(POST_ARGS) { - const char *goodsec; - enum roff_sec sec; + struct roff_node *nch; + const char *goodsec; + const char *const *testsec; + int dist, mindist; + enum roff_sec sec; /* * Process a new section. Sections are either "named" or @@ -1775,10 +2478,13 @@ post_sh_head(POST_ARGS) /* The NAME should be first. */ - if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) - mandoc_vmsg(MANDOCERR_NAMESEC_FIRST, mdoc->parse, - mdoc->last->line, mdoc->last->pos, - "Sh %s", secnames[sec]); + if (sec != SEC_NAME && mdoc->lastnamed == SEC_NONE) + mandoc_msg(MANDOCERR_NAMESEC_FIRST, + mdoc->last->line, mdoc->last->pos, "Sh %s", + sec != SEC_CUSTOM ? secnames[sec] : + (nch = mdoc->last->child) == NULL ? "" : + nch->type == ROFFT_TEXT ? nch->string : + roff_name[nch->tok]); /* The SYNOPSIS gets special attention in other areas. */ @@ -1789,6 +2495,8 @@ post_sh_head(POST_ARGS) roff_setreg(mdoc->roff, "nS", 0, '='); mdoc->flags &= ~MDOC_SYNOPSIS; } + if (sec == SEC_DESCRIPTION) + fn_prio = TAG_STRONG; /* Mark our last section. */ @@ -1796,8 +2504,24 @@ post_sh_head(POST_ARGS) /* We don't care about custom sections after this. */ - if (sec == SEC_CUSTOM) + if (sec == SEC_CUSTOM) { + if ((nch = mdoc->last->child) == NULL || + nch->type != ROFFT_TEXT || nch->next != NULL) + return; + goodsec = NULL; + mindist = INT_MAX; + for (testsec = secnames + 1; *testsec != NULL; testsec++) { + dist = similar(nch->string, *testsec); + if (dist < mindist) { + goodsec = *testsec; + mindist = dist; + } + } + if (goodsec != NULL) + mandoc_msg(MANDOCERR_SEC_TYPO, nch->line, nch->pos, + "Sh %s instead of %s", nch->string, goodsec); return; + } /* * Check whether our non-custom section is being repeated or is @@ -1805,14 +2529,12 @@ post_sh_head(POST_ARGS) */ if (sec == mdoc->lastnamed) - mandoc_vmsg(MANDOCERR_SEC_REP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, - "Sh %s", secnames[sec]); + mandoc_msg(MANDOCERR_SEC_REP, mdoc->last->line, + mdoc->last->pos, "Sh %s", secnames[sec]); if (sec < mdoc->lastnamed) - mandoc_vmsg(MANDOCERR_SEC_ORDER, mdoc->parse, - mdoc->last->line, mdoc->last->pos, - "Sh %s", secnames[sec]); + mandoc_msg(MANDOCERR_SEC_ORDER, mdoc->last->line, + mdoc->last->pos, "Sh %s", secnames[sec]); /* Mark the last named section. */ @@ -1844,7 +2566,7 @@ post_sh_head(POST_ARGS) break; if (NULL == goodsec) goodsec = "9"; - mandoc_vmsg(MANDOCERR_SEC_MSEC, mdoc->parse, + mandoc_msg(MANDOCERR_SEC_MSEC, mdoc->last->line, mdoc->last->pos, "Sh %s for %s only", secnames[sec], goodsec); break; @@ -1854,12 +2576,53 @@ post_sh_head(POST_ARGS) } static void -post_ignpar(POST_ARGS) +post_xr(POST_ARGS) { - struct roff_node *np; + struct roff_node *n, *nch; - switch (mdoc->last->type) { + n = mdoc->last; + nch = n->child; + if (nch->next == NULL) { + mandoc_msg(MANDOCERR_XR_NOSEC, + n->line, n->pos, "Xr %s", nch->string); + } else { + assert(nch->next == n->last); + if(mandoc_xr_add(nch->next->string, nch->string, + nch->line, nch->pos)) + mandoc_msg(MANDOCERR_XR_SELF, + nch->line, nch->pos, "Xr %s %s", + nch->string, nch->next->string); + } + post_delim_nb(mdoc); +} + +static void +post_section(POST_ARGS) +{ + struct roff_node *n, *nch; + char *cp, *tag; + + n = mdoc->last; + switch (n->type) { + case ROFFT_BLOCK: + post_prevpar(mdoc); + return; case ROFFT_HEAD: + tag = NULL; + deroff(&tag, n); + if (tag != NULL) { + for (cp = tag; *cp != '\0'; cp++) + if (*cp == ' ') + *cp = '_'; + if ((nch = n->child) != NULL && + nch->type == ROFFT_TEXT && + strcmp(nch->string, tag) == 0) + tag_put(NULL, TAG_WEAK, n); + else + tag_put(tag, TAG_FALLBACK, n); + free(tag); + } + post_delim(mdoc); post_hyph(mdoc); return; case ROFFT_BODY: @@ -1867,45 +2630,40 @@ post_ignpar(POST_ARGS) default: return; } - - if ((np = mdoc->last->child) != NULL) - if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { - mandoc_vmsg(MANDOCERR_PAR_SKIP, - mdoc->parse, np->line, np->pos, - "%s after %s", mdoc_macronames[np->tok], - mdoc_macronames[mdoc->last->tok]); - roff_node_delete(mdoc, np); - } - - if ((np = mdoc->last->last) != NULL) - if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { - mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - np->line, np->pos, "%s at the end of %s", - mdoc_macronames[np->tok], - mdoc_macronames[mdoc->last->tok]); - roff_node_delete(mdoc, np); - } + if ((nch = n->child) != NULL && + (nch->tok == MDOC_Pp || nch->tok == ROFF_br || + nch->tok == ROFF_sp)) { + mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos, + "%s after %s", roff_name[nch->tok], + roff_name[n->tok]); + roff_node_delete(mdoc, nch); + } + if ((nch = n->last) != NULL && + (nch->tok == MDOC_Pp || nch->tok == ROFF_br)) { + mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos, + "%s at the end of %s", roff_name[nch->tok], + roff_name[n->tok]); + roff_node_delete(mdoc, nch); + } } static void post_prevpar(POST_ARGS) { - struct roff_node *n; + struct roff_node *n, *np; n = mdoc->last; - if (NULL == n->prev) - return; if (n->type != ROFFT_ELEM && n->type != ROFFT_BLOCK) return; + if ((np = roff_node_prev(n)) == NULL) + return; /* - * Don't allow prior `Lp' or `Pp' prior to a paragraph-type - * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. + * Don't allow `Pp' prior to a paragraph-type + * block: `Pp' or non-compact `Bd' or `Bl'. */ - if (n->prev->tok != MDOC_Pp && - n->prev->tok != MDOC_Lp && - n->prev->tok != MDOC_br) + if (np->tok != MDOC_Pp && np->tok != ROFF_br) return; if (n->tok == MDOC_Bl && n->norm->Bl.comp) return; @@ -1914,11 +2672,9 @@ post_prevpar(POST_ARGS) if (n->tok == MDOC_It && n->parent->norm->Bl.comp) return; - mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - n->prev->line, n->prev->pos, - "%s before %s", mdoc_macronames[n->prev->tok], - mdoc_macronames[n->tok]); - roff_node_delete(mdoc, n->prev); + mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos, + "%s before %s", roff_name[np->tok], roff_name[n->tok]); + roff_node_delete(mdoc, np); } static void @@ -1926,74 +2682,39 @@ post_par(POST_ARGS) { struct roff_node *np; - np = mdoc->last; - if (np->tok != MDOC_br && np->tok != MDOC_sp) - post_prevpar(mdoc); + fn_prio = TAG_STRONG; + post_prevpar(mdoc); - if (np->tok == MDOC_sp) { - if (np->nchild > 1) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, - np->child->next->line, np->child->next->pos, - "sp ... %s", np->child->next->string); - } else if (np->child != NULL) - mandoc_vmsg(MANDOCERR_ARG_SKIP, - mdoc->parse, np->line, np->pos, "%s %s", - mdoc_macronames[np->tok], np->child->string); - - if ((np = mdoc->last->prev) == NULL) { - np = mdoc->last->parent; - if (np->tok != MDOC_Sh && np->tok != MDOC_Ss) - return; - } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && - (mdoc->last->tok != MDOC_br || - (np->tok != MDOC_sp && np->tok != MDOC_br))) - return; - - mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, - "%s after %s", mdoc_macronames[mdoc->last->tok], - mdoc_macronames[np->tok]); - roff_node_delete(mdoc, mdoc->last); + np = mdoc->last; + if (np->child != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, np->line, np->pos, + "%s %s", roff_name[np->tok], np->child->string); } static void post_dd(POST_ARGS) { struct roff_node *n; - char *datestr; n = mdoc->last; + n->flags |= NODE_NOPRT; + if (mdoc->meta.date != NULL) { - mandoc_msg(MANDOCERR_PROLOG_REP, mdoc->parse, - n->line, n->pos, "Dd"); + mandoc_msg(MANDOCERR_PROLOG_REP, n->line, n->pos, "Dd"); free(mdoc->meta.date); } else if (mdoc->flags & MDOC_PBODY) - mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse, - n->line, n->pos, "Dd"); + mandoc_msg(MANDOCERR_PROLOG_LATE, n->line, n->pos, "Dd"); else if (mdoc->meta.title != NULL) - mandoc_msg(MANDOCERR_PROLOG_ORDER, mdoc->parse, + mandoc_msg(MANDOCERR_PROLOG_ORDER, n->line, n->pos, "Dd after Dt"); else if (mdoc->meta.os != NULL) - mandoc_msg(MANDOCERR_PROLOG_ORDER, mdoc->parse, + mandoc_msg(MANDOCERR_PROLOG_ORDER, n->line, n->pos, "Dd after Os"); - if (n->child == NULL || n->child->string[0] == '\0') { - mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); - goto out; - } - - datestr = NULL; - deroff(&datestr, n); - if (mdoc->quick) - mdoc->meta.date = datestr; - else { - mdoc->meta.date = mandoc_normdate(mdoc->parse, - datestr, n->line, n->pos); - free(datestr); - } -out: - roff_node_delete(mdoc, n); + if (mdoc->quick && n != NULL) + mdoc->meta.date = mandoc_strdup(""); + else + mdoc->meta.date = mandoc_normdate(n->child, n); } static void @@ -2004,17 +2725,17 @@ post_dt(POST_ARGS) char *p; n = mdoc->last; + n->flags |= NODE_NOPRT; + if (mdoc->flags & MDOC_PBODY) { - mandoc_msg(MANDOCERR_DT_LATE, mdoc->parse, - n->line, n->pos, "Dt"); - goto out; + mandoc_msg(MANDOCERR_DT_LATE, n->line, n->pos, "Dt"); + return; } if (mdoc->meta.title != NULL) - mandoc_msg(MANDOCERR_PROLOG_REP, mdoc->parse, - n->line, n->pos, "Dt"); + mandoc_msg(MANDOCERR_PROLOG_REP, n->line, n->pos, "Dt"); else if (mdoc->meta.os != NULL) - mandoc_msg(MANDOCERR_PROLOG_ORDER, mdoc->parse, + mandoc_msg(MANDOCERR_PROLOG_ORDER, n->line, n->pos, "Dt after Os"); free(mdoc->meta.title); @@ -2031,8 +2752,7 @@ post_dt(POST_ARGS) nn = n->child; if (nn == NULL || *nn->string == '\0') { - mandoc_msg(MANDOCERR_DT_NOTITLE, - mdoc->parse, n->line, n->pos, "Dt"); + mandoc_msg(MANDOCERR_DT_NOTITLE, n->line, n->pos, "Dt"); mdoc->meta.title = mandoc_strdup("UNTITLED"); } else { mdoc->meta.title = mandoc_strdup(nn->string); @@ -2041,25 +2761,23 @@ post_dt(POST_ARGS) for (p = nn->string; *p != '\0'; p++) if (islower((unsigned char)*p)) { - mandoc_vmsg(MANDOCERR_TITLE_CASE, - mdoc->parse, nn->line, - nn->pos + (p - nn->string), + mandoc_msg(MANDOCERR_TITLE_CASE, nn->line, + nn->pos + (int)(p - nn->string), "Dt %s", nn->string); break; } } - /* Mandatory second argument: section. */ + /* Mandatory second argument: section. */ if (nn != NULL) nn = nn->next; if (nn == NULL) { - mandoc_vmsg(MANDOCERR_MSEC_MISSING, - mdoc->parse, n->line, n->pos, + mandoc_msg(MANDOCERR_MSEC_MISSING, n->line, n->pos, "Dt %s", mdoc->meta.title); mdoc->meta.vol = mandoc_strdup("LOCAL"); - goto out; /* msec and arch remain NULL. */ + return; /* msec and arch remain NULL. */ } mdoc->meta.msec = mandoc_strdup(nn->string); @@ -2068,16 +2786,22 @@ post_dt(POST_ARGS) cp = mandoc_a2msec(nn->string); if (cp == NULL) { - mandoc_vmsg(MANDOCERR_MSEC_BAD, mdoc->parse, + mandoc_msg(MANDOCERR_MSEC_BAD, nn->line, nn->pos, "Dt ... %s", nn->string); mdoc->meta.vol = mandoc_strdup(nn->string); - } else + } else { mdoc->meta.vol = mandoc_strdup(cp); + if (mdoc->filesec != '\0' && + mdoc->filesec != *nn->string && + *nn->string >= '1' && *nn->string <= '9') + mandoc_msg(MANDOCERR_MSEC_FILE, nn->line, nn->pos, + "*.%c vs Dt ... %c", mdoc->filesec, *nn->string); + } /* Optional third argument: architecture. */ if ((nn = nn->next) == NULL) - goto out; + return; for (p = nn->string; *p != '\0'; p++) *p = tolower((unsigned char)*p); @@ -2086,26 +2810,61 @@ post_dt(POST_ARGS) /* Ignore fourth and later arguments. */ if ((nn = nn->next) != NULL) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + mandoc_msg(MANDOCERR_ARG_EXCESS, nn->line, nn->pos, "Dt ... %s", nn->string); - -out: - roff_node_delete(mdoc, n); } static void post_bx(POST_ARGS) { - struct roff_node *n; + struct roff_node *n, *nch; + const char *macro; + post_delim_nb(mdoc); + + n = mdoc->last; + nch = n->child; + + if (nch != NULL) { + macro = !strcmp(nch->string, "Open") ? "Ox" : + !strcmp(nch->string, "Net") ? "Nx" : + !strcmp(nch->string, "Free") ? "Fx" : + !strcmp(nch->string, "DragonFly") ? "Dx" : NULL; + if (macro != NULL) + mandoc_msg(MANDOCERR_BX, + n->line, n->pos, "%s", macro); + mdoc->last = nch; + nch = nch->next; + mdoc->next = ROFF_NEXT_SIBLING; + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Ns); + mdoc->last->flags |= NODE_NOSRC; + mdoc->next = ROFF_NEXT_SIBLING; + } else + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, "BSD"); + mdoc->last->flags |= NODE_NOSRC; + + if (nch == NULL) { + mdoc->last = n; + return; + } + + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Ns); + mdoc->last->flags |= NODE_NOSRC; + mdoc->next = ROFF_NEXT_SIBLING; + roff_word_alloc(mdoc, n->line, n->pos, "-"); + mdoc->last->flags |= NODE_NOSRC; + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Ns); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; + /* * Make `Bx's second argument always start with an uppercase * letter. Groff checks if it's an "accepted" term, but we just * uppercase blindly. */ - if ((n = mdoc->last->child) != NULL && (n = n->next) != NULL) - *n->string = (char)toupper((unsigned char)*n->string); + *nch->string = (char)toupper((unsigned char)*nch->string); } static void @@ -2118,13 +2877,15 @@ post_os(POST_ARGS) struct roff_node *n; n = mdoc->last; + n->flags |= NODE_NOPRT; + if (mdoc->meta.os != NULL) - mandoc_msg(MANDOCERR_PROLOG_REP, mdoc->parse, - n->line, n->pos, "Os"); + mandoc_msg(MANDOCERR_PROLOG_REP, n->line, n->pos, "Os"); else if (mdoc->flags & MDOC_PBODY) - mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse, - n->line, n->pos, "Os"); + mandoc_msg(MANDOCERR_PROLOG_LATE, n->line, n->pos, "Os"); + post_delim(mdoc); + /* * Set the operating system by way of the `Os' macro. * The order of precedence is: @@ -2140,8 +2901,8 @@ post_os(POST_ARGS) if (mdoc->meta.os) goto out; - if (mdoc->defos) { - mdoc->meta.os = mandoc_strdup(mdoc->defos); + if (mdoc->os_s != NULL) { + mdoc->meta.os = mandoc_strdup(mdoc->os_s); goto out; } @@ -2150,8 +2911,7 @@ post_os(POST_ARGS) #else /*!OSNAME */ if (defbuf == NULL) { if (uname(&utsname) == -1) { - mandoc_msg(MANDOCERR_OS_UNAME, mdoc->parse, - n->line, n->pos, "Os"); + mandoc_msg(MANDOCERR_OS_UNAME, n->line, n->pos, "Os"); defbuf = mandoc_strdup("UNKNOWN"); } else mandoc_asprintf(&defbuf, "%s %s", @@ -2161,33 +2921,39 @@ post_os(POST_ARGS) #endif /*!OSNAME*/ out: - roff_node_delete(mdoc, n); -} + if (mdoc->meta.os_e == MANDOC_OS_OTHER) { + if (strstr(mdoc->meta.os, "OpenBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(mdoc->meta.os, "NetBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_NETBSD; + } -/* - * If no argument is provided, - * fill in the name of the current manual page. - */ -static void -post_ex(POST_ARGS) -{ - struct roff_node *n; + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ - post_std(mdoc); - - n = mdoc->last; if (n->child != NULL) - return; + mandoc_msg(MANDOCERR_OS_ARG, n->child->line, n->child->pos, + "Os %s (%s)", n->child->string, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "OpenBSD" : "NetBSD"); - if (mdoc->meta.name == NULL) { - mandoc_msg(MANDOCERR_EX_NONAME, mdoc->parse, - n->line, n->pos, "Ex"); + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MANDOC_OS_OPENBSD) + mandoc_msg(MANDOCERR_MDOCDATE_MISSING, n->line, + n->pos, "Dd %s (OpenBSD)", n->string); + } else { + if (mdoc->meta.os_e == MANDOC_OS_NETBSD) + mandoc_msg(MANDOCERR_MDOCDATE, n->line, + n->pos, "Dd %s (NetBSD)", n->string); } - - mdoc->next = ROFF_NEXT_CHILD; - roff_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name); - mdoc->last = n; } enum roff_sec @@ -2203,7 +2969,7 @@ mdoc_a2sec(const char *p) } static size_t -macro2len(int macro) +macro2len(enum roff_tok macro) { switch (macro) {