[BACK]Return to man_validate.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / man_validate.c (download)

Revision 1.139, Tue Dec 4 02:53:51 2018 UTC (5 years, 4 months ago) by schwarze
Branch: MAIN
Changes since 1.138: +48 -39 lines

Clean up the validation of .Pp, .PP, .sp, and .br.  Make sure all
combinations are handled, and are handled in a systematic manner.
This resolves some erratic duplicate handling, handles a number of
missing cases, and improves diagnostics in various respects.

Move validation of .br and .sp to the roff validation module
rather than doing that twice in the mdoc and man validation modules.
Move the node relinking function to the roff library where it belongs.

In validation functions, only look at the node itself, at previous
nodes, and at descendants, not at following nodes or ancestors,
such that only nodes are inspected which are already validated.

/*	$OpenBSD$ */
/*
 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
 * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/types.h>

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "mandoc_aux.h"
#include "mandoc.h"
#include "roff.h"
#include "man.h"
#include "libmandoc.h"
#include "roff_int.h"
#include "libman.h"

#define	CHKARGS	  struct roff_man *man, struct roff_node *n

typedef	void	(*v_check)(CHKARGS);

static	void	  check_abort(CHKARGS);
static	void	  check_par(CHKARGS);
static	void	  check_part(CHKARGS);
static	void	  check_root(CHKARGS);
static	void	  check_text(CHKARGS);

static	void	  post_AT(CHKARGS);
static	void	  post_IP(CHKARGS);
static	void	  post_OP(CHKARGS);
static	void	  post_SH(CHKARGS);
static	void	  post_TH(CHKARGS);
static	void	  post_UC(CHKARGS);
static	void	  post_UR(CHKARGS);
static	void	  post_in(CHKARGS);

static	const v_check man_valids[MAN_MAX - MAN_TH] = {
	post_TH,    /* TH */
	post_SH,    /* SH */
	post_SH,    /* SS */
	NULL,       /* TP */
	NULL,       /* TQ */
	check_abort,/* LP */
	check_par,  /* PP */
	check_abort,/* P */
	post_IP,    /* IP */
	NULL,       /* HP */
	NULL,       /* SM */
	NULL,       /* SB */
	NULL,       /* BI */
	NULL,       /* IB */
	NULL,       /* BR */
	NULL,       /* RB */
	NULL,       /* R */
	NULL,       /* B */
	NULL,       /* I */
	NULL,       /* IR */
	NULL,       /* RI */
	NULL,       /* nf */
	NULL,       /* fi */
	NULL,       /* RE */
	check_part, /* RS */
	NULL,       /* DT */
	post_UC,    /* UC */
	NULL,       /* PD */
	post_AT,    /* AT */
	post_in,    /* in */
	NULL,       /* SY */
	NULL,       /* YS */
	post_OP,    /* OP */
	NULL,       /* EX */
	NULL,       /* EE */
	post_UR,    /* UR */
	NULL,       /* UE */
	post_UR,    /* MT */
	NULL,       /* ME */
};


/* Validate the subtree rooted at man->last. */
void
man_node_validate(struct roff_man *man)
{
	struct roff_node *n;
	const v_check	 *cp;

	/*
	 * Translate obsolete macros such that later code
	 * does not need to look for them.
	 */

	n = man->last;
	switch (n->tok) {
	case MAN_LP:
	case MAN_P:
		n->tok = MAN_PP;
		break;
	default:
		break;
	}

	/*
	 * Iterate over all children, recursing into each one
	 * in turn, depth-first.
	 */

	man->last = man->last->child;
	while (man->last != NULL) {
		man_node_validate(man);
		if (man->last == n)
			man->last = man->last->child;
		else
			man->last = man->last->next;
	}

	/* Finally validate the macro itself. */

	man->last = n;
	man->next = ROFF_NEXT_SIBLING;
	switch (n->type) {
	case ROFFT_TEXT:
		check_text(man, n);
		break;
	case ROFFT_ROOT:
		check_root(man, n);
		break;
	case ROFFT_COMMENT:
	case ROFFT_EQN:
	case ROFFT_TBL:
		break;
	default:
		if (n->tok < ROFF_MAX) {
			roff_validate(man);
			break;
		}
		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
		cp = man_valids + (n->tok - MAN_TH);
		if (*cp)
			(*cp)(man, n);
		if (man->last == n)
			man_state(man, n);
		break;
	}
}

static void
check_root(CHKARGS)
{
	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);

	if (n->last == NULL || n->last->type == ROFFT_COMMENT)
		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
		    n->line, n->pos, NULL);
	else
		man->meta.hasbody = 1;

	if (NULL == man->meta.title) {
		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
		    n->line, n->pos, NULL);

		/*
		 * If a title hasn't been set, do so now (by
		 * implication, date and section also aren't set).
		 */

		man->meta.title = mandoc_strdup("");
		man->meta.msec = mandoc_strdup("");
		man->meta.date = man->quick ? mandoc_strdup("") :
		    mandoc_normdate(man, NULL, n->line, n->pos);
	}

	if (man->meta.os_e &&
	    (man->meta.rcsids & (1 << man->meta.os_e)) == 0)
		mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0,
		    man->meta.os_e == MANDOC_OS_OPENBSD ?
		    "(OpenBSD)" : "(NetBSD)");
}

static void
check_abort(CHKARGS)
{
	abort();
}

static void
check_text(CHKARGS)
{
	char		*cp, *p;

	if (MAN_LITERAL & man->flags)
		return;

	cp = n->string;
	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
		    n->line, n->pos + (p - cp), NULL);
}

static void
post_OP(CHKARGS)
{

	if (n->child == NULL)
		mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
		    n->line, n->pos, "OP");
	else if (n->child->next != NULL && n->child->next->next != NULL) {
		n = n->child->next->next;
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
		    n->line, n->pos, "OP ... %s", n->string);
	}
}

static void
post_SH(CHKARGS)
{
	struct roff_node	*nc;

	if (n->type != ROFFT_BODY || (nc = n->child) == NULL)
		return;

	if (nc->tok == MAN_PP && nc->body->child != NULL) {
		while (nc->body->last != NULL) {
			man->next = ROFF_NEXT_CHILD;
			roff_node_relink(man, nc->body->last);
			man->last = n;
		}
	}

	if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) {
		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse,
		    nc->line, nc->pos, "%s after %s",
		    roff_name[nc->tok], roff_name[n->tok]);
		roff_node_delete(man, nc);
	}

	/*
	 * Trailing PP is empty, so it is deleted by check_par().
	 * Trailing sp is significant.
	 */

	if ((nc = n->last) != NULL && nc->tok == ROFF_br) {
		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse,
		    nc->line, nc->pos, "%s at the end of %s",
		    roff_name[nc->tok], roff_name[n->tok]);
		roff_node_delete(man, nc);
	}
}

static void
post_UR(CHKARGS)
{
	if (n->type == ROFFT_HEAD && n->child == NULL)
		mandoc_msg(MANDOCERR_UR_NOHEAD, man->parse,
		    n->line, n->pos, roff_name[n->tok]);
	check_part(man, n);
}

static void
check_part(CHKARGS)
{

	if (n->type == ROFFT_BODY && n->child == NULL)
		mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
		    n->line, n->pos, roff_name[n->tok]);
}

static void
check_par(CHKARGS)
{

	switch (n->type) {
	case ROFFT_BLOCK:
		if (n->body->child == NULL)
			roff_node_delete(man, n);
		break;
	case ROFFT_BODY:
		if (n->child != NULL &&
		    (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) {
			mandoc_vmsg(MANDOCERR_PAR_SKIP,
			    man->parse, n->child->line, n->child->pos,
			    "%s after %s", roff_name[n->child->tok],
			    roff_name[n->tok]);
			roff_node_delete(man, n->child);
		}
		if (n->child == NULL)
			mandoc_vmsg(MANDOCERR_PAR_SKIP,
			    man->parse, n->line, n->pos,
			    "%s empty", roff_name[n->tok]);
		break;
	case ROFFT_HEAD:
		if (n->child != NULL)
			mandoc_vmsg(MANDOCERR_ARG_SKIP,
			    man->parse, n->line, n->pos, "%s %s%s",
			    roff_name[n->tok], n->child->string,
			    n->child->next != NULL ? " ..." : "");
		break;
	default:
		break;
	}
}

static void
post_IP(CHKARGS)
{

	switch (n->type) {
	case ROFFT_BLOCK:
		if (n->head->child == NULL && n->body->child == NULL)
			roff_node_delete(man, n);
		break;
	case ROFFT_BODY:
		if (n->parent->head->child == NULL && n->child == NULL)
			mandoc_vmsg(MANDOCERR_PAR_SKIP,
			    man->parse, n->line, n->pos,
			    "%s empty", roff_name[n->tok]);
		break;
	default:
		break;
	}
}

static void
post_TH(CHKARGS)
{
	struct roff_node *nb;
	const char	*p;

	free(man->meta.title);
	free(man->meta.vol);
	free(man->meta.os);
	free(man->meta.msec);
	free(man->meta.date);

	man->meta.title = man->meta.vol = man->meta.date =
	    man->meta.msec = man->meta.os = NULL;

	nb = n;

	/* ->TITLE<- MSEC DATE OS VOL */

	n = n->child;
	if (n && n->string) {
		for (p = n->string; '\0' != *p; p++) {
			/* Only warn about this once... */
			if (isalpha((unsigned char)*p) &&
			    ! isupper((unsigned char)*p)) {
				mandoc_vmsg(MANDOCERR_TITLE_CASE,
				    man->parse, n->line,
				    n->pos + (p - n->string),
				    "TH %s", n->string);
				break;
			}
		}
		man->meta.title = mandoc_strdup(n->string);
	} else {
		man->meta.title = mandoc_strdup("");
		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
		    nb->line, nb->pos, "TH");
	}

	/* TITLE ->MSEC<- DATE OS VOL */

	if (n)
		n = n->next;
	if (n && n->string)
		man->meta.msec = mandoc_strdup(n->string);
	else {
		man->meta.msec = mandoc_strdup("");
		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
		    nb->line, nb->pos, "TH %s", man->meta.title);
	}

	/* TITLE MSEC ->DATE<- OS VOL */

	if (n)
		n = n->next;
	if (n && n->string && '\0' != n->string[0]) {
		man->meta.date = man->quick ?
		    mandoc_strdup(n->string) :
		    mandoc_normdate(man, n->string, n->line, n->pos);
	} else {
		man->meta.date = mandoc_strdup("");
		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
		    n ? n->line : nb->line,
		    n ? n->pos : nb->pos, "TH");
	}

	/* TITLE MSEC DATE ->OS<- VOL */

	if (n && (n = n->next))
		man->meta.os = mandoc_strdup(n->string);
	else if (man->os_s != NULL)
		man->meta.os = mandoc_strdup(man->os_s);
	if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) {
		if (strstr(man->meta.os, "OpenBSD") != NULL)
			man->meta.os_e = MANDOC_OS_OPENBSD;
		else if (strstr(man->meta.os, "NetBSD") != NULL)
			man->meta.os_e = MANDOC_OS_NETBSD;
	}

	/* TITLE MSEC DATE OS ->VOL<- */
	/* If missing, use the default VOL name for MSEC. */

	if (n && (n = n->next))
		man->meta.vol = mandoc_strdup(n->string);
	else if ('\0' != man->meta.msec[0] &&
	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
		man->meta.vol = mandoc_strdup(p);

	if (n != NULL && (n = n->next) != NULL)
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
		    n->line, n->pos, "TH ... %s", n->string);

	/*
	 * Remove the `TH' node after we've processed it for our
	 * meta-data.
	 */
	roff_node_delete(man, man->last);
}

static void
post_UC(CHKARGS)
{
	static const char * const bsd_versions[] = {
	    "3rd Berkeley Distribution",
	    "4th Berkeley Distribution",
	    "4.2 Berkeley Distribution",
	    "4.3 Berkeley Distribution",
	    "4.4 Berkeley Distribution",
	};

	const char	*p, *s;

	n = n->child;

	if (n == NULL || n->type != ROFFT_TEXT)
		p = bsd_versions[0];
	else {
		s = n->string;
		if (0 == strcmp(s, "3"))
			p = bsd_versions[0];
		else if (0 == strcmp(s, "4"))
			p = bsd_versions[1];
		else if (0 == strcmp(s, "5"))
			p = bsd_versions[2];
		else if (0 == strcmp(s, "6"))
			p = bsd_versions[3];
		else if (0 == strcmp(s, "7"))
			p = bsd_versions[4];
		else
			p = bsd_versions[0];
	}

	free(man->meta.os);
	man->meta.os = mandoc_strdup(p);
}

static void
post_AT(CHKARGS)
{
	static const char * const unix_versions[] = {
	    "7th Edition",
	    "System III",
	    "System V",
	    "System V Release 2",
	};

	struct roff_node *nn;
	const char	*p, *s;

	n = n->child;

	if (n == NULL || n->type != ROFFT_TEXT)
		p = unix_versions[0];
	else {
		s = n->string;
		if (0 == strcmp(s, "3"))
			p = unix_versions[0];
		else if (0 == strcmp(s, "4"))
			p = unix_versions[1];
		else if (0 == strcmp(s, "5")) {
			nn = n->next;
			if (nn != NULL &&
			    nn->type == ROFFT_TEXT &&
			    nn->string[0] != '\0')
				p = unix_versions[3];
			else
				p = unix_versions[2];
		} else
			p = unix_versions[0];
	}

	free(man->meta.os);
	man->meta.os = mandoc_strdup(p);
}

static void
post_in(CHKARGS)
{
	char	*s;

	if (n->parent->tok != MAN_TP ||
	    n->parent->type != ROFFT_HEAD ||
	    n->child == NULL ||
	    *n->child->string == '+' ||
	    *n->child->string == '-')
		return;
	mandoc_asprintf(&s, "+%s", n->child->string);
	free(n->child->string);
	n->child->string = s;
}