[BACK]Return to mansearch.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / mansearch.c (download)

Revision 1.75, Wed Jul 19 14:05:13 2017 UTC (6 years, 8 months ago) by schwarze
Branch: MAIN
CVS Tags: VERSION_1_14_2
Changes since 1.74: +19 -3 lines

Prefer arch-dependent over arch-independent pages if the name priority,
the section number, and all names match.  Changes little on installed
systems except the ordering of apropos(1) results, because we install
base and Xenocara manuals in different trees, but fixes lookup of pages
like apm(4) vs. apm(4/amd64) in man.cgi(8).
Issue discovered by martian67 on freenode and reported via tj@.

/*	$OpenBSD: mansearch.c,v 1.50 2016/07/09 15:23:36 schwarze Exp $ */
/*
 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
 * Copyright (c) 2013-2017 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/mman.h>
#include <sys/types.h>

#include <assert.h>
#if HAVE_ERR
#include <err.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <glob.h>
#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "mandoc.h"
#include "mandoc_aux.h"
#include "mandoc_ohash.h"
#include "manconf.h"
#include "mansearch.h"
#include "dbm.h"

struct	expr {
	/* Used for terms: */
	struct dbm_match match;   /* Match type and expression. */
	uint64_t	 bits;    /* Type mask. */
	/* Used for OR and AND groups: */
	struct expr	*next;    /* Next child in the parent group. */
	struct expr	*child;   /* First child in this group. */
	enum { EXPR_TERM, EXPR_OR, EXPR_AND } type;
};

const char *const mansearch_keynames[KEY_MAX] = {
	"arch",	"sec",	"Xr",	"Ar",	"Fa",	"Fl",	"Dv",	"Fn",
	"Ic",	"Pa",	"Cm",	"Li",	"Em",	"Cd",	"Va",	"Ft",
	"Tn",	"Er",	"Ev",	"Sy",	"Sh",	"In",	"Ss",	"Ox",
	"An",	"Mt",	"St",	"Bx",	"At",	"Nx",	"Fx",	"Lk",
	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",	"Lb",	"Nm",	"Nd"
};


static	struct ohash	*manmerge(struct expr *, struct ohash *);
static	struct ohash	*manmerge_term(struct expr *, struct ohash *);
static	struct ohash	*manmerge_or(struct expr *, struct ohash *);
static	struct ohash	*manmerge_and(struct expr *, struct ohash *);
static	char		*buildnames(const struct dbm_page *);
static	char		*buildoutput(size_t, struct dbm_page *);
static	size_t		 lstlen(const char *, size_t);
static	void		 lstcat(char *, size_t *, const char *, const char *);
static	int		 lstmatch(const char *, const char *);
static	struct expr	*exprcomp(const struct mansearch *,
				int, char *[], int *);
static	struct expr	*expr_and(const struct mansearch *,
				int, char *[], int *);
static	struct expr	*exprterm(const struct mansearch *,
				int, char *[], int *);
static	void		 exprfree(struct expr *);
static	int		 manpage_compare(const void *, const void *);


int
mansearch(const struct mansearch *search,
		const struct manpaths *paths,
		int argc, char *argv[],
		struct manpage **res, size_t *sz)
{
	char		 buf[PATH_MAX];
	struct dbm_res	*rp;
	struct expr	*e;
	struct dbm_page	*page;
	struct manpage	*mpage;
	struct ohash	*htab;
	size_t		 cur, i, maxres, outkey;
	unsigned int	 slot;
	int		 argi, chdir_status, getcwd_status, im;

	argi = 0;
	if ((e = exprcomp(search, argc, argv, &argi)) == NULL) {
		*sz = 0;
		return 0;
	}

	cur = maxres = 0;
	if (res != NULL)
		*res = NULL;

	outkey = KEY_Nd;
	if (search->outkey != NULL)
		for (im = 0; im < KEY_MAX; im++)
			if (0 == strcasecmp(search->outkey,
			    mansearch_keynames[im])) {
				outkey = im;
				break;
			}

	/*
	 * Remember the original working directory, if possible.
	 * This will be needed if the second or a later directory
	 * is given as a relative path.
	 * Do not error out if the current directory is not
	 * searchable: Maybe it won't be needed after all.
	 */

	if (getcwd(buf, PATH_MAX) == NULL) {
		getcwd_status = 0;
		(void)strlcpy(buf, strerror(errno), sizeof(buf));
	} else
		getcwd_status = 1;

	/*
	 * Loop over the directories (containing databases) for us to
	 * search.
	 * Don't let missing/bad databases/directories phase us.
	 * In each, try to open the resident database and, if it opens,
	 * scan it for our match expression.
	 */

	chdir_status = 0;
	for (i = 0; i < paths->sz; i++) {
		if (chdir_status && paths->paths[i][0] != '/') {
			if ( ! getcwd_status) {
				warnx("%s: getcwd: %s", paths->paths[i], buf);
				continue;
			} else if (chdir(buf) == -1) {
				warn("%s", buf);
				continue;
			}
		}
		if (chdir(paths->paths[i]) == -1) {
			warn("%s", paths->paths[i]);
			continue;
		}
		chdir_status = 1;

		if (dbm_open(MANDOC_DB) == -1) {
			if (errno != ENOENT)
				warn("%s/%s", paths->paths[i], MANDOC_DB);
			continue;
		}

		if ((htab = manmerge(e, NULL)) == NULL) {
			dbm_close();
			continue;
		}

		for (rp = ohash_first(htab, &slot); rp != NULL;
		    rp = ohash_next(htab, &slot)) {
			page = dbm_page_get(rp->page);

			if (lstmatch(search->sec, page->sect) == 0 ||
			    lstmatch(search->arch, page->arch) == 0)
				continue;

			if (res == NULL) {
				cur = 1;
				break;
			}
			if (cur + 1 > maxres) {
				maxres += 1024;
				*res = mandoc_reallocarray(*res,
				    maxres, sizeof(**res));
			}
			mpage = *res + cur;
			mandoc_asprintf(&mpage->file, "%s/%s",
			    paths->paths[i], page->file + 1);
			mpage->names = buildnames(page);
			mpage->output = buildoutput(outkey, page);
			mpage->ipath = i;
			mpage->bits = rp->bits;
			mpage->sec = *page->sect - '0';
			if (mpage->sec < 0 || mpage->sec > 9)
				mpage->sec = 10;
			mpage->form = *page->file;
			free(rp);
			cur++;
		}
		ohash_delete(htab);
		free(htab);
		dbm_close();

		/*
		 * In man(1) mode, prefer matches in earlier trees
		 * over matches in later trees.
		 */

		if (cur && search->firstmatch)
			break;
	}
	if (res != NULL)
		qsort(*res, cur, sizeof(struct manpage), manpage_compare);
	if (chdir_status && getcwd_status && chdir(buf) == -1)
		warn("%s", buf);
	exprfree(e);
	*sz = cur;
	return res != NULL || cur;
}

/*
 * Merge the results for the expression tree rooted at e
 * into the the result list htab.
 */
static struct ohash *
manmerge(struct expr *e, struct ohash *htab)
{
	switch (e->type) {
	case EXPR_TERM:
		return manmerge_term(e, htab);
	case EXPR_OR:
		return manmerge_or(e->child, htab);
	case EXPR_AND:
		return manmerge_and(e->child, htab);
	default:
		abort();
	}
}

static struct ohash *
manmerge_term(struct expr *e, struct ohash *htab)
{
	struct dbm_res	 res, *rp;
	uint64_t	 ib;
	unsigned int	 slot;
	int		 im;

	if (htab == NULL) {
		htab = mandoc_malloc(sizeof(*htab));
		mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page));
	}

	for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) {
		if ((e->bits & ib) == 0)
			continue;

		switch (ib) {
		case TYPE_arch:
			dbm_page_byarch(&e->match);
			break;
		case TYPE_sec:
			dbm_page_bysect(&e->match);
			break;
		case TYPE_Nm:
			dbm_page_byname(&e->match);
			break;
		case TYPE_Nd:
			dbm_page_bydesc(&e->match);
			break;
		default:
			dbm_page_bymacro(im - 2, &e->match);
			break;
		}

		/*
		 * When hashing for deduplication, use the unique
		 * page ID itself instead of a hash function;
		 * that is quite efficient.
		 */

		for (;;) {
			res = dbm_page_next();
			if (res.page == -1)
				break;
			slot = ohash_lookup_memory(htab,
			    (char *)&res, sizeof(res.page), res.page);
			if ((rp = ohash_find(htab, slot)) != NULL) {
				rp->bits |= res.bits;
				continue;
			}
			rp = mandoc_malloc(sizeof(*rp));
			*rp = res;
			ohash_insert(htab, slot, rp);
		}
	}
	return htab;
}

static struct ohash *
manmerge_or(struct expr *e, struct ohash *htab)
{
	while (e != NULL) {
		htab = manmerge(e, htab);
		e = e->next;
	}
	return htab;
}

static struct ohash *
manmerge_and(struct expr *e, struct ohash *htab)
{
	struct ohash	*hand, *h1, *h2;
	struct dbm_res	*res;
	unsigned int	 slot1, slot2;

	/* Evaluate the first term of the AND clause. */

	hand = manmerge(e, NULL);

	while ((e = e->next) != NULL) {

		/* Evaluate the next term and prepare for ANDing. */

		h2 = manmerge(e, NULL);
		if (ohash_entries(h2) < ohash_entries(hand)) {
			h1 = h2;
			h2 = hand;
		} else
			h1 = hand;
		hand = mandoc_malloc(sizeof(*hand));
		mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page));

		/* Keep all pages that are in both result sets. */

		for (res = ohash_first(h1, &slot1); res != NULL;
		    res = ohash_next(h1, &slot1)) {
			if (ohash_find(h2, ohash_lookup_memory(h2,
			    (char *)res, sizeof(res->page),
			    res->page)) == NULL)
				free(res);
			else
				ohash_insert(hand, ohash_lookup_memory(hand,
				    (char *)res, sizeof(res->page),
				    res->page), res);
		}

		/* Discard the merged results. */

		for (res = ohash_first(h2, &slot2); res != NULL;
		    res = ohash_next(h2, &slot2))
			free(res);
		ohash_delete(h2);
		free(h2);
		ohash_delete(h1);
		free(h1);
	}

	/* Merge the result of the AND into htab. */

	if (htab == NULL)
		return hand;

	for (res = ohash_first(hand, &slot1); res != NULL;
	    res = ohash_next(hand, &slot1)) {
		slot2 = ohash_lookup_memory(htab,
		    (char *)res, sizeof(res->page), res->page);
		if (ohash_find(htab, slot2) == NULL)
			ohash_insert(htab, slot2, res);
		else
			free(res);
	}

	/* Discard the merged result. */

	ohash_delete(hand);
	free(hand);
	return htab;
}

void
mansearch_free(struct manpage *res, size_t sz)
{
	size_t	 i;

	for (i = 0; i < sz; i++) {
		free(res[i].file);
		free(res[i].names);
		free(res[i].output);
	}
	free(res);
}

static int
manpage_compare(const void *vp1, const void *vp2)
{
	const struct manpage	*mp1, *mp2;
	const char		*cp1, *cp2;
	size_t			 sz1, sz2;
	int			 diff;

	mp1 = vp1;
	mp2 = vp2;
	if ((diff = mp2->bits - mp1->bits) ||
	    (diff = mp1->sec - mp2->sec))
		return diff;

	/* Fall back to alphabetic ordering of names. */
	sz1 = strcspn(mp1->names, "(");
	sz2 = strcspn(mp2->names, "(");
	if (sz1 < sz2)
		sz1 = sz2;
	if ((diff = strncasecmp(mp1->names, mp2->names, sz1)))
		return diff;

	/* For identical names and sections, prefer arch-dependent. */
	cp1 = strchr(mp1->names + sz1, '/');
	cp2 = strchr(mp2->names + sz2, '/');
	return cp1 != NULL && cp2 != NULL ? strcasecmp(cp1, cp2) :
	    cp1 != NULL ? -1 : cp2 != NULL ? 1 : 0;
}

static char *
buildnames(const struct dbm_page *page)
{
	char	*buf;
	size_t	 i, sz;

	sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) +
	    (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2;
	buf = mandoc_malloc(sz);
	i = 0;
	lstcat(buf, &i, page->name, ", ");
	buf[i++] = '(';
	lstcat(buf, &i, page->sect, ", ");
	if (page->arch != NULL) {
		buf[i++] = '/';
		lstcat(buf, &i, page->arch, ", ");
	}
	buf[i++] = ')';
	buf[i++] = '\0';
	assert(i == sz);
	return buf;
}

/*
 * Count the buffer space needed to print the NUL-terminated
 * list of NUL-terminated strings, when printing sep separator
 * characters between strings.
 */
static size_t
lstlen(const char *cp, size_t sep)
{
	size_t	 sz;

	for (sz = 0;; sz++) {
		if (cp[0] == '\0') {
			if (cp[1] == '\0')
				break;
			sz += sep - 1;
		} else if (cp[0] < ' ')
			sz--;
		cp++;
	}
	return sz;
}

/*
 * Print the NUL-terminated list of NUL-terminated strings
 * into the buffer, seperating strings with sep.
 */
static void
lstcat(char *buf, size_t *i, const char *cp, const char *sep)
{
	const char *s;

	for (;;) {
		if (cp[0] == '\0') {
			if (cp[1] == '\0')
				break;
			s = sep;
			while (*s != '\0')
				buf[(*i)++] = *s++;
		} else if (cp[0] >= ' ')
			buf[(*i)++] = cp[0];
		cp++;
	}
}

/*
 * Return 1 if the string *want occurs in any of the strings
 * in the NUL-terminated string list *have, or 0 otherwise.
 * If either argument is NULL or empty, assume no filtering
 * is desired and return 1.
 */
static int
lstmatch(const char *want, const char *have)
{
        if (want == NULL || have == NULL || *have == '\0')
                return 1;
        while (*have != '\0') {
                if (strcasestr(have, want) != NULL)
                        return 1;
                have = strchr(have, '\0') + 1;
        }
        return 0;
}

/*
 * Build a list of values taken by the macro im in the manual page.
 */
static char *
buildoutput(size_t im, struct dbm_page *page)
{
	const char	*oldoutput, *sep, *input;
	char		*output, *newoutput, *value;
	size_t		 sz, i;

	switch (im) {
	case KEY_Nd:
		return mandoc_strdup(page->desc);
	case KEY_Nm:
		input = page->name;
		break;
	case KEY_sec:
		input = page->sect;
		break;
	case KEY_arch:
		input = page->arch;
		if (input == NULL)
			input = "all\0";
		break;
	default:
		input = NULL;
		break;
	}

	if (input != NULL) {
		sz = lstlen(input, 3) + 1;
		output = mandoc_malloc(sz);
		i = 0;
		lstcat(output, &i, input, " # ");
		output[i++] = '\0';
		assert(i == sz);
		return output;
	}

	output = NULL;
	dbm_macro_bypage(im - 2, page->addr);
	while ((value = dbm_macro_next()) != NULL) {
		if (output == NULL) {
			oldoutput = "";
			sep = "";
		} else {
			oldoutput = output;
			sep = " # ";
		}
		mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value);
		free(output);
		output = newoutput;
	}
	return output;
}

/*
 * Compile a set of string tokens into an expression.
 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
 * "(", "foo=bar", etc.).
 */
static struct expr *
exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi)
{
	struct expr	*parent, *child;
	int		 needterm, nested;

	if ((nested = *argi) == argc)
		return NULL;
	needterm = 1;
	parent = child = NULL;
	while (*argi < argc) {
		if (strcmp(")", argv[*argi]) == 0) {
			if (needterm)
				warnx("missing term "
				    "before closing parenthesis");
			needterm = 0;
			if (nested)
				break;
			warnx("ignoring unmatched right parenthesis");
			++*argi;
			continue;
		}
		if (strcmp("-o", argv[*argi]) == 0) {
			if (needterm) {
				if (*argi > 0)
					warnx("ignoring -o after %s",
					    argv[*argi - 1]);
				else
					warnx("ignoring initial -o");
			}
			needterm = 1;
			++*argi;
			continue;
		}
		needterm = 0;
		if (child == NULL) {
			child = expr_and(search, argc, argv, argi);
			continue;
		}
		if (parent == NULL) {
			parent = mandoc_calloc(1, sizeof(*parent));
			parent->type = EXPR_OR;
			parent->next = NULL;
			parent->child = child;
		}
		child->next = expr_and(search, argc, argv, argi);
		child = child->next;
	}
	if (needterm && *argi)
		warnx("ignoring trailing %s", argv[*argi - 1]);
	return parent == NULL ? child : parent;
}

static struct expr *
expr_and(const struct mansearch *search, int argc, char *argv[], int *argi)
{
	struct expr	*parent, *child;
	int		 needterm;

	needterm = 1;
	parent = child = NULL;
	while (*argi < argc) {
		if (strcmp(")", argv[*argi]) == 0) {
			if (needterm)
				warnx("missing term "
				    "before closing parenthesis");
			needterm = 0;
			break;
		}
		if (strcmp("-o", argv[*argi]) == 0)
			break;
		if (strcmp("-a", argv[*argi]) == 0) {
			if (needterm) {
				if (*argi > 0)
					warnx("ignoring -a after %s",
					    argv[*argi - 1]);
				else
					warnx("ignoring initial -a");
			}
			needterm = 1;
			++*argi;
			continue;
		}
		if (needterm == 0)
			break;
		if (child == NULL) {
			child = exprterm(search, argc, argv, argi);
			if (child != NULL)
				needterm = 0;
			continue;
		}
		needterm = 0;
		if (parent == NULL) {
			parent = mandoc_calloc(1, sizeof(*parent));
			parent->type = EXPR_AND;
			parent->next = NULL;
			parent->child = child;
		}
		child->next = exprterm(search, argc, argv, argi);
		if (child->next != NULL) {
			child = child->next;
			needterm = 0;
		}
	}
	if (needterm && *argi)
		warnx("ignoring trailing %s", argv[*argi - 1]);
	return parent == NULL ? child : parent;
}

static struct expr *
exprterm(const struct mansearch *search, int argc, char *argv[], int *argi)
{
	char		 errbuf[BUFSIZ];
	struct expr	*e;
	char		*key, *val;
	uint64_t	 iterbit;
	int		 cs, i, irc;

	if (strcmp("(", argv[*argi]) == 0) {
		++*argi;
		e = exprcomp(search, argc, argv, argi);
		if (*argi < argc) {
			assert(strcmp(")", argv[*argi]) == 0);
			++*argi;
		} else
			warnx("unclosed parenthesis");
		return e;
	}

	if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) {
		cs = 0;
		++*argi;
	} else
		cs = 1;

	e = mandoc_calloc(1, sizeof(*e));
	e->type = EXPR_TERM;
	e->bits = 0;
	e->next = NULL;
	e->child = NULL;

	if (search->argmode == ARG_NAME) {
		e->bits = TYPE_Nm;
		e->match.type = DBM_EXACT;
		e->match.str = argv[(*argi)++];
		return e;
	}

	/*
	 * Separate macro keys from search string.
	 * If needed, request regular expression handling.
	 */

	if (search->argmode == ARG_WORD) {
		e->bits = TYPE_Nm;
		e->match.type = DBM_REGEX;
#if HAVE_REWB_BSD
		mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]);
#elif HAVE_REWB_SYSV
		mandoc_asprintf(&val, "\\<%s\\>", argv[*argi]);
#else
		mandoc_asprintf(&val,
		    "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", argv[*argi]);
#endif
		cs = 0;
	} else if ((val = strpbrk(argv[*argi], "=~")) == NULL) {
		e->bits = TYPE_Nm | TYPE_Nd;
		e->match.type = DBM_SUB;
		e->match.str = argv[*argi];
	} else {
		if (val == argv[*argi])
			e->bits = TYPE_Nm | TYPE_Nd;
		if (*val == '=') {
			e->match.type = DBM_SUB;
			e->match.str = val + 1;
		} else
			e->match.type = DBM_REGEX;
		*val++ = '\0';
		if (strstr(argv[*argi], "arch") != NULL)
			cs = 0;
	}

	/* Compile regular expressions. */

	if (e->match.type == DBM_REGEX) {
		e->match.re = mandoc_malloc(sizeof(*e->match.re));
		irc = regcomp(e->match.re, val,
		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
		if (irc) {
			regerror(irc, e->match.re, errbuf, sizeof(errbuf));
			warnx("regcomp /%s/: %s", val, errbuf);
		}
		if (search->argmode == ARG_WORD)
			free(val);
		if (irc) {
			free(e->match.re);
			free(e);
			++*argi;
			return NULL;
		}
	}

	if (e->bits) {
		++*argi;
		return e;
	}

	/*
	 * Parse out all possible fields.
	 * If the field doesn't resolve, bail.
	 */

	while (NULL != (key = strsep(&argv[*argi], ","))) {
		if ('\0' == *key)
			continue;
		for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) {
			if (0 == strcasecmp(key, mansearch_keynames[i])) {
				e->bits |= iterbit;
				break;
			}
		}
		if (i == KEY_MAX) {
			if (strcasecmp(key, "any"))
				warnx("treating unknown key "
				    "\"%s\" as \"any\"", key);
			e->bits |= ~0ULL;
		}
	}

	++*argi;
	return e;
}

static void
exprfree(struct expr *e)
{
	if (e->next != NULL)
		exprfree(e->next);
	if (e->child != NULL)
		exprfree(e->child);
	free(e);
}