[BACK]Return to dbm.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / dbm.c (download)

Revision 1.6, Mon Nov 19 19:22:07 2018 UTC (5 years, 4 months ago) by schwarze
Branch: MAIN
CVS Tags: VERSION_1_14_5
Changes since 1.5: +4 -4 lines

Correctly construct empty lists in dbm_page_get().

Original commit message by the author of this bugfix patch, bluhm@:

lstmatch() expects a list of strings separated by \0 and terminated
with \0\0.  In the NULL case dbm_page_get() returned only simple
strings so correct processing was depending on data layout.  Use
an additional \0 to terminate the single string lists.  Found by
mandoc regress since llvm linker on amd64 arranges strings differently.

/*	$Id: dbm.c,v 1.6 2018/11/19 19:22:07 schwarze Exp $ */
/*
 * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Map-based version of the mandoc database, for read-only access.
 * The interface is defined in "dbm.h".
 */
#include "config.h"

#include <assert.h>
#if HAVE_ENDIAN
#include <endian.h>
#elif HAVE_SYS_ENDIAN
#include <sys/endian.h>
#elif HAVE_NTOHL
#include <arpa/inet.h>
#endif
#if HAVE_ERR
#include <err.h>
#endif
#include <errno.h>
#include <regex.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "mansearch.h"
#include "dbm_map.h"
#include "dbm.h"

struct macro {
	int32_t	value;
	int32_t	pages;
};

struct page {
	int32_t	name;
	int32_t	sect;
	int32_t	arch;
	int32_t	desc;
	int32_t	file;
};

enum iter {
	ITER_NONE = 0,
	ITER_NAME,
	ITER_SECT,
	ITER_ARCH,
	ITER_DESC,
	ITER_MACRO
};

static struct macro	*macros[MACRO_MAX];
static int32_t		 nvals[MACRO_MAX];
static struct page	*pages;
static int32_t		 npages;
static enum iter	 iteration;

static struct dbm_res	 page_bytitle(enum iter, const struct dbm_match *);
static struct dbm_res	 page_byarch(const struct dbm_match *);
static struct dbm_res	 page_bymacro(int32_t, const struct dbm_match *);
static char		*macro_bypage(int32_t, int32_t);


/*** top level functions **********************************************/

/*
 * Open a disk-based mandoc database for read-only access.
 * Map the pages and macros[] arrays.
 * Return 0 on success.  Return -1 and set errno on failure.
 */
int
dbm_open(const char *fname)
{
	const int32_t	*mp, *ep;
	int32_t		 im;

	if (dbm_map(fname) == -1)
		return -1;

	if ((npages = be32toh(*dbm_getint(4))) < 0) {
		warnx("dbm_open(%s): Invalid number of pages: %d",
		    fname, npages);
		goto fail;
	}
	pages = (struct page *)dbm_getint(5);

	if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
		warnx("dbm_open(%s): Invalid offset of macros array", fname);
		goto fail;
	}
	if (be32toh(*mp) != MACRO_MAX) {
		warnx("dbm_open(%s): Invalid number of macros: %d",
		    fname, be32toh(*mp));
		goto fail;
	}
	for (im = 0; im < MACRO_MAX; im++) {
		if ((ep = dbm_get(*++mp)) == NULL) {
			warnx("dbm_open(%s): Invalid offset of macro %d",
			    fname, im);
			goto fail;
		}
		nvals[im] = be32toh(*ep);
		macros[im] = (struct macro *)++ep;
	}
	return 0;

fail:
	dbm_unmap();
	errno = EFTYPE;
	return -1;
}

void
dbm_close(void)
{
	dbm_unmap();
}


/*** functions for handling pages *************************************/

int32_t
dbm_page_count(void)
{
	return npages;
}

/*
 * Give the caller pointers to the data for one manual page.
 */
struct dbm_page *
dbm_page_get(int32_t ip)
{
	static struct dbm_page	 res;

	assert(ip >= 0);
	assert(ip < npages);
	res.name = dbm_get(pages[ip].name);
	if (res.name == NULL)
		res.name = "(NULL)\0";
	res.sect = dbm_get(pages[ip].sect);
	if (res.sect == NULL)
		res.sect = "(NULL)\0";
	res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
	res.desc = dbm_get(pages[ip].desc);
	if (res.desc == NULL)
		res.desc = "(NULL)";
	res.file = dbm_get(pages[ip].file);
	if (res.file == NULL)
		res.file = " (NULL)\0";
	res.addr = dbm_addr(pages + ip);
	return &res;
}

/*
 * Functions to start filtered iterations over manual pages.
 */
void
dbm_page_byname(const struct dbm_match *match)
{
	assert(match != NULL);
	page_bytitle(ITER_NAME, match);
}

void
dbm_page_bysect(const struct dbm_match *match)
{
	assert(match != NULL);
	page_bytitle(ITER_SECT, match);
}

void
dbm_page_byarch(const struct dbm_match *match)
{
	assert(match != NULL);
	page_byarch(match);
}

void
dbm_page_bydesc(const struct dbm_match *match)
{
	assert(match != NULL);
	page_bytitle(ITER_DESC, match);
}

void
dbm_page_bymacro(int32_t im, const struct dbm_match *match)
{
	assert(im >= 0);
	assert(im < MACRO_MAX);
	assert(match != NULL);
	page_bymacro(im, match);
}

/*
 * Return the number of the next manual page in the current iteration.
 */
struct dbm_res
dbm_page_next(void)
{
	struct dbm_res			 res = {-1, 0};

	switch(iteration) {
	case ITER_NONE:
		return res;
	case ITER_ARCH:
		return page_byarch(NULL);
	case ITER_MACRO:
		return page_bymacro(0, NULL);
	default:
		return page_bytitle(iteration, NULL);
	}
}

/*
 * Functions implementing the iteration over manual pages.
 */
static struct dbm_res
page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
{
	static const struct dbm_match	*match;
	static const char		*cp;	
	static int32_t			 ip;
	struct dbm_res			 res = {-1, 0};

	assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
	    arg_iter == ITER_SECT);

	/* Initialize for a new iteration. */

	if (arg_match != NULL) {
		iteration = arg_iter;
		match = arg_match;
		switch (iteration) {
		case ITER_NAME:
			cp = dbm_get(pages[0].name);
			break;
		case ITER_SECT:
			cp = dbm_get(pages[0].sect);
			break;
		case ITER_DESC:
			cp = dbm_get(pages[0].desc);
			break;
		default:
			abort();
		}
		if (cp == NULL) {
			iteration = ITER_NONE;
			match = NULL;
			cp = NULL;
			ip = npages;
		} else
			ip = 0;
		return res;
	}

	/* Search for a name. */

	while (ip < npages) {
		if (iteration == ITER_NAME)
			cp++;
		if (dbm_match(match, cp))
			break;
		cp = strchr(cp, '\0') + 1;
		if (iteration == ITER_DESC)
			ip++;
		else if (*cp == '\0') {
			cp++;
			ip++;
		}
	}

	/* Reached the end without a match. */

	if (ip == npages) {
		iteration = ITER_NONE;
		match = NULL;
		cp = NULL;
		return res;
	}

	/* Found a match; save the quality for later retrieval. */

	res.page = ip;
	res.bits = iteration == ITER_NAME ? cp[-1] : 0;

	/* Skip the remaining names of this page. */

	if (++ip < npages) {
		do {
			cp++;
		} while (cp[-1] != '\0' ||
		    (iteration != ITER_DESC && cp[-2] != '\0'));
	}
	return res;
}

static struct dbm_res
page_byarch(const struct dbm_match *arg_match)
{
	static const struct dbm_match	*match;
	struct dbm_res			 res = {-1, 0};
	static int32_t			 ip;
	const char			*cp;	

	/* Initialize for a new iteration. */

	if (arg_match != NULL) {
		iteration = ITER_ARCH;
		match = arg_match;
		ip = 0;
		return res;
	}

	/* Search for an architecture. */

	for ( ; ip < npages; ip++)
		if (pages[ip].arch)
			for (cp = dbm_get(pages[ip].arch);
			    *cp != '\0';
			    cp = strchr(cp, '\0') + 1)
				if (dbm_match(match, cp)) {
					res.page = ip++;
					return res;
				}

	/* Reached the end without a match. */

	iteration = ITER_NONE;
	match = NULL;
	return res;
}

static struct dbm_res
page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
{
	static const struct dbm_match	*match;
	static const int32_t		*pp;
	static const char		*cp;
	static int32_t			 im, iv;
	struct dbm_res			 res = {-1, 0};

	assert(im >= 0);
	assert(im < MACRO_MAX);

	/* Initialize for a new iteration. */

	if (arg_match != NULL) {
		iteration = ITER_MACRO;
		match = arg_match;
		im = arg_im;
		cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
		pp = NULL;
		iv = -1;
		return res;
	}
	if (iteration != ITER_MACRO)
		return res;

	/* Find the next matching macro value. */

	while (pp == NULL || *pp == 0) {
		if (++iv == nvals[im]) {
			iteration = ITER_NONE;
			return res;
		}
		if (iv)
			cp = strchr(cp, '\0') + 1;
		if (dbm_match(match, cp))
			pp = dbm_get(macros[im][iv].pages);
	}

	/* Found a matching page. */

	res.page = (struct page *)dbm_get(*pp++) - pages;
	return res;
}


/*** functions for handling macros ************************************/

int32_t
dbm_macro_count(int32_t im)
{
	assert(im >= 0);
	assert(im < MACRO_MAX);
	return nvals[im];
}

struct dbm_macro *
dbm_macro_get(int32_t im, int32_t iv)
{
	static struct dbm_macro macro;

	assert(im >= 0);
	assert(im < MACRO_MAX);
	assert(iv >= 0);
	assert(iv < nvals[im]);
	macro.value = dbm_get(macros[im][iv].value);
	macro.pp = dbm_get(macros[im][iv].pages);
	return &macro;
}

/*
 * Filtered iteration over macro entries.
 */
void
dbm_macro_bypage(int32_t im, int32_t ip)
{
	assert(im >= 0);
	assert(im < MACRO_MAX);
	assert(ip != 0);
	macro_bypage(im, ip);
}

char *
dbm_macro_next(void)
{
	return macro_bypage(MACRO_MAX, 0);
}

static char *
macro_bypage(int32_t arg_im, int32_t arg_ip)
{
	static const int32_t	*pp;
	static int32_t		 im, ip, iv;

	/* Initialize for a new iteration. */

	if (arg_im < MACRO_MAX && arg_ip != 0) {
		im = arg_im;
		ip = arg_ip;
		pp = dbm_get(macros[im]->pages);
		iv = 0;
		return NULL;
	}
	if (im >= MACRO_MAX)
		return NULL;

	/* Search for the next value. */

	while (iv < nvals[im]) {
		if (*pp == ip)
			break;
		if (*pp == 0)
			iv++;
		pp++;
	}

	/* Reached the end without a match. */

	if (iv == nvals[im]) {
		im = MACRO_MAX;
		ip = 0;
		pp = NULL;
		return NULL;
	}

	/* Found a match; skip the remaining pages of this entry. */

	if (++iv < nvals[im])
		while (*pp++ != 0)
			continue;

	return dbm_get(macros[im][iv - 1].value);
}