[BACK]Return to mandoc-db.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / Attic / mandoc-db.c (download)

Revision 1.1, Sat Apr 2 15:40:40 2011 UTC (13 years ago) by kristaps
Branch: MAIN

Add an initial keyword-indexer as an alternative front-end to libmandoc.a.
This will be used during BSDCan-2011, paired with a web front-end, as a
demonstrandum of semantic annotations (mdoc) versus those of man.  This
isn't linked to the build in any way but for cleaning created files.  This
code has been heavily tested under valgrind and is known to work on
GNU/Linux (needs -ldb library), all BSDs, and Mac OSX.

Please don't repost this, for the time being, as I don't want to spoil the
fun for the conference.  This utility is still constantly under development
(e.g., it will also generate a recno database of filenames and `Nd' output
so that paths needn't be hard-coded) but is mature enough to warrant being
checked in.

/*	$Id: mandoc-db.c,v 1.1 2011/04/02 15:40:40 kristaps Exp $ */
/*
 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/param.h>

#include <assert.h>
#ifdef __linux__
# include <db_185.h>
#else
# include <db.h>
#endif
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "man.h"
#include "mdoc.h"
#include "mandoc.h"

#define	MANDOC_DB	 "mandoc.db"
#define	MANDOC_BUFSZ	  10

enum	type {
	MANDOC_NONE = 0,
	MANDOC_NAME,
	MANDOC_FUNCTION,
	MANDOC_UTILITY,
	MANDOC_INCLUDES,
	MANDOC_VARIABLE
};

#define	MDOC_ARGS	  DB *db, \
			  const char *dbn, \
			  DBT *key, size_t *ksz, \
			  DBT *val, \
			  const struct mdoc_node *n

static	void		  dbt_append(DBT *, size_t *, const char *);
static	void		  dbt_appendb(DBT *, size_t *, 
				const void *, size_t);
static	void		  dbt_init(DBT *, size_t *);
static	void		  version(void);
static	void		  usage(void);
static	void		  pmdoc(DB *, const char *, 
				DBT *, size_t *, 
				DBT *, size_t *, 
				const char *, struct mdoc *);
static	void		  pmdoc_node(MDOC_ARGS);
static	void		  pmdoc_Fd(MDOC_ARGS);
static	void		  pmdoc_In(MDOC_ARGS);
static	void		  pmdoc_Fn(MDOC_ARGS);
static	void		  pmdoc_Fo(MDOC_ARGS);
static	void		  pmdoc_Nm(MDOC_ARGS);
static	void		  pmdoc_Vt(MDOC_ARGS);

typedef	void		(*pmdoc_nf)(MDOC_ARGS);

static	const char	 *progname;

static	const pmdoc_nf	  mdocs[MDOC_MAX] = {
	NULL, /* Ap */
	NULL, /* Dd */
	NULL, /* Dt */
	NULL, /* Os */
	NULL, /* Sh */ 
	NULL, /* Ss */ 
	NULL, /* Pp */ 
	NULL, /* D1 */
	NULL, /* Dl */
	NULL, /* Bd */
	NULL, /* Ed */
	NULL, /* Bl */ 
	NULL, /* El */
	NULL, /* It */
	NULL, /* Ad */ 
	NULL, /* An */ 
	NULL, /* Ar */
	NULL, /* Cd */ 
	NULL, /* Cm */
	NULL, /* Dv */ 
	NULL, /* Er */ 
	NULL, /* Ev */ 
	NULL, /* Ex */ 
	NULL, /* Fa */ 
	pmdoc_Fd, /* Fd */
	NULL, /* Fl */
	pmdoc_Fn, /* Fn */ 
	NULL, /* Ft */ 
	NULL, /* Ic */ 
	pmdoc_In, /* In */ 
	NULL, /* Li */
	NULL, /* Nd */
	pmdoc_Nm, /* Nm */
	NULL, /* Op */
	NULL, /* Ot */
	NULL, /* Pa */
	NULL, /* Rv */
	NULL, /* St */ 
	pmdoc_Vt, /* Va */
	pmdoc_Vt, /* Vt */ 
	NULL, /* Xr */ 
	NULL, /* %A */
	NULL, /* %B */
	NULL, /* %D */
	NULL, /* %I */
	NULL, /* %J */
	NULL, /* %N */
	NULL, /* %O */
	NULL, /* %P */
	NULL, /* %R */
	NULL, /* %T */
	NULL, /* %V */
	NULL, /* Ac */
	NULL, /* Ao */
	NULL, /* Aq */
	NULL, /* At */ 
	NULL, /* Bc */
	NULL, /* Bf */
	NULL, /* Bo */
	NULL, /* Bq */
	NULL, /* Bsx */
	NULL, /* Bx */
	NULL, /* Db */
	NULL, /* Dc */
	NULL, /* Do */
	NULL, /* Dq */
	NULL, /* Ec */
	NULL, /* Ef */ 
	NULL, /* Em */ 
	NULL, /* Eo */
	NULL, /* Fx */
	NULL, /* Ms */ 
	NULL, /* No */
	NULL, /* Ns */
	NULL, /* Nx */
	NULL, /* Ox */
	NULL, /* Pc */
	NULL, /* Pf */
	NULL, /* Po */
	NULL, /* Pq */
	NULL, /* Qc */
	NULL, /* Ql */
	NULL, /* Qo */
	NULL, /* Qq */
	NULL, /* Re */
	NULL, /* Rs */
	NULL, /* Sc */
	NULL, /* So */
	NULL, /* Sq */
	NULL, /* Sm */ 
	NULL, /* Sx */
	NULL, /* Sy */
	NULL, /* Tn */
	NULL, /* Ux */
	NULL, /* Xc */
	NULL, /* Xo */
	pmdoc_Fo, /* Fo */ 
	NULL, /* Fc */ 
	NULL, /* Oo */
	NULL, /* Oc */
	NULL, /* Bk */
	NULL, /* Ek */
	NULL, /* Bt */
	NULL, /* Hf */
	NULL, /* Fr */
	NULL, /* Ud */
	NULL, /* Lb */
	NULL, /* Lp */ 
	NULL, /* Lk */ 
	NULL, /* Mt */ 
	NULL, /* Brq */ 
	NULL, /* Bro */ 
	NULL, /* Brc */ 
	NULL, /* %C */
	NULL, /* Es */
	NULL, /* En */
	NULL, /* Dx */
	NULL, /* %Q */
	NULL, /* br */
	NULL, /* sp */
	NULL, /* %U */
	NULL, /* Ta */
};

int
main(int argc, char *argv[])
{
	struct mparse	*mp;
	struct mdoc	*mdoc;
	struct man	*man;
	const char	*f, *fn;
	size_t		 sz;
	char		 fbuf[MAXPATHLEN];
	int		 c;
	DB		*db;
	DBT		 key, val;
	size_t		 ksz, vsz;
	BTREEINFO	 info;
	extern int	 optind;
	extern char	*optarg;

	f = MANDOC_DB;

	progname = strrchr(argv[0], '/');
	if (progname == NULL)
		progname = argv[0];
	else
		++progname;

	while (-1 != (c = getopt(argc, argv, "f:V")))
		switch (c) {
		case ('f'):
			f = optarg;
			break;
		case ('V'):
			version();
			return((int)MANDOCLEVEL_OK);
		default:
			usage();
			return((int)MANDOCLEVEL_BADARG);
		}

	argc -= optind;
	argv += optind;

	/*
	 * Set up a temporary file-name into which we're going to write
	 * all of our data.  This is securely renamed to the real
	 * file-name after we've written all of our data.
	 */

	if (0 == (sz = strlen(f)) || sz + 5 >= MAXPATHLEN) {
		fprintf(stderr, "%s: Bad filename\n", progname);
		exit((int)MANDOCLEVEL_SYSERR);
	}

	memcpy(fbuf, f, sz);
	memcpy(fbuf + (int)sz, ".bak", 4);
	fbuf[(int)sz + 4] = '\0';

	/*
	 * Open a BTREE database that allows duplicates.  If the
	 * database already exists (it's a backup anyway), then blow it
	 * away with O_TRUNC.
	 */

	memset(&info, 0, sizeof(BTREEINFO));
	info.flags = R_DUP;

	db = dbopen(fbuf, O_CREAT|O_TRUNC|O_RDWR, 
			0644, DB_BTREE, &info);

	if (NULL == db) {
		perror(f);
		exit((int)MANDOCLEVEL_SYSERR);
	}

	/* Use the auto-parser and don't report any errors. */

	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);

	/*
	 * Try parsing the manuals given on the command line.  If we
	 * totally fail, then just keep on going.  Take resulting trees
	 * and push them down into the database code.
	 */

	memset(&key, 0, sizeof(DBT));
	memset(&val, 0, sizeof(DBT));
	ksz = vsz = 0;

	while (NULL != (fn = *argv++)) {
		printf("Trying: %s\n", fn);
		mparse_reset(mp);
		if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL)
			continue;
		mparse_result(mp, &mdoc, &man);
		if (mdoc)
			pmdoc(db, fbuf, &key, &ksz, 
				&val, &vsz, fn, mdoc);
	}

	(*db->close)(db);
	mparse_free(mp);

	free(key.data);
	free(val.data);

	/* Atomically replace the file with our temporary one. */

	if (-1 == rename(fbuf, f))
		perror(f);

	return((int)MANDOCLEVEL_OK);
}

/*
 * Initialise the stored database key whose data buffer is shared
 * between uses (as the key must sometimes be constructed from an array
 * of 
 */
static void
dbt_init(DBT *key, size_t *ksz)
{

	if (0 == *ksz) {
		assert(0 == key->size);
		assert(NULL == key->data);
		key->data = mandoc_malloc(MANDOC_BUFSZ);
		*ksz = MANDOC_BUFSZ;
	}

	key->size = 0;
}

/*
 * Append a binary value to a database entry.  This can be invoked
 * multiple times; the buffer is automatically resized.
 */
static void
dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
{

	assert(key->data);

	/* Overshoot by MANDOC_BUFSZ. */

	while (key->size + sz >= *ksz) {
		*ksz = key->size + sz + MANDOC_BUFSZ;
		*ksz = *ksz + (4 - (*ksz % 4));
		key->data = mandoc_realloc(key->data, *ksz);
	}

	memcpy(key->data + (int)key->size, cp, sz);
	key->size += sz;
}

/*
 * Append a nil-terminated string to the database entry.  This can be
 * invoked multiple times.  The database entry will be nil-terminated as
 * well; if invoked multiple times, a space is put between strings.
 */
static void
dbt_append(DBT *key, size_t *ksz, const char *cp)
{
	size_t		 sz;

	assert(key->data);
	assert(key->size <= *ksz);

	if (0 == (sz = strlen(cp)))
		return;

	/* Overshoot by MANDOC_BUFSZ (and nil terminator). */

	while (key->size + sz + 1 >= *ksz) {
		*ksz = key->size + sz + 1 + MANDOC_BUFSZ;
		*ksz = *ksz + (4 - (*ksz % 4));
		key->data = mandoc_realloc(key->data, *ksz);
	}

	/* Space-separate appended tokens. */

	if (key->size)
		((char *)key->data)[(int)key->size - 1] = ' ';

	memcpy(key->data + (int)key->size, cp, sz + 1);
	key->size += sz + 1;
}

/* ARGSUSED */
static void
pmdoc_Fd(MDOC_ARGS)
{
	uint32_t	 fl;
	const char	*start, *end;
	size_t		 sz;
	char		 nil;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
		return;
	if (strcmp("#include", n->string))
		return;
	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
		return;

	start = n->string;
	if ('<' == *start)
		start++;

	if (0 == (sz = strlen(start)))
		return;

	end = &start[(int)sz - 1];
	if ('>' == *end)
		end--;

	nil = '\0';
	dbt_appendb(key, ksz, start, end - start + 1);
	dbt_appendb(key, ksz, &nil, 1);

	fl = MANDOC_INCLUDES;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_In(MDOC_ARGS)
{
	uint32_t	 fl;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	dbt_append(key, ksz, n->child->string);
	fl = MANDOC_INCLUDES;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Fn(MDOC_ARGS)
{
	uint32_t	 fl;
	const char	*cp;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	/* .Fn "struct type *arg" "foo" */

	cp = strrchr(n->child->string, ' ');
	if (NULL == cp)
		cp = n->child->string;

	/* Ignore pointers. */

	while ('*' == *cp)
		cp++;

	dbt_append(key, ksz, cp);
	fl = MANDOC_FUNCTION;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Vt(MDOC_ARGS)
{
	uint32_t	 fl;
	const char	*start, *end;
	size_t		 sz;
	char		 nil;
	
	if (SEC_SYNOPSIS != n->sec)
		return;
	if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	/*
	 * Strip away leading '*' and trailing ';'.
	 */

	start = n->last->string;

	while ('*' == *start)
		start++;

	if (0 == (sz = strlen(start)))
		return;

	end = &start[sz - 1];
	while (end > start && ';' == *end)
		end--;

	if (end == start)
		return;

	nil = '\0';
	dbt_appendb(key, ksz, start, end - start + 1);
	dbt_appendb(key, ksz, &nil, 1);
	fl = MANDOC_VARIABLE;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Fo(MDOC_ARGS)
{
	uint32_t	 fl;
	
	if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
		return;
	if (NULL == n->child || MDOC_TEXT != n->child->type)
		return;

	dbt_append(key, ksz, n->child->string);
	fl = MANDOC_FUNCTION;
	memcpy(val->data, &fl, 4);
}

/* ARGSUSED */
static void
pmdoc_Nm(MDOC_ARGS)
{
	uint32_t	 fl;
	
	if (SEC_NAME == n->sec) {
		for (n = n->child; n; n = n->next) {
			if (MDOC_TEXT != n->type)
				continue;
			dbt_append(key, ksz, n->string);
		}
		fl = MANDOC_NAME;
		memcpy(val->data, &fl, 4);
		return;
	} else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
		return;

	for (n = n->child; n; n = n->next) {
		if (MDOC_TEXT != n->type)
			continue;
		dbt_append(key, ksz, n->string);
	}

	fl = MANDOC_UTILITY;
	memcpy(val->data, &fl, 4);
}

/*
 * Call out to per-macro handlers after clearing the persistent database
 * key.  If the macro sets the database key, flush it to the database.
 */
static void
pmdoc_node(MDOC_ARGS)
{

	if (NULL == n)
		return;

	switch (n->type) {
	case (MDOC_HEAD):
		/* FALLTHROUGH */
	case (MDOC_BODY):
		/* FALLTHROUGH */
	case (MDOC_TAIL):
		/* FALLTHROUGH */
	case (MDOC_BLOCK):
		/* FALLTHROUGH */
	case (MDOC_ELEM):
		if (NULL == mdocs[n->tok])
			break;

		dbt_init(key, ksz);
		(*mdocs[n->tok])(db, dbn, key, ksz, val, n);

		if (0 == key->size)
			break;
		if (0 == (*db->put)(db, key, val, 0))
			break;
		
		perror(dbn);
		exit((int)MANDOCLEVEL_SYSERR);
		/* NOTREACHED */
	default:
		break;
	}

	pmdoc_node(db, dbn, key, ksz, val, n->child);
	pmdoc_node(db, dbn, key, ksz, val, n->next);
}

static void
pmdoc(DB *db, const char *dbn, 
		DBT *key, size_t *ksz, 
		DBT *val, size_t *valsz,
		const char *path, struct mdoc *m)
{
	uint32_t	 flag;

	flag = MANDOC_NONE;

	/* 
	 * Database values are a 4-byte bit-field followed by the path
	 * of the manual.  Allocate all the space we'll need now; we
	 * change the bit-field depending on the key type.
	 */

	dbt_init(val, valsz);
	dbt_appendb(val, valsz, &flag, 4);
	dbt_append(val, valsz, path);

	pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
}

static void
version(void)
{

	printf("%s %s\n", progname, VERSION);
}

static void
usage(void)
{

	fprintf(stderr, "usage: %s "
			"[-V] "
			"[-f path] "
			"[file...]\n", 
			progname);
}