version 1.5, 2011/11/18 07:02:19 |
version 1.11, 2011/11/23 09:55:28 |
|
|
#include <fcntl.h> |
#include <fcntl.h> |
#include <regex.h> |
#include <regex.h> |
#include <stdarg.h> |
#include <stdarg.h> |
|
#include <stdint.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
|
#include <unistd.h> |
|
|
#ifdef __linux__ |
#ifdef __linux__ |
# include <db_185.h> |
# include <db_185.h> |
|
|
struct expr { |
struct expr { |
int regex; /* is regex? */ |
int regex; /* is regex? */ |
int index; /* index in match array */ |
int index; /* index in match array */ |
int mask; /* type-mask */ |
uint64_t mask; /* type-mask */ |
int cs; /* is case-sensitive? */ |
int cs; /* is case-sensitive? */ |
int and; /* is rhs of logical AND? */ |
int and; /* is rhs of logical AND? */ |
char *v; /* search value */ |
char *v; /* search value */ |
|
|
}; |
}; |
|
|
struct type { |
struct type { |
int mask; |
uint64_t mask; |
const char *name; |
const char *name; |
}; |
}; |
|
|
|
struct rectree { |
|
struct rec *node; /* record array for dir tree */ |
|
int len; /* length of record array */ |
|
}; |
|
|
static const struct type types[] = { |
static const struct type types[] = { |
{ TYPE_An, "An" }, |
{ TYPE_An, "An" }, |
|
{ TYPE_Ar, "Ar" }, |
|
{ TYPE_At, "At" }, |
|
{ TYPE_Bsx, "Bsx" }, |
|
{ TYPE_Bx, "Bx" }, |
{ TYPE_Cd, "Cd" }, |
{ TYPE_Cd, "Cd" }, |
|
{ TYPE_Cm, "Cm" }, |
|
{ TYPE_Dv, "Dv" }, |
|
{ TYPE_Dx, "Dx" }, |
|
{ TYPE_Em, "Em" }, |
{ TYPE_Er, "Er" }, |
{ TYPE_Er, "Er" }, |
{ TYPE_Ev, "Ev" }, |
{ TYPE_Ev, "Ev" }, |
|
{ TYPE_Fa, "Fa" }, |
|
{ TYPE_Fl, "Fl" }, |
{ TYPE_Fn, "Fn" }, |
{ TYPE_Fn, "Fn" }, |
{ TYPE_Fn, "Fo" }, |
{ TYPE_Fn, "Fo" }, |
|
{ TYPE_Ft, "Ft" }, |
|
{ TYPE_Fx, "Fx" }, |
|
{ TYPE_Ic, "Ic" }, |
{ TYPE_In, "In" }, |
{ TYPE_In, "In" }, |
|
{ TYPE_Lb, "Lb" }, |
|
{ TYPE_Li, "Li" }, |
|
{ TYPE_Lk, "Lk" }, |
|
{ TYPE_Ms, "Ms" }, |
|
{ TYPE_Mt, "Mt" }, |
{ TYPE_Nd, "Nd" }, |
{ TYPE_Nd, "Nd" }, |
{ TYPE_Nm, "Nm" }, |
{ TYPE_Nm, "Nm" }, |
|
{ TYPE_Nx, "Nx" }, |
|
{ TYPE_Ox, "Ox" }, |
{ TYPE_Pa, "Pa" }, |
{ TYPE_Pa, "Pa" }, |
|
{ TYPE_Rs, "Rs" }, |
|
{ TYPE_Sh, "Sh" }, |
|
{ TYPE_Ss, "Ss" }, |
{ TYPE_St, "St" }, |
{ TYPE_St, "St" }, |
|
{ TYPE_Sy, "Sy" }, |
|
{ TYPE_Tn, "Tn" }, |
{ TYPE_Va, "Va" }, |
{ TYPE_Va, "Va" }, |
{ TYPE_Va, "Vt" }, |
{ TYPE_Va, "Vt" }, |
{ TYPE_Xr, "Xr" }, |
{ TYPE_Xr, "Xr" }, |
Line 87 static int btree_read(const DBT *, |
|
Line 119 static int btree_read(const DBT *, |
|
const struct mchars *, char **); |
const struct mchars *, char **); |
static int expreval(const struct expr *, int *); |
static int expreval(const struct expr *, int *); |
static void exprexec(const struct expr *, |
static void exprexec(const struct expr *, |
const char *, int, struct rec *); |
const char *, uint64_t, struct rec *); |
static int exprmark(const struct expr *, |
static int exprmark(const struct expr *, |
const char *, int, int *); |
const char *, uint64_t, int *); |
static struct expr *exprexpr(int, char *[], int *, int *, size_t *); |
static struct expr *exprexpr(int, char *[], int *, int *, size_t *); |
static struct expr *exprterm(char *, int); |
static struct expr *exprterm(char *, int); |
static DB *index_open(void); |
static DB *index_open(void); |
static int index_read(const DBT *, const DBT *, |
static int index_read(const DBT *, const DBT *, int, |
const struct mchars *, struct rec *); |
const struct mchars *, struct rec *); |
static void norm_string(const char *, |
static void norm_string(const char *, |
const struct mchars *, char **); |
const struct mchars *, char **); |
static size_t norm_utf8(unsigned int, char[7]); |
static size_t norm_utf8(unsigned int, char[7]); |
static void recfree(struct rec *); |
static void recfree(struct rec *); |
|
static int single_search(struct rectree *, const struct opts *, |
|
const struct expr *, size_t terms, |
|
struct mchars *, int); |
|
|
/* |
/* |
* Open the keyword mandoc-db database. |
* Open the keyword mandoc-db database. |
Line 130 btree_read(const DBT *v, const struct mchars *mc, char |
|
Line 165 btree_read(const DBT *v, const struct mchars *mc, char |
|
/* Sanity: are we nil-terminated? */ |
/* Sanity: are we nil-terminated? */ |
|
|
assert(v->size > 0); |
assert(v->size > 0); |
|
|
if ('\0' != ((char *)v->data)[(int)v->size - 1]) |
if ('\0' != ((char *)v->data)[(int)v->size - 1]) |
return(0); |
return(0); |
|
|
Line 309 index_open(void) |
|
Line 345 index_open(void) |
|
* Returns 1 if an entry was unpacked, 0 if the database is insane. |
* Returns 1 if an entry was unpacked, 0 if the database is insane. |
*/ |
*/ |
static int |
static int |
index_read(const DBT *key, const DBT *val, |
index_read(const DBT *key, const DBT *val, int index, |
const struct mchars *mc, struct rec *rec) |
const struct mchars *mc, struct rec *rec) |
{ |
{ |
size_t left; |
size_t left; |
Line 328 index_read(const DBT *key, const DBT *val, |
|
Line 364 index_read(const DBT *key, const DBT *val, |
|
cp = (char *)val->data; |
cp = (char *)val->data; |
|
|
rec->res.rec = *(recno_t *)key->data; |
rec->res.rec = *(recno_t *)key->data; |
|
rec->res.volume = index; |
|
|
INDEX_BREAD(rec->res.file); |
INDEX_BREAD(rec->res.file); |
INDEX_BREAD(rec->res.cat); |
INDEX_BREAD(rec->res.cat); |
Line 338 index_read(const DBT *key, const DBT *val, |
|
Line 375 index_read(const DBT *key, const DBT *val, |
|
} |
} |
|
|
/* |
/* |
* Search the mandocdb database for the expression "expr". |
* Search mandocdb databases in paths for expression "expr". |
* Filter out by "opts". |
* Filter out by "opts". |
* Call "res" with the results, which may be zero. |
* Call "res" with the results, which may be zero. |
* Return 0 if there was a database error, else return 1. |
* Return 0 if there was a database error, else return 1. |
*/ |
*/ |
int |
int |
apropos_search(const struct opts *opts, const struct expr *expr, |
apropos_search(int pathsz, char **paths, const struct opts *opts, |
size_t terms, void *arg, |
const struct expr *expr, size_t terms, void *arg, |
void (*res)(struct res *, size_t, void *)) |
void (*res)(struct res *, size_t, void *)) |
{ |
{ |
int i, rsz, root, leaf, mask, mlen, rc, ch; |
struct rectree tree; |
|
struct mchars *mc; |
|
struct res *ress; |
|
int i, mlen, rc; |
|
|
|
memset(&tree, 0, sizeof(struct rectree)); |
|
|
|
rc = 0; |
|
mc = mchars_alloc(); |
|
|
|
/* |
|
* Main loop. Change into the directory containing manpage |
|
* databases. Run our expession over each database in the set. |
|
*/ |
|
|
|
for (i = 0; i < pathsz; i++) { |
|
if (chdir(paths[i])) |
|
continue; |
|
if ( ! single_search(&tree, opts, expr, terms, mc, i)) |
|
goto out; |
|
} |
|
|
|
/* |
|
* Count matching files, transfer to a "clean" array, then feed |
|
* them to the output handler. |
|
*/ |
|
|
|
for (mlen = i = 0; i < tree.len; i++) |
|
if (tree.node[i].matched) |
|
mlen++; |
|
|
|
ress = mandoc_malloc(mlen * sizeof(struct res)); |
|
|
|
for (mlen = i = 0; i < tree.len; i++) |
|
if (tree.node[i].matched) |
|
memcpy(&ress[mlen++], &tree.node[i].res, |
|
sizeof(struct res)); |
|
|
|
(*res)(ress, mlen, arg); |
|
free(ress); |
|
|
|
rc = 1; |
|
out: |
|
for (i = 0; i < tree.len; i++) |
|
recfree(&tree.node[i]); |
|
|
|
free(tree.node); |
|
mchars_free(mc); |
|
return(rc); |
|
} |
|
|
|
static int |
|
single_search(struct rectree *tree, const struct opts *opts, |
|
const struct expr *expr, size_t terms, |
|
struct mchars *mc, int vol) |
|
{ |
|
int root, leaf, ch; |
|
uint64_t mask; |
DBT key, val; |
DBT key, val; |
DB *btree, *idx; |
DB *btree, *idx; |
struct mchars *mc; |
|
char *buf; |
char *buf; |
recno_t rec; |
recno_t rec; |
struct rec *rs; |
struct rec *rs; |
struct res *ress; |
|
struct rec r; |
struct rec r; |
|
struct db_val *vbuf; |
|
|
rc = 0; |
|
root = -1; |
root = -1; |
leaf = -1; |
leaf = -1; |
btree = NULL; |
btree = NULL; |
idx = NULL; |
idx = NULL; |
mc = NULL; |
|
buf = NULL; |
buf = NULL; |
rs = NULL; |
rs = tree->node; |
rsz = 0; |
|
|
|
memset(&r, 0, sizeof(struct rec)); |
memset(&r, 0, sizeof(struct rec)); |
|
|
mc = mchars_alloc(); |
|
|
|
if (NULL == (btree = btree_open())) |
if (NULL == (btree = btree_open())) |
goto out; |
return(1); |
if (NULL == (idx = index_open())) |
|
goto out; |
|
|
|
|
if (NULL == (idx = index_open())) { |
|
(*btree->close)(btree); |
|
return(1); |
|
} |
|
|
while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { |
while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { |
/* |
if (key.size < 2 || sizeof(struct db_val) != val.size) |
* Low-water mark for key and value. |
break; |
* The key must have something in it, and the value must |
|
* have the correct tags/recno mix. |
|
*/ |
|
if (key.size < 2 || 8 != val.size) |
|
break; |
|
if ( ! btree_read(&key, mc, &buf)) |
if ( ! btree_read(&key, mc, &buf)) |
break; |
break; |
|
|
mask = *(int *)val.data; |
vbuf = val.data; |
|
rec = vbuf->rec; |
|
mask = vbuf->mask; |
|
|
/* |
/* |
* See if this keyword record matches any of the |
* See if this keyword record matches any of the |
Line 397 apropos_search(const struct opts *opts, const struct e |
|
Line 485 apropos_search(const struct opts *opts, const struct e |
|
if ( ! exprmark(expr, buf, mask, NULL)) |
if ( ! exprmark(expr, buf, mask, NULL)) |
continue; |
continue; |
|
|
memcpy(&rec, val.data + 4, sizeof(recno_t)); |
|
|
|
/* |
/* |
* O(log n) scan for prior records. Since a record |
* O(log n) scan for prior records. Since a record |
* number is unbounded, this has decent performance over |
* number is unbounded, this has decent performance over |
Line 440 apropos_search(const struct opts *opts, const struct e |
|
Line 526 apropos_search(const struct opts *opts, const struct e |
|
break; |
break; |
|
|
r.lhs = r.rhs = -1; |
r.lhs = r.rhs = -1; |
if ( ! index_read(&key, &val, mc, &r)) |
if ( ! index_read(&key, &val, vol, mc, &r)) |
break; |
break; |
|
|
/* XXX: this should be elsewhere, I guess? */ |
/* XXX: this should be elsewhere, I guess? */ |
Line 450 apropos_search(const struct opts *opts, const struct e |
|
Line 536 apropos_search(const struct opts *opts, const struct e |
|
if (opts->arch && strcasecmp(opts->arch, r.res.arch)) |
if (opts->arch && strcasecmp(opts->arch, r.res.arch)) |
continue; |
continue; |
|
|
rs = mandoc_realloc |
tree->node = rs = mandoc_realloc |
(rs, (rsz + 1) * sizeof(struct rec)); |
(rs, (tree->len + 1) * sizeof(struct rec)); |
|
|
memcpy(&rs[rsz], &r, sizeof(struct rec)); |
memcpy(&rs[tree->len], &r, sizeof(struct rec)); |
rs[rsz].matches = mandoc_calloc(terms, sizeof(int)); |
rs[tree->len].matches = |
|
mandoc_calloc(terms, sizeof(int)); |
|
|
exprexec(expr, buf, mask, &rs[rsz]); |
exprexec(expr, buf, mask, &rs[tree->len]); |
/* Append to our tree. */ |
/* Append to our tree. */ |
|
|
if (leaf >= 0) { |
if (leaf >= 0) { |
if (rec > rs[leaf].res.rec) |
if (rec > rs[leaf].res.rec) |
rs[leaf].rhs = rsz; |
rs[leaf].rhs = tree->len; |
else |
else |
rs[leaf].lhs = rsz; |
rs[leaf].lhs = tree->len; |
} else |
} else |
root = rsz; |
root = tree->len; |
|
|
memset(&r, 0, sizeof(struct rec)); |
memset(&r, 0, sizeof(struct rec)); |
rsz++; |
tree->len++; |
} |
} |
|
|
/* |
(*btree->close)(btree); |
* If we haven't encountered any database errors, then construct |
(*idx->close)(idx); |
* an array of results and push them to the caller. |
|
*/ |
|
|
|
if (1 == ch) { |
|
for (mlen = i = 0; i < rsz; i++) |
|
if (rs[i].matched) |
|
mlen++; |
|
ress = mandoc_malloc(mlen * sizeof(struct res)); |
|
for (mlen = i = 0; i < rsz; i++) |
|
if (rs[i].matched) |
|
memcpy(&ress[mlen++], &rs[i].res, |
|
sizeof(struct res)); |
|
(*res)(ress, mlen, arg); |
|
free(ress); |
|
rc = 1; |
|
} |
|
|
|
out: |
|
for (i = 0; i < rsz; i++) |
|
recfree(&rs[i]); |
|
|
|
recfree(&r); |
|
|
|
if (mc) |
|
mchars_free(mc); |
|
if (btree) |
|
(*btree->close)(btree); |
|
if (idx) |
|
(*idx->close)(idx); |
|
|
|
free(buf); |
free(buf); |
free(rs); |
return(1 == ch); |
return(rc); |
|
} |
} |
|
|
static void |
static void |
Line 701 exprfree(struct expr *p) |
|
Line 758 exprfree(struct expr *p) |
|
} |
} |
|
|
static int |
static int |
exprmark(const struct expr *p, const char *cp, int mask, int *ms) |
exprmark(const struct expr *p, const char *cp, |
|
uint64_t mask, int *ms) |
{ |
{ |
|
|
for ( ; p; p = p->next) { |
for ( ; p; p = p->next) { |
Line 772 expreval(const struct expr *p, int *ms) |
|
Line 830 expreval(const struct expr *p, int *ms) |
|
* If this evaluates to true, mark the expression as satisfied. |
* If this evaluates to true, mark the expression as satisfied. |
*/ |
*/ |
static void |
static void |
exprexec(const struct expr *p, const char *cp, int mask, struct rec *r) |
exprexec(const struct expr *p, const char *cp, |
|
uint64_t mask, struct rec *r) |
{ |
{ |
|
|
assert(0 == r->matched); |
assert(0 == r->matched); |