version 1.8, 2011/11/20 15:43:14 |
version 1.14, 2011/11/27 23:11:37 |
Line 115 static const struct type types[] = { |
|
Line 115 static const struct type types[] = { |
|
}; |
}; |
|
|
static DB *btree_open(void); |
static DB *btree_open(void); |
static int btree_read(const DBT *, |
static int btree_read(const DBT *, |
const struct mchars *, char **); |
const struct mchars *, char **); |
static int expreval(const struct expr *, int *); |
static int expreval(const struct expr *, int *); |
static void exprexec(const struct expr *, |
static void exprexec(const struct expr *, |
const char *, uint64_t, struct rec *); |
const char *, uint64_t, struct rec *); |
static int exprmark(const struct expr *, |
static int exprmark(const struct expr *, |
const char *, uint64_t, int *); |
const char *, uint64_t, int *); |
static struct expr *exprexpr(int, char *[], int *, int *, size_t *); |
static struct expr *exprexpr(int, char *[], int *, int *, size_t *); |
static struct expr *exprterm(char *, int); |
static struct expr *exprterm(char *, int); |
static DB *index_open(void); |
static DB *index_open(void); |
static int index_read(const DBT *, const DBT *, |
static int index_read(const DBT *, const DBT *, int, |
const struct mchars *, struct rec *); |
const struct mchars *, struct rec *); |
static void norm_string(const char *, |
static void norm_string(const char *, |
const struct mchars *, char **); |
const struct mchars *, char **); |
Line 133 static size_t norm_utf8(unsigned int, char[7]); |
|
Line 133 static size_t norm_utf8(unsigned int, char[7]); |
|
static void recfree(struct rec *); |
static void recfree(struct rec *); |
static int single_search(struct rectree *, const struct opts *, |
static int single_search(struct rectree *, const struct opts *, |
const struct expr *, size_t terms, |
const struct expr *, size_t terms, |
struct mchars *); |
struct mchars *, int); |
|
|
/* |
/* |
* Open the keyword mandoc-db database. |
* Open the keyword mandoc-db database. |
Line 148 btree_open(void) |
|
Line 148 btree_open(void) |
|
info.flags = R_DUP; |
info.flags = R_DUP; |
|
|
db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info); |
db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info); |
if (NULL != db) |
if (NULL != db) |
return(db); |
return(db); |
|
|
return(NULL); |
return(NULL); |
Line 176 btree_read(const DBT *v, const struct mchars *mc, char |
|
Line 176 btree_read(const DBT *v, const struct mchars *mc, char |
|
/* |
/* |
* Take a Unicode codepoint and produce its UTF-8 encoding. |
* Take a Unicode codepoint and produce its UTF-8 encoding. |
* This isn't the best way to do this, but it works. |
* This isn't the best way to do this, but it works. |
* The magic numbers are from the UTF-8 packaging. |
* The magic numbers are from the UTF-8 packaging. |
* They're not as scary as they seem: read the UTF-8 spec for details. |
* They're not as scary as they seem: read the UTF-8 spec for details. |
*/ |
*/ |
static size_t |
static size_t |
Line 241 norm_string(const char *val, const struct mchars *mc, |
|
Line 241 norm_string(const char *val, const struct mchars *mc, |
|
const char *seq, *cpp; |
const char *seq, *cpp; |
int len, u, pos; |
int len, u, pos; |
enum mandoc_esc esc; |
enum mandoc_esc esc; |
static const char res[] = { '\\', '\t', |
static const char res[] = { '\\', '\t', |
ASCII_NBRSP, ASCII_HYPH, '\0' }; |
ASCII_NBRSP, ASCII_HYPH, '\0' }; |
|
|
/* Pre-allocate by the length of the input */ |
/* Pre-allocate by the length of the input */ |
Line 287 norm_string(const char *val, const struct mchars *mc, |
|
Line 287 norm_string(const char *val, const struct mchars *mc, |
|
if (ESCAPE_ERROR == esc) |
if (ESCAPE_ERROR == esc) |
break; |
break; |
|
|
/* |
/* |
* XXX - this just does UTF-8, but we need to know |
* XXX - this just does UTF-8, but we need to know |
* beforehand whether we should do text substitution. |
* beforehand whether we should do text substitution. |
*/ |
*/ |
Line 345 index_open(void) |
|
Line 345 index_open(void) |
|
* Returns 1 if an entry was unpacked, 0 if the database is insane. |
* Returns 1 if an entry was unpacked, 0 if the database is insane. |
*/ |
*/ |
static int |
static int |
index_read(const DBT *key, const DBT *val, |
index_read(const DBT *key, const DBT *val, int index, |
const struct mchars *mc, struct rec *rec) |
const struct mchars *mc, struct rec *rec) |
{ |
{ |
size_t left; |
size_t left; |
Line 364 index_read(const DBT *key, const DBT *val, |
|
Line 364 index_read(const DBT *key, const DBT *val, |
|
cp = (char *)val->data; |
cp = (char *)val->data; |
|
|
rec->res.rec = *(recno_t *)key->data; |
rec->res.rec = *(recno_t *)key->data; |
|
rec->res.volume = index; |
|
|
|
INDEX_BREAD(rec->res.type); |
INDEX_BREAD(rec->res.file); |
INDEX_BREAD(rec->res.file); |
INDEX_BREAD(rec->res.cat); |
INDEX_BREAD(rec->res.cat); |
INDEX_BREAD(rec->res.title); |
INDEX_BREAD(rec->res.title); |
Line 374 index_read(const DBT *key, const DBT *val, |
|
Line 376 index_read(const DBT *key, const DBT *val, |
|
} |
} |
|
|
/* |
/* |
* Search mandocdb databases in argv (size argc) for the expression |
* Search mandocdb databases in paths for expression "expr". |
* "expr". |
|
* Filter out by "opts". |
* Filter out by "opts". |
* Call "res" with the results, which may be zero. |
* Call "res" with the results, which may be zero. |
* Return 0 if there was a database error, else return 1. |
* Return 0 if there was a database error, else return 1. |
*/ |
*/ |
int |
int |
apropos_search(int argc, char *argv[], const struct opts *opts, |
apropos_search(int pathsz, char **paths, const struct opts *opts, |
const struct expr *expr, size_t terms, void *arg, |
const struct expr *expr, size_t terms, void *arg, |
void (*res)(struct res *, size_t, void *)) |
void (*res)(struct res *, size_t, void *)) |
{ |
{ |
struct rectree tree; |
struct rectree tree; |
Line 392 apropos_search(int argc, char *argv[], const struct op |
|
Line 393 apropos_search(int argc, char *argv[], const struct op |
|
|
|
memset(&tree, 0, sizeof(struct rectree)); |
memset(&tree, 0, sizeof(struct rectree)); |
|
|
|
rc = 0; |
mc = mchars_alloc(); |
mc = mchars_alloc(); |
|
|
for (rc = 1, i = 0; rc && i < argc; i++) { |
/* |
/* FIXME: ugly warning: we shouldn't get here! */ |
* Main loop. Change into the directory containing manpage |
if (chdir(argv[i])) |
* databases. Run our expession over each database in the set. |
|
*/ |
|
|
|
for (i = 0; i < pathsz; i++) { |
|
if (chdir(paths[i])) |
continue; |
continue; |
rc = single_search(&tree, opts, expr, terms, mc); |
if ( ! single_search(&tree, opts, expr, terms, mc, i)) |
/* FIXME: warn and continue... ? */ |
goto out; |
} |
} |
|
|
/* |
/* |
* Count the matching files |
* Count matching files, transfer to a "clean" array, then feed |
* and feed them to the output handler. |
* them to the output handler. |
*/ |
*/ |
|
|
for (mlen = i = 0; i < tree.len; i++) |
for (mlen = i = 0; i < tree.len; i++) |
Line 415 apropos_search(int argc, char *argv[], const struct op |
|
Line 421 apropos_search(int argc, char *argv[], const struct op |
|
|
|
for (mlen = i = 0; i < tree.len; i++) |
for (mlen = i = 0; i < tree.len; i++) |
if (tree.node[i].matched) |
if (tree.node[i].matched) |
memcpy(&ress[mlen++], &tree.node[i].res, |
memcpy(&ress[mlen++], &tree.node[i].res, |
sizeof(struct res)); |
sizeof(struct res)); |
|
|
(*res)(ress, mlen, arg); |
(*res)(ress, mlen, arg); |
free(ress); |
free(ress); |
|
|
|
rc = 1; |
|
out: |
for (i = 0; i < tree.len; i++) |
for (i = 0; i < tree.len; i++) |
recfree(&tree.node[i]); |
recfree(&tree.node[i]); |
|
|
Line 432 apropos_search(int argc, char *argv[], const struct op |
|
Line 440 apropos_search(int argc, char *argv[], const struct op |
|
static int |
static int |
single_search(struct rectree *tree, const struct opts *opts, |
single_search(struct rectree *tree, const struct opts *opts, |
const struct expr *expr, size_t terms, |
const struct expr *expr, size_t terms, |
struct mchars *mc) |
struct mchars *mc, int vol) |
{ |
{ |
int root, leaf, ch; |
int root, leaf, ch; |
uint64_t mask; |
uint64_t mask; |
Line 453 single_search(struct rectree *tree, const struct opts |
|
Line 461 single_search(struct rectree *tree, const struct opts |
|
|
|
memset(&r, 0, sizeof(struct rec)); |
memset(&r, 0, sizeof(struct rec)); |
|
|
if (NULL == (btree = btree_open())) |
if (NULL == (btree = btree_open())) |
return(0); |
return(1); |
|
|
if (NULL == (idx = index_open())) { |
if (NULL == (idx = index_open())) { |
(*btree->close)(btree); |
(*btree->close)(btree); |
return(0); |
return(1); |
} |
} |
|
|
while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { |
while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { |
if (key.size < 2 || sizeof(struct db_val) != val.size) |
if (key.size < 2 || sizeof(struct db_val) != val.size) |
break; |
break; |
if ( ! btree_read(&key, mc, &buf)) |
if ( ! btree_read(&key, mc, &buf)) |
break; |
break; |
|
|
Line 485 single_search(struct rectree *tree, const struct opts |
|
Line 493 single_search(struct rectree *tree, const struct opts |
|
*/ |
*/ |
|
|
for (leaf = root; leaf >= 0; ) |
for (leaf = root; leaf >= 0; ) |
if (rec > rs[leaf].res.rec && |
if (rec > rs[leaf].res.rec && |
rs[leaf].rhs >= 0) |
rs[leaf].rhs >= 0) |
leaf = rs[leaf].rhs; |
leaf = rs[leaf].rhs; |
else if (rec < rs[leaf].res.rec && |
else if (rec < rs[leaf].res.rec && |
rs[leaf].lhs >= 0) |
rs[leaf].lhs >= 0) |
leaf = rs[leaf].lhs; |
leaf = rs[leaf].lhs; |
else |
else |
break; |
break; |
|
|
/* |
/* |
Line 519 single_search(struct rectree *tree, const struct opts |
|
Line 527 single_search(struct rectree *tree, const struct opts |
|
break; |
break; |
|
|
r.lhs = r.rhs = -1; |
r.lhs = r.rhs = -1; |
if ( ! index_read(&key, &val, mc, &r)) |
if ( ! index_read(&key, &val, vol, mc, &r)) |
break; |
break; |
|
|
/* XXX: this should be elsewhere, I guess? */ |
/* XXX: this should be elsewhere, I guess? */ |
Line 533 single_search(struct rectree *tree, const struct opts |
|
Line 541 single_search(struct rectree *tree, const struct opts |
|
(rs, (tree->len + 1) * sizeof(struct rec)); |
(rs, (tree->len + 1) * sizeof(struct rec)); |
|
|
memcpy(&rs[tree->len], &r, sizeof(struct rec)); |
memcpy(&rs[tree->len], &r, sizeof(struct rec)); |
rs[tree->len].matches = |
rs[tree->len].matches = |
mandoc_calloc(terms, sizeof(int)); |
mandoc_calloc(terms, sizeof(int)); |
|
|
exprexec(expr, buf, mask, &rs[tree->len]); |
exprexec(expr, buf, mask, &rs[tree->len]); |
|
|
/* Append to our tree. */ |
/* Append to our tree. */ |
|
|
if (leaf >= 0) { |
if (leaf >= 0) { |
Line 546 single_search(struct rectree *tree, const struct opts |
|
Line 555 single_search(struct rectree *tree, const struct opts |
|
rs[leaf].lhs = tree->len; |
rs[leaf].lhs = tree->len; |
} else |
} else |
root = tree->len; |
root = tree->len; |
|
|
memset(&r, 0, sizeof(struct rec)); |
memset(&r, 0, sizeof(struct rec)); |
tree->len++; |
tree->len++; |
} |
} |
|
|
(*btree->close)(btree); |
(*btree->close)(btree); |
(*idx->close)(idx); |
(*idx->close)(idx); |
|
|
Line 571 recfree(struct rec *rec) |
|
Line 580 recfree(struct rec *rec) |
|
free(rec->matches); |
free(rec->matches); |
} |
} |
|
|
|
/* |
|
* Compile a list of straight-up terms. |
|
* The arguments are re-written into ~[[:<:]]term[[:>:]], or "term" |
|
* surrounded by word boundaries, then pumped through exprterm(). |
|
* Terms are case-insensitive. |
|
* This emulates whatis(1) behaviour. |
|
*/ |
struct expr * |
struct expr * |
|
termcomp(int argc, char *argv[], size_t *tt) |
|
{ |
|
char *buf; |
|
int pos; |
|
struct expr *e, *next; |
|
size_t sz; |
|
|
|
buf = NULL; |
|
e = NULL; |
|
*tt = 0; |
|
|
|
for (pos = 0; pos < argc; pos++) { |
|
sz = strlen(argv[pos]) + 16; |
|
buf = mandoc_realloc(buf, sz); |
|
strlcpy(buf, "~[[:<:]]", sz); |
|
strlcat(buf, argv[pos], sz); |
|
strlcat(buf, "[[:>:]]", sz); |
|
if (NULL == (next = exprterm(buf, 0))) { |
|
free(buf); |
|
exprfree(e); |
|
return(NULL); |
|
} |
|
if (NULL != e) |
|
e->next = next; |
|
e = next; |
|
(*tt)++; |
|
} |
|
|
|
free(buf); |
|
return(e); |
|
} |
|
|
|
/* |
|
* Compile a sequence of logical expressions. |
|
* See apropos.1 for a grammar of this sequence. |
|
*/ |
|
struct expr * |
exprcomp(int argc, char *argv[], size_t *tt) |
exprcomp(int argc, char *argv[], size_t *tt) |
{ |
{ |
int pos, lvl; |
int pos, lvl; |
Line 596 exprcomp(int argc, char *argv[], size_t *tt) |
|
Line 649 exprcomp(int argc, char *argv[], size_t *tt) |
|
* Return the root of the expression sequence if alright. |
* Return the root of the expression sequence if alright. |
*/ |
*/ |
static struct expr * |
static struct expr * |
exprexpr(int argc, char **argv, int *pos, int *lvl, size_t *tt) |
exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt) |
{ |
{ |
struct expr *e, *first, *next; |
struct expr *e, *first, *next; |
int log; |
int log; |
Line 625 exprexpr(int argc, char **argv, int *pos, int *lvl, si |
|
Line 678 exprexpr(int argc, char **argv, int *pos, int *lvl, si |
|
log = 0; |
log = 0; |
|
|
if (NULL != e && 0 == strcmp("-a", argv[*pos])) |
if (NULL != e && 0 == strcmp("-a", argv[*pos])) |
log = 1; |
log = 1; |
else if (NULL != e && 0 == strcmp("-o", argv[*pos])) |
else if (NULL != e && 0 == strcmp("-o", argv[*pos])) |
log = 2; |
log = 2; |
|
|
Line 721 exprterm(char *buf, int cs) |
|
Line 774 exprterm(char *buf, int cs) |
|
e.mask = TYPE_Nm | TYPE_Nd; |
e.mask = TYPE_Nm | TYPE_Nd; |
|
|
if (e.regex) { |
if (e.regex) { |
i = REG_EXTENDED | REG_NOSUB | cs ? 0 : REG_ICASE; |
i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE); |
if (regcomp(&e.re, e.v, i)) |
if (regcomp(&e.re, e.v, i)) |
return(NULL); |
return(NULL); |
} |
} |
|
|
exprfree(struct expr *p) |
exprfree(struct expr *p) |
{ |
{ |
struct expr *pp; |
struct expr *pp; |
|
|
while (NULL != p) { |
while (NULL != p) { |
if (p->subexpr) |
if (p->subexpr) |
exprfree(p->subexpr); |
exprfree(p->subexpr); |
Line 751 exprfree(struct expr *p) |
|
Line 804 exprfree(struct expr *p) |
|
} |
} |
|
|
static int |
static int |
exprmark(const struct expr *p, const char *cp, |
exprmark(const struct expr *p, const char *cp, |
uint64_t mask, int *ms) |
uint64_t mask, int *ms) |
{ |
{ |
|
|
Line 806 expreval(const struct expr *p, int *ms) |
|
Line 859 expreval(const struct expr *p, int *ms) |
|
for ( ; p->next && p->next->and; p = p->next) { |
for ( ; p->next && p->next->and; p = p->next) { |
/* Evaluate a subexpression, if applicable. */ |
/* Evaluate a subexpression, if applicable. */ |
if (p->next->subexpr && ! ms[p->next->index]) |
if (p->next->subexpr && ! ms[p->next->index]) |
ms[p->next->index] = |
ms[p->next->index] = |
expreval(p->next->subexpr, ms); |
expreval(p->next->subexpr, ms); |
match = match && ms[p->next->index]; |
match = match && ms[p->next->index]; |
} |
} |
Line 823 expreval(const struct expr *p, int *ms) |
|
Line 876 expreval(const struct expr *p, int *ms) |
|
* If this evaluates to true, mark the expression as satisfied. |
* If this evaluates to true, mark the expression as satisfied. |
*/ |
*/ |
static void |
static void |
exprexec(const struct expr *p, const char *cp, |
exprexec(const struct expr *e, const char *cp, |
uint64_t mask, struct rec *r) |
uint64_t mask, struct rec *r) |
{ |
{ |
|
|
assert(0 == r->matched); |
assert(0 == r->matched); |
exprmark(p, cp, mask, r->matches); |
exprmark(e, cp, mask, r->matches); |
r->matched = expreval(p, r->matches); |
r->matched = expreval(e, r->matches); |
} |
} |