Annotation of mandoc/mansearch.c, Revision 1.1
1.1 ! kristaps 1: /* $Id: mandocdb.c,v 1.46 2012/03/23 06:52:17 kristaps Exp $ */
! 2: /*
! 3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the above
! 7: * copyright notice and this permission notice appear in all copies.
! 8: *
! 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 16: */
! 17: #ifdef HAVE_CONFIG_H
! 18: #include "config.h"
! 19: #endif
! 20:
! 21: #include <sys/param.h>
! 22:
! 23: #include <assert.h>
! 24: #include <fcntl.h>
! 25: #include <getopt.h>
! 26: #include <stdio.h>
! 27: #include <stdint.h>
! 28: #include <stddef.h>
! 29: #include <stdlib.h>
! 30: #include <string.h>
! 31: #include <unistd.h>
! 32:
! 33: #include <ohash.h>
! 34: #include <sqlite3.h>
! 35:
! 36: #include "mandoc.h"
! 37: #include "manpath.h"
! 38: #include "mandocdb.h"
! 39: #include "mansearch.h"
! 40:
! 41: struct expr {
! 42: int glob; /* is glob? */
! 43: uint64_t bits; /* type-mask */
! 44: const char *v; /* search value */
! 45: struct expr *next; /* next in sequence */
! 46: };
! 47:
! 48: struct match {
! 49: uint64_t id; /* identifier in database */
! 50: char *file; /* relative filepath of manpage */
! 51: char *desc; /* description of manpage */
! 52: int form; /* 0 == catpage */
! 53: };
! 54:
! 55: struct type {
! 56: uint64_t bits;
! 57: const char *name;
! 58: };
! 59:
! 60: static const struct type types[] = {
! 61: { TYPE_An, "An" },
! 62: { TYPE_Ar, "Ar" },
! 63: { TYPE_At, "At" },
! 64: { TYPE_Bsx, "Bsx" },
! 65: { TYPE_Bx, "Bx" },
! 66: { TYPE_Cd, "Cd" },
! 67: { TYPE_Cm, "Cm" },
! 68: { TYPE_Dv, "Dv" },
! 69: { TYPE_Dx, "Dx" },
! 70: { TYPE_Em, "Em" },
! 71: { TYPE_Er, "Er" },
! 72: { TYPE_Ev, "Ev" },
! 73: { TYPE_Fa, "Fa" },
! 74: { TYPE_Fl, "Fl" },
! 75: { TYPE_Fn, "Fn" },
! 76: { TYPE_Fn, "Fo" },
! 77: { TYPE_Ft, "Ft" },
! 78: { TYPE_Fx, "Fx" },
! 79: { TYPE_Ic, "Ic" },
! 80: { TYPE_In, "In" },
! 81: { TYPE_Lb, "Lb" },
! 82: { TYPE_Li, "Li" },
! 83: { TYPE_Lk, "Lk" },
! 84: { TYPE_Ms, "Ms" },
! 85: { TYPE_Mt, "Mt" },
! 86: { TYPE_Nd, "Nd" },
! 87: { TYPE_Nm, "Nm" },
! 88: { TYPE_Nx, "Nx" },
! 89: { TYPE_Ox, "Ox" },
! 90: { TYPE_Pa, "Pa" },
! 91: { TYPE_Rs, "Rs" },
! 92: { TYPE_Sh, "Sh" },
! 93: { TYPE_Ss, "Ss" },
! 94: { TYPE_St, "St" },
! 95: { TYPE_Sy, "Sy" },
! 96: { TYPE_Tn, "Tn" },
! 97: { TYPE_Va, "Va" },
! 98: { TYPE_Va, "Vt" },
! 99: { TYPE_Xr, "Xr" },
! 100: { ~0ULL, "any" },
! 101: { 0ULL, NULL }
! 102: };
! 103:
! 104: static void *hash_alloc(size_t, void *);
! 105: static void hash_free(void *, size_t, void *);
! 106: static void *hash_halloc(size_t, void *);
! 107: static struct expr *exprcomp(int, char *[]);
! 108: static void exprfree(struct expr *);
! 109: static struct expr *exprterm(char *);
! 110: static char *sql_statement(const struct expr *,
! 111: const char *, const char *);
! 112:
! 113: int
! 114: mansearch(const struct manpaths *paths,
! 115: const char *arch, const char *sec,
! 116: int argc, char *argv[],
! 117: struct manpage **res, size_t *sz)
! 118: {
! 119: int fd, rc;
! 120: int64_t id;
! 121: char buf[MAXPATHLEN];
! 122: char *sql;
! 123: struct expr *e, *ep;
! 124: sqlite3 *db;
! 125: sqlite3_stmt *s;
! 126: struct match *mp;
! 127: struct ohash_info info;
! 128: struct ohash htab;
! 129: unsigned int idx;
! 130: size_t i, j, cur, maxres;
! 131:
! 132: memset(&info, 0, sizeof(struct ohash_info));
! 133:
! 134: info.halloc = hash_halloc;
! 135: info.alloc = hash_alloc;
! 136: info.hfree = hash_free;
! 137: info.key_offset = offsetof(struct match, id);
! 138:
! 139: *sz = 0;
! 140: sql = NULL;
! 141: *res = NULL;
! 142: fd = -1;
! 143: e = NULL;
! 144: cur = maxres = 0;
! 145:
! 146: if (0 == argc)
! 147: goto out;
! 148: if (NULL == (e = exprcomp(argc, argv)))
! 149: goto out;
! 150:
! 151: /*
! 152: * Save a descriptor to the current working directory.
! 153: * Since pathnames in the "paths" variable might be relative,
! 154: * and we'll be chdir()ing into them, we need to keep a handle
! 155: * on our current directory from which to start the chdir().
! 156: */
! 157:
! 158: if (NULL == getcwd(buf, MAXPATHLEN)) {
! 159: perror(NULL);
! 160: goto out;
! 161: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
! 162: perror(buf);
! 163: goto out;
! 164: }
! 165:
! 166: sql = sql_statement(e, arch, sec);
! 167:
! 168: /*
! 169: * Loop over the directories (containing databases) for us to
! 170: * search.
! 171: * Don't let missing/bad databases/directories phase us.
! 172: * In each, try to open the resident database and, if it opens,
! 173: * scan it for our match expression.
! 174: */
! 175:
! 176: for (i = 0; i < paths->sz; i++) {
! 177: if (-1 == fchdir(fd)) {
! 178: /* FIXME: will return success */
! 179: perror(buf);
! 180: free(*res);
! 181: break;
! 182: } else if (-1 == chdir(paths->paths[i])) {
! 183: perror(paths->paths[i]);
! 184: continue;
! 185: }
! 186:
! 187: rc = sqlite3_open_v2
! 188: (MANDOC_DB, &db, SQLITE_OPEN_READONLY, NULL);
! 189:
! 190: if (SQLITE_OK != rc) {
! 191: perror(MANDOC_DB);
! 192: sqlite3_close(db);
! 193: continue;
! 194: }
! 195:
! 196: j = 1;
! 197: sqlite3_prepare_v2(db, sql, -1, &s, NULL);
! 198:
! 199: if (NULL != arch)
! 200: sqlite3_bind_text
! 201: (s, j++, arch, -1, SQLITE_STATIC);
! 202: if (NULL != sec)
! 203: sqlite3_bind_text
! 204: (s, j++, sec, -1, SQLITE_STATIC);
! 205:
! 206: for (ep = e; NULL != ep; ep = ep->next) {
! 207: sqlite3_bind_text
! 208: (s, j++, ep->v, -1, SQLITE_STATIC);
! 209: sqlite3_bind_int64
! 210: (s, j++, ep->bits);
! 211: }
! 212:
! 213: memset(&htab, 0, sizeof(struct ohash));
! 214: ohash_init(&htab, 4, &info);
! 215:
! 216: /*
! 217: * Hash each entry on its [unique] document identifier.
! 218: * This is a uint64_t.
! 219: * Instead of using a hash function, simply convert the
! 220: * uint64_t to a uint32_t, the hash value's type.
! 221: * This gives good performance and preserves the
! 222: * distribution of buckets in the table.
! 223: */
! 224: while (SQLITE_ROW == sqlite3_step(s)) {
! 225: id = sqlite3_column_int64(s, 0);
! 226: idx = ohash_lookup_memory
! 227: (&htab, (char *)&id,
! 228: sizeof(uint64_t), (uint32_t)id);
! 229:
! 230: if (NULL != ohash_find(&htab, idx))
! 231: continue;
! 232:
! 233: mp = mandoc_calloc(1, sizeof(struct match));
! 234: mp->id = id;
! 235: mp->file = mandoc_strdup
! 236: ((char *)sqlite3_column_text(s, 3));
! 237: mp->desc = mandoc_strdup
! 238: ((char *)sqlite3_column_text(s, 4));
! 239: mp->form = sqlite3_column_int(s, 5);
! 240: ohash_insert(&htab, idx, mp);
! 241: }
! 242:
! 243: sqlite3_finalize(s);
! 244: sqlite3_close(db);
! 245:
! 246: for (mp = ohash_first(&htab, &idx);
! 247: NULL != mp;
! 248: mp = ohash_next(&htab, &idx)) {
! 249: if (cur + 1 > maxres) {
! 250: maxres += 1024;
! 251: *res = mandoc_realloc
! 252: (*res, maxres * sizeof(struct manpage));
! 253: }
! 254: strlcpy((*res)[cur].file,
! 255: paths->paths[i], MAXPATHLEN);
! 256: strlcat((*res)[cur].file, "/", MAXPATHLEN);
! 257: strlcat((*res)[cur].file, mp->file, MAXPATHLEN);
! 258: (*res)[cur].desc = mp->desc;
! 259: (*res)[cur].form = mp->form;
! 260: free(mp->file);
! 261: free(mp);
! 262: cur++;
! 263: }
! 264: ohash_delete(&htab);
! 265: }
! 266: out:
! 267: exprfree(e);
! 268: if (-1 != fd)
! 269: close(fd);
! 270: free(sql);
! 271: *sz = cur;
! 272: return(1);
! 273: }
! 274:
! 275: /*
! 276: * Prepare the search SQL statement.
! 277: * We search for any of the words specified in our match expression.
! 278: * We filter the per-doc AND expressions when collecting results.
! 279: */
! 280: static char *
! 281: sql_statement(const struct expr *e, const char *arch, const char *sec)
! 282: {
! 283: char *sql;
! 284: const char *glob = "(key GLOB ? AND bits & ?)";
! 285: const char *eq = "(key = ? AND bits & ?)";
! 286: const char *andarch = "arch = ? AND ";
! 287: const char *andsec = "sec = ? AND ";
! 288: const size_t globsz = 27;
! 289: const size_t eqsz = 22;
! 290: size_t sz;
! 291:
! 292: sql = mandoc_strdup
! 293: ("SELECT docid,bits,key,file,desc,form,sec,arch "
! 294: "FROM keys "
! 295: "INNER JOIN docs ON docs.id=keys.docid "
! 296: "WHERE ");
! 297: sz = strlen(sql);
! 298:
! 299: if (NULL != arch) {
! 300: sz += strlen(andarch) + 1;
! 301: sql = mandoc_realloc(sql, sz);
! 302: strlcat(sql, andarch, sz);
! 303: }
! 304: if (NULL != sec) {
! 305: sz += strlen(andsec) + 1;
! 306: sql = mandoc_realloc(sql, sz);
! 307: strlcat(sql, andsec, sz);
! 308: }
! 309:
! 310: sz += 2;
! 311: sql = mandoc_realloc(sql, sz);
! 312: strlcat(sql, "(", sz);
! 313:
! 314: for ( ; NULL != e; e = e->next) {
! 315: sz += (e->glob ? globsz : eqsz) +
! 316: (NULL == e->next ? 3 : 5);
! 317: sql = mandoc_realloc(sql, sz);
! 318: strlcat(sql, e->glob ? glob : eq, sz);
! 319: strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
! 320: }
! 321:
! 322: return(sql);
! 323: }
! 324:
! 325: /*
! 326: * Compile a set of string tokens into an expression.
! 327: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
! 328: * "(", "foo=bar", etc.).
! 329: */
! 330: static struct expr *
! 331: exprcomp(int argc, char *argv[])
! 332: {
! 333: int i;
! 334: struct expr *first, *next, *cur;
! 335:
! 336: first = cur = NULL;
! 337:
! 338: for (i = 0; i < argc; i++) {
! 339: next = exprterm(argv[i]);
! 340: if (NULL == next) {
! 341: exprfree(first);
! 342: return(NULL);
! 343: }
! 344: if (NULL != first) {
! 345: cur->next = next;
! 346: cur = next;
! 347: } else
! 348: cur = first = next;
! 349: }
! 350:
! 351: return(first);
! 352: }
! 353:
! 354: static struct expr *
! 355: exprterm(char *buf)
! 356: {
! 357: struct expr *e;
! 358: char *key, *v;
! 359: size_t i;
! 360:
! 361: if ('\0' == *buf)
! 362: return(NULL);
! 363:
! 364: e = mandoc_calloc(1, sizeof(struct expr));
! 365:
! 366: /*
! 367: * If no =~ is specified, search with equality over names and
! 368: * descriptions.
! 369: * If =~ begins the phrase, use name and description fields.
! 370: */
! 371:
! 372: if (NULL == (v = strpbrk(buf, "=~"))) {
! 373: e->v = buf;
! 374: e->bits = TYPE_Nm | TYPE_Nd;
! 375: return(e);
! 376: } else if (v == buf)
! 377: e->bits = TYPE_Nm | TYPE_Nd;
! 378:
! 379: e->glob = '~' == *v;
! 380: *v++ = '\0';
! 381: e->v = v;
! 382:
! 383: /*
! 384: * Parse out all possible fields.
! 385: * If the field doesn't resolve, bail.
! 386: */
! 387:
! 388: while (NULL != (key = strsep(&buf, ","))) {
! 389: if ('\0' == *key)
! 390: continue;
! 391: i = 0;
! 392: while (types[i].bits &&
! 393: strcasecmp(types[i].name, key))
! 394: i++;
! 395: if (0 == types[i].bits) {
! 396: free(e);
! 397: return(NULL);
! 398: }
! 399: e->bits |= types[i].bits;
! 400: }
! 401:
! 402: return(e);
! 403: }
! 404:
! 405: static void
! 406: exprfree(struct expr *p)
! 407: {
! 408: struct expr *pp;
! 409:
! 410: while (NULL != p) {
! 411: pp = p->next;
! 412: free(p);
! 413: p = pp;
! 414: }
! 415: }
! 416:
! 417: static void *
! 418: hash_halloc(size_t sz, void *arg)
! 419: {
! 420:
! 421: return(mandoc_calloc(sz, 1));
! 422: }
! 423:
! 424: static void *
! 425: hash_alloc(size_t sz, void *arg)
! 426: {
! 427:
! 428: return(mandoc_malloc(sz));
! 429: }
! 430:
! 431: static void
! 432: hash_free(void *p, size_t sz, void *arg)
! 433: {
! 434:
! 435: free(p);
! 436: }
CVSweb