Annotation of mandoc/mansearch.c, Revision 1.7
1.7 ! schwarze 1: /* $Id: mansearch.c,v 1.6 2013/06/05 02:00:26 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.7 ! schwarze 4: * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <assert.h>
23: #include <fcntl.h>
24: #include <getopt.h>
1.6 schwarze 25: #include <limits.h>
1.1 kristaps 26: #include <stdio.h>
27: #include <stdint.h>
28: #include <stddef.h>
29: #include <stdlib.h>
30: #include <string.h>
31: #include <unistd.h>
32:
1.4 kristaps 33: #ifdef HAVE_OHASH
1.1 kristaps 34: #include <ohash.h>
1.4 kristaps 35: #else
36: #include "compat_ohash.h"
37: #endif
1.1 kristaps 38: #include <sqlite3.h>
39:
40: #include "mandoc.h"
41: #include "manpath.h"
42: #include "mansearch.h"
43:
1.3 kristaps 44: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
1.2 kristaps 45: if (SQLITE_OK != sqlite3_bind_text \
46: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
47: fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
1.3 kristaps 48: #define SQL_BIND_INT64(_db, _s, _i, _v) \
1.2 kristaps 49: if (SQLITE_OK != sqlite3_bind_int64 \
50: ((_s), (_i)++, (_v))) \
51: fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
52:
1.1 kristaps 53: struct expr {
54: int glob; /* is glob? */
55: uint64_t bits; /* type-mask */
56: const char *v; /* search value */
57: struct expr *next; /* next in sequence */
58: };
59:
60: struct match {
61: uint64_t id; /* identifier in database */
62: char *file; /* relative filepath of manpage */
63: char *desc; /* description of manpage */
64: int form; /* 0 == catpage */
65: };
66:
67: struct type {
68: uint64_t bits;
69: const char *name;
70: };
71:
72: static const struct type types[] = {
73: { TYPE_An, "An" },
74: { TYPE_Ar, "Ar" },
75: { TYPE_At, "At" },
76: { TYPE_Bsx, "Bsx" },
77: { TYPE_Bx, "Bx" },
78: { TYPE_Cd, "Cd" },
79: { TYPE_Cm, "Cm" },
80: { TYPE_Dv, "Dv" },
81: { TYPE_Dx, "Dx" },
82: { TYPE_Em, "Em" },
83: { TYPE_Er, "Er" },
84: { TYPE_Ev, "Ev" },
85: { TYPE_Fa, "Fa" },
86: { TYPE_Fl, "Fl" },
87: { TYPE_Fn, "Fn" },
88: { TYPE_Fn, "Fo" },
89: { TYPE_Ft, "Ft" },
90: { TYPE_Fx, "Fx" },
91: { TYPE_Ic, "Ic" },
92: { TYPE_In, "In" },
93: { TYPE_Lb, "Lb" },
94: { TYPE_Li, "Li" },
95: { TYPE_Lk, "Lk" },
96: { TYPE_Ms, "Ms" },
97: { TYPE_Mt, "Mt" },
98: { TYPE_Nd, "Nd" },
99: { TYPE_Nm, "Nm" },
100: { TYPE_Nx, "Nx" },
101: { TYPE_Ox, "Ox" },
102: { TYPE_Pa, "Pa" },
103: { TYPE_Rs, "Rs" },
104: { TYPE_Sh, "Sh" },
105: { TYPE_Ss, "Ss" },
106: { TYPE_St, "St" },
107: { TYPE_Sy, "Sy" },
108: { TYPE_Tn, "Tn" },
109: { TYPE_Va, "Va" },
110: { TYPE_Va, "Vt" },
111: { TYPE_Xr, "Xr" },
112: { ~0ULL, "any" },
113: { 0ULL, NULL }
114: };
115:
116: static void *hash_alloc(size_t, void *);
117: static void hash_free(void *, size_t, void *);
118: static void *hash_halloc(size_t, void *);
1.5 kristaps 119: static struct expr *exprcomp(const struct mansearch *,
120: int, char *[]);
1.1 kristaps 121: static void exprfree(struct expr *);
1.5 kristaps 122: static struct expr *exprterm(const struct mansearch *, char *);
1.7 ! schwarze 123: static void sql_match(sqlite3_context *context,
! 124: int argc, sqlite3_value **argv);
1.1 kristaps 125: static char *sql_statement(const struct expr *,
126: const char *, const char *);
127:
128: int
1.5 kristaps 129: mansearch(const struct mansearch *search,
130: const struct manpaths *paths,
1.1 kristaps 131: int argc, char *argv[],
132: struct manpage **res, size_t *sz)
133: {
1.2 kristaps 134: int fd, rc, c;
1.1 kristaps 135: int64_t id;
1.6 schwarze 136: char buf[PATH_MAX];
1.1 kristaps 137: char *sql;
138: struct expr *e, *ep;
139: sqlite3 *db;
140: sqlite3_stmt *s;
141: struct match *mp;
142: struct ohash_info info;
143: struct ohash htab;
144: unsigned int idx;
145: size_t i, j, cur, maxres;
146:
147: memset(&info, 0, sizeof(struct ohash_info));
148:
149: info.halloc = hash_halloc;
150: info.alloc = hash_alloc;
151: info.hfree = hash_free;
152: info.key_offset = offsetof(struct match, id);
153:
1.2 kristaps 154: *sz = cur = maxres = 0;
1.1 kristaps 155: sql = NULL;
156: *res = NULL;
157: fd = -1;
158: e = NULL;
1.2 kristaps 159: rc = 0;
1.1 kristaps 160:
161: if (0 == argc)
162: goto out;
1.5 kristaps 163: if (NULL == (e = exprcomp(search, argc, argv)))
1.1 kristaps 164: goto out;
165:
166: /*
167: * Save a descriptor to the current working directory.
168: * Since pathnames in the "paths" variable might be relative,
169: * and we'll be chdir()ing into them, we need to keep a handle
170: * on our current directory from which to start the chdir().
171: */
172:
1.6 schwarze 173: if (NULL == getcwd(buf, PATH_MAX)) {
1.1 kristaps 174: perror(NULL);
175: goto out;
176: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
177: perror(buf);
178: goto out;
179: }
180:
1.5 kristaps 181: sql = sql_statement(e, search->arch, search->sec);
1.1 kristaps 182:
183: /*
184: * Loop over the directories (containing databases) for us to
185: * search.
186: * Don't let missing/bad databases/directories phase us.
187: * In each, try to open the resident database and, if it opens,
188: * scan it for our match expression.
189: */
190:
191: for (i = 0; i < paths->sz; i++) {
192: if (-1 == fchdir(fd)) {
193: perror(buf);
194: free(*res);
195: break;
196: } else if (-1 == chdir(paths->paths[i])) {
197: perror(paths->paths[i]);
198: continue;
199: }
200:
1.2 kristaps 201: c = sqlite3_open_v2
202: (MANDOC_DB, &db,
203: SQLITE_OPEN_READONLY, NULL);
1.1 kristaps 204:
1.2 kristaps 205: if (SQLITE_OK != c) {
1.1 kristaps 206: perror(MANDOC_DB);
207: sqlite3_close(db);
208: continue;
209: }
210:
1.7 ! schwarze 211: /* Define the SQL function for substring matching. */
! 212:
! 213: c = sqlite3_create_function(db, "match", 2,
! 214: SQLITE_ANY, NULL, sql_match, NULL, NULL);
! 215: if (SQLITE_OK != c) {
! 216: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
! 217: break;
! 218: }
! 219:
1.1 kristaps 220: j = 1;
1.2 kristaps 221: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
222: if (SQLITE_OK != c)
223: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
1.1 kristaps 224:
1.5 kristaps 225: if (NULL != search->arch)
226: SQL_BIND_TEXT(db, s, j, search->arch);
227: if (NULL != search->sec)
228: SQL_BIND_TEXT(db, s, j, search->sec);
1.1 kristaps 229:
230: for (ep = e; NULL != ep; ep = ep->next) {
1.3 kristaps 231: SQL_BIND_TEXT(db, s, j, ep->v);
232: SQL_BIND_INT64(db, s, j, ep->bits);
1.1 kristaps 233: }
234:
235: memset(&htab, 0, sizeof(struct ohash));
236: ohash_init(&htab, 4, &info);
237:
238: /*
239: * Hash each entry on its [unique] document identifier.
240: * This is a uint64_t.
241: * Instead of using a hash function, simply convert the
242: * uint64_t to a uint32_t, the hash value's type.
243: * This gives good performance and preserves the
244: * distribution of buckets in the table.
245: */
1.2 kristaps 246: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.1 kristaps 247: id = sqlite3_column_int64(s, 0);
248: idx = ohash_lookup_memory
249: (&htab, (char *)&id,
250: sizeof(uint64_t), (uint32_t)id);
251:
252: if (NULL != ohash_find(&htab, idx))
253: continue;
254:
255: mp = mandoc_calloc(1, sizeof(struct match));
256: mp->id = id;
257: mp->file = mandoc_strdup
258: ((char *)sqlite3_column_text(s, 3));
259: mp->desc = mandoc_strdup
260: ((char *)sqlite3_column_text(s, 4));
261: mp->form = sqlite3_column_int(s, 5);
262: ohash_insert(&htab, idx, mp);
263: }
264:
1.2 kristaps 265: if (SQLITE_DONE != c)
266: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
267:
1.1 kristaps 268: sqlite3_finalize(s);
269: sqlite3_close(db);
270:
271: for (mp = ohash_first(&htab, &idx);
272: NULL != mp;
273: mp = ohash_next(&htab, &idx)) {
274: if (cur + 1 > maxres) {
275: maxres += 1024;
276: *res = mandoc_realloc
277: (*res, maxres * sizeof(struct manpage));
278: }
279: strlcpy((*res)[cur].file,
1.6 schwarze 280: paths->paths[i], PATH_MAX);
281: strlcat((*res)[cur].file, "/", PATH_MAX);
282: strlcat((*res)[cur].file, mp->file, PATH_MAX);
1.1 kristaps 283: (*res)[cur].desc = mp->desc;
284: (*res)[cur].form = mp->form;
285: free(mp->file);
286: free(mp);
287: cur++;
288: }
289: ohash_delete(&htab);
290: }
1.2 kristaps 291: rc = 1;
1.1 kristaps 292: out:
293: exprfree(e);
294: if (-1 != fd)
295: close(fd);
296: free(sql);
297: *sz = cur;
1.2 kristaps 298: return(rc);
1.1 kristaps 299: }
300:
301: /*
1.7 ! schwarze 302: * Implement substring match as an application-defined SQL function.
! 303: * Using the SQL LIKE or GLOB operators instead would be a bad idea
! 304: * because that would require escaping metacharacters in the string
! 305: * being searched for.
! 306: */
! 307: static void
! 308: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
! 309: {
! 310:
! 311: assert(2 == argc);
! 312: sqlite3_result_int(context, NULL != strcasestr(
! 313: (const char *)sqlite3_value_text(argv[1]),
! 314: (const char *)sqlite3_value_text(argv[0])));
! 315: }
! 316:
! 317: /*
1.1 kristaps 318: * Prepare the search SQL statement.
319: * We search for any of the words specified in our match expression.
320: * We filter the per-doc AND expressions when collecting results.
321: */
322: static char *
323: sql_statement(const struct expr *e, const char *arch, const char *sec)
324: {
325: char *sql;
1.7 ! schwarze 326: const char *substr = "(key MATCH ? AND bits & ?)";
1.1 kristaps 327: const char *glob = "(key GLOB ? AND bits & ?)";
328: const char *andarch = "arch = ? AND ";
329: const char *andsec = "sec = ? AND ";
1.7 ! schwarze 330: size_t substrsz;
1.2 kristaps 331: size_t globsz;
1.1 kristaps 332: size_t sz;
333:
334: sql = mandoc_strdup
335: ("SELECT docid,bits,key,file,desc,form,sec,arch "
336: "FROM keys "
337: "INNER JOIN docs ON docs.id=keys.docid "
338: "WHERE ");
339: sz = strlen(sql);
1.7 ! schwarze 340: substrsz = strlen(substr);
1.2 kristaps 341: globsz = strlen(glob);
1.1 kristaps 342:
343: if (NULL != arch) {
344: sz += strlen(andarch) + 1;
345: sql = mandoc_realloc(sql, sz);
346: strlcat(sql, andarch, sz);
347: }
1.2 kristaps 348:
1.1 kristaps 349: if (NULL != sec) {
350: sz += strlen(andsec) + 1;
351: sql = mandoc_realloc(sql, sz);
352: strlcat(sql, andsec, sz);
353: }
354:
355: sz += 2;
356: sql = mandoc_realloc(sql, sz);
357: strlcat(sql, "(", sz);
358:
359: for ( ; NULL != e; e = e->next) {
1.7 ! schwarze 360: sz += (e->glob ? globsz : substrsz) +
1.1 kristaps 361: (NULL == e->next ? 3 : 5);
362: sql = mandoc_realloc(sql, sz);
1.7 ! schwarze 363: strlcat(sql, e->glob ? glob : substr, sz);
1.1 kristaps 364: strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
365: }
366:
367: return(sql);
368: }
369:
370: /*
371: * Compile a set of string tokens into an expression.
372: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
373: * "(", "foo=bar", etc.).
374: */
375: static struct expr *
1.5 kristaps 376: exprcomp(const struct mansearch *search, int argc, char *argv[])
1.1 kristaps 377: {
378: int i;
379: struct expr *first, *next, *cur;
380:
381: first = cur = NULL;
382:
383: for (i = 0; i < argc; i++) {
1.5 kristaps 384: next = exprterm(search, argv[i]);
1.1 kristaps 385: if (NULL == next) {
386: exprfree(first);
387: return(NULL);
388: }
389: if (NULL != first) {
390: cur->next = next;
391: cur = next;
392: } else
393: cur = first = next;
394: }
395:
396: return(first);
397: }
398:
399: static struct expr *
1.5 kristaps 400: exprterm(const struct mansearch *search, char *buf)
1.1 kristaps 401: {
402: struct expr *e;
403: char *key, *v;
404: size_t i;
405:
406: if ('\0' == *buf)
407: return(NULL);
408:
409: e = mandoc_calloc(1, sizeof(struct expr));
410:
1.5 kristaps 411: /*"whatis" mode uses an opaque string and default fields. */
412:
413: if (MANSEARCH_WHATIS & search->flags) {
414: e->v = buf;
415: e->bits = search->deftype;
416: return(e);
417: }
418:
1.1 kristaps 419: /*
420: * If no =~ is specified, search with equality over names and
421: * descriptions.
422: * If =~ begins the phrase, use name and description fields.
423: */
424:
425: if (NULL == (v = strpbrk(buf, "=~"))) {
426: e->v = buf;
1.5 kristaps 427: e->bits = search->deftype;
1.1 kristaps 428: return(e);
429: } else if (v == buf)
1.5 kristaps 430: e->bits = search->deftype;
1.1 kristaps 431:
432: e->glob = '~' == *v;
433: *v++ = '\0';
434: e->v = v;
435:
436: /*
437: * Parse out all possible fields.
438: * If the field doesn't resolve, bail.
439: */
440:
441: while (NULL != (key = strsep(&buf, ","))) {
442: if ('\0' == *key)
443: continue;
444: i = 0;
445: while (types[i].bits &&
446: strcasecmp(types[i].name, key))
447: i++;
448: if (0 == types[i].bits) {
449: free(e);
450: return(NULL);
451: }
452: e->bits |= types[i].bits;
453: }
454:
455: return(e);
456: }
457:
458: static void
459: exprfree(struct expr *p)
460: {
461: struct expr *pp;
462:
463: while (NULL != p) {
464: pp = p->next;
465: free(p);
466: p = pp;
467: }
468: }
469:
470: static void *
471: hash_halloc(size_t sz, void *arg)
472: {
473:
474: return(mandoc_calloc(sz, 1));
475: }
476:
477: static void *
478: hash_alloc(size_t sz, void *arg)
479: {
480:
481: return(mandoc_malloc(sz));
482: }
483:
484: static void
485: hash_free(void *p, size_t sz, void *arg)
486: {
487:
488: free(p);
489: }
CVSweb