Annotation of mandoc/mansearch.c, Revision 1.2
1.2 ! kristaps 1: /* $Id: mansearch.c,v 1.1 2012/06/08 10:36:23 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <sys/param.h>
22:
23: #include <assert.h>
24: #include <fcntl.h>
25: #include <getopt.h>
26: #include <stdio.h>
27: #include <stdint.h>
28: #include <stddef.h>
29: #include <stdlib.h>
30: #include <string.h>
31: #include <unistd.h>
32:
33: #include <ohash.h>
34: #include <sqlite3.h>
35:
36: #include "mandoc.h"
37: #include "manpath.h"
38: #include "mandocdb.h"
39: #include "mansearch.h"
40:
1.2 ! kristaps 41: #define BIND_TEXT(_db, _s, _i, _v) \
! 42: if (SQLITE_OK != sqlite3_bind_text \
! 43: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
! 44: fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
! 45: #define BIND_INT64(_db, _s, _i, _v) \
! 46: if (SQLITE_OK != sqlite3_bind_int64 \
! 47: ((_s), (_i)++, (_v))) \
! 48: fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
! 49:
1.1 kristaps 50: struct expr {
51: int glob; /* is glob? */
52: uint64_t bits; /* type-mask */
53: const char *v; /* search value */
54: struct expr *next; /* next in sequence */
55: };
56:
57: struct match {
58: uint64_t id; /* identifier in database */
59: char *file; /* relative filepath of manpage */
60: char *desc; /* description of manpage */
61: int form; /* 0 == catpage */
62: };
63:
64: struct type {
65: uint64_t bits;
66: const char *name;
67: };
68:
69: static const struct type types[] = {
70: { TYPE_An, "An" },
71: { TYPE_Ar, "Ar" },
72: { TYPE_At, "At" },
73: { TYPE_Bsx, "Bsx" },
74: { TYPE_Bx, "Bx" },
75: { TYPE_Cd, "Cd" },
76: { TYPE_Cm, "Cm" },
77: { TYPE_Dv, "Dv" },
78: { TYPE_Dx, "Dx" },
79: { TYPE_Em, "Em" },
80: { TYPE_Er, "Er" },
81: { TYPE_Ev, "Ev" },
82: { TYPE_Fa, "Fa" },
83: { TYPE_Fl, "Fl" },
84: { TYPE_Fn, "Fn" },
85: { TYPE_Fn, "Fo" },
86: { TYPE_Ft, "Ft" },
87: { TYPE_Fx, "Fx" },
88: { TYPE_Ic, "Ic" },
89: { TYPE_In, "In" },
90: { TYPE_Lb, "Lb" },
91: { TYPE_Li, "Li" },
92: { TYPE_Lk, "Lk" },
93: { TYPE_Ms, "Ms" },
94: { TYPE_Mt, "Mt" },
95: { TYPE_Nd, "Nd" },
96: { TYPE_Nm, "Nm" },
97: { TYPE_Nx, "Nx" },
98: { TYPE_Ox, "Ox" },
99: { TYPE_Pa, "Pa" },
100: { TYPE_Rs, "Rs" },
101: { TYPE_Sh, "Sh" },
102: { TYPE_Ss, "Ss" },
103: { TYPE_St, "St" },
104: { TYPE_Sy, "Sy" },
105: { TYPE_Tn, "Tn" },
106: { TYPE_Va, "Va" },
107: { TYPE_Va, "Vt" },
108: { TYPE_Xr, "Xr" },
109: { ~0ULL, "any" },
110: { 0ULL, NULL }
111: };
112:
113: static void *hash_alloc(size_t, void *);
114: static void hash_free(void *, size_t, void *);
115: static void *hash_halloc(size_t, void *);
116: static struct expr *exprcomp(int, char *[]);
117: static void exprfree(struct expr *);
118: static struct expr *exprterm(char *);
119: static char *sql_statement(const struct expr *,
120: const char *, const char *);
121:
122: int
123: mansearch(const struct manpaths *paths,
124: const char *arch, const char *sec,
125: int argc, char *argv[],
126: struct manpage **res, size_t *sz)
127: {
1.2 ! kristaps 128: int fd, rc, c;
1.1 kristaps 129: int64_t id;
130: char buf[MAXPATHLEN];
131: char *sql;
132: struct expr *e, *ep;
133: sqlite3 *db;
134: sqlite3_stmt *s;
135: struct match *mp;
136: struct ohash_info info;
137: struct ohash htab;
138: unsigned int idx;
139: size_t i, j, cur, maxres;
140:
141: memset(&info, 0, sizeof(struct ohash_info));
142:
143: info.halloc = hash_halloc;
144: info.alloc = hash_alloc;
145: info.hfree = hash_free;
146: info.key_offset = offsetof(struct match, id);
147:
1.2 ! kristaps 148: *sz = cur = maxres = 0;
1.1 kristaps 149: sql = NULL;
150: *res = NULL;
151: fd = -1;
152: e = NULL;
1.2 ! kristaps 153: rc = 0;
1.1 kristaps 154:
155: if (0 == argc)
156: goto out;
157: if (NULL == (e = exprcomp(argc, argv)))
158: goto out;
159:
160: /*
161: * Save a descriptor to the current working directory.
162: * Since pathnames in the "paths" variable might be relative,
163: * and we'll be chdir()ing into them, we need to keep a handle
164: * on our current directory from which to start the chdir().
165: */
166:
167: if (NULL == getcwd(buf, MAXPATHLEN)) {
168: perror(NULL);
169: goto out;
170: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
171: perror(buf);
172: goto out;
173: }
174:
175: sql = sql_statement(e, arch, sec);
176:
177: /*
178: * Loop over the directories (containing databases) for us to
179: * search.
180: * Don't let missing/bad databases/directories phase us.
181: * In each, try to open the resident database and, if it opens,
182: * scan it for our match expression.
183: */
184:
185: for (i = 0; i < paths->sz; i++) {
186: if (-1 == fchdir(fd)) {
187: perror(buf);
188: free(*res);
189: break;
190: } else if (-1 == chdir(paths->paths[i])) {
191: perror(paths->paths[i]);
192: continue;
193: }
194:
1.2 ! kristaps 195: c = sqlite3_open_v2
! 196: (MANDOC_DB, &db,
! 197: SQLITE_OPEN_READONLY, NULL);
1.1 kristaps 198:
1.2 ! kristaps 199: if (SQLITE_OK != c) {
1.1 kristaps 200: perror(MANDOC_DB);
201: sqlite3_close(db);
202: continue;
203: }
204:
205: j = 1;
1.2 ! kristaps 206: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
! 207: if (SQLITE_OK != c)
! 208: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
1.1 kristaps 209:
210: if (NULL != arch)
1.2 ! kristaps 211: BIND_TEXT(db, s, j, arch);
1.1 kristaps 212: if (NULL != sec)
1.2 ! kristaps 213: BIND_TEXT(db, s, j, arch);
1.1 kristaps 214:
215: for (ep = e; NULL != ep; ep = ep->next) {
1.2 ! kristaps 216: BIND_TEXT(db, s, j, ep->v);
! 217: BIND_INT64(db, s, j, ep->bits);
1.1 kristaps 218: }
219:
220: memset(&htab, 0, sizeof(struct ohash));
221: ohash_init(&htab, 4, &info);
222:
223: /*
224: * Hash each entry on its [unique] document identifier.
225: * This is a uint64_t.
226: * Instead of using a hash function, simply convert the
227: * uint64_t to a uint32_t, the hash value's type.
228: * This gives good performance and preserves the
229: * distribution of buckets in the table.
230: */
1.2 ! kristaps 231: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.1 kristaps 232: id = sqlite3_column_int64(s, 0);
233: idx = ohash_lookup_memory
234: (&htab, (char *)&id,
235: sizeof(uint64_t), (uint32_t)id);
236:
237: if (NULL != ohash_find(&htab, idx))
238: continue;
239:
240: mp = mandoc_calloc(1, sizeof(struct match));
241: mp->id = id;
242: mp->file = mandoc_strdup
243: ((char *)sqlite3_column_text(s, 3));
244: mp->desc = mandoc_strdup
245: ((char *)sqlite3_column_text(s, 4));
246: mp->form = sqlite3_column_int(s, 5);
247: ohash_insert(&htab, idx, mp);
248: }
249:
1.2 ! kristaps 250: if (SQLITE_DONE != c)
! 251: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
! 252:
1.1 kristaps 253: sqlite3_finalize(s);
254: sqlite3_close(db);
255:
256: for (mp = ohash_first(&htab, &idx);
257: NULL != mp;
258: mp = ohash_next(&htab, &idx)) {
259: if (cur + 1 > maxres) {
260: maxres += 1024;
261: *res = mandoc_realloc
262: (*res, maxres * sizeof(struct manpage));
263: }
264: strlcpy((*res)[cur].file,
265: paths->paths[i], MAXPATHLEN);
266: strlcat((*res)[cur].file, "/", MAXPATHLEN);
267: strlcat((*res)[cur].file, mp->file, MAXPATHLEN);
268: (*res)[cur].desc = mp->desc;
269: (*res)[cur].form = mp->form;
270: free(mp->file);
271: free(mp);
272: cur++;
273: }
274: ohash_delete(&htab);
275: }
1.2 ! kristaps 276: rc = 1;
1.1 kristaps 277: out:
278: exprfree(e);
279: if (-1 != fd)
280: close(fd);
281: free(sql);
282: *sz = cur;
1.2 ! kristaps 283: return(rc);
1.1 kristaps 284: }
285:
286: /*
287: * Prepare the search SQL statement.
288: * We search for any of the words specified in our match expression.
289: * We filter the per-doc AND expressions when collecting results.
290: */
291: static char *
292: sql_statement(const struct expr *e, const char *arch, const char *sec)
293: {
294: char *sql;
295: const char *glob = "(key GLOB ? AND bits & ?)";
296: const char *eq = "(key = ? AND bits & ?)";
297: const char *andarch = "arch = ? AND ";
298: const char *andsec = "sec = ? AND ";
1.2 ! kristaps 299: size_t globsz;
! 300: size_t eqsz;
1.1 kristaps 301: size_t sz;
302:
303: sql = mandoc_strdup
304: ("SELECT docid,bits,key,file,desc,form,sec,arch "
305: "FROM keys "
306: "INNER JOIN docs ON docs.id=keys.docid "
307: "WHERE ");
308: sz = strlen(sql);
1.2 ! kristaps 309: globsz = strlen(glob);
! 310: eqsz = strlen(eq);
1.1 kristaps 311:
312: if (NULL != arch) {
313: sz += strlen(andarch) + 1;
314: sql = mandoc_realloc(sql, sz);
315: strlcat(sql, andarch, sz);
316: }
1.2 ! kristaps 317:
1.1 kristaps 318: if (NULL != sec) {
319: sz += strlen(andsec) + 1;
320: sql = mandoc_realloc(sql, sz);
321: strlcat(sql, andsec, sz);
322: }
323:
324: sz += 2;
325: sql = mandoc_realloc(sql, sz);
326: strlcat(sql, "(", sz);
327:
328: for ( ; NULL != e; e = e->next) {
329: sz += (e->glob ? globsz : eqsz) +
330: (NULL == e->next ? 3 : 5);
331: sql = mandoc_realloc(sql, sz);
332: strlcat(sql, e->glob ? glob : eq, sz);
333: strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
334: }
335:
336: return(sql);
337: }
338:
339: /*
340: * Compile a set of string tokens into an expression.
341: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
342: * "(", "foo=bar", etc.).
343: */
344: static struct expr *
345: exprcomp(int argc, char *argv[])
346: {
347: int i;
348: struct expr *first, *next, *cur;
349:
350: first = cur = NULL;
351:
352: for (i = 0; i < argc; i++) {
353: next = exprterm(argv[i]);
354: if (NULL == next) {
355: exprfree(first);
356: return(NULL);
357: }
358: if (NULL != first) {
359: cur->next = next;
360: cur = next;
361: } else
362: cur = first = next;
363: }
364:
365: return(first);
366: }
367:
368: static struct expr *
369: exprterm(char *buf)
370: {
371: struct expr *e;
372: char *key, *v;
373: size_t i;
374:
375: if ('\0' == *buf)
376: return(NULL);
377:
378: e = mandoc_calloc(1, sizeof(struct expr));
379:
380: /*
381: * If no =~ is specified, search with equality over names and
382: * descriptions.
383: * If =~ begins the phrase, use name and description fields.
384: */
385:
386: if (NULL == (v = strpbrk(buf, "=~"))) {
387: e->v = buf;
388: e->bits = TYPE_Nm | TYPE_Nd;
389: return(e);
390: } else if (v == buf)
391: e->bits = TYPE_Nm | TYPE_Nd;
392:
393: e->glob = '~' == *v;
394: *v++ = '\0';
395: e->v = v;
396:
397: /*
398: * Parse out all possible fields.
399: * If the field doesn't resolve, bail.
400: */
401:
402: while (NULL != (key = strsep(&buf, ","))) {
403: if ('\0' == *key)
404: continue;
405: i = 0;
406: while (types[i].bits &&
407: strcasecmp(types[i].name, key))
408: i++;
409: if (0 == types[i].bits) {
410: free(e);
411: return(NULL);
412: }
413: e->bits |= types[i].bits;
414: }
415:
416: return(e);
417: }
418:
419: static void
420: exprfree(struct expr *p)
421: {
422: struct expr *pp;
423:
424: while (NULL != p) {
425: pp = p->next;
426: free(p);
427: p = pp;
428: }
429: }
430:
431: static void *
432: hash_halloc(size_t sz, void *arg)
433: {
434:
435: return(mandoc_calloc(sz, 1));
436: }
437:
438: static void *
439: hash_alloc(size_t sz, void *arg)
440: {
441:
442: return(mandoc_malloc(sz));
443: }
444:
445: static void
446: hash_free(void *p, size_t sz, void *arg)
447: {
448:
449: free(p);
450: }
CVSweb