Annotation of mandoc/mansearch.c, Revision 1.6
1.6 ! schwarze 1: /* $Id: mansearch.c,v 1.5 2012/06/09 14:11:16 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <assert.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.6 ! schwarze 24: #include <limits.h>
1.1 kristaps 25: #include <stdio.h>
26: #include <stdint.h>
27: #include <stddef.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <unistd.h>
31:
1.4 kristaps 32: #ifdef HAVE_OHASH
1.1 kristaps 33: #include <ohash.h>
1.4 kristaps 34: #else
35: #include "compat_ohash.h"
36: #endif
1.1 kristaps 37: #include <sqlite3.h>
38:
39: #include "mandoc.h"
40: #include "manpath.h"
41: #include "mansearch.h"
42:
1.3 kristaps 43: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
1.2 kristaps 44: if (SQLITE_OK != sqlite3_bind_text \
45: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
46: fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
1.3 kristaps 47: #define SQL_BIND_INT64(_db, _s, _i, _v) \
1.2 kristaps 48: if (SQLITE_OK != sqlite3_bind_int64 \
49: ((_s), (_i)++, (_v))) \
50: fprintf(stderr, "%s\n", sqlite3_errmsg((_db)))
51:
1.1 kristaps 52: struct expr {
53: int glob; /* is glob? */
54: uint64_t bits; /* type-mask */
55: const char *v; /* search value */
56: struct expr *next; /* next in sequence */
57: };
58:
59: struct match {
60: uint64_t id; /* identifier in database */
61: char *file; /* relative filepath of manpage */
62: char *desc; /* description of manpage */
63: int form; /* 0 == catpage */
64: };
65:
66: struct type {
67: uint64_t bits;
68: const char *name;
69: };
70:
71: static const struct type types[] = {
72: { TYPE_An, "An" },
73: { TYPE_Ar, "Ar" },
74: { TYPE_At, "At" },
75: { TYPE_Bsx, "Bsx" },
76: { TYPE_Bx, "Bx" },
77: { TYPE_Cd, "Cd" },
78: { TYPE_Cm, "Cm" },
79: { TYPE_Dv, "Dv" },
80: { TYPE_Dx, "Dx" },
81: { TYPE_Em, "Em" },
82: { TYPE_Er, "Er" },
83: { TYPE_Ev, "Ev" },
84: { TYPE_Fa, "Fa" },
85: { TYPE_Fl, "Fl" },
86: { TYPE_Fn, "Fn" },
87: { TYPE_Fn, "Fo" },
88: { TYPE_Ft, "Ft" },
89: { TYPE_Fx, "Fx" },
90: { TYPE_Ic, "Ic" },
91: { TYPE_In, "In" },
92: { TYPE_Lb, "Lb" },
93: { TYPE_Li, "Li" },
94: { TYPE_Lk, "Lk" },
95: { TYPE_Ms, "Ms" },
96: { TYPE_Mt, "Mt" },
97: { TYPE_Nd, "Nd" },
98: { TYPE_Nm, "Nm" },
99: { TYPE_Nx, "Nx" },
100: { TYPE_Ox, "Ox" },
101: { TYPE_Pa, "Pa" },
102: { TYPE_Rs, "Rs" },
103: { TYPE_Sh, "Sh" },
104: { TYPE_Ss, "Ss" },
105: { TYPE_St, "St" },
106: { TYPE_Sy, "Sy" },
107: { TYPE_Tn, "Tn" },
108: { TYPE_Va, "Va" },
109: { TYPE_Va, "Vt" },
110: { TYPE_Xr, "Xr" },
111: { ~0ULL, "any" },
112: { 0ULL, NULL }
113: };
114:
115: static void *hash_alloc(size_t, void *);
116: static void hash_free(void *, size_t, void *);
117: static void *hash_halloc(size_t, void *);
1.5 kristaps 118: static struct expr *exprcomp(const struct mansearch *,
119: int, char *[]);
1.1 kristaps 120: static void exprfree(struct expr *);
1.5 kristaps 121: static struct expr *exprterm(const struct mansearch *, char *);
1.1 kristaps 122: static char *sql_statement(const struct expr *,
123: const char *, const char *);
124:
125: int
1.5 kristaps 126: mansearch(const struct mansearch *search,
127: const struct manpaths *paths,
1.1 kristaps 128: int argc, char *argv[],
129: struct manpage **res, size_t *sz)
130: {
1.2 kristaps 131: int fd, rc, c;
1.1 kristaps 132: int64_t id;
1.6 ! schwarze 133: char buf[PATH_MAX];
1.1 kristaps 134: char *sql;
135: struct expr *e, *ep;
136: sqlite3 *db;
137: sqlite3_stmt *s;
138: struct match *mp;
139: struct ohash_info info;
140: struct ohash htab;
141: unsigned int idx;
142: size_t i, j, cur, maxres;
143:
144: memset(&info, 0, sizeof(struct ohash_info));
145:
146: info.halloc = hash_halloc;
147: info.alloc = hash_alloc;
148: info.hfree = hash_free;
149: info.key_offset = offsetof(struct match, id);
150:
1.2 kristaps 151: *sz = cur = maxres = 0;
1.1 kristaps 152: sql = NULL;
153: *res = NULL;
154: fd = -1;
155: e = NULL;
1.2 kristaps 156: rc = 0;
1.1 kristaps 157:
158: if (0 == argc)
159: goto out;
1.5 kristaps 160: if (NULL == (e = exprcomp(search, argc, argv)))
1.1 kristaps 161: goto out;
162:
163: /*
164: * Save a descriptor to the current working directory.
165: * Since pathnames in the "paths" variable might be relative,
166: * and we'll be chdir()ing into them, we need to keep a handle
167: * on our current directory from which to start the chdir().
168: */
169:
1.6 ! schwarze 170: if (NULL == getcwd(buf, PATH_MAX)) {
1.1 kristaps 171: perror(NULL);
172: goto out;
173: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
174: perror(buf);
175: goto out;
176: }
177:
1.5 kristaps 178: sql = sql_statement(e, search->arch, search->sec);
1.1 kristaps 179:
180: /*
181: * Loop over the directories (containing databases) for us to
182: * search.
183: * Don't let missing/bad databases/directories phase us.
184: * In each, try to open the resident database and, if it opens,
185: * scan it for our match expression.
186: */
187:
188: for (i = 0; i < paths->sz; i++) {
189: if (-1 == fchdir(fd)) {
190: perror(buf);
191: free(*res);
192: break;
193: } else if (-1 == chdir(paths->paths[i])) {
194: perror(paths->paths[i]);
195: continue;
196: }
197:
1.2 kristaps 198: c = sqlite3_open_v2
199: (MANDOC_DB, &db,
200: SQLITE_OPEN_READONLY, NULL);
1.1 kristaps 201:
1.2 kristaps 202: if (SQLITE_OK != c) {
1.1 kristaps 203: perror(MANDOC_DB);
204: sqlite3_close(db);
205: continue;
206: }
207:
208: j = 1;
1.2 kristaps 209: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
210: if (SQLITE_OK != c)
211: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
1.1 kristaps 212:
1.5 kristaps 213: if (NULL != search->arch)
214: SQL_BIND_TEXT(db, s, j, search->arch);
215: if (NULL != search->sec)
216: SQL_BIND_TEXT(db, s, j, search->sec);
1.1 kristaps 217:
218: for (ep = e; NULL != ep; ep = ep->next) {
1.3 kristaps 219: SQL_BIND_TEXT(db, s, j, ep->v);
220: SQL_BIND_INT64(db, s, j, ep->bits);
1.1 kristaps 221: }
222:
223: memset(&htab, 0, sizeof(struct ohash));
224: ohash_init(&htab, 4, &info);
225:
226: /*
227: * Hash each entry on its [unique] document identifier.
228: * This is a uint64_t.
229: * Instead of using a hash function, simply convert the
230: * uint64_t to a uint32_t, the hash value's type.
231: * This gives good performance and preserves the
232: * distribution of buckets in the table.
233: */
1.2 kristaps 234: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.1 kristaps 235: id = sqlite3_column_int64(s, 0);
236: idx = ohash_lookup_memory
237: (&htab, (char *)&id,
238: sizeof(uint64_t), (uint32_t)id);
239:
240: if (NULL != ohash_find(&htab, idx))
241: continue;
242:
243: mp = mandoc_calloc(1, sizeof(struct match));
244: mp->id = id;
245: mp->file = mandoc_strdup
246: ((char *)sqlite3_column_text(s, 3));
247: mp->desc = mandoc_strdup
248: ((char *)sqlite3_column_text(s, 4));
249: mp->form = sqlite3_column_int(s, 5);
250: ohash_insert(&htab, idx, mp);
251: }
252:
1.2 kristaps 253: if (SQLITE_DONE != c)
254: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
255:
1.1 kristaps 256: sqlite3_finalize(s);
257: sqlite3_close(db);
258:
259: for (mp = ohash_first(&htab, &idx);
260: NULL != mp;
261: mp = ohash_next(&htab, &idx)) {
262: if (cur + 1 > maxres) {
263: maxres += 1024;
264: *res = mandoc_realloc
265: (*res, maxres * sizeof(struct manpage));
266: }
267: strlcpy((*res)[cur].file,
1.6 ! schwarze 268: paths->paths[i], PATH_MAX);
! 269: strlcat((*res)[cur].file, "/", PATH_MAX);
! 270: strlcat((*res)[cur].file, mp->file, PATH_MAX);
1.1 kristaps 271: (*res)[cur].desc = mp->desc;
272: (*res)[cur].form = mp->form;
273: free(mp->file);
274: free(mp);
275: cur++;
276: }
277: ohash_delete(&htab);
278: }
1.2 kristaps 279: rc = 1;
1.1 kristaps 280: out:
281: exprfree(e);
282: if (-1 != fd)
283: close(fd);
284: free(sql);
285: *sz = cur;
1.2 kristaps 286: return(rc);
1.1 kristaps 287: }
288:
289: /*
290: * Prepare the search SQL statement.
291: * We search for any of the words specified in our match expression.
292: * We filter the per-doc AND expressions when collecting results.
293: */
294: static char *
295: sql_statement(const struct expr *e, const char *arch, const char *sec)
296: {
297: char *sql;
298: const char *glob = "(key GLOB ? AND bits & ?)";
299: const char *eq = "(key = ? AND bits & ?)";
300: const char *andarch = "arch = ? AND ";
301: const char *andsec = "sec = ? AND ";
1.2 kristaps 302: size_t globsz;
303: size_t eqsz;
1.1 kristaps 304: size_t sz;
305:
306: sql = mandoc_strdup
307: ("SELECT docid,bits,key,file,desc,form,sec,arch "
308: "FROM keys "
309: "INNER JOIN docs ON docs.id=keys.docid "
310: "WHERE ");
311: sz = strlen(sql);
1.2 kristaps 312: globsz = strlen(glob);
313: eqsz = strlen(eq);
1.1 kristaps 314:
315: if (NULL != arch) {
316: sz += strlen(andarch) + 1;
317: sql = mandoc_realloc(sql, sz);
318: strlcat(sql, andarch, sz);
319: }
1.2 kristaps 320:
1.1 kristaps 321: if (NULL != sec) {
322: sz += strlen(andsec) + 1;
323: sql = mandoc_realloc(sql, sz);
324: strlcat(sql, andsec, sz);
325: }
326:
327: sz += 2;
328: sql = mandoc_realloc(sql, sz);
329: strlcat(sql, "(", sz);
330:
331: for ( ; NULL != e; e = e->next) {
332: sz += (e->glob ? globsz : eqsz) +
333: (NULL == e->next ? 3 : 5);
334: sql = mandoc_realloc(sql, sz);
335: strlcat(sql, e->glob ? glob : eq, sz);
336: strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
337: }
338:
339: return(sql);
340: }
341:
342: /*
343: * Compile a set of string tokens into an expression.
344: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
345: * "(", "foo=bar", etc.).
346: */
347: static struct expr *
1.5 kristaps 348: exprcomp(const struct mansearch *search, int argc, char *argv[])
1.1 kristaps 349: {
350: int i;
351: struct expr *first, *next, *cur;
352:
353: first = cur = NULL;
354:
355: for (i = 0; i < argc; i++) {
1.5 kristaps 356: next = exprterm(search, argv[i]);
1.1 kristaps 357: if (NULL == next) {
358: exprfree(first);
359: return(NULL);
360: }
361: if (NULL != first) {
362: cur->next = next;
363: cur = next;
364: } else
365: cur = first = next;
366: }
367:
368: return(first);
369: }
370:
371: static struct expr *
1.5 kristaps 372: exprterm(const struct mansearch *search, char *buf)
1.1 kristaps 373: {
374: struct expr *e;
375: char *key, *v;
376: size_t i;
377:
378: if ('\0' == *buf)
379: return(NULL);
380:
381: e = mandoc_calloc(1, sizeof(struct expr));
382:
1.5 kristaps 383: /*"whatis" mode uses an opaque string and default fields. */
384:
385: if (MANSEARCH_WHATIS & search->flags) {
386: e->v = buf;
387: e->bits = search->deftype;
388: return(e);
389: }
390:
1.1 kristaps 391: /*
392: * If no =~ is specified, search with equality over names and
393: * descriptions.
394: * If =~ begins the phrase, use name and description fields.
395: */
396:
397: if (NULL == (v = strpbrk(buf, "=~"))) {
398: e->v = buf;
1.5 kristaps 399: e->bits = search->deftype;
1.1 kristaps 400: return(e);
401: } else if (v == buf)
1.5 kristaps 402: e->bits = search->deftype;
1.1 kristaps 403:
404: e->glob = '~' == *v;
405: *v++ = '\0';
406: e->v = v;
407:
408: /*
409: * Parse out all possible fields.
410: * If the field doesn't resolve, bail.
411: */
412:
413: while (NULL != (key = strsep(&buf, ","))) {
414: if ('\0' == *key)
415: continue;
416: i = 0;
417: while (types[i].bits &&
418: strcasecmp(types[i].name, key))
419: i++;
420: if (0 == types[i].bits) {
421: free(e);
422: return(NULL);
423: }
424: e->bits |= types[i].bits;
425: }
426:
427: return(e);
428: }
429:
430: static void
431: exprfree(struct expr *p)
432: {
433: struct expr *pp;
434:
435: while (NULL != p) {
436: pp = p->next;
437: free(p);
438: p = pp;
439: }
440: }
441:
442: static void *
443: hash_halloc(size_t sz, void *arg)
444: {
445:
446: return(mandoc_calloc(sz, 1));
447: }
448:
449: static void *
450: hash_alloc(size_t sz, void *arg)
451: {
452:
453: return(mandoc_malloc(sz));
454: }
455:
456: static void
457: hash_free(void *p, size_t sz, void *arg)
458: {
459:
460: free(p);
461: }
CVSweb