Annotation of mandoc/mansearch.c, Revision 1.11
1.11 ! schwarze 1: /* $Id: mansearch.c,v 1.10 2013/12/27 18:51:25 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
1.7 schwarze 4: * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <assert.h>
23: #include <fcntl.h>
24: #include <getopt.h>
1.6 schwarze 25: #include <limits.h>
1.8 schwarze 26: #include <regex.h>
1.1 kristaps 27: #include <stdio.h>
28: #include <stdint.h>
29: #include <stddef.h>
30: #include <stdlib.h>
31: #include <string.h>
32: #include <unistd.h>
33:
1.4 kristaps 34: #ifdef HAVE_OHASH
1.1 kristaps 35: #include <ohash.h>
1.4 kristaps 36: #else
37: #include "compat_ohash.h"
38: #endif
1.1 kristaps 39: #include <sqlite3.h>
40:
41: #include "mandoc.h"
42: #include "manpath.h"
43: #include "mansearch.h"
44:
1.3 kristaps 45: #define SQL_BIND_TEXT(_db, _s, _i, _v) \
1.8 schwarze 46: do { if (SQLITE_OK != sqlite3_bind_text \
1.2 kristaps 47: ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
1.8 schwarze 48: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
49: } while (0)
1.3 kristaps 50: #define SQL_BIND_INT64(_db, _s, _i, _v) \
1.8 schwarze 51: do { if (SQLITE_OK != sqlite3_bind_int64 \
1.2 kristaps 52: ((_s), (_i)++, (_v))) \
1.8 schwarze 53: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
54: } while (0)
55: #define SQL_BIND_BLOB(_db, _s, _i, _v) \
56: do { if (SQLITE_OK != sqlite3_bind_blob \
57: ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
58: fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
59: } while (0)
1.2 kristaps 60:
1.1 kristaps 61: struct expr {
1.8 schwarze 62: uint64_t bits; /* type-mask */
63: const char *substr; /* to search for, if applicable */
64: regex_t regexp; /* compiled regexp, if applicable */
65: struct expr *next; /* next in sequence */
1.1 kristaps 66: };
67:
68: struct match {
69: uint64_t id; /* identifier in database */
70: char *file; /* relative filepath of manpage */
71: char *desc; /* description of manpage */
72: int form; /* 0 == catpage */
73: };
74:
75: struct type {
76: uint64_t bits;
77: const char *name;
78: };
79:
80: static const struct type types[] = {
81: { TYPE_An, "An" },
82: { TYPE_Ar, "Ar" },
83: { TYPE_At, "At" },
84: { TYPE_Bsx, "Bsx" },
85: { TYPE_Bx, "Bx" },
86: { TYPE_Cd, "Cd" },
87: { TYPE_Cm, "Cm" },
88: { TYPE_Dv, "Dv" },
89: { TYPE_Dx, "Dx" },
90: { TYPE_Em, "Em" },
91: { TYPE_Er, "Er" },
92: { TYPE_Ev, "Ev" },
93: { TYPE_Fa, "Fa" },
94: { TYPE_Fl, "Fl" },
95: { TYPE_Fn, "Fn" },
96: { TYPE_Fn, "Fo" },
97: { TYPE_Ft, "Ft" },
98: { TYPE_Fx, "Fx" },
99: { TYPE_Ic, "Ic" },
100: { TYPE_In, "In" },
101: { TYPE_Lb, "Lb" },
102: { TYPE_Li, "Li" },
103: { TYPE_Lk, "Lk" },
104: { TYPE_Ms, "Ms" },
105: { TYPE_Mt, "Mt" },
106: { TYPE_Nd, "Nd" },
107: { TYPE_Nm, "Nm" },
108: { TYPE_Nx, "Nx" },
109: { TYPE_Ox, "Ox" },
110: { TYPE_Pa, "Pa" },
111: { TYPE_Rs, "Rs" },
112: { TYPE_Sh, "Sh" },
113: { TYPE_Ss, "Ss" },
114: { TYPE_St, "St" },
115: { TYPE_Sy, "Sy" },
116: { TYPE_Tn, "Tn" },
117: { TYPE_Va, "Va" },
118: { TYPE_Va, "Vt" },
119: { TYPE_Xr, "Xr" },
120: { ~0ULL, "any" },
121: { 0ULL, NULL }
122: };
123:
1.11 ! schwarze 124: static char *buildnames(sqlite3 *, sqlite3_stmt *, uint64_t);
1.1 kristaps 125: static void *hash_alloc(size_t, void *);
126: static void hash_free(void *, size_t, void *);
127: static void *hash_halloc(size_t, void *);
1.5 kristaps 128: static struct expr *exprcomp(const struct mansearch *,
129: int, char *[]);
1.1 kristaps 130: static void exprfree(struct expr *);
1.8 schwarze 131: static struct expr *exprterm(const struct mansearch *, char *, int);
1.7 schwarze 132: static void sql_match(sqlite3_context *context,
133: int argc, sqlite3_value **argv);
1.8 schwarze 134: static void sql_regexp(sqlite3_context *context,
135: int argc, sqlite3_value **argv);
1.1 kristaps 136: static char *sql_statement(const struct expr *,
137: const char *, const char *);
138:
139: int
1.5 kristaps 140: mansearch(const struct mansearch *search,
141: const struct manpaths *paths,
1.1 kristaps 142: int argc, char *argv[],
143: struct manpage **res, size_t *sz)
144: {
1.2 kristaps 145: int fd, rc, c;
1.1 kristaps 146: int64_t id;
1.6 schwarze 147: char buf[PATH_MAX];
1.11 ! schwarze 148: char *sql;
1.10 schwarze 149: struct manpage *mpage;
1.1 kristaps 150: struct expr *e, *ep;
151: sqlite3 *db;
152: sqlite3_stmt *s;
153: struct match *mp;
154: struct ohash_info info;
155: struct ohash htab;
156: unsigned int idx;
157: size_t i, j, cur, maxres;
158:
159: memset(&info, 0, sizeof(struct ohash_info));
160:
161: info.halloc = hash_halloc;
162: info.alloc = hash_alloc;
163: info.hfree = hash_free;
164: info.key_offset = offsetof(struct match, id);
165:
1.2 kristaps 166: *sz = cur = maxres = 0;
1.1 kristaps 167: sql = NULL;
168: *res = NULL;
169: fd = -1;
170: e = NULL;
1.2 kristaps 171: rc = 0;
1.1 kristaps 172:
173: if (0 == argc)
174: goto out;
1.5 kristaps 175: if (NULL == (e = exprcomp(search, argc, argv)))
1.1 kristaps 176: goto out;
177:
178: /*
179: * Save a descriptor to the current working directory.
180: * Since pathnames in the "paths" variable might be relative,
181: * and we'll be chdir()ing into them, we need to keep a handle
182: * on our current directory from which to start the chdir().
183: */
184:
1.6 schwarze 185: if (NULL == getcwd(buf, PATH_MAX)) {
1.1 kristaps 186: perror(NULL);
187: goto out;
188: } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
189: perror(buf);
190: goto out;
191: }
192:
1.5 kristaps 193: sql = sql_statement(e, search->arch, search->sec);
1.1 kristaps 194:
195: /*
196: * Loop over the directories (containing databases) for us to
197: * search.
198: * Don't let missing/bad databases/directories phase us.
199: * In each, try to open the resident database and, if it opens,
200: * scan it for our match expression.
201: */
202:
203: for (i = 0; i < paths->sz; i++) {
204: if (-1 == fchdir(fd)) {
205: perror(buf);
206: free(*res);
207: break;
208: } else if (-1 == chdir(paths->paths[i])) {
209: perror(paths->paths[i]);
210: continue;
211: }
212:
1.2 kristaps 213: c = sqlite3_open_v2
214: (MANDOC_DB, &db,
215: SQLITE_OPEN_READONLY, NULL);
1.1 kristaps 216:
1.2 kristaps 217: if (SQLITE_OK != c) {
1.1 kristaps 218: perror(MANDOC_DB);
219: sqlite3_close(db);
220: continue;
221: }
222:
1.8 schwarze 223: /*
224: * Define the SQL functions for substring
225: * and regular expression matching.
226: */
1.7 schwarze 227:
228: c = sqlite3_create_function(db, "match", 2,
229: SQLITE_ANY, NULL, sql_match, NULL, NULL);
1.8 schwarze 230: assert(SQLITE_OK == c);
231: c = sqlite3_create_function(db, "regexp", 2,
232: SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
233: assert(SQLITE_OK == c);
1.7 schwarze 234:
1.1 kristaps 235: j = 1;
1.2 kristaps 236: c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
237: if (SQLITE_OK != c)
238: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
1.1 kristaps 239:
1.5 kristaps 240: if (NULL != search->arch)
241: SQL_BIND_TEXT(db, s, j, search->arch);
242: if (NULL != search->sec)
243: SQL_BIND_TEXT(db, s, j, search->sec);
1.1 kristaps 244:
245: for (ep = e; NULL != ep; ep = ep->next) {
1.8 schwarze 246: if (NULL == ep->substr) {
247: SQL_BIND_BLOB(db, s, j, ep->regexp);
248: } else
249: SQL_BIND_TEXT(db, s, j, ep->substr);
1.3 kristaps 250: SQL_BIND_INT64(db, s, j, ep->bits);
1.1 kristaps 251: }
252:
253: memset(&htab, 0, sizeof(struct ohash));
254: ohash_init(&htab, 4, &info);
255:
256: /*
257: * Hash each entry on its [unique] document identifier.
258: * This is a uint64_t.
259: * Instead of using a hash function, simply convert the
260: * uint64_t to a uint32_t, the hash value's type.
261: * This gives good performance and preserves the
262: * distribution of buckets in the table.
263: */
1.2 kristaps 264: while (SQLITE_ROW == (c = sqlite3_step(s))) {
1.1 kristaps 265: id = sqlite3_column_int64(s, 0);
266: idx = ohash_lookup_memory
267: (&htab, (char *)&id,
268: sizeof(uint64_t), (uint32_t)id);
269:
270: if (NULL != ohash_find(&htab, idx))
271: continue;
272:
273: mp = mandoc_calloc(1, sizeof(struct match));
274: mp->id = id;
275: mp->file = mandoc_strdup
276: ((char *)sqlite3_column_text(s, 3));
277: mp->desc = mandoc_strdup
278: ((char *)sqlite3_column_text(s, 4));
279: mp->form = sqlite3_column_int(s, 5);
280: ohash_insert(&htab, idx, mp);
281: }
282:
1.2 kristaps 283: if (SQLITE_DONE != c)
284: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
285:
1.1 kristaps 286: sqlite3_finalize(s);
1.10 schwarze 287:
288: c = sqlite3_prepare_v2(db,
289: "SELECT * FROM mlinks WHERE pageid=?",
290: -1, &s, NULL);
291: if (SQLITE_OK != c)
292: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
1.1 kristaps 293:
294: for (mp = ohash_first(&htab, &idx);
295: NULL != mp;
296: mp = ohash_next(&htab, &idx)) {
297: if (cur + 1 > maxres) {
298: maxres += 1024;
299: *res = mandoc_realloc
300: (*res, maxres * sizeof(struct manpage));
301: }
1.10 schwarze 302: mpage = *res + cur;
303: if (-1 == asprintf(&mpage->file, "%s/%s",
304: paths->paths[i], mp->file)) {
305: perror(0);
306: exit((int)MANDOCLEVEL_SYSERR);
307: }
308: mpage->desc = mp->desc;
309: mpage->form = mp->form;
1.11 ! schwarze 310: mpage->names = buildnames(db, s, mp->id);
1.10 schwarze 311:
1.1 kristaps 312: free(mp->file);
313: free(mp);
314: cur++;
315: }
1.10 schwarze 316:
317: sqlite3_finalize(s);
318: sqlite3_close(db);
1.1 kristaps 319: ohash_delete(&htab);
320: }
1.2 kristaps 321: rc = 1;
1.1 kristaps 322: out:
323: exprfree(e);
324: if (-1 != fd)
325: close(fd);
326: free(sql);
327: *sz = cur;
1.2 kristaps 328: return(rc);
1.11 ! schwarze 329: }
! 330:
! 331: static char *
! 332: buildnames(sqlite3 *db, sqlite3_stmt *s, uint64_t id)
! 333: {
! 334: char *names, *newnames;
! 335: const char *oldnames, *sep1, *name, *sec, *sep2, *arch;
! 336: size_t i;
! 337: int c;
! 338:
! 339: names = NULL;
! 340: i = 1;
! 341: SQL_BIND_INT64(db, s, i, id);
! 342: while (SQLITE_ROW == (c = sqlite3_step(s))) {
! 343: if (NULL == names) {
! 344: oldnames = "";
! 345: sep1 = "";
! 346: } else {
! 347: oldnames = names;
! 348: sep1 = ", ";
! 349: }
! 350: sec = sqlite3_column_text(s, 1);
! 351: arch = sqlite3_column_text(s, 2);
! 352: name = sqlite3_column_text(s, 3);
! 353: sep2 = '\0' == *arch ? "" : "/";
! 354: if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
! 355: oldnames, sep1, name, sec, sep2, arch)) {
! 356: perror(0);
! 357: exit((int)MANDOCLEVEL_SYSERR);
! 358: }
! 359: free(names);
! 360: names = newnames;
! 361: }
! 362: if (SQLITE_DONE != c)
! 363: fprintf(stderr, "%s\n", sqlite3_errmsg(db));
! 364: sqlite3_reset(s);
! 365: return(names);
1.1 kristaps 366: }
367:
368: /*
1.7 schwarze 369: * Implement substring match as an application-defined SQL function.
370: * Using the SQL LIKE or GLOB operators instead would be a bad idea
371: * because that would require escaping metacharacters in the string
372: * being searched for.
373: */
374: static void
375: sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
376: {
377:
378: assert(2 == argc);
379: sqlite3_result_int(context, NULL != strcasestr(
380: (const char *)sqlite3_value_text(argv[1]),
381: (const char *)sqlite3_value_text(argv[0])));
382: }
383:
384: /*
1.8 schwarze 385: * Implement regular expression match
386: * as an application-defined SQL function.
387: */
388: static void
389: sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
390: {
391:
392: assert(2 == argc);
393: sqlite3_result_int(context, !regexec(
394: (regex_t *)sqlite3_value_blob(argv[0]),
395: (const char *)sqlite3_value_text(argv[1]),
396: 0, NULL, 0));
397: }
398:
399: /*
1.1 kristaps 400: * Prepare the search SQL statement.
401: * We search for any of the words specified in our match expression.
402: * We filter the per-doc AND expressions when collecting results.
403: */
404: static char *
405: sql_statement(const struct expr *e, const char *arch, const char *sec)
406: {
407: char *sql;
1.7 schwarze 408: const char *substr = "(key MATCH ? AND bits & ?)";
1.8 schwarze 409: const char *regexp = "(key REGEXP ? AND bits & ?)";
1.1 kristaps 410: const char *andarch = "arch = ? AND ";
411: const char *andsec = "sec = ? AND ";
1.7 schwarze 412: size_t substrsz;
1.8 schwarze 413: size_t regexpsz;
1.1 kristaps 414: size_t sz;
415:
416: sql = mandoc_strdup
1.9 schwarze 417: ("SELECT pageid,bits,key,file,desc,form,sec,arch "
1.1 kristaps 418: "FROM keys "
1.9 schwarze 419: "INNER JOIN mpages ON mpages.id=keys.pageid "
1.1 kristaps 420: "WHERE ");
421: sz = strlen(sql);
1.7 schwarze 422: substrsz = strlen(substr);
1.8 schwarze 423: regexpsz = strlen(regexp);
1.1 kristaps 424:
425: if (NULL != arch) {
426: sz += strlen(andarch) + 1;
427: sql = mandoc_realloc(sql, sz);
428: strlcat(sql, andarch, sz);
429: }
1.2 kristaps 430:
1.1 kristaps 431: if (NULL != sec) {
432: sz += strlen(andsec) + 1;
433: sql = mandoc_realloc(sql, sz);
434: strlcat(sql, andsec, sz);
435: }
436:
437: sz += 2;
438: sql = mandoc_realloc(sql, sz);
439: strlcat(sql, "(", sz);
440:
441: for ( ; NULL != e; e = e->next) {
1.8 schwarze 442: sz += (NULL == e->substr ? regexpsz : substrsz) +
1.1 kristaps 443: (NULL == e->next ? 3 : 5);
444: sql = mandoc_realloc(sql, sz);
1.8 schwarze 445: strlcat(sql, NULL == e->substr ? regexp : substr, sz);
1.1 kristaps 446: strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
447: }
448:
449: return(sql);
450: }
451:
452: /*
453: * Compile a set of string tokens into an expression.
454: * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
455: * "(", "foo=bar", etc.).
456: */
457: static struct expr *
1.5 kristaps 458: exprcomp(const struct mansearch *search, int argc, char *argv[])
1.1 kristaps 459: {
1.8 schwarze 460: int i, cs;
1.1 kristaps 461: struct expr *first, *next, *cur;
462:
463: first = cur = NULL;
464:
465: for (i = 0; i < argc; i++) {
1.8 schwarze 466: if (0 == strcmp("-i", argv[i])) {
467: if (++i >= argc)
468: return(NULL);
469: cs = 0;
470: } else
471: cs = 1;
472: next = exprterm(search, argv[i], cs);
1.1 kristaps 473: if (NULL == next) {
474: exprfree(first);
475: return(NULL);
476: }
477: if (NULL != first) {
478: cur->next = next;
479: cur = next;
480: } else
481: cur = first = next;
482: }
483:
484: return(first);
485: }
486:
487: static struct expr *
1.8 schwarze 488: exprterm(const struct mansearch *search, char *buf, int cs)
1.1 kristaps 489: {
490: struct expr *e;
491: char *key, *v;
492: size_t i;
493:
494: if ('\0' == *buf)
495: return(NULL);
496:
497: e = mandoc_calloc(1, sizeof(struct expr));
498:
1.5 kristaps 499: /*"whatis" mode uses an opaque string and default fields. */
500:
501: if (MANSEARCH_WHATIS & search->flags) {
1.8 schwarze 502: e->substr = buf;
1.5 kristaps 503: e->bits = search->deftype;
504: return(e);
505: }
506:
1.1 kristaps 507: /*
508: * If no =~ is specified, search with equality over names and
509: * descriptions.
510: * If =~ begins the phrase, use name and description fields.
511: */
512:
513: if (NULL == (v = strpbrk(buf, "=~"))) {
1.8 schwarze 514: e->substr = buf;
1.5 kristaps 515: e->bits = search->deftype;
1.1 kristaps 516: return(e);
517: } else if (v == buf)
1.5 kristaps 518: e->bits = search->deftype;
1.1 kristaps 519:
1.8 schwarze 520: if ('~' == *v++) {
521: if (regcomp(&e->regexp, v,
522: REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
523: free(e);
524: return(NULL);
525: }
526: } else
527: e->substr = v;
528: v[-1] = '\0';
1.1 kristaps 529:
530: /*
531: * Parse out all possible fields.
532: * If the field doesn't resolve, bail.
533: */
534:
535: while (NULL != (key = strsep(&buf, ","))) {
536: if ('\0' == *key)
537: continue;
538: i = 0;
539: while (types[i].bits &&
540: strcasecmp(types[i].name, key))
541: i++;
542: if (0 == types[i].bits) {
543: free(e);
544: return(NULL);
545: }
546: e->bits |= types[i].bits;
547: }
548:
549: return(e);
550: }
551:
552: static void
553: exprfree(struct expr *p)
554: {
555: struct expr *pp;
556:
557: while (NULL != p) {
558: pp = p->next;
559: free(p);
560: p = pp;
561: }
562: }
563:
564: static void *
565: hash_halloc(size_t sz, void *arg)
566: {
567:
568: return(mandoc_calloc(sz, 1));
569: }
570:
571: static void *
572: hash_alloc(size_t sz, void *arg)
573: {
574:
575: return(mandoc_malloc(sz));
576: }
577:
578: static void
579: hash_free(void *p, size_t sz, void *arg)
580: {
581:
582: free(p);
583: }
CVSweb