Annotation of mandoc/apropos.c, Revision 1.5
1.5 ! kristaps 1: /* $Id: apropos.c,v 1.4 2011/10/08 12:20:09 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <sys/types.h>
22:
23: #include <assert.h>
24: #include <errno.h>
25: #include <fcntl.h>
26: #include <getopt.h>
27: #include <limits.h>
28: #include <regex.h>
29: #include <stdarg.h>
30: #include <stdint.h>
31: #include <stdio.h>
32: #include <stdlib.h>
33: #include <string.h>
34: #include <unistd.h>
35:
36: #ifdef __linux__
37: # include <db_185.h>
38: #else
39: # include <db.h>
40: #endif
41:
42: #include "mandoc.h"
43:
1.2 kristaps 44: #define MAXRESULTS 256
1.1 kristaps 45:
1.2 kristaps 46: /* Bit-fields. See mandocdb.8. */
47:
48: #define TYPE_NAME 0x01
49: #define TYPE_FUNCTION 0x02
50: #define TYPE_UTILITY 0x04
51: #define TYPE_INCLUDES 0x08
52: #define TYPE_VARIABLE 0x10
53: #define TYPE_STANDARD 0x20
54: #define TYPE_AUTHOR 0x40
55: #define TYPE_CONFIG 0x80
56: #define TYPE_DESC 0x100
57: #define TYPE_XREF 0x200
58: #define TYPE_PATH 0x400
59: #define TYPE_ENV 0x800
60: #define TYPE_ERR 0x1000
1.1 kristaps 61:
62: enum match {
63: MATCH_SUBSTR = 0,
64: MATCH_REGEX,
65: MATCH_EXACT
66: };
67:
68: enum sort {
69: SORT_TITLE = 0,
70: SORT_CAT,
71: SORT__MAX
72: };
73:
74: struct opts {
75: enum sort sort; /* output sorting */
76: const char *arch; /* restrict to architecture */
77: const char *cat; /* restrict to category */
78: int types; /* only types in bitmask */
79: int insens; /* case-insensitive match */
80: enum match match; /* match type */
81: };
82:
83: struct type {
84: int mask;
1.2 kristaps 85: const char *name; /* command-line type name */
1.1 kristaps 86: };
87:
88: struct rec {
1.2 kristaps 89: char *file; /* file in file-system */
90: char *cat; /* category (3p, 3, etc.) */
91: char *title; /* title (FOO, etc.) */
92: char *arch; /* arch (or empty string) */
93: char *desc; /* description (from Nd) */
94: recno_t rec; /* record in index */
1.1 kristaps 95: };
96:
97: struct res {
98: char *arch; /* architecture */
99: char *desc; /* free-form description */
100: char *keyword; /* matched keyword */
101: int types; /* bitmask of field selectors */
102: char *cat; /* manual section */
103: char *title; /* manual section */
104: char *uri; /* formatted uri of file */
105: recno_t rec; /* unique id of underlying manual */
1.5 ! kristaps 106: /*
! 107: * Maintain a binary tree for checking the uniqueness of `rec'
! 108: * when adding elements to the results array.
! 109: * Since the results array is dynamic, use offset in the array
! 110: * instead of a pointer to the structure.
! 111: */
! 112: int lhs;
! 113: int rhs;
1.1 kristaps 114: };
115:
116: struct state {
117: DB *db; /* database */
118: DB *idx; /* index */
119: const char *dbf; /* database name */
120: const char *idxf; /* index name */
121: };
122:
123: static const char * const sorts[SORT__MAX] = {
124: "cat", /* SORT_CAT */
125: "title", /* SORT_TITLE */
126: };
127:
128: static const struct type types[] = {
129: { TYPE_NAME, "name" },
130: { TYPE_FUNCTION, "func" },
131: { TYPE_UTILITY, "utility" },
132: { TYPE_INCLUDES, "incl" },
133: { TYPE_VARIABLE, "var" },
134: { TYPE_STANDARD, "stand" },
135: { TYPE_AUTHOR, "auth" },
136: { TYPE_CONFIG, "conf" },
137: { TYPE_DESC, "desc" },
138: { TYPE_XREF, "xref" },
139: { TYPE_PATH, "path" },
140: { TYPE_ENV, "env" },
141: { TYPE_ERR, "err" },
142: { INT_MAX, "all" },
143: { 0, NULL }
144: };
145:
146: static void buf_alloc(char **, size_t *, size_t);
147: static void buf_dup(struct mchars *, char **, const char *);
148: static void buf_redup(struct mchars *, char **,
149: size_t *, const char *);
150: static int sort_cat(const void *, const void *);
151: static int sort_title(const void *, const void *);
1.2 kristaps 152: static int state_getrecord(struct state *,
153: recno_t, struct rec *);
1.1 kristaps 154: static void state_output(const struct res *, int);
155: static void state_search(struct state *,
156: const struct opts *, char *);
157: static void usage(void);
158:
1.2 kristaps 159: static char *progname;
1.1 kristaps 160:
161: int
162: main(int argc, char *argv[])
163: {
1.2 kristaps 164: BTREEINFO info;
165: int ch, i, rc;
1.1 kristaps 166: const char *dbf, *idxf;
167: struct state state;
168: char *q, *v;
169: struct opts opts;
170: extern int optind;
171: extern char *optarg;
172:
173: memset(&opts, 0, sizeof(struct opts));
1.2 kristaps 174: memset(&state, 0, sizeof(struct state));
1.1 kristaps 175:
176: dbf = "mandoc.db";
177: idxf = "mandoc.index";
178: q = NULL;
1.2 kristaps 179: rc = EXIT_FAILURE;
1.1 kristaps 180:
181: progname = strrchr(argv[0], '/');
182: if (progname == NULL)
183: progname = argv[0];
184: else
185: ++progname;
186:
187: opts.match = MATCH_SUBSTR;
188:
189: while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:")))
190: switch (ch) {
191: case ('a'):
192: opts.arch = optarg;
193: break;
194: case ('c'):
195: opts.cat = optarg;
196: break;
197: case ('e'):
198: opts.match = MATCH_EXACT;
199: break;
200: case ('I'):
201: opts.insens = 1;
202: break;
203: case ('r'):
204: opts.match = MATCH_REGEX;
205: break;
206: case ('s'):
207: for (i = 0; i < SORT__MAX; i++) {
208: if (strcmp(optarg, sorts[i]))
209: continue;
210: opts.sort = (enum sort)i;
211: break;
212: }
213:
214: if (i < SORT__MAX)
215: break;
216:
1.2 kristaps 217: fprintf(stderr, "%s: Bad sort\n", optarg);
1.1 kristaps 218: return(EXIT_FAILURE);
219: case ('t'):
220: while (NULL != (v = strsep(&optarg, ","))) {
221: if ('\0' == *v)
222: continue;
223: for (i = 0; types[i].mask; i++) {
224: if (strcmp(types[i].name, v))
225: continue;
226: break;
227: }
228: if (0 == types[i].mask)
229: break;
230: opts.types |= types[i].mask;
231: }
232: if (NULL == v)
233: break;
234:
1.2 kristaps 235: fprintf(stderr, "%s: Bad type\n", v);
1.1 kristaps 236: return(EXIT_FAILURE);
237: default:
238: usage();
239: return(EXIT_FAILURE);
240: }
241:
242: argc -= optind;
243: argv += optind;
244:
245: if (0 == argc || '\0' == **argv) {
246: usage();
1.2 kristaps 247: goto out;
1.1 kristaps 248: } else
249: q = *argv;
250:
251: if (0 == opts.types)
252: opts.types = TYPE_NAME | TYPE_DESC;
253:
1.2 kristaps 254: /*
255: * Configure databases.
256: * The keyword database is a btree that allows for duplicate
257: * entries.
258: * The index database is a recno.
259: */
260:
261: memset(&info, 0, sizeof(BTREEINFO));
262: info.flags = R_DUP;
263:
264: state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info);
265: if (NULL == state.db) {
266: perror(dbf);
267: goto out;
268: }
269:
270: state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL);
271: if (NULL == state.idx) {
272: perror(idxf);
273: goto out;
1.1 kristaps 274: }
275:
1.2 kristaps 276: /* Main search function. */
277:
1.1 kristaps 278: state_search(&state, &opts, q);
279:
1.2 kristaps 280: rc = EXIT_SUCCESS;
281: out:
282: if (state.db)
283: (*state.db->close)(state.db);
284: if (state.idx)
285: (*state.idx->close)(state.idx);
286:
287: return(rc);
1.1 kristaps 288: }
289:
290: static void
291: state_search(struct state *p, const struct opts *opts, char *q)
292: {
1.5 ! kristaps 293: int leaf, root, len, ch, rflags, dflag;
1.1 kristaps 294: struct mchars *mc;
295: char *buf;
296: size_t bufsz;
297: recno_t rec;
298: uint32_t fl;
299: DBT key, val;
1.3 kristaps 300: struct res *res;
1.1 kristaps 301: regex_t reg;
302: regex_t *regp;
303: char filebuf[10];
304: struct rec record;
305:
1.5 ! kristaps 306: root = leaf = -1;
1.3 kristaps 307: res = NULL;
1.1 kristaps 308: len = 0;
309: buf = NULL;
310: bufsz = 0;
311: ch = 0;
312: regp = NULL;
313:
1.2 kristaps 314: /*
315: * Configure how we scan through results to see if we match:
316: * whether by regexp or exact matches.
317: */
318:
1.1 kristaps 319: switch (opts->match) {
320: case (MATCH_REGEX):
321: rflags = REG_EXTENDED | REG_NOSUB |
322: (opts->insens ? REG_ICASE : 0);
323:
324: if (0 != regcomp(®, q, rflags)) {
1.2 kristaps 325: fprintf(stderr, "%s: Bad pattern\n", q);
1.1 kristaps 326: return;
327: }
328:
329: regp = ®
330: dflag = R_FIRST;
331: break;
332: case (MATCH_EXACT):
333: key.data = q;
334: key.size = strlen(q) + 1;
335: dflag = R_CURSOR;
336: break;
337: default:
338: dflag = R_FIRST;
339: break;
340: }
341:
342: if (NULL == (mc = mchars_alloc())) {
343: perror(NULL);
344: exit(EXIT_FAILURE);
345: }
346:
347: /*
348: * Iterate over the entire keyword database.
349: * For each record, we must first translate the key into UTF-8.
350: * Following that, make sure it's acceptable.
351: * Lastly, add it to the available records.
352: */
353:
1.3 kristaps 354: while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) {
1.1 kristaps 355: dflag = R_NEXT;
356:
357: /*
358: * Keys must be sized as such: the keyword must be
359: * non-empty (nil terminator plus one character) and the
360: * value must be 8 (recno_t---uint32_t---index reference
361: * and a uint32_t flag field).
362: */
363:
364: if (key.size < 2 || 8 != val.size) {
1.2 kristaps 365: fprintf(stderr, "%s: Corrupt database\n", p->dbf);
1.1 kristaps 366: exit(EXIT_FAILURE);
367: }
368:
369: buf_redup(mc, &buf, &bufsz, (char *)key.data);
370:
371: fl = *(uint32_t *)val.data;
372:
373: if ( ! (fl & opts->types))
374: continue;
375:
376: switch (opts->match) {
377: case (MATCH_REGEX):
378: if (regexec(regp, buf, 0, NULL, 0))
379: continue;
380: break;
381: case (MATCH_EXACT):
382: if (opts->insens && strcasecmp(buf, q))
383: goto send;
384: if ( ! opts->insens && strcmp(buf, q))
385: goto send;
386: break;
387: default:
388: if (opts->insens && NULL == strcasestr(buf, q))
389: continue;
390: if ( ! opts->insens && NULL == strstr(buf, q))
391: continue;
392: break;
393: }
394:
395: /*
396: * Now look up the file itself in our index. The file's
397: * indexed by its recno for fast lookups.
398: */
399:
400: memcpy(&rec, val.data + 4, sizeof(recno_t));
401:
402: if ( ! state_getrecord(p, rec, &record))
403: exit(EXIT_FAILURE);
404:
405: /* If we're in a different section, skip... */
406:
407: if (opts->cat && strcasecmp(opts->cat, record.cat))
408: continue;
409: if (opts->arch && strcasecmp(opts->arch, record.arch))
410: continue;
411:
1.5 ! kristaps 412: /*
! 413: * Do a binary search to dedupe the results tree of the
! 414: * same record: we don't print the same file.
! 415: */
1.1 kristaps 416:
1.5 ! kristaps 417: for (leaf = root; leaf >= 0; )
! 418: if (rec > res[leaf].rec && res[leaf].rhs >= 0)
! 419: leaf = res[leaf].rhs;
! 420: else if (rec < res[leaf].rec && res[leaf].lhs >= 0)
! 421: leaf = res[leaf].lhs;
! 422: else
1.1 kristaps 423: break;
424:
1.5 ! kristaps 425: if (leaf >= 0 && res[leaf].rec == rec)
1.1 kristaps 426: continue;
427:
1.3 kristaps 428: res = mandoc_realloc
429: (res, (len + 1) * sizeof(struct res));
430:
1.1 kristaps 431: /*
432: * Now we have our filename, keywords, types, and all
433: * other necessary information.
434: * Process it and add it to our list of results.
435: */
436:
437: filebuf[9] = '\0';
438: snprintf(filebuf, 10, "%u", record.rec);
439: assert('\0' == filebuf[9]);
440:
441: res[len].rec = record.rec;
442: res[len].types = fl;
1.5 ! kristaps 443: res[len].lhs = res[len].rhs = -1;
1.1 kristaps 444:
445: buf_dup(mc, &res[len].keyword, buf);
446: buf_dup(mc, &res[len].uri, filebuf);
447: buf_dup(mc, &res[len].cat, record.cat);
448: buf_dup(mc, &res[len].arch, record.arch);
449: buf_dup(mc, &res[len].title, record.title);
450: buf_dup(mc, &res[len].desc, record.desc);
1.5 ! kristaps 451:
! 452: if (leaf >= 0) {
! 453: if (record.rec > res[leaf].rec)
! 454: res[leaf].rhs = len;
! 455: else
! 456: res[leaf].lhs = len;
! 457: } else
! 458: root = len;
! 459:
1.1 kristaps 460: len++;
461: }
462:
463: send:
464: if (ch < 0) {
465: perror(p->dbf);
466: exit(EXIT_FAILURE);
467: }
468:
1.2 kristaps 469: /*
470: * Sort our results.
471: * We do this post-scan (instead of an in-line sort) because
472: * it's more or less the same in terms of run-time. Assuming we
473: * sort in-line with a tree versus post:
474: *
475: * In-place: n * O(lg n)
476: * After: n + O(n lg n)
477: *
478: * Whatever. This also buys us simplicity.
479: */
480:
1.1 kristaps 481: switch (opts->sort) {
482: case (SORT_CAT):
483: qsort(res, len, sizeof(struct res), sort_cat);
484: break;
485: default:
486: qsort(res, len, sizeof(struct res), sort_title);
487: break;
488: }
489:
490: state_output(res, len);
491:
492: for (len-- ; len >= 0; len--) {
493: free(res[len].keyword);
494: free(res[len].title);
495: free(res[len].cat);
496: free(res[len].arch);
497: free(res[len].desc);
498: free(res[len].uri);
499: }
500:
1.3 kristaps 501: free(res);
1.1 kristaps 502: free(buf);
503: mchars_free(mc);
504:
505: if (regp)
506: regfree(regp);
507: }
508:
509: /*
510: * Track allocated buffer size for buf_redup().
511: */
512: static inline void
513: buf_alloc(char **buf, size_t *bufsz, size_t sz)
514: {
515:
516: if (sz < *bufsz)
517: return;
518:
519: *bufsz = sz + 1024;
1.4 kristaps 520: *buf = mandoc_realloc(*buf, *bufsz);
1.1 kristaps 521: }
522:
523: /*
524: * Like buf_redup() but throwing away the buffer size.
525: */
526: static void
527: buf_dup(struct mchars *mc, char **buf, const char *val)
528: {
529: size_t bufsz;
530:
531: bufsz = 0;
532: *buf = NULL;
533: buf_redup(mc, buf, &bufsz, val);
534: }
535:
536: /*
537: * Normalise strings from the index and database.
538: * These strings are escaped as defined by mandoc_char(7) along with
539: * other goop in mandoc.h (e.g., soft hyphens).
540: */
541: static void
542: buf_redup(struct mchars *mc, char **buf,
543: size_t *bufsz, const char *val)
544: {
545: size_t sz;
546: const char *seq, *cpp;
547: int len, pos;
548: enum mandoc_esc esc;
549: const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
550:
551: /* Pre-allocate by the length of the input */
552:
553: buf_alloc(buf, bufsz, strlen(val) + 1);
554:
555: pos = 0;
556:
557: while ('\0' != *val) {
558: /*
559: * Halt on the first escape sequence.
560: * This also halts on the end of string, in which case
561: * we just copy, fallthrough, and exit the loop.
562: */
563: if ((sz = strcspn(val, rsv)) > 0) {
564: memcpy(&(*buf)[pos], val, sz);
565: pos += (int)sz;
566: val += (int)sz;
567: }
568:
569: if (ASCII_HYPH == *val) {
570: (*buf)[pos++] = '-';
571: val++;
572: continue;
573: } else if (ASCII_NBRSP == *val) {
574: (*buf)[pos++] = ' ';
575: val++;
576: continue;
577: } else if ('\\' != *val)
578: break;
579:
580: /* Read past the slash. */
581:
582: val++;
583:
584: /*
585: * Parse the escape sequence and see if it's a
586: * predefined character or special character.
587: */
588:
589: esc = mandoc_escape(&val, &seq, &len);
590: if (ESCAPE_ERROR == esc)
591: break;
592:
593: cpp = ESCAPE_SPECIAL == esc ?
594: mchars_spec2str(mc, seq, len, &sz) : NULL;
595:
596: if (NULL == cpp)
597: continue;
598:
599: /* Copy the rendered glyph into the stream. */
600:
601: buf_alloc(buf, bufsz, sz);
602:
603: memcpy(&(*buf)[pos], cpp, sz);
604: pos += (int)sz;
605: }
606:
607: (*buf)[pos] = '\0';
608: }
609:
610: static void
611: state_output(const struct res *res, int sz)
612: {
613: int i;
614:
615: for (i = 0; i < sz; i++)
616: printf("%s(%s%s%s) - %s\n", res[i].title,
617: res[i].cat,
618: *res[i].arch ? "/" : "",
619: *res[i].arch ? res[i].arch : "",
620: res[i].desc);
621: }
622:
623: static void
624: usage(void)
625: {
626:
627: fprintf(stderr, "usage: %s "
628: "[-eIr] "
629: "[-a arch] "
630: "[-c cat] "
631: "[-s sort] "
632: "[-t type[,...]] "
633: "key\n", progname);
634: }
635:
636: static int
637: state_getrecord(struct state *p, recno_t rec, struct rec *rp)
638: {
639: DBT key, val;
640: size_t sz;
641: int rc;
642:
643: key.data = &rec;
644: key.size = sizeof(recno_t);
645:
646: rc = (*p->idx->get)(p->idx, &key, &val, 0);
647: if (rc < 0) {
1.2 kristaps 648: perror(p->idxf);
1.1 kristaps 649: return(0);
1.2 kristaps 650: } else if (rc > 0)
651: goto err;
1.1 kristaps 652:
653: rp->file = (char *)val.data;
1.2 kristaps 654: if ((sz = strlen(rp->file) + 1) >= val.size)
655: goto err;
1.1 kristaps 656:
657: rp->cat = (char *)val.data + (int)sz;
1.2 kristaps 658: if ((sz += strlen(rp->cat) + 1) >= val.size)
659: goto err;
1.1 kristaps 660:
661: rp->title = (char *)val.data + (int)sz;
1.2 kristaps 662: if ((sz += strlen(rp->title) + 1) >= val.size)
663: goto err;
1.1 kristaps 664:
665: rp->arch = (char *)val.data + (int)sz;
1.2 kristaps 666: if ((sz += strlen(rp->arch) + 1) >= val.size)
667: goto err;
1.1 kristaps 668:
669: rp->desc = (char *)val.data + (int)sz;
670: rp->rec = rec;
671: return(1);
1.2 kristaps 672: err:
673: fprintf(stderr, "%s: Corrupt index\n", p->idxf);
674: return(0);
1.1 kristaps 675: }
676:
677: static int
678: sort_title(const void *p1, const void *p2)
679: {
680:
681: return(strcmp(((const struct res *)p1)->title,
682: ((const struct res *)p2)->title));
683: }
684:
685: static int
686: sort_cat(const void *p1, const void *p2)
687: {
688: int rc;
689:
690: rc = strcmp(((const struct res *)p1)->cat,
691: ((const struct res *)p2)->cat);
692:
693: return(0 == rc ? sort_title(p1, p2) : rc);
694: }
CVSweb