Annotation of mandoc/apropos.c, Revision 1.4
1.4 ! kristaps 1: /* $Id: apropos.c,v 1.3 2011/10/07 13:29:03 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <sys/types.h>
22:
23: #include <assert.h>
24: #include <errno.h>
25: #include <fcntl.h>
26: #include <getopt.h>
27: #include <limits.h>
28: #include <regex.h>
29: #include <stdarg.h>
30: #include <stdint.h>
31: #include <stdio.h>
32: #include <stdlib.h>
33: #include <string.h>
34: #include <unistd.h>
35:
36: #ifdef __linux__
37: # include <db_185.h>
38: #else
39: # include <db.h>
40: #endif
41:
42: #include "mandoc.h"
43:
1.2 kristaps 44: #define MAXRESULTS 256
1.1 kristaps 45:
1.2 kristaps 46: /* Bit-fields. See mandocdb.8. */
47:
48: #define TYPE_NAME 0x01
49: #define TYPE_FUNCTION 0x02
50: #define TYPE_UTILITY 0x04
51: #define TYPE_INCLUDES 0x08
52: #define TYPE_VARIABLE 0x10
53: #define TYPE_STANDARD 0x20
54: #define TYPE_AUTHOR 0x40
55: #define TYPE_CONFIG 0x80
56: #define TYPE_DESC 0x100
57: #define TYPE_XREF 0x200
58: #define TYPE_PATH 0x400
59: #define TYPE_ENV 0x800
60: #define TYPE_ERR 0x1000
1.1 kristaps 61:
62: enum match {
63: MATCH_SUBSTR = 0,
64: MATCH_REGEX,
65: MATCH_EXACT
66: };
67:
68: enum sort {
69: SORT_TITLE = 0,
70: SORT_CAT,
71: SORT__MAX
72: };
73:
74: struct opts {
75: enum sort sort; /* output sorting */
76: const char *arch; /* restrict to architecture */
77: const char *cat; /* restrict to category */
78: int types; /* only types in bitmask */
79: int insens; /* case-insensitive match */
80: enum match match; /* match type */
81: };
82:
83: struct type {
84: int mask;
1.2 kristaps 85: const char *name; /* command-line type name */
1.1 kristaps 86: };
87:
88: struct rec {
1.2 kristaps 89: char *file; /* file in file-system */
90: char *cat; /* category (3p, 3, etc.) */
91: char *title; /* title (FOO, etc.) */
92: char *arch; /* arch (or empty string) */
93: char *desc; /* description (from Nd) */
94: recno_t rec; /* record in index */
1.1 kristaps 95: };
96:
97: struct res {
98: char *arch; /* architecture */
99: char *desc; /* free-form description */
100: char *keyword; /* matched keyword */
101: int types; /* bitmask of field selectors */
102: char *cat; /* manual section */
103: char *title; /* manual section */
104: char *uri; /* formatted uri of file */
105: recno_t rec; /* unique id of underlying manual */
106: };
107:
108: struct state {
109: DB *db; /* database */
110: DB *idx; /* index */
111: const char *dbf; /* database name */
112: const char *idxf; /* index name */
113: };
114:
115: static const char * const sorts[SORT__MAX] = {
116: "cat", /* SORT_CAT */
117: "title", /* SORT_TITLE */
118: };
119:
120: static const struct type types[] = {
121: { TYPE_NAME, "name" },
122: { TYPE_FUNCTION, "func" },
123: { TYPE_UTILITY, "utility" },
124: { TYPE_INCLUDES, "incl" },
125: { TYPE_VARIABLE, "var" },
126: { TYPE_STANDARD, "stand" },
127: { TYPE_AUTHOR, "auth" },
128: { TYPE_CONFIG, "conf" },
129: { TYPE_DESC, "desc" },
130: { TYPE_XREF, "xref" },
131: { TYPE_PATH, "path" },
132: { TYPE_ENV, "env" },
133: { TYPE_ERR, "err" },
134: { INT_MAX, "all" },
135: { 0, NULL }
136: };
137:
138: static void buf_alloc(char **, size_t *, size_t);
139: static void buf_dup(struct mchars *, char **, const char *);
140: static void buf_redup(struct mchars *, char **,
141: size_t *, const char *);
142: static int sort_cat(const void *, const void *);
143: static int sort_title(const void *, const void *);
1.2 kristaps 144: static int state_getrecord(struct state *,
145: recno_t, struct rec *);
1.1 kristaps 146: static void state_output(const struct res *, int);
147: static void state_search(struct state *,
148: const struct opts *, char *);
149: static void usage(void);
150:
1.2 kristaps 151: static char *progname;
1.1 kristaps 152:
153: int
154: main(int argc, char *argv[])
155: {
1.2 kristaps 156: BTREEINFO info;
157: int ch, i, rc;
1.1 kristaps 158: const char *dbf, *idxf;
159: struct state state;
160: char *q, *v;
161: struct opts opts;
162: extern int optind;
163: extern char *optarg;
164:
165: memset(&opts, 0, sizeof(struct opts));
1.2 kristaps 166: memset(&state, 0, sizeof(struct state));
1.1 kristaps 167:
168: dbf = "mandoc.db";
169: idxf = "mandoc.index";
170: q = NULL;
1.2 kristaps 171: rc = EXIT_FAILURE;
1.1 kristaps 172:
173: progname = strrchr(argv[0], '/');
174: if (progname == NULL)
175: progname = argv[0];
176: else
177: ++progname;
178:
179: opts.match = MATCH_SUBSTR;
180:
181: while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:")))
182: switch (ch) {
183: case ('a'):
184: opts.arch = optarg;
185: break;
186: case ('c'):
187: opts.cat = optarg;
188: break;
189: case ('e'):
190: opts.match = MATCH_EXACT;
191: break;
192: case ('I'):
193: opts.insens = 1;
194: break;
195: case ('r'):
196: opts.match = MATCH_REGEX;
197: break;
198: case ('s'):
199: for (i = 0; i < SORT__MAX; i++) {
200: if (strcmp(optarg, sorts[i]))
201: continue;
202: opts.sort = (enum sort)i;
203: break;
204: }
205:
206: if (i < SORT__MAX)
207: break;
208:
1.2 kristaps 209: fprintf(stderr, "%s: Bad sort\n", optarg);
1.1 kristaps 210: return(EXIT_FAILURE);
211: case ('t'):
212: while (NULL != (v = strsep(&optarg, ","))) {
213: if ('\0' == *v)
214: continue;
215: for (i = 0; types[i].mask; i++) {
216: if (strcmp(types[i].name, v))
217: continue;
218: break;
219: }
220: if (0 == types[i].mask)
221: break;
222: opts.types |= types[i].mask;
223: }
224: if (NULL == v)
225: break;
226:
1.2 kristaps 227: fprintf(stderr, "%s: Bad type\n", v);
1.1 kristaps 228: return(EXIT_FAILURE);
229: default:
230: usage();
231: return(EXIT_FAILURE);
232: }
233:
234: argc -= optind;
235: argv += optind;
236:
237: if (0 == argc || '\0' == **argv) {
238: usage();
1.2 kristaps 239: goto out;
1.1 kristaps 240: } else
241: q = *argv;
242:
243: if (0 == opts.types)
244: opts.types = TYPE_NAME | TYPE_DESC;
245:
1.2 kristaps 246: /*
247: * Configure databases.
248: * The keyword database is a btree that allows for duplicate
249: * entries.
250: * The index database is a recno.
251: */
252:
253: memset(&info, 0, sizeof(BTREEINFO));
254: info.flags = R_DUP;
255:
256: state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info);
257: if (NULL == state.db) {
258: perror(dbf);
259: goto out;
260: }
261:
262: state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL);
263: if (NULL == state.idx) {
264: perror(idxf);
265: goto out;
1.1 kristaps 266: }
267:
1.2 kristaps 268: /* Main search function. */
269:
1.1 kristaps 270: state_search(&state, &opts, q);
271:
1.2 kristaps 272: rc = EXIT_SUCCESS;
273: out:
274: if (state.db)
275: (*state.db->close)(state.db);
276: if (state.idx)
277: (*state.idx->close)(state.idx);
278:
279: return(rc);
1.1 kristaps 280: }
281:
282: static void
283: state_search(struct state *p, const struct opts *opts, char *q)
284: {
285: int i, len, ch, rflags, dflag;
286: struct mchars *mc;
287: char *buf;
288: size_t bufsz;
289: recno_t rec;
290: uint32_t fl;
291: DBT key, val;
1.3 kristaps 292: struct res *res;
1.1 kristaps 293: regex_t reg;
294: regex_t *regp;
295: char filebuf[10];
296: struct rec record;
297:
1.3 kristaps 298: res = NULL;
1.1 kristaps 299: len = 0;
300: buf = NULL;
301: bufsz = 0;
302: ch = 0;
303: regp = NULL;
304:
1.2 kristaps 305: /*
306: * Configure how we scan through results to see if we match:
307: * whether by regexp or exact matches.
308: */
309:
1.1 kristaps 310: switch (opts->match) {
311: case (MATCH_REGEX):
312: rflags = REG_EXTENDED | REG_NOSUB |
313: (opts->insens ? REG_ICASE : 0);
314:
315: if (0 != regcomp(®, q, rflags)) {
1.2 kristaps 316: fprintf(stderr, "%s: Bad pattern\n", q);
1.1 kristaps 317: return;
318: }
319:
320: regp = ®
321: dflag = R_FIRST;
322: break;
323: case (MATCH_EXACT):
324: key.data = q;
325: key.size = strlen(q) + 1;
326: dflag = R_CURSOR;
327: break;
328: default:
329: dflag = R_FIRST;
330: break;
331: }
332:
333: if (NULL == (mc = mchars_alloc())) {
334: perror(NULL);
335: exit(EXIT_FAILURE);
336: }
337:
338: /*
339: * Iterate over the entire keyword database.
340: * For each record, we must first translate the key into UTF-8.
341: * Following that, make sure it's acceptable.
342: * Lastly, add it to the available records.
343: */
344:
1.3 kristaps 345: while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) {
1.1 kristaps 346: dflag = R_NEXT;
347:
348: /*
349: * Keys must be sized as such: the keyword must be
350: * non-empty (nil terminator plus one character) and the
351: * value must be 8 (recno_t---uint32_t---index reference
352: * and a uint32_t flag field).
353: */
354:
355: if (key.size < 2 || 8 != val.size) {
1.2 kristaps 356: fprintf(stderr, "%s: Corrupt database\n", p->dbf);
1.1 kristaps 357: exit(EXIT_FAILURE);
358: }
359:
360: buf_redup(mc, &buf, &bufsz, (char *)key.data);
361:
362: fl = *(uint32_t *)val.data;
363:
364: if ( ! (fl & opts->types))
365: continue;
366:
367: switch (opts->match) {
368: case (MATCH_REGEX):
369: if (regexec(regp, buf, 0, NULL, 0))
370: continue;
371: break;
372: case (MATCH_EXACT):
373: if (opts->insens && strcasecmp(buf, q))
374: goto send;
375: if ( ! opts->insens && strcmp(buf, q))
376: goto send;
377: break;
378: default:
379: if (opts->insens && NULL == strcasestr(buf, q))
380: continue;
381: if ( ! opts->insens && NULL == strstr(buf, q))
382: continue;
383: break;
384: }
385:
386: /*
387: * Now look up the file itself in our index. The file's
388: * indexed by its recno for fast lookups.
389: */
390:
391: memcpy(&rec, val.data + 4, sizeof(recno_t));
392:
393: if ( ! state_getrecord(p, rec, &record))
394: exit(EXIT_FAILURE);
395:
396: /* If we're in a different section, skip... */
397:
398: if (opts->cat && strcasecmp(opts->cat, record.cat))
399: continue;
400: if (opts->arch && strcasecmp(opts->arch, record.arch))
401: continue;
402:
403: /* FIXME: this needs to be changed. Ugh. Linear. */
404:
405: for (i = 0; i < len; i++)
406: if (res[i].rec == record.rec)
407: break;
408:
409: if (i < len)
410: continue;
411:
1.3 kristaps 412: res = mandoc_realloc
413: (res, (len + 1) * sizeof(struct res));
414:
1.1 kristaps 415: /*
416: * Now we have our filename, keywords, types, and all
417: * other necessary information.
418: * Process it and add it to our list of results.
419: */
420:
421: filebuf[9] = '\0';
422: snprintf(filebuf, 10, "%u", record.rec);
423: assert('\0' == filebuf[9]);
424:
425: res[len].rec = record.rec;
426: res[len].types = fl;
427:
428: buf_dup(mc, &res[len].keyword, buf);
429: buf_dup(mc, &res[len].uri, filebuf);
430: buf_dup(mc, &res[len].cat, record.cat);
431: buf_dup(mc, &res[len].arch, record.arch);
432: buf_dup(mc, &res[len].title, record.title);
433: buf_dup(mc, &res[len].desc, record.desc);
434: len++;
435: }
436:
437: send:
438: if (ch < 0) {
439: perror(p->dbf);
440: exit(EXIT_FAILURE);
441: }
442:
1.2 kristaps 443: /*
444: * Sort our results.
445: * We do this post-scan (instead of an in-line sort) because
446: * it's more or less the same in terms of run-time. Assuming we
447: * sort in-line with a tree versus post:
448: *
449: * In-place: n * O(lg n)
450: * After: n + O(n lg n)
451: *
452: * Whatever. This also buys us simplicity.
453: */
454:
1.1 kristaps 455: switch (opts->sort) {
456: case (SORT_CAT):
457: qsort(res, len, sizeof(struct res), sort_cat);
458: break;
459: default:
460: qsort(res, len, sizeof(struct res), sort_title);
461: break;
462: }
463:
464: state_output(res, len);
465:
466: for (len-- ; len >= 0; len--) {
467: free(res[len].keyword);
468: free(res[len].title);
469: free(res[len].cat);
470: free(res[len].arch);
471: free(res[len].desc);
472: free(res[len].uri);
473: }
474:
1.3 kristaps 475: free(res);
1.1 kristaps 476: free(buf);
477: mchars_free(mc);
478:
479: if (regp)
480: regfree(regp);
481: }
482:
483: /*
484: * Track allocated buffer size for buf_redup().
485: */
486: static inline void
487: buf_alloc(char **buf, size_t *bufsz, size_t sz)
488: {
489:
490: if (sz < *bufsz)
491: return;
492:
493: *bufsz = sz + 1024;
1.4 ! kristaps 494: *buf = mandoc_realloc(*buf, *bufsz);
1.1 kristaps 495: }
496:
497: /*
498: * Like buf_redup() but throwing away the buffer size.
499: */
500: static void
501: buf_dup(struct mchars *mc, char **buf, const char *val)
502: {
503: size_t bufsz;
504:
505: bufsz = 0;
506: *buf = NULL;
507: buf_redup(mc, buf, &bufsz, val);
508: }
509:
510: /*
511: * Normalise strings from the index and database.
512: * These strings are escaped as defined by mandoc_char(7) along with
513: * other goop in mandoc.h (e.g., soft hyphens).
514: */
515: static void
516: buf_redup(struct mchars *mc, char **buf,
517: size_t *bufsz, const char *val)
518: {
519: size_t sz;
520: const char *seq, *cpp;
521: int len, pos;
522: enum mandoc_esc esc;
523: const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
524:
525: /* Pre-allocate by the length of the input */
526:
527: buf_alloc(buf, bufsz, strlen(val) + 1);
528:
529: pos = 0;
530:
531: while ('\0' != *val) {
532: /*
533: * Halt on the first escape sequence.
534: * This also halts on the end of string, in which case
535: * we just copy, fallthrough, and exit the loop.
536: */
537: if ((sz = strcspn(val, rsv)) > 0) {
538: memcpy(&(*buf)[pos], val, sz);
539: pos += (int)sz;
540: val += (int)sz;
541: }
542:
543: if (ASCII_HYPH == *val) {
544: (*buf)[pos++] = '-';
545: val++;
546: continue;
547: } else if (ASCII_NBRSP == *val) {
548: (*buf)[pos++] = ' ';
549: val++;
550: continue;
551: } else if ('\\' != *val)
552: break;
553:
554: /* Read past the slash. */
555:
556: val++;
557:
558: /*
559: * Parse the escape sequence and see if it's a
560: * predefined character or special character.
561: */
562:
563: esc = mandoc_escape(&val, &seq, &len);
564: if (ESCAPE_ERROR == esc)
565: break;
566:
567: cpp = ESCAPE_SPECIAL == esc ?
568: mchars_spec2str(mc, seq, len, &sz) : NULL;
569:
570: if (NULL == cpp)
571: continue;
572:
573: /* Copy the rendered glyph into the stream. */
574:
575: buf_alloc(buf, bufsz, sz);
576:
577: memcpy(&(*buf)[pos], cpp, sz);
578: pos += (int)sz;
579: }
580:
581: (*buf)[pos] = '\0';
582: }
583:
584: static void
585: state_output(const struct res *res, int sz)
586: {
587: int i;
588:
589: for (i = 0; i < sz; i++)
590: printf("%s(%s%s%s) - %s\n", res[i].title,
591: res[i].cat,
592: *res[i].arch ? "/" : "",
593: *res[i].arch ? res[i].arch : "",
594: res[i].desc);
595: }
596:
597: static void
598: usage(void)
599: {
600:
601: fprintf(stderr, "usage: %s "
602: "[-eIr] "
603: "[-a arch] "
604: "[-c cat] "
605: "[-s sort] "
606: "[-t type[,...]] "
607: "key\n", progname);
608: }
609:
610: static int
611: state_getrecord(struct state *p, recno_t rec, struct rec *rp)
612: {
613: DBT key, val;
614: size_t sz;
615: int rc;
616:
617: key.data = &rec;
618: key.size = sizeof(recno_t);
619:
620: rc = (*p->idx->get)(p->idx, &key, &val, 0);
621: if (rc < 0) {
1.2 kristaps 622: perror(p->idxf);
1.1 kristaps 623: return(0);
1.2 kristaps 624: } else if (rc > 0)
625: goto err;
1.1 kristaps 626:
627: rp->file = (char *)val.data;
1.2 kristaps 628: if ((sz = strlen(rp->file) + 1) >= val.size)
629: goto err;
1.1 kristaps 630:
631: rp->cat = (char *)val.data + (int)sz;
1.2 kristaps 632: if ((sz += strlen(rp->cat) + 1) >= val.size)
633: goto err;
1.1 kristaps 634:
635: rp->title = (char *)val.data + (int)sz;
1.2 kristaps 636: if ((sz += strlen(rp->title) + 1) >= val.size)
637: goto err;
1.1 kristaps 638:
639: rp->arch = (char *)val.data + (int)sz;
1.2 kristaps 640: if ((sz += strlen(rp->arch) + 1) >= val.size)
641: goto err;
1.1 kristaps 642:
643: rp->desc = (char *)val.data + (int)sz;
644: rp->rec = rec;
645: return(1);
1.2 kristaps 646: err:
647: fprintf(stderr, "%s: Corrupt index\n", p->idxf);
648: return(0);
1.1 kristaps 649: }
650:
651: static int
652: sort_title(const void *p1, const void *p2)
653: {
654:
655: return(strcmp(((const struct res *)p1)->title,
656: ((const struct res *)p2)->title));
657: }
658:
659: static int
660: sort_cat(const void *p1, const void *p2)
661: {
662: int rc;
663:
664: rc = strcmp(((const struct res *)p1)->cat,
665: ((const struct res *)p2)->cat);
666:
667: return(0 == rc ? sort_title(p1, p2) : rc);
668: }
CVSweb