Annotation of mandoc/apropos.c, Revision 1.1
1.1 ! kristaps 1: /* $Id: apropos.c,v 1.18 2011/07/12 15:57:41 kristaps Exp $ */
! 2: /*
! 3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the above
! 7: * copyright notice and this permission notice appear in all copies.
! 8: *
! 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 16: */
! 17: #ifdef HAVE_CONFIG_H
! 18: #include "config.h"
! 19: #endif
! 20:
! 21: #include <sys/types.h>
! 22:
! 23: #include <assert.h>
! 24: #include <errno.h>
! 25: #include <fcntl.h>
! 26: #include <getopt.h>
! 27: #include <limits.h>
! 28: #include <regex.h>
! 29: #include <stdarg.h>
! 30: #include <stdint.h>
! 31: #include <stdio.h>
! 32: #include <stdlib.h>
! 33: #include <string.h>
! 34: #include <unistd.h>
! 35:
! 36: #ifdef __linux__
! 37: # include <db_185.h>
! 38: #else
! 39: # include <db.h>
! 40: #endif
! 41:
! 42: #include "mandoc.h"
! 43:
! 44: #define MAXRESULTS 100
! 45:
! 46: #define TYPE_NAME 0x01
! 47: #define TYPE_FUNCTION 0x02
! 48: #define TYPE_UTILITY 0x04
! 49: #define TYPE_INCLUDES 0x08
! 50: #define TYPE_VARIABLE 0x10
! 51: #define TYPE_STANDARD 0x20
! 52: #define TYPE_AUTHOR 0x40
! 53: #define TYPE_CONFIG 0x80
! 54: #define TYPE_DESC 0x100
! 55: #define TYPE_XREF 0x200
! 56: #define TYPE_PATH 0x400
! 57: #define TYPE_ENV 0x800
! 58: #define TYPE_ERR 0x1000
! 59:
! 60: enum match {
! 61: MATCH_SUBSTR = 0,
! 62: MATCH_REGEX,
! 63: MATCH_EXACT
! 64: };
! 65:
! 66: enum sort {
! 67: SORT_TITLE = 0,
! 68: SORT_CAT,
! 69: SORT__MAX
! 70: };
! 71:
! 72: struct opts {
! 73: enum sort sort; /* output sorting */
! 74: const char *arch; /* restrict to architecture */
! 75: const char *cat; /* restrict to category */
! 76: int types; /* only types in bitmask */
! 77: int insens; /* case-insensitive match */
! 78: enum match match; /* match type */
! 79: };
! 80:
! 81: struct type {
! 82: int mask;
! 83: const char *name;
! 84: };
! 85:
! 86: struct rec {
! 87: char *file;
! 88: char *cat;
! 89: char *title;
! 90: char *arch;
! 91: char *desc;
! 92: recno_t rec;
! 93: };
! 94:
! 95: struct res {
! 96: char *arch; /* architecture */
! 97: char *desc; /* free-form description */
! 98: char *keyword; /* matched keyword */
! 99: int types; /* bitmask of field selectors */
! 100: char *cat; /* manual section */
! 101: char *title; /* manual section */
! 102: char *uri; /* formatted uri of file */
! 103: recno_t rec; /* unique id of underlying manual */
! 104: };
! 105:
! 106: struct state {
! 107: DB *db; /* database */
! 108: DB *idx; /* index */
! 109: const char *dbf; /* database name */
! 110: const char *idxf; /* index name */
! 111: void (*err)(const char *);
! 112: void (*errx)(const char *, ...);
! 113: };
! 114:
! 115: static const char * const sorts[SORT__MAX] = {
! 116: "cat", /* SORT_CAT */
! 117: "title", /* SORT_TITLE */
! 118: };
! 119:
! 120: static const struct type types[] = {
! 121: { TYPE_NAME, "name" },
! 122: { TYPE_FUNCTION, "func" },
! 123: { TYPE_UTILITY, "utility" },
! 124: { TYPE_INCLUDES, "incl" },
! 125: { TYPE_VARIABLE, "var" },
! 126: { TYPE_STANDARD, "stand" },
! 127: { TYPE_AUTHOR, "auth" },
! 128: { TYPE_CONFIG, "conf" },
! 129: { TYPE_DESC, "desc" },
! 130: { TYPE_XREF, "xref" },
! 131: { TYPE_PATH, "path" },
! 132: { TYPE_ENV, "env" },
! 133: { TYPE_ERR, "err" },
! 134: { INT_MAX, "all" },
! 135: { 0, NULL }
! 136: };
! 137:
! 138: static void buf_alloc(char **, size_t *, size_t);
! 139: static void buf_dup(struct mchars *, char **, const char *);
! 140: static void buf_redup(struct mchars *, char **,
! 141: size_t *, const char *);
! 142: static void error(const char *, ...);
! 143: static int sort_cat(const void *, const void *);
! 144: static int sort_title(const void *, const void *);
! 145: static void state_destroy(struct state *);
! 146: static int state_getrecord(struct state *, recno_t, struct rec *);
! 147: static int state_init(struct state *,
! 148: const char *, const char *,
! 149: void (*err)(const char *),
! 150: void (*errx)(const char *, ...));
! 151: static void state_output(const struct res *, int);
! 152: static void state_search(struct state *,
! 153: const struct opts *, char *);
! 154:
! 155: static void usage(void);
! 156:
! 157: static const char *progname;
! 158:
! 159: int
! 160: main(int argc, char *argv[])
! 161: {
! 162: int ch, i;
! 163: const char *dbf, *idxf;
! 164: struct state state;
! 165: char *q, *v;
! 166: struct opts opts;
! 167: extern int optind;
! 168: extern char *optarg;
! 169:
! 170: memset(&opts, 0, sizeof(struct opts));
! 171:
! 172: dbf = "mandoc.db";
! 173: idxf = "mandoc.index";
! 174: q = NULL;
! 175:
! 176: progname = strrchr(argv[0], '/');
! 177: if (progname == NULL)
! 178: progname = argv[0];
! 179: else
! 180: ++progname;
! 181:
! 182: opts.match = MATCH_SUBSTR;
! 183:
! 184: while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:")))
! 185: switch (ch) {
! 186: case ('a'):
! 187: opts.arch = optarg;
! 188: break;
! 189: case ('c'):
! 190: opts.cat = optarg;
! 191: break;
! 192: case ('e'):
! 193: opts.match = MATCH_EXACT;
! 194: break;
! 195: case ('I'):
! 196: opts.insens = 1;
! 197: break;
! 198: case ('r'):
! 199: opts.match = MATCH_REGEX;
! 200: break;
! 201: case ('s'):
! 202: for (i = 0; i < SORT__MAX; i++) {
! 203: if (strcmp(optarg, sorts[i]))
! 204: continue;
! 205: opts.sort = (enum sort)i;
! 206: break;
! 207: }
! 208:
! 209: if (i < SORT__MAX)
! 210: break;
! 211:
! 212: error("%s: Bad sort\n", optarg);
! 213: return(EXIT_FAILURE);
! 214: case ('t'):
! 215: while (NULL != (v = strsep(&optarg, ","))) {
! 216: if ('\0' == *v)
! 217: continue;
! 218: for (i = 0; types[i].mask; i++) {
! 219: if (strcmp(types[i].name, v))
! 220: continue;
! 221: break;
! 222: }
! 223: if (0 == types[i].mask)
! 224: break;
! 225: opts.types |= types[i].mask;
! 226: }
! 227: if (NULL == v)
! 228: break;
! 229:
! 230: error("%s: Bad type\n", v);
! 231: return(EXIT_FAILURE);
! 232: default:
! 233: usage();
! 234: return(EXIT_FAILURE);
! 235: }
! 236:
! 237: argc -= optind;
! 238: argv += optind;
! 239:
! 240: if (0 == argc || '\0' == **argv) {
! 241: usage();
! 242: return(EXIT_FAILURE);
! 243: } else
! 244: q = *argv;
! 245:
! 246: if (0 == opts.types)
! 247: opts.types = TYPE_NAME | TYPE_DESC;
! 248:
! 249: if ( ! state_init(&state, dbf, idxf, perror, error)) {
! 250: state_destroy(&state);
! 251: return(EXIT_FAILURE);
! 252: }
! 253:
! 254: state_search(&state, &opts, q);
! 255: state_destroy(&state);
! 256:
! 257: return(EXIT_SUCCESS);
! 258: }
! 259:
! 260: static void
! 261: state_search(struct state *p, const struct opts *opts, char *q)
! 262: {
! 263: int i, len, ch, rflags, dflag;
! 264: struct mchars *mc;
! 265: char *buf;
! 266: size_t bufsz;
! 267: recno_t rec;
! 268: uint32_t fl;
! 269: DBT key, val;
! 270: struct res res[MAXRESULTS];
! 271: regex_t reg;
! 272: regex_t *regp;
! 273: char filebuf[10];
! 274: struct rec record;
! 275:
! 276: len = 0;
! 277: buf = NULL;
! 278: bufsz = 0;
! 279: ch = 0;
! 280: regp = NULL;
! 281:
! 282: switch (opts->match) {
! 283: case (MATCH_REGEX):
! 284: rflags = REG_EXTENDED | REG_NOSUB |
! 285: (opts->insens ? REG_ICASE : 0);
! 286:
! 287: if (0 != regcomp(®, q, rflags)) {
! 288: error("%s: Bad pattern\n", q);
! 289: return;
! 290: }
! 291:
! 292: regp = ®
! 293: dflag = R_FIRST;
! 294: break;
! 295: case (MATCH_EXACT):
! 296: key.data = q;
! 297: key.size = strlen(q) + 1;
! 298: dflag = R_CURSOR;
! 299: break;
! 300: default:
! 301: dflag = R_FIRST;
! 302: break;
! 303: }
! 304:
! 305: if (NULL == (mc = mchars_alloc())) {
! 306: perror(NULL);
! 307: exit(EXIT_FAILURE);
! 308: }
! 309:
! 310: /*
! 311: * Iterate over the entire keyword database.
! 312: * For each record, we must first translate the key into UTF-8.
! 313: * Following that, make sure it's acceptable.
! 314: * Lastly, add it to the available records.
! 315: */
! 316:
! 317: while (len < MAXRESULTS) {
! 318: if ((ch = (*p->db->seq)(p->db, &key, &val, dflag)))
! 319: break;
! 320:
! 321: dflag = R_NEXT;
! 322:
! 323: /*
! 324: * Keys must be sized as such: the keyword must be
! 325: * non-empty (nil terminator plus one character) and the
! 326: * value must be 8 (recno_t---uint32_t---index reference
! 327: * and a uint32_t flag field).
! 328: */
! 329:
! 330: if (key.size < 2 || 8 != val.size) {
! 331: error("%s: Corrupt database\n", p->dbf);
! 332: exit(EXIT_FAILURE);
! 333: }
! 334:
! 335: buf_redup(mc, &buf, &bufsz, (char *)key.data);
! 336:
! 337: fl = *(uint32_t *)val.data;
! 338:
! 339: if ( ! (fl & opts->types))
! 340: continue;
! 341:
! 342: switch (opts->match) {
! 343: case (MATCH_REGEX):
! 344: if (regexec(regp, buf, 0, NULL, 0))
! 345: continue;
! 346: break;
! 347: case (MATCH_EXACT):
! 348: if (opts->insens && strcasecmp(buf, q))
! 349: goto send;
! 350: if ( ! opts->insens && strcmp(buf, q))
! 351: goto send;
! 352: break;
! 353: default:
! 354: if (opts->insens && NULL == strcasestr(buf, q))
! 355: continue;
! 356: if ( ! opts->insens && NULL == strstr(buf, q))
! 357: continue;
! 358: break;
! 359: }
! 360:
! 361: /*
! 362: * Now look up the file itself in our index. The file's
! 363: * indexed by its recno for fast lookups.
! 364: */
! 365:
! 366: memcpy(&rec, val.data + 4, sizeof(recno_t));
! 367:
! 368: if ( ! state_getrecord(p, rec, &record))
! 369: exit(EXIT_FAILURE);
! 370:
! 371: /* If we're in a different section, skip... */
! 372:
! 373: if (opts->cat && strcasecmp(opts->cat, record.cat))
! 374: continue;
! 375: if (opts->arch && strcasecmp(opts->arch, record.arch))
! 376: continue;
! 377:
! 378: /* FIXME: this needs to be changed. Ugh. Linear. */
! 379:
! 380: for (i = 0; i < len; i++)
! 381: if (res[i].rec == record.rec)
! 382: break;
! 383:
! 384: if (i < len)
! 385: continue;
! 386:
! 387: /*
! 388: * Now we have our filename, keywords, types, and all
! 389: * other necessary information.
! 390: * Process it and add it to our list of results.
! 391: */
! 392:
! 393: filebuf[9] = '\0';
! 394: snprintf(filebuf, 10, "%u", record.rec);
! 395: assert('\0' == filebuf[9]);
! 396:
! 397: res[len].rec = record.rec;
! 398: res[len].types = fl;
! 399:
! 400: buf_dup(mc, &res[len].keyword, buf);
! 401: buf_dup(mc, &res[len].uri, filebuf);
! 402: buf_dup(mc, &res[len].cat, record.cat);
! 403: buf_dup(mc, &res[len].arch, record.arch);
! 404: buf_dup(mc, &res[len].title, record.title);
! 405: buf_dup(mc, &res[len].desc, record.desc);
! 406: len++;
! 407: }
! 408:
! 409: send:
! 410: if (ch < 0) {
! 411: perror(p->dbf);
! 412: exit(EXIT_FAILURE);
! 413: }
! 414:
! 415: switch (opts->sort) {
! 416: case (SORT_CAT):
! 417: qsort(res, len, sizeof(struct res), sort_cat);
! 418: break;
! 419: default:
! 420: qsort(res, len, sizeof(struct res), sort_title);
! 421: break;
! 422: }
! 423:
! 424: state_output(res, len);
! 425:
! 426: for (len-- ; len >= 0; len--) {
! 427: free(res[len].keyword);
! 428: free(res[len].title);
! 429: free(res[len].cat);
! 430: free(res[len].arch);
! 431: free(res[len].desc);
! 432: free(res[len].uri);
! 433: }
! 434:
! 435: free(buf);
! 436: mchars_free(mc);
! 437:
! 438: if (regp)
! 439: regfree(regp);
! 440: }
! 441:
! 442: /*
! 443: * Track allocated buffer size for buf_redup().
! 444: */
! 445: static inline void
! 446: buf_alloc(char **buf, size_t *bufsz, size_t sz)
! 447: {
! 448:
! 449: if (sz < *bufsz)
! 450: return;
! 451:
! 452: *bufsz = sz + 1024;
! 453: if (NULL == (*buf = realloc(*buf, *bufsz))) {
! 454: perror(NULL);
! 455: exit(EXIT_FAILURE);
! 456: }
! 457: }
! 458:
! 459: /*
! 460: * Like buf_redup() but throwing away the buffer size.
! 461: */
! 462: static void
! 463: buf_dup(struct mchars *mc, char **buf, const char *val)
! 464: {
! 465: size_t bufsz;
! 466:
! 467: bufsz = 0;
! 468: *buf = NULL;
! 469: buf_redup(mc, buf, &bufsz, val);
! 470: }
! 471:
! 472: /*
! 473: * Normalise strings from the index and database.
! 474: * These strings are escaped as defined by mandoc_char(7) along with
! 475: * other goop in mandoc.h (e.g., soft hyphens).
! 476: */
! 477: static void
! 478: buf_redup(struct mchars *mc, char **buf,
! 479: size_t *bufsz, const char *val)
! 480: {
! 481: size_t sz;
! 482: const char *seq, *cpp;
! 483: int len, pos;
! 484: enum mandoc_esc esc;
! 485: const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
! 486:
! 487: /* Pre-allocate by the length of the input */
! 488:
! 489: buf_alloc(buf, bufsz, strlen(val) + 1);
! 490:
! 491: pos = 0;
! 492:
! 493: while ('\0' != *val) {
! 494: /*
! 495: * Halt on the first escape sequence.
! 496: * This also halts on the end of string, in which case
! 497: * we just copy, fallthrough, and exit the loop.
! 498: */
! 499: if ((sz = strcspn(val, rsv)) > 0) {
! 500: memcpy(&(*buf)[pos], val, sz);
! 501: pos += (int)sz;
! 502: val += (int)sz;
! 503: }
! 504:
! 505: if (ASCII_HYPH == *val) {
! 506: (*buf)[pos++] = '-';
! 507: val++;
! 508: continue;
! 509: } else if (ASCII_NBRSP == *val) {
! 510: (*buf)[pos++] = ' ';
! 511: val++;
! 512: continue;
! 513: } else if ('\\' != *val)
! 514: break;
! 515:
! 516: /* Read past the slash. */
! 517:
! 518: val++;
! 519:
! 520: /*
! 521: * Parse the escape sequence and see if it's a
! 522: * predefined character or special character.
! 523: */
! 524:
! 525: esc = mandoc_escape(&val, &seq, &len);
! 526: if (ESCAPE_ERROR == esc)
! 527: break;
! 528:
! 529: cpp = ESCAPE_SPECIAL == esc ?
! 530: mchars_spec2str(mc, seq, len, &sz) : NULL;
! 531:
! 532: if (NULL == cpp)
! 533: continue;
! 534:
! 535: /* Copy the rendered glyph into the stream. */
! 536:
! 537: buf_alloc(buf, bufsz, sz);
! 538:
! 539: memcpy(&(*buf)[pos], cpp, sz);
! 540: pos += (int)sz;
! 541: }
! 542:
! 543: (*buf)[pos] = '\0';
! 544: }
! 545:
! 546: static void
! 547: error(const char *fmt, ...)
! 548: {
! 549: va_list ap;
! 550:
! 551: va_start(ap, fmt);
! 552: vfprintf(stderr, fmt, ap);
! 553: va_end(ap);
! 554: }
! 555:
! 556: static void
! 557: state_output(const struct res *res, int sz)
! 558: {
! 559: int i;
! 560:
! 561: for (i = 0; i < sz; i++)
! 562: printf("%s(%s%s%s) - %s\n", res[i].title,
! 563: res[i].cat,
! 564: *res[i].arch ? "/" : "",
! 565: *res[i].arch ? res[i].arch : "",
! 566: res[i].desc);
! 567: }
! 568:
! 569: static void
! 570: usage(void)
! 571: {
! 572:
! 573: fprintf(stderr, "usage: %s "
! 574: "[-eIr] "
! 575: "[-a arch] "
! 576: "[-c cat] "
! 577: "[-s sort] "
! 578: "[-t type[,...]] "
! 579: "key\n", progname);
! 580: }
! 581:
! 582: static int
! 583: state_init(struct state *p,
! 584: const char *dbf, const char *idxf,
! 585: void (*err)(const char *),
! 586: void (*errx)(const char *, ...))
! 587: {
! 588: BTREEINFO info;
! 589:
! 590: memset(p, 0, sizeof(struct state));
! 591: memset(&info, 0, sizeof(BTREEINFO));
! 592:
! 593: info.flags = R_DUP;
! 594:
! 595: p->dbf = dbf;
! 596: p->idxf = idxf;
! 597: p->err = err;
! 598:
! 599: p->db = dbopen(p->dbf, O_RDONLY, 0, DB_BTREE, &info);
! 600: if (NULL == p->db) {
! 601: (*err)(p->dbf);
! 602: return(0);
! 603: }
! 604:
! 605: p->idx = dbopen(p->idxf, O_RDONLY, 0, DB_RECNO, NULL);
! 606: if (NULL == p->idx) {
! 607: (*err)(p->idxf);
! 608: return(0);
! 609: }
! 610:
! 611: return(1);
! 612: }
! 613:
! 614: static void
! 615: state_destroy(struct state *p)
! 616: {
! 617:
! 618: if (p->db)
! 619: (*p->db->close)(p->db);
! 620: if (p->idx)
! 621: (*p->idx->close)(p->idx);
! 622: }
! 623:
! 624: static int
! 625: state_getrecord(struct state *p, recno_t rec, struct rec *rp)
! 626: {
! 627: DBT key, val;
! 628: size_t sz;
! 629: int rc;
! 630:
! 631: key.data = &rec;
! 632: key.size = sizeof(recno_t);
! 633:
! 634: rc = (*p->idx->get)(p->idx, &key, &val, 0);
! 635: if (rc < 0) {
! 636: (*p->err)(p->idxf);
! 637: return(0);
! 638: } else if (rc > 0) {
! 639: (*p->errx)("%s: Corrupt index\n", p->idxf);
! 640: return(0);
! 641: }
! 642:
! 643: rp->file = (char *)val.data;
! 644: if ((sz = strlen(rp->file) + 1) >= val.size) {
! 645: (*p->errx)("%s: Corrupt index\n", p->idxf);
! 646: return(0);
! 647: }
! 648:
! 649: rp->cat = (char *)val.data + (int)sz;
! 650: if ((sz += strlen(rp->cat) + 1) >= val.size) {
! 651: (*p->errx)("%s: Corrupt index\n", p->idxf);
! 652: return(0);
! 653: }
! 654:
! 655: rp->title = (char *)val.data + (int)sz;
! 656: if ((sz += strlen(rp->title) + 1) >= val.size) {
! 657: (*p->errx)("%s: Corrupt index\n", p->idxf);
! 658: return(0);
! 659: }
! 660:
! 661: rp->arch = (char *)val.data + (int)sz;
! 662: if ((sz += strlen(rp->arch) + 1) >= val.size) {
! 663: (*p->errx)("%s: Corrupt index\n", p->idxf);
! 664: return(0);
! 665: }
! 666:
! 667: rp->desc = (char *)val.data + (int)sz;
! 668: rp->rec = rec;
! 669: return(1);
! 670: }
! 671:
! 672: static int
! 673: sort_title(const void *p1, const void *p2)
! 674: {
! 675:
! 676: return(strcmp(((const struct res *)p1)->title,
! 677: ((const struct res *)p2)->title));
! 678: }
! 679:
! 680: static int
! 681: sort_cat(const void *p1, const void *p2)
! 682: {
! 683: int rc;
! 684:
! 685: rc = strcmp(((const struct res *)p1)->cat,
! 686: ((const struct res *)p2)->cat);
! 687:
! 688: return(0 == rc ? sort_title(p1, p2) : rc);
! 689: }
CVSweb