=================================================================== RCS file: /cvs/mandoc/Attic/apropos.c,v retrieving revision 1.4 retrieving revision 1.10 diff -u -p -r1.4 -r1.10 --- mandoc/Attic/apropos.c 2011/10/08 12:20:09 1.4 +++ mandoc/Attic/apropos.c 2011/11/09 22:05:56 1.10 @@ -1,6 +1,6 @@ -/* $Id: apropos.c,v 1.4 2011/10/08 12:20:09 kristaps Exp $ */ +/* $Id: apropos.c,v 1.10 2011/11/09 22:05:56 kristaps Exp $ */ /* -* Copyright (c) 2011 Kristaps Dzonsons + * Copyright (c) 2011 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,138 +14,18 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include - #include -#include -#include #include #include -#include -#include -#include #include #include #include -#include -#ifdef __linux__ -# include -#else -# include -#endif - +#include "apropos.h" #include "mandoc.h" -#define MAXRESULTS 256 - -/* Bit-fields. See mandocdb.8. */ - -#define TYPE_NAME 0x01 -#define TYPE_FUNCTION 0x02 -#define TYPE_UTILITY 0x04 -#define TYPE_INCLUDES 0x08 -#define TYPE_VARIABLE 0x10 -#define TYPE_STANDARD 0x20 -#define TYPE_AUTHOR 0x40 -#define TYPE_CONFIG 0x80 -#define TYPE_DESC 0x100 -#define TYPE_XREF 0x200 -#define TYPE_PATH 0x400 -#define TYPE_ENV 0x800 -#define TYPE_ERR 0x1000 - -enum match { - MATCH_SUBSTR = 0, - MATCH_REGEX, - MATCH_EXACT -}; - -enum sort { - SORT_TITLE = 0, - SORT_CAT, - SORT__MAX -}; - -struct opts { - enum sort sort; /* output sorting */ - const char *arch; /* restrict to architecture */ - const char *cat; /* restrict to category */ - int types; /* only types in bitmask */ - int insens; /* case-insensitive match */ - enum match match; /* match type */ -}; - -struct type { - int mask; - const char *name; /* command-line type name */ -}; - -struct rec { - char *file; /* file in file-system */ - char *cat; /* category (3p, 3, etc.) */ - char *title; /* title (FOO, etc.) */ - char *arch; /* arch (or empty string) */ - char *desc; /* description (from Nd) */ - recno_t rec; /* record in index */ -}; - -struct res { - char *arch; /* architecture */ - char *desc; /* free-form description */ - char *keyword; /* matched keyword */ - int types; /* bitmask of field selectors */ - char *cat; /* manual section */ - char *title; /* manual section */ - char *uri; /* formatted uri of file */ - recno_t rec; /* unique id of underlying manual */ -}; - -struct state { - DB *db; /* database */ - DB *idx; /* index */ - const char *dbf; /* database name */ - const char *idxf; /* index name */ -}; - -static const char * const sorts[SORT__MAX] = { - "cat", /* SORT_CAT */ - "title", /* SORT_TITLE */ -}; - -static const struct type types[] = { - { TYPE_NAME, "name" }, - { TYPE_FUNCTION, "func" }, - { TYPE_UTILITY, "utility" }, - { TYPE_INCLUDES, "incl" }, - { TYPE_VARIABLE, "var" }, - { TYPE_STANDARD, "stand" }, - { TYPE_AUTHOR, "auth" }, - { TYPE_CONFIG, "conf" }, - { TYPE_DESC, "desc" }, - { TYPE_XREF, "xref" }, - { TYPE_PATH, "path" }, - { TYPE_ENV, "env" }, - { TYPE_ERR, "err" }, - { INT_MAX, "all" }, - { 0, NULL } -}; - -static void buf_alloc(char **, size_t *, size_t); -static void buf_dup(struct mchars *, char **, const char *); -static void buf_redup(struct mchars *, char **, - size_t *, const char *); -static int sort_cat(const void *, const void *); -static int sort_title(const void *, const void *); -static int state_getrecord(struct state *, - recno_t, struct rec *); -static void state_output(const struct res *, int); -static void state_search(struct state *, - const struct opts *, char *); +static int cmp(const void *, const void *); +static void list(struct rec *, size_t, void *); static void usage(void); static char *progname; @@ -153,79 +33,33 @@ static char *progname; int main(int argc, char *argv[]) { - BTREEINFO info; - int ch, i, rc; - const char *dbf, *idxf; - struct state state; - char *q, *v; + int ch, cs; struct opts opts; + struct expr *e; extern int optind; extern char *optarg; memset(&opts, 0, sizeof(struct opts)); - memset(&state, 0, sizeof(struct state)); - dbf = "mandoc.db"; - idxf = "mandoc.index"; - q = NULL; - rc = EXIT_FAILURE; - progname = strrchr(argv[0], '/'); if (progname == NULL) progname = argv[0]; else ++progname; - opts.match = MATCH_SUBSTR; + cs = 0; - while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:"))) + while (-1 != (ch = getopt(argc, argv, "S:s:I"))) switch (ch) { - case ('a'): + case ('S'): opts.arch = optarg; break; - case ('c'): + case ('s'): opts.cat = optarg; break; - case ('e'): - opts.match = MATCH_EXACT; - break; case ('I'): - opts.insens = 1; + cs = 1; break; - case ('r'): - opts.match = MATCH_REGEX; - break; - case ('s'): - for (i = 0; i < SORT__MAX; i++) { - if (strcmp(optarg, sorts[i])) - continue; - opts.sort = (enum sort)i; - break; - } - - if (i < SORT__MAX) - break; - - fprintf(stderr, "%s: Bad sort\n", optarg); - return(EXIT_FAILURE); - case ('t'): - while (NULL != (v = strsep(&optarg, ","))) { - if ('\0' == *v) - continue; - for (i = 0; types[i].mask; i++) { - if (strcmp(types[i].name, v)) - continue; - break; - } - if (0 == types[i].mask) - break; - opts.types |= types[i].mask; - } - if (NULL == v) - break; - - fprintf(stderr, "%s: Bad type\n", v); - return(EXIT_FAILURE); default: usage(); return(EXIT_FAILURE); @@ -234,14 +68,13 @@ main(int argc, char *argv[]) argc -= optind; argv += optind; - if (0 == argc || '\0' == **argv) { - usage(); - goto out; - } else - q = *argv; + if (0 == argc) + return(EXIT_SUCCESS); - if (0 == opts.types) - opts.types = TYPE_NAME | TYPE_DESC; + if (NULL == (e = exprcomp(cs, argv, argc))) { + fprintf(stderr, "Bad expression\n"); + return(EXIT_FAILURE); + } /* * Configure databases. @@ -250,343 +83,20 @@ main(int argc, char *argv[]) * The index database is a recno. */ - memset(&info, 0, sizeof(BTREEINFO)); - info.flags = R_DUP; - - state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info); - if (NULL == state.db) { - perror(dbf); - goto out; - } - - state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL); - if (NULL == state.idx) { - perror(idxf); - goto out; - } - - /* Main search function. */ - - state_search(&state, &opts, q); - - rc = EXIT_SUCCESS; -out: - if (state.db) - (*state.db->close)(state.db); - if (state.idx) - (*state.idx->close)(state.idx); - - return(rc); + apropos_search(&opts, e, NULL, list); + exprfree(e); + return(EXIT_SUCCESS); } +/* ARGSUSED */ static void -state_search(struct state *p, const struct opts *opts, char *q) +list(struct rec *res, size_t sz, void *arg) { - int i, len, ch, rflags, dflag; - struct mchars *mc; - char *buf; - size_t bufsz; - recno_t rec; - uint32_t fl; - DBT key, val; - struct res *res; - regex_t reg; - regex_t *regp; - char filebuf[10]; - struct rec record; - - res = NULL; - len = 0; - buf = NULL; - bufsz = 0; - ch = 0; - regp = NULL; - - /* - * Configure how we scan through results to see if we match: - * whether by regexp or exact matches. - */ - - switch (opts->match) { - case (MATCH_REGEX): - rflags = REG_EXTENDED | REG_NOSUB | - (opts->insens ? REG_ICASE : 0); - - if (0 != regcomp(®, q, rflags)) { - fprintf(stderr, "%s: Bad pattern\n", q); - return; - } - - regp = ® - dflag = R_FIRST; - break; - case (MATCH_EXACT): - key.data = q; - key.size = strlen(q) + 1; - dflag = R_CURSOR; - break; - default: - dflag = R_FIRST; - break; - } - - if (NULL == (mc = mchars_alloc())) { - perror(NULL); - exit(EXIT_FAILURE); - } - - /* - * Iterate over the entire keyword database. - * For each record, we must first translate the key into UTF-8. - * Following that, make sure it's acceptable. - * Lastly, add it to the available records. - */ - - while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) { - dflag = R_NEXT; - - /* - * Keys must be sized as such: the keyword must be - * non-empty (nil terminator plus one character) and the - * value must be 8 (recno_t---uint32_t---index reference - * and a uint32_t flag field). - */ - - if (key.size < 2 || 8 != val.size) { - fprintf(stderr, "%s: Corrupt database\n", p->dbf); - exit(EXIT_FAILURE); - } - - buf_redup(mc, &buf, &bufsz, (char *)key.data); - - fl = *(uint32_t *)val.data; - - if ( ! (fl & opts->types)) - continue; - - switch (opts->match) { - case (MATCH_REGEX): - if (regexec(regp, buf, 0, NULL, 0)) - continue; - break; - case (MATCH_EXACT): - if (opts->insens && strcasecmp(buf, q)) - goto send; - if ( ! opts->insens && strcmp(buf, q)) - goto send; - break; - default: - if (opts->insens && NULL == strcasestr(buf, q)) - continue; - if ( ! opts->insens && NULL == strstr(buf, q)) - continue; - break; - } - - /* - * Now look up the file itself in our index. The file's - * indexed by its recno for fast lookups. - */ - - memcpy(&rec, val.data + 4, sizeof(recno_t)); - - if ( ! state_getrecord(p, rec, &record)) - exit(EXIT_FAILURE); - - /* If we're in a different section, skip... */ - - if (opts->cat && strcasecmp(opts->cat, record.cat)) - continue; - if (opts->arch && strcasecmp(opts->arch, record.arch)) - continue; - - /* FIXME: this needs to be changed. Ugh. Linear. */ - - for (i = 0; i < len; i++) - if (res[i].rec == record.rec) - break; - - if (i < len) - continue; - - res = mandoc_realloc - (res, (len + 1) * sizeof(struct res)); - - /* - * Now we have our filename, keywords, types, and all - * other necessary information. - * Process it and add it to our list of results. - */ - - filebuf[9] = '\0'; - snprintf(filebuf, 10, "%u", record.rec); - assert('\0' == filebuf[9]); - - res[len].rec = record.rec; - res[len].types = fl; - - buf_dup(mc, &res[len].keyword, buf); - buf_dup(mc, &res[len].uri, filebuf); - buf_dup(mc, &res[len].cat, record.cat); - buf_dup(mc, &res[len].arch, record.arch); - buf_dup(mc, &res[len].title, record.title); - buf_dup(mc, &res[len].desc, record.desc); - len++; - } - -send: - if (ch < 0) { - perror(p->dbf); - exit(EXIT_FAILURE); - } - - /* - * Sort our results. - * We do this post-scan (instead of an in-line sort) because - * it's more or less the same in terms of run-time. Assuming we - * sort in-line with a tree versus post: - * - * In-place: n * O(lg n) - * After: n + O(n lg n) - * - * Whatever. This also buys us simplicity. - */ - - switch (opts->sort) { - case (SORT_CAT): - qsort(res, len, sizeof(struct res), sort_cat); - break; - default: - qsort(res, len, sizeof(struct res), sort_title); - break; - } - - state_output(res, len); - - for (len-- ; len >= 0; len--) { - free(res[len].keyword); - free(res[len].title); - free(res[len].cat); - free(res[len].arch); - free(res[len].desc); - free(res[len].uri); - } - - free(res); - free(buf); - mchars_free(mc); - - if (regp) - regfree(regp); -} - -/* - * Track allocated buffer size for buf_redup(). - */ -static inline void -buf_alloc(char **buf, size_t *bufsz, size_t sz) -{ - - if (sz < *bufsz) - return; - - *bufsz = sz + 1024; - *buf = mandoc_realloc(*buf, *bufsz); -} - -/* - * Like buf_redup() but throwing away the buffer size. - */ -static void -buf_dup(struct mchars *mc, char **buf, const char *val) -{ - size_t bufsz; - - bufsz = 0; - *buf = NULL; - buf_redup(mc, buf, &bufsz, val); -} - -/* - * Normalise strings from the index and database. - * These strings are escaped as defined by mandoc_char(7) along with - * other goop in mandoc.h (e.g., soft hyphens). - */ -static void -buf_redup(struct mchars *mc, char **buf, - size_t *bufsz, const char *val) -{ - size_t sz; - const char *seq, *cpp; - int len, pos; - enum mandoc_esc esc; - const char rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' }; - - /* Pre-allocate by the length of the input */ - - buf_alloc(buf, bufsz, strlen(val) + 1); - - pos = 0; - - while ('\0' != *val) { - /* - * Halt on the first escape sequence. - * This also halts on the end of string, in which case - * we just copy, fallthrough, and exit the loop. - */ - if ((sz = strcspn(val, rsv)) > 0) { - memcpy(&(*buf)[pos], val, sz); - pos += (int)sz; - val += (int)sz; - } - - if (ASCII_HYPH == *val) { - (*buf)[pos++] = '-'; - val++; - continue; - } else if (ASCII_NBRSP == *val) { - (*buf)[pos++] = ' '; - val++; - continue; - } else if ('\\' != *val) - break; - - /* Read past the slash. */ - - val++; - - /* - * Parse the escape sequence and see if it's a - * predefined character or special character. - */ - - esc = mandoc_escape(&val, &seq, &len); - if (ESCAPE_ERROR == esc) - break; - - cpp = ESCAPE_SPECIAL == esc ? - mchars_spec2str(mc, seq, len, &sz) : NULL; - - if (NULL == cpp) - continue; - - /* Copy the rendered glyph into the stream. */ - - buf_alloc(buf, bufsz, sz); - - memcpy(&(*buf)[pos], cpp, sz); - pos += (int)sz; - } - - (*buf)[pos] = '\0'; -} - -static void -state_output(const struct res *res, int sz) -{ int i; - for (i = 0; i < sz; i++) + qsort(res, sz, sizeof(struct rec), cmp); + + for (i = 0; i < (int)sz; i++) printf("%s(%s%s%s) - %s\n", res[i].title, res[i].cat, *res[i].arch ? "/" : "", @@ -594,75 +104,22 @@ state_output(const struct res *res, int sz) res[i].desc); } -static void -usage(void) -{ - - fprintf(stderr, "usage: %s " - "[-eIr] " - "[-a arch] " - "[-c cat] " - "[-s sort] " - "[-t type[,...]] " - "key\n", progname); -} - static int -state_getrecord(struct state *p, recno_t rec, struct rec *rp) +cmp(const void *p1, const void *p2) { - DBT key, val; - size_t sz; - int rc; - key.data = &rec; - key.size = sizeof(recno_t); - - rc = (*p->idx->get)(p->idx, &key, &val, 0); - if (rc < 0) { - perror(p->idxf); - return(0); - } else if (rc > 0) - goto err; - - rp->file = (char *)val.data; - if ((sz = strlen(rp->file) + 1) >= val.size) - goto err; - - rp->cat = (char *)val.data + (int)sz; - if ((sz += strlen(rp->cat) + 1) >= val.size) - goto err; - - rp->title = (char *)val.data + (int)sz; - if ((sz += strlen(rp->title) + 1) >= val.size) - goto err; - - rp->arch = (char *)val.data + (int)sz; - if ((sz += strlen(rp->arch) + 1) >= val.size) - goto err; - - rp->desc = (char *)val.data + (int)sz; - rp->rec = rec; - return(1); -err: - fprintf(stderr, "%s: Corrupt index\n", p->idxf); - return(0); + return(strcmp(((const struct rec *)p1)->title, + ((const struct rec *)p2)->title)); } -static int -sort_title(const void *p1, const void *p2) +static void +usage(void) { - return(strcmp(((const struct res *)p1)->title, - ((const struct res *)p2)->title)); -} - -static int -sort_cat(const void *p1, const void *p2) -{ - int rc; - - rc = strcmp(((const struct res *)p1)->cat, - ((const struct res *)p2)->cat); - - return(0 == rc ? sort_title(p1, p2) : rc); + fprintf(stderr, "usage: %s " + "[-I] " + "[-S arch] " + "[-s section] " + "EXPR\n", + progname); }