=================================================================== RCS file: /cvs/mandoc/Attic/apropos.c,v retrieving revision 1.2 retrieving revision 1.7 diff -u -p -r1.2 -r1.7 --- mandoc/Attic/apropos.c 2011/10/07 13:22:33 1.2 +++ mandoc/Attic/apropos.c 2011/10/09 10:46:38 1.7 @@ -1,4 +1,4 @@ -/* $Id: apropos.c,v 1.2 2011/10/07 13:22:33 kristaps Exp $ */ +/* $Id: apropos.c,v 1.7 2011/10/09 10:46:38 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -103,6 +103,14 @@ struct res { char *title; /* manual section */ char *uri; /* formatted uri of file */ recno_t rec; /* unique id of underlying manual */ + /* + * Maintain a binary tree for checking the uniqueness of `rec' + * when adding elements to the results array. + * Since the results array is dynamic, use offset in the array + * instead of a pointer to the structure. + */ + int lhs; + int rhs; }; struct state { @@ -144,7 +152,7 @@ static int sort_title(const void *, const void *); static int state_getrecord(struct state *, recno_t, struct rec *); static void state_output(const struct res *, int); -static void state_search(struct state *, +static int state_search(struct state *, const struct opts *, char *); static void usage(void); @@ -267,9 +275,8 @@ main(int argc, char *argv[]) /* Main search function. */ - state_search(&state, &opts, q); - - rc = EXIT_SUCCESS; + rc = state_search(&state, &opts, q) ? + EXIT_SUCCESS : EXIT_FAILURE; out: if (state.db) (*state.db->close)(state.db); @@ -279,26 +286,28 @@ out: return(rc); } -static void +static int state_search(struct state *p, const struct opts *opts, char *q) { - int i, len, ch, rflags, dflag; + int leaf, root, len, ch, dflag, rc; struct mchars *mc; char *buf; size_t bufsz; recno_t rec; uint32_t fl; DBT key, val; - struct res res[MAXRESULTS]; + struct res *res; regex_t reg; regex_t *regp; char filebuf[10]; struct rec record; + rc = 0; + root = leaf = -1; + res = NULL; len = 0; buf = NULL; bufsz = 0; - ch = 0; regp = NULL; /* @@ -308,12 +317,12 @@ state_search(struct state *p, const struct opts *opts, switch (opts->match) { case (MATCH_REGEX): - rflags = REG_EXTENDED | REG_NOSUB | + ch = REG_EXTENDED | REG_NOSUB | (opts->insens ? REG_ICASE : 0); - if (0 != regcomp(®, q, rflags)) { + if (0 != regcomp(®, q, ch)) { fprintf(stderr, "%s: Bad pattern\n", q); - return; + return(0); } regp = ® @@ -329,10 +338,7 @@ state_search(struct state *p, const struct opts *opts, break; } - if (NULL == (mc = mchars_alloc())) { - perror(NULL); - exit(EXIT_FAILURE); - } + mc = mchars_alloc(); /* * Iterate over the entire keyword database. @@ -341,10 +347,7 @@ state_search(struct state *p, const struct opts *opts, * Lastly, add it to the available records. */ - while (len < MAXRESULTS) { - if ((ch = (*p->db->seq)(p->db, &key, &val, dflag))) - break; - + while (0 == (ch = (*p->db->seq)(p->db, &key, &val, dflag))) { dflag = R_NEXT; /* @@ -355,8 +358,8 @@ state_search(struct state *p, const struct opts *opts, */ if (key.size < 2 || 8 != val.size) { - fprintf(stderr, "%s: Corrupt database\n", p->dbf); - exit(EXIT_FAILURE); + fprintf(stderr, "%s: Bad database\n", p->dbf); + goto out; } buf_redup(mc, &buf, &bufsz, (char *)key.data); @@ -393,7 +396,7 @@ state_search(struct state *p, const struct opts *opts, memcpy(&rec, val.data + 4, sizeof(recno_t)); if ( ! state_getrecord(p, rec, &record)) - exit(EXIT_FAILURE); + goto out; /* If we're in a different section, skip... */ @@ -402,15 +405,25 @@ state_search(struct state *p, const struct opts *opts, if (opts->arch && strcasecmp(opts->arch, record.arch)) continue; - /* FIXME: this needs to be changed. Ugh. Linear. */ + /* + * Do a binary search to dedupe the results tree of the + * same record: we don't print the same file. + */ - for (i = 0; i < len; i++) - if (res[i].rec == record.rec) + for (leaf = root; leaf >= 0; ) + if (rec > res[leaf].rec && res[leaf].rhs >= 0) + leaf = res[leaf].rhs; + else if (rec < res[leaf].rec && res[leaf].lhs >= 0) + leaf = res[leaf].lhs; + else break; - if (i < len) + if (leaf >= 0 && res[leaf].rec == rec) continue; + res = mandoc_realloc + (res, (len + 1) * sizeof(struct res)); + /* * Now we have our filename, keywords, types, and all * other necessary information. @@ -423,6 +436,7 @@ state_search(struct state *p, const struct opts *opts, res[len].rec = record.rec; res[len].types = fl; + res[len].lhs = res[len].rhs = -1; buf_dup(mc, &res[len].keyword, buf); buf_dup(mc, &res[len].uri, filebuf); @@ -430,38 +444,33 @@ state_search(struct state *p, const struct opts *opts, buf_dup(mc, &res[len].arch, record.arch); buf_dup(mc, &res[len].title, record.title); buf_dup(mc, &res[len].desc, record.desc); + + if (leaf >= 0) { + if (record.rec > res[leaf].rec) + res[leaf].rhs = len; + else + res[leaf].lhs = len; + } else + root = len; + len++; } -send: if (ch < 0) { perror(p->dbf); - exit(EXIT_FAILURE); + goto out; } +send: + /* Sort our results. */ - /* - * Sort our results. - * We do this post-scan (instead of an in-line sort) because - * it's more or less the same in terms of run-time. Assuming we - * sort in-line with a tree versus post: - * - * In-place: n * O(lg n) - * After: n + O(n lg n) - * - * Whatever. This also buys us simplicity. - */ - - switch (opts->sort) { - case (SORT_CAT): + if (SORT_CAT == opts->sort) qsort(res, len, sizeof(struct res), sort_cat); - break; - default: + else qsort(res, len, sizeof(struct res), sort_title); - break; - } state_output(res, len); - + rc = 1; +out: for (len-- ; len >= 0; len--) { free(res[len].keyword); free(res[len].title); @@ -471,11 +480,14 @@ send: free(res[len].uri); } + free(res); free(buf); mchars_free(mc); if (regp) regfree(regp); + + return(rc); } /* @@ -489,10 +501,7 @@ buf_alloc(char **buf, size_t *bufsz, size_t sz) return; *bufsz = sz + 1024; - if (NULL == (*buf = realloc(*buf, *bufsz))) { - perror(NULL); - exit(EXIT_FAILURE); - } + *buf = mandoc_realloc(*buf, *bufsz); } /*