=================================================================== RCS file: /cvs/mandoc/Attic/apropos.c,v retrieving revision 1.4 retrieving revision 1.6 diff -u -p -r1.4 -r1.6 --- mandoc/Attic/apropos.c 2011/10/08 12:20:09 1.4 +++ mandoc/Attic/apropos.c 2011/10/09 10:37:52 1.6 @@ -1,4 +1,4 @@ -/* $Id: apropos.c,v 1.4 2011/10/08 12:20:09 kristaps Exp $ */ +/* $Id: apropos.c,v 1.6 2011/10/09 10:37:52 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -103,6 +103,14 @@ struct res { char *title; /* manual section */ char *uri; /* formatted uri of file */ recno_t rec; /* unique id of underlying manual */ + /* + * Maintain a binary tree for checking the uniqueness of `rec' + * when adding elements to the results array. + * Since the results array is dynamic, use offset in the array + * instead of a pointer to the structure. + */ + int lhs; + int rhs; }; struct state { @@ -282,7 +290,7 @@ out: static void state_search(struct state *p, const struct opts *opts, char *q) { - int i, len, ch, rflags, dflag; + int leaf, root, len, ch, dflag; struct mchars *mc; char *buf; size_t bufsz; @@ -295,11 +303,11 @@ state_search(struct state *p, const struct opts *opts, char filebuf[10]; struct rec record; + root = leaf = -1; res = NULL; len = 0; buf = NULL; bufsz = 0; - ch = 0; regp = NULL; /* @@ -309,10 +317,10 @@ state_search(struct state *p, const struct opts *opts, switch (opts->match) { case (MATCH_REGEX): - rflags = REG_EXTENDED | REG_NOSUB | + ch = REG_EXTENDED | REG_NOSUB | (opts->insens ? REG_ICASE : 0); - if (0 != regcomp(®, q, rflags)) { + if (0 != regcomp(®, q, ch)) { fprintf(stderr, "%s: Bad pattern\n", q); return; } @@ -330,10 +338,7 @@ state_search(struct state *p, const struct opts *opts, break; } - if (NULL == (mc = mchars_alloc())) { - perror(NULL); - exit(EXIT_FAILURE); - } + mc = mchars_alloc(); /* * Iterate over the entire keyword database. @@ -400,13 +405,20 @@ state_search(struct state *p, const struct opts *opts, if (opts->arch && strcasecmp(opts->arch, record.arch)) continue; - /* FIXME: this needs to be changed. Ugh. Linear. */ + /* + * Do a binary search to dedupe the results tree of the + * same record: we don't print the same file. + */ - for (i = 0; i < len; i++) - if (res[i].rec == record.rec) + for (leaf = root; leaf >= 0; ) + if (rec > res[leaf].rec && res[leaf].rhs >= 0) + leaf = res[leaf].rhs; + else if (rec < res[leaf].rec && res[leaf].lhs >= 0) + leaf = res[leaf].lhs; + else break; - if (i < len) + if (leaf >= 0 && res[leaf].rec == rec) continue; res = mandoc_realloc @@ -424,6 +436,7 @@ state_search(struct state *p, const struct opts *opts, res[len].rec = record.rec; res[len].types = fl; + res[len].lhs = res[len].rhs = -1; buf_dup(mc, &res[len].keyword, buf); buf_dup(mc, &res[len].uri, filebuf); @@ -431,6 +444,15 @@ state_search(struct state *p, const struct opts *opts, buf_dup(mc, &res[len].arch, record.arch); buf_dup(mc, &res[len].title, record.title); buf_dup(mc, &res[len].desc, record.desc); + + if (leaf >= 0) { + if (record.rec > res[leaf].rec) + res[leaf].rhs = len; + else + res[leaf].lhs = len; + } else + root = len; + len++; } @@ -440,26 +462,12 @@ send: exit(EXIT_FAILURE); } - /* - * Sort our results. - * We do this post-scan (instead of an in-line sort) because - * it's more or less the same in terms of run-time. Assuming we - * sort in-line with a tree versus post: - * - * In-place: n * O(lg n) - * After: n + O(n lg n) - * - * Whatever. This also buys us simplicity. - */ + /* Sort our results. */ - switch (opts->sort) { - case (SORT_CAT): + if (SORT_CAT == opts->sort) qsort(res, len, sizeof(struct res), sort_cat); - break; - default: + else qsort(res, len, sizeof(struct res), sort_title); - break; - } state_output(res, len);