=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.30 retrieving revision 1.37 diff -u -p -r1.30 -r1.37 --- mandoc/mandocdb.c 2011/12/09 01:21:10 1.30 +++ mandoc/mandocdb.c 2011/12/20 21:41:11 1.37 @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.30 2011/12/09 01:21:10 kristaps Exp $ */ +/* $Id: mandocdb.c,v 1.37 2011/12/20 21:41:11 schwarze Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -110,7 +109,7 @@ static void index_prune(const struct of *, DB *, recno_t *, recno_t **, size_t *, size_t *); static void ofile_argbuild(int, char *[], struct of **); -static int ofile_dirbuild(const char *, const char *, +static void ofile_dirbuild(const char *, const char *, const char *, int, struct of **); static void ofile_free(struct of *); static void pformatted(DB *, struct buf *, struct buf *, @@ -273,6 +272,7 @@ main(int argc, char *argv[]) struct manpaths dirs; enum op op; /* current operation */ const char *dir; + char *conf_file; char *cp; char pbuf[PATH_MAX], ibuf[MAXPATHLEN], /* index fname */ @@ -312,12 +312,16 @@ main(int argc, char *argv[]) maxrec = 0; op = OP_NEW; dir = NULL; + conf_file = NULL; - while (-1 != (ch = getopt(argc, argv, "ad:u:v"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:u:v"))) switch (ch) { case ('a'): use_all = 1; break; + case ('C'): + conf_file = optarg; + break; case ('d'): dir = optarg; op = OP_UPDATE; @@ -379,12 +383,8 @@ main(int argc, char *argv[]) exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) { - printf("%s: Opened\n", fbuf); - printf("%s: Opened\n", ibuf); - } - ofile_argbuild(argc, argv, &of); + if (NULL == of) goto out; @@ -394,13 +394,16 @@ main(int argc, char *argv[]) &maxrec, &recs, &recsz, &reccur); /* - * Go to the root of the respective manual tree - * such that .so links work. In case of failure, - * just prod on, even though .so links won't work. + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). */ if (OP_UPDATE == op) { - chdir(dir); + if (-1 == chdir(dir)) { + perror(dir); + exit((int)MANDOCLEVEL_SYSERR); + } index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, idx, ibuf, maxrec, recs, reccur); @@ -426,7 +429,7 @@ main(int argc, char *argv[]) dirs.paths[i] = mandoc_strdup(cp); } } else - manpath_parse(&dirs, NULL, NULL); + manpath_parse(&dirs, conf_file, NULL, NULL); for (i = 0; i < dirs.sz; i++) { ibuf[0] = fbuf[0] = '\0'; @@ -461,30 +464,32 @@ main(int argc, char *argv[]) exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) { - printf("%s: Truncated\n", fbuf); - printf("%s: Truncated\n", ibuf); - } - ofile_free(of); of = NULL; - if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, - 0, &of)) + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); exit((int)MANDOCLEVEL_SYSERR); + } + ofile_dirbuild(".", NULL, NULL, 0, &of); + if (NULL == of) continue; of = of->first; /* - * Go to the root of the respective manual tree - * such that .so links work. In case of failure, - * just prod on, even though .so links won't work. + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). */ - chdir(dirs.paths[i]); + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); + exit((int)MANDOCLEVEL_SYSERR); + } + index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, idx, ibuf, maxrec, recs, reccur); } @@ -520,37 +525,25 @@ index_merge(const struct of *of, struct mparse *mp, struct mdoc *mdoc; struct man *man; const char *fn, *msec, *mtitle, *arch; + uint64_t mask; size_t sv; unsigned seq; struct db_val vbuf; + char type; for (rec = 0; of; of = of->next) { fn = of->fname; /* - * Reclaim an empty index record, if available. + * Try interpreting the file as mdoc(7) or man(7) + * source code, unless it is already known to be + * formatted. Fall back to formatted mode. */ - if (reccur > 0) { - --reccur; - rec = recs[(int)reccur]; - } else if (maxrec > 0) { - rec = maxrec; - maxrec = 0; - } else - rec++; - mparse_reset(mp); - hash_reset(&hash); mdoc = NULL; man = NULL; - /* - * Try interpreting the file as mdoc(7) or man(7) - * source code, unless it is already known to be - * formatted. Fall back to formatted mode. - */ - if ((MANDOC_SRC & of->src_form || ! (MANDOC_FORM & of->src_form)) && MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) @@ -579,14 +572,14 @@ index_merge(const struct of *of, struct mparse *mp, if (0 == use_all) { assert(of->sec); assert(msec); - if (strcmp(msec, of->sec)) + if (strcasecmp(msec, of->sec)) continue; if (NULL == arch) { if (NULL != of->arch) continue; } else if (NULL == of->arch || - strcmp(arch, of->arch)) + strcasecmp(arch, of->arch)) continue; } @@ -617,7 +610,8 @@ index_merge(const struct of *of, struct mparse *mp, */ dbuf->len = 0; - buf_append(dbuf, mdoc ? "mdoc" : (man ? "man" : "cat")); + type = mdoc ? 'd' : (man ? 'a' : 'c'); + buf_appendb(dbuf, &type, 1); buf_appendb(dbuf, fn, strlen(fn) + 1); buf_appendb(dbuf, msec, strlen(msec) + 1); buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); @@ -625,8 +619,12 @@ index_merge(const struct of *of, struct mparse *mp, sv = dbuf->len; - /* Fix the record number in the btree value. */ + /* + * Collect keyword/mask pairs. + * Each pair will become a new btree node. + */ + hash_reset(&hash); if (mdoc) pmdoc_node(hash, buf, dbuf, mdoc_node(mdoc), mdoc_meta(mdoc)); @@ -636,15 +634,31 @@ index_merge(const struct of *of, struct mparse *mp, pformatted(hash, buf, dbuf, of); /* - * Copy from the in-memory hashtable of pending keywords - * into the database. + * Reclaim an empty index record, if available. + * Use its record number for all new btree nodes. */ + if (reccur > 0) { + --reccur; + rec = recs[(int)reccur]; + } else if (maxrec > 0) { + rec = maxrec; + maxrec = 0; + } else + rec++; vbuf.rec = htobe32(rec); + + /* + * Copy from the in-memory hashtable of pending + * keyword/mask pairs into the database. + */ + seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - vbuf.mask = htobe64(*(uint64_t *)val.data); + assert(sizeof(uint64_t) == val.size); + memcpy(&mask, val.data, val.size); + vbuf.mask = htobe64(mask); val.size = sizeof(struct db_val); val.data = &vbuf; dbt_put(db, dbf, &key, &val); @@ -687,7 +701,7 @@ index_prune(const struct of *ofile, DB *db, const char recno_t **recs, size_t *recsz, size_t *reccur) { const struct of *of; - const char *fn, *cp; + const char *fn; struct db_val *vbuf; unsigned seq, sseq; DBT key, val; @@ -697,8 +711,8 @@ index_prune(const struct of *ofile, DB *db, const char seq = R_FIRST; while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { seq = R_NEXT; - *maxrec = *(recno_t *)key.data; - cp = val.data; + assert(sizeof(recno_t) == key.size); + memcpy(maxrec, key.data, key.size); /* Deleted records are zero-sized. Skip them. */ @@ -712,12 +726,9 @@ index_prune(const struct of *ofile, DB *db, const char * Failing any of these, we go into our error handler. */ - if (NULL == (fn = memchr(cp, '\0', val.size))) + fn = (char *)val.data + 1; + if (NULL == memchr(fn, '\0', val.size - 1)) break; - if (++fn - cp >= (int)val.size) - break; - if (NULL == memchr(fn, '\0', val.size - (fn - cp))) - break; /* * Search for the file in those we care about. @@ -1061,6 +1072,7 @@ pmdoc_Sh(MDOC_ARGS) static void hash_put(DB *db, const struct buf *buf, uint64_t mask) { + uint64_t oldmask; DBT key, val; int rc; @@ -1073,8 +1085,11 @@ hash_put(DB *db, const struct buf *buf, uint64_t mask) if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { perror("hash"); exit((int)MANDOCLEVEL_SYSERR); - } else if (0 == rc) - mask |= *(uint64_t *)val.data; + } else if (0 == rc) { + assert(sizeof(uint64_t) == val.size); + memcpy(&oldmask, val.data, val.size); + mask |= oldmask; + } val.data = &mask; val.size = sizeof(uint64_t); @@ -1288,29 +1303,30 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf buf_append(buf, of->title); hash_put(hash, buf, TYPE_Nm); - /* Skip to first blank line. */ + /* Skip to first blank line. */ while (NULL != (line = fgetln(stream, &len))) - if (len && '\n' == *line) + if ('\n' == *line) break; - /* - * Skip to first section header. - * This happens when text is flush-left. + /* + * Assume the first line that is not indented + * is the first section header. Skip to it. */ while (NULL != (line = fgetln(stream, &len))) - if (len && '\n' != *line && ' ' != *line) + if ('\n' != *line && ' ' != *line) break; /* - * If no page content can be found or the input line is - * malformed (zer-length or has no trailing newline), reuse the - * page title as the page description. + * If no page content can be found, or the input line + * is already the next section header, or there is no + * trailing newline, reuse the page title as the page + * description. */ line = fgetln(stream, &len); - if (NULL == line || len == 0 || '\n' != line[(int)len - 1]) { + if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) { buf_appendb(dbuf, buf->cp, buf->size); hash_put(hash, buf, TYPE_Nd); fclose(stream); @@ -1319,8 +1335,8 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf line[(int)--len] = '\0'; - /* - * Skip to the last dash. + /* + * Skip to the first dash. * Use the remaining line as the description (no more than 70 * bytes). */ @@ -1424,8 +1440,6 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * Add the structure to the list. */ - if (verb > 2) - printf("%s: Scheduling\n", argv[i]); if (NULL == *of) { *of = nof; (*of)->first = nof; @@ -1445,12 +1459,11 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * everything else is a manual. * Pass in a pointer to a NULL structure for the first invocation. */ -static int +static void ofile_dirbuild(const char *dir, const char* psec, const char *parch, int p_src_form, struct of **of) { char buf[MAXPATHLEN]; - struct stat sb; size_t sz; DIR *d; const char *fn, *sec, *arch; @@ -1461,7 +1474,7 @@ ofile_dirbuild(const char *dir, const char* psec, cons if (NULL == (d = opendir(dir))) { perror(dir); - return(0); + exit((int)MANDOCLEVEL_SYSERR); } while (NULL != (dp = readdir(d))) { @@ -1506,20 +1519,16 @@ ofile_dirbuild(const char *dir, const char* psec, cons if (MAXPATHLEN <= sz) { fprintf(stderr, "%s: Path too long\n", dir); - return(0); + exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) - printf("%s: Scanning\n", buf); - - if ( ! ofile_dirbuild(buf, sec, arch, - src_form, of)) - return(0); + ofile_dirbuild(buf, sec, arch, src_form, of); } + if (DT_REG != dp->d_type || - (NULL == psec && !use_all) || - !strcmp(MANDOC_DB, fn) || - !strcmp(MANDOC_IDX, fn)) + (NULL == psec && !use_all) || + ! strcmp(MANDOC_DB, fn) || + ! strcmp(MANDOC_IDX, fn)) continue; /* @@ -1558,6 +1567,10 @@ ofile_dirbuild(const char *dir, const char* psec, cons buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); p = strrchr(buf, '/'); + if (NULL != parch && NULL != p) + for (p--; p > buf; p--) + if ('/' == *p) + break; if (NULL == p) p = buf; else @@ -1579,13 +1592,15 @@ ofile_dirbuild(const char *dir, const char* psec, cons "%s: Path too long\n", buf); continue; } - if (0 == stat(buf, &sb)) + if (0 == access(buf, R_OK)) continue; } } + assert('.' == dir[0]); + assert('/' == dir[1]); buf[0] = '\0'; - strlcat(buf, dir, MAXPATHLEN); + strlcat(buf, dir + 2, MAXPATHLEN); strlcat(buf, "/", MAXPATHLEN); sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { @@ -1614,8 +1629,6 @@ ofile_dirbuild(const char *dir, const char* psec, cons * Add the structure to the list. */ - if (verb > 2) - printf("%s: Scheduling\n", buf); if (NULL == *of) { *of = nof; (*of)->first = nof; @@ -1627,7 +1640,6 @@ ofile_dirbuild(const char *dir, const char* psec, cons } closedir(d); - return(1); } static void @@ -1651,7 +1663,8 @@ usage(void) { fprintf(stderr, "usage: %s [-v] " - "[-d dir [files...] |" - " -u dir [files...] |" - " dir...]\n", progname); + "[-C file] |" + " dir ... |" + " -d dir [file ...] |" + " -u dir [file ...]\n", progname); }