=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.25 retrieving revision 1.36 diff -u -p -r1.25 -r1.36 --- mandoc/mandocdb.c 2011/12/07 01:57:20 1.25 +++ mandoc/mandocdb.c 2011/12/16 12:06:35 1.36 @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.25 2011/12/07 01:57:20 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.36 2011/12/16 12:06:35 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -107,9 +106,10 @@ static void index_merge(const struct of *, struct m recno_t, const recno_t *, size_t); static void index_prune(const struct of *, DB *, const char *, DB *, const char *, - recno_t *, recno_t **, size_t *); + recno_t *, recno_t **, size_t *, + size_t *); static void ofile_argbuild(int, char *[], struct of **); -static int ofile_dirbuild(const char *, const char *, +static void ofile_dirbuild(const char *, const char *, const char *, int, struct of **); static void ofile_free(struct of *); static void pformatted(DB *, struct buf *, struct buf *, @@ -272,7 +272,10 @@ main(int argc, char *argv[]) struct manpaths dirs; enum op op; /* current operation */ const char *dir; - char ibuf[MAXPATHLEN], /* index fname */ + char *conf_file; + char *cp; + char pbuf[PATH_MAX], + ibuf[MAXPATHLEN], /* index fname */ fbuf[MAXPATHLEN]; /* btree fname */ int ch, i, flags; DB *idx, /* index database */ @@ -309,12 +312,16 @@ main(int argc, char *argv[]) maxrec = 0; op = OP_NEW; dir = NULL; + conf_file = NULL; - while (-1 != (ch = getopt(argc, argv, "ad:u:v"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:u:v"))) switch (ch) { case ('a'): use_all = 1; break; + case ('C'): + conf_file = optarg; + break; case ('d'): dir = optarg; op = OP_UPDATE; @@ -376,28 +383,27 @@ main(int argc, char *argv[]) exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) { - printf("%s: Opened\n", fbuf); - printf("%s: Opened\n", ibuf); - } - ofile_argbuild(argc, argv, &of); + if (NULL == of) goto out; of = of->first; index_prune(of, db, fbuf, idx, ibuf, - &maxrec, &recs, &recsz); + &maxrec, &recs, &recsz, &reccur); /* - * Go to the root of the respective manual tree - * such that .so links work. In case of failure, - * just prod on, even though .so links won't work. + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). */ if (OP_UPDATE == op) { - chdir(dir); + if (-1 == chdir(dir)) { + perror(dir); + exit((int)MANDOCLEVEL_SYSERR); + } index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, idx, ibuf, maxrec, recs, reccur); @@ -413,12 +419,17 @@ main(int argc, char *argv[]) */ if (argc > 0) { - dirs.paths = mandoc_malloc(argc * sizeof(char *)); + dirs.paths = mandoc_calloc(argc, sizeof(char *)); dirs.sz = argc; - for (i = 0; i < argc; i++) - dirs.paths[i] = mandoc_strdup(argv[i]); + for (i = 0; i < argc; i++) { + if (NULL == (cp = realpath(argv[i], pbuf))) { + perror(argv[i]); + goto out; + } + dirs.paths[i] = mandoc_strdup(cp); + } } else - manpath_parse(&dirs, NULL, NULL); + manpath_parse(&dirs, conf_file, NULL, NULL); for (i = 0; i < dirs.sz; i++) { ibuf[0] = fbuf[0] = '\0'; @@ -453,30 +464,32 @@ main(int argc, char *argv[]) exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) { - printf("%s: Truncated\n", fbuf); - printf("%s: Truncated\n", ibuf); - } - ofile_free(of); of = NULL; - if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, - 0, &of)) + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); exit((int)MANDOCLEVEL_SYSERR); + } + ofile_dirbuild(".", NULL, NULL, 0, &of); + if (NULL == of) continue; of = of->first; /* - * Go to the root of the respective manual tree - * such that .so links work. In case of failure, - * just prod on, even though .so links won't work. + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). */ - chdir(dirs.paths[i]); + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); + exit((int)MANDOCLEVEL_SYSERR); + } + index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, idx, ibuf, maxrec, recs, reccur); } @@ -515,34 +528,21 @@ index_merge(const struct of *of, struct mparse *mp, size_t sv; unsigned seq; struct db_val vbuf; + char type; for (rec = 0; of; of = of->next) { fn = of->fname; /* - * Reclaim an empty index record, if available. + * Try interpreting the file as mdoc(7) or man(7) + * source code, unless it is already known to be + * formatted. Fall back to formatted mode. */ - if (reccur > 0) { - --reccur; - rec = recs[(int)reccur]; - } else if (maxrec > 0) { - rec = maxrec; - maxrec = 0; - } else - rec++; - mparse_reset(mp); - hash_reset(&hash); mdoc = NULL; man = NULL; - /* - * Try interpreting the file as mdoc(7) or man(7) - * source code, unless it is already known to be - * formatted. Fall back to formatted mode. - */ - if ((MANDOC_SRC & of->src_form || ! (MANDOC_FORM & of->src_form)) && MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) @@ -571,14 +571,14 @@ index_merge(const struct of *of, struct mparse *mp, if (0 == use_all) { assert(of->sec); assert(msec); - if (strcmp(msec, of->sec)) + if (strcasecmp(msec, of->sec)) continue; if (NULL == arch) { if (NULL != of->arch) continue; } else if (NULL == of->arch || - strcmp(arch, of->arch)) + strcasecmp(arch, of->arch)) continue; } @@ -609,7 +609,8 @@ index_merge(const struct of *of, struct mparse *mp, */ dbuf->len = 0; - buf_append(dbuf, mdoc ? "mdoc" : (man ? "man" : "cat")); + type = mdoc ? 'd' : (man ? 'a' : 'c'); + buf_appendb(dbuf, &type, 1); buf_appendb(dbuf, fn, strlen(fn) + 1); buf_appendb(dbuf, msec, strlen(msec) + 1); buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); @@ -617,8 +618,12 @@ index_merge(const struct of *of, struct mparse *mp, sv = dbuf->len; - /* Fix the record number in the btree value. */ + /* + * Collect keyword/mask pairs. + * Each pair will become a new btree node. + */ + hash_reset(&hash); if (mdoc) pmdoc_node(hash, buf, dbuf, mdoc_node(mdoc), mdoc_meta(mdoc)); @@ -628,11 +633,25 @@ index_merge(const struct of *of, struct mparse *mp, pformatted(hash, buf, dbuf, of); /* - * Copy from the in-memory hashtable of pending keywords - * into the database. + * Reclaim an empty index record, if available. + * Use its record number for all new btree nodes. */ + if (reccur > 0) { + --reccur; + rec = recs[(int)reccur]; + } else if (maxrec > 0) { + rec = maxrec; + maxrec = 0; + } else + rec++; vbuf.rec = htobe32(rec); + + /* + * Copy from the in-memory hashtable of pending + * keyword/mask pairs into the database. + */ + seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; @@ -675,23 +694,21 @@ index_merge(const struct of *of, struct mparse *mp, */ static void index_prune(const struct of *ofile, DB *db, const char *dbf, - DB *idx, const char *idxf, - recno_t *maxrec, recno_t **recs, size_t *recsz) + DB *idx, const char *idxf, recno_t *maxrec, + recno_t **recs, size_t *recsz, size_t *reccur) { const struct of *of; - const char *fn, *cp; + const char *fn; struct db_val *vbuf; unsigned seq, sseq; DBT key, val; - size_t reccur; int ch; - reccur = 0; + *reccur = 0; seq = R_FIRST; while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { seq = R_NEXT; *maxrec = *(recno_t *)key.data; - cp = val.data; /* Deleted records are zero-sized. Skip them. */ @@ -705,12 +722,9 @@ index_prune(const struct of *ofile, DB *db, const char * Failing any of these, we go into our error handler. */ - if (NULL == (fn = memchr(cp, '\0', val.size))) + fn = (char *)val.data + 1; + if (NULL == memchr(fn, '\0', val.size - 1)) break; - if (++fn - cp >= (int)val.size) - break; - if (NULL == memchr(fn, '\0', val.size - (fn - cp))) - break; /* * Search for the file in those we care about. @@ -760,14 +774,14 @@ index_prune(const struct of *ofile, DB *db, const char if (ch < 0) break; cont: - if (reccur >= *recsz) { + if (*reccur >= *recsz) { *recsz += MANDOC_SLOP; *recs = mandoc_realloc (*recs, *recsz * sizeof(recno_t)); } - (*recs)[(int)reccur] = *maxrec; - reccur++; + (*recs)[(int)*reccur] = *maxrec; + (*reccur)++; } if (ch < 0) { @@ -1281,52 +1295,72 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf buf_append(buf, of->title); hash_put(hash, buf, TYPE_Nm); - while (NULL != (line = fgetln(stream, &len)) && '\n' != *line) - /* Skip to first blank line. */ ; + /* Skip to first blank line. */ - while (NULL != (line = fgetln(stream, &len)) && - ('\n' == *line || ' ' == *line)) - /* Skip to first section header. */ ; + while (NULL != (line = fgetln(stream, &len))) + if ('\n' == *line) + break; /* - * If no page content can be found, - * reuse the page title as the page description. + * Assume the first line that is not indented + * is the first section header. Skip to it. */ - if (NULL == (line = fgetln(stream, &len))) { + while (NULL != (line = fgetln(stream, &len))) + if ('\n' != *line && ' ' != *line) + break; + + /* + * If no page content can be found, or the input line + * is already the next section header, or there is no + * trailing newline, reuse the page title as the page + * description. + */ + + line = fgetln(stream, &len); + if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) { buf_appendb(dbuf, buf->cp, buf->size); hash_put(hash, buf, TYPE_Nd); fclose(stream); return; } - fclose(stream); + line[(int)--len] = '\0'; + /* - * If there is a dash, skip to the text following it. + * Skip to the first dash. + * Use the remaining line as the description (no more than 70 + * bytes). */ - for (p = line, plen = len; plen; p++, plen--) - if ('-' == *p) - break; - for ( ; plen; p++, plen--) - if ('-' != *p && ' ' != *p && 8 != *p) - break; - if (0 == plen) { + if (NULL != (p = strstr(line, "- "))) { + for (p += 2; ' ' == *p || '\b' == *p; p++) + /* Skip to next word. */ ; + } else p = line; - plen = len; + + if ((plen = strlen(p)) > 70) { + plen = 70; + p[plen] = '\0'; } - /* - * Copy the rest of the line, but no more than 70 bytes. - */ + /* Strip backspace-encoding from line. */ - if (70 < plen) - plen = 70; - p[plen-1] = '\0'; - buf_appendb(dbuf, p, plen); + while (NULL != (line = memchr(p, '\b', plen))) { + len = line - p; + if (0 == len) { + memmove(line, line + 1, plen--); + continue; + } + memmove(line - 1, line + 1, plen - len); + plen -= 2; + } + + buf_appendb(dbuf, p, plen + 1); buf->len = 0; - buf_appendb(buf, p, plen); + buf_appendb(buf, p, plen + 1); hash_put(hash, buf, TYPE_Nd); + fclose(stream); } static void @@ -1398,8 +1432,6 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * Add the structure to the list. */ - if (verb > 2) - printf("%s: Scheduling\n", argv[i]); if (NULL == *of) { *of = nof; (*of)->first = nof; @@ -1419,12 +1451,11 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * everything else is a manual. * Pass in a pointer to a NULL structure for the first invocation. */ -static int +static void ofile_dirbuild(const char *dir, const char* psec, const char *parch, int p_src_form, struct of **of) { char buf[MAXPATHLEN]; - struct stat sb; size_t sz; DIR *d; const char *fn, *sec, *arch; @@ -1435,7 +1466,7 @@ ofile_dirbuild(const char *dir, const char* psec, cons if (NULL == (d = opendir(dir))) { perror(dir); - return(0); + exit((int)MANDOCLEVEL_SYSERR); } while (NULL != (dp = readdir(d))) { @@ -1480,20 +1511,16 @@ ofile_dirbuild(const char *dir, const char* psec, cons if (MAXPATHLEN <= sz) { fprintf(stderr, "%s: Path too long\n", dir); - return(0); + exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) - printf("%s: Scanning\n", buf); - - if ( ! ofile_dirbuild(buf, sec, arch, - src_form, of)) - return(0); + ofile_dirbuild(buf, sec, arch, src_form, of); } + if (DT_REG != dp->d_type || - (NULL == psec && !use_all) || - !strcmp(MANDOC_DB, fn) || - !strcmp(MANDOC_IDX, fn)) + (NULL == psec && !use_all) || + ! strcmp(MANDOC_DB, fn) || + ! strcmp(MANDOC_IDX, fn)) continue; /* @@ -1532,6 +1559,10 @@ ofile_dirbuild(const char *dir, const char* psec, cons buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); p = strrchr(buf, '/'); + if (NULL != parch && NULL != p) + for (p--; p > buf; p--) + if ('/' == *p) + break; if (NULL == p) p = buf; else @@ -1553,13 +1584,15 @@ ofile_dirbuild(const char *dir, const char* psec, cons "%s: Path too long\n", buf); continue; } - if (0 == stat(buf, &sb)) + if (0 == access(buf, R_OK)) continue; } } + assert('.' == dir[0]); + assert('/' == dir[1]); buf[0] = '\0'; - strlcat(buf, dir, MAXPATHLEN); + strlcat(buf, dir + 2, MAXPATHLEN); strlcat(buf, "/", MAXPATHLEN); sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { @@ -1588,8 +1621,6 @@ ofile_dirbuild(const char *dir, const char* psec, cons * Add the structure to the list. */ - if (verb > 2) - printf("%s: Scheduling\n", buf); if (NULL == *of) { *of = nof; (*of)->first = nof; @@ -1601,7 +1632,6 @@ ofile_dirbuild(const char *dir, const char* psec, cons } closedir(d); - return(1); } static void @@ -1625,7 +1655,8 @@ usage(void) { fprintf(stderr, "usage: %s [-v] " - "[-d dir [files...] |" - " -u dir [files...] |" - " dir...]\n", progname); + "[-C file] |" + " dir ... |" + " -d dir [file ...] |" + " -u dir [file ...]\n", progname); }