=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.36 retrieving revision 1.48 diff -u -p -r1.36 -r1.48 --- mandoc/mandocdb.c 2011/12/16 12:06:35 1.36 +++ mandoc/mandocdb.c 2012/05/27 17:39:28 1.48 @@ -1,7 +1,7 @@ -/* $Id: mandocdb.c,v 1.36 2011/12/16 12:06:35 kristaps Exp $ */ +/* $Id: mandocdb.c,v 1.48 2012/05/27 17:39:28 schwarze Exp $ */ /* - * Copyright (c) 2011 Kristaps Dzonsons - * Copyright (c) 2011 Ingo Schwarze + * Copyright (c) 2011, 2012 Kristaps Dzonsons + * Copyright (c) 2011, 2012 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -23,7 +23,9 @@ #include #include +#include #include +#include #include #include #include @@ -54,6 +56,33 @@ #define MANDOC_SRC 0x1 #define MANDOC_FORM 0x2 +#define WARNING(_f, _b, _fmt, _args...) \ + do if (warnings) { \ + fprintf(stderr, "%s: ", (_b)); \ + fprintf(stderr, (_fmt), ##_args); \ + if ('\0' != *(_f)) \ + fprintf(stderr, ": %s", (_f)); \ + fprintf(stderr, "\n"); \ + } while (/* CONSTCOND */ 0) + +/* Access to the mandoc database on disk. */ + +struct mdb { + char idxn[MAXPATHLEN]; /* index db filename */ + char dbn[MAXPATHLEN]; /* keyword db filename */ + DB *idx; /* index recno database */ + DB *db; /* keyword btree database */ +}; + +/* Stack of temporarily unused index records. */ + +struct recs { + recno_t *stack; /* pointer to a malloc'ed array */ + size_t size; /* number of allocated slots */ + size_t cur; /* current number of empty records */ + recno_t last; /* last record number in the index */ +}; + /* Tiny list for files. No need to bring in QUEUE. */ struct of { @@ -77,9 +106,11 @@ struct buf { /* Operation we're going to perform. */ enum op { - OP_NEW = 0, /* new database */ + OP_DEFAULT = 0, /* new dbs from dir list or default config */ + OP_CONFFILE, /* new databases from custom config file */ OP_UPDATE, /* delete/add entries in existing database */ - OP_DELETE /* delete entries from existing database */ + OP_DELETE, /* delete entries from existing database */ + OP_TEST /* change no databases, report potential problems */ }; #define MAN_ARGS DB *hash, \ @@ -102,18 +133,17 @@ static void hash_put(DB *, const struct buf *, uint static void hash_reset(DB **); static void index_merge(const struct of *, struct mparse *, struct buf *, struct buf *, DB *, - DB *, const char *, DB *, const char *, - recno_t, const recno_t *, size_t); -static void index_prune(const struct of *, DB *, - const char *, DB *, const char *, - recno_t *, recno_t **, size_t *, - size_t *); -static void ofile_argbuild(int, char *[], struct of **); + struct mdb *, struct recs *, + const char *); +static void index_prune(const struct of *, struct mdb *, + struct recs *, const char *); +static void ofile_argbuild(int, char *[], + struct of **, const char *); static void ofile_dirbuild(const char *, const char *, - const char *, int, struct of **); + const char *, int, struct of **, char *); static void ofile_free(struct of *); static void pformatted(DB *, struct buf *, struct buf *, - const struct of *); + const struct of *, const char *); static int pman_node(MAN_ARGS); static void pmdoc_node(MDOC_ARGS); static int pmdoc_head(MDOC_ARGS); @@ -126,7 +156,6 @@ static int pmdoc_Nm(MDOC_ARGS); static int pmdoc_Sh(MDOC_ARGS); static int pmdoc_St(MDOC_ARGS); static int pmdoc_Xr(MDOC_ARGS); -static void usage(void); #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ @@ -264,29 +293,22 @@ static const struct mdoc_handler mdocs[MDOC_MAX] = { static const char *progname; static int use_all; /* Use all directories and files. */ static int verb; /* Output verbosity level. */ +static int warnings; /* Potential problems in manuals. */ int main(int argc, char *argv[]) { struct mparse *mp; /* parse sequence */ struct manpaths dirs; + struct mdb mdb; + struct recs recs; enum op op; /* current operation */ const char *dir; - char *conf_file; - char *cp; - char pbuf[PATH_MAX], - ibuf[MAXPATHLEN], /* index fname */ - fbuf[MAXPATHLEN]; /* btree fname */ int ch, i, flags; - DB *idx, /* index database */ - *db, /* keyword database */ - *hash; /* temporary keyword hashtable */ + char dirbuf[MAXPATHLEN]; + DB *hash; /* temporary keyword hashtable */ BTREEINFO info; /* btree configuration */ - recno_t maxrec; /* last record number in the index */ - recno_t *recs; /* the numbers of all empty records */ - size_t sz1, sz2, - recsz, /* number of allocated slots in recs */ - reccur; /* current number of empty records */ + size_t sz1, sz2; struct buf buf, /* keyword buffer */ dbuf; /* description buffer */ struct of *of; /* list of files for processing */ @@ -300,48 +322,78 @@ main(int argc, char *argv[]) ++progname; memset(&dirs, 0, sizeof(struct manpaths)); + memset(&mdb, 0, sizeof(struct mdb)); + memset(&recs, 0, sizeof(struct recs)); - verb = 0; - use_all = 0; of = NULL; - db = idx = NULL; mp = NULL; hash = NULL; - recs = NULL; - recsz = reccur = 0; - maxrec = 0; - op = OP_NEW; + op = OP_DEFAULT; dir = NULL; - conf_file = NULL; - while (-1 != (ch = getopt(argc, argv, "aC:d:u:v"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) switch (ch) { case ('a'): use_all = 1; break; case ('C'): - conf_file = optarg; + if (op) { + fprintf(stderr, + "-C: conflicting options\n"); + goto usage; + } + dir = optarg; + op = OP_CONFFILE; break; case ('d'): + if (op) { + fprintf(stderr, + "-d: conflicting options\n"); + goto usage; + } dir = optarg; op = OP_UPDATE; break; + case ('t'): + dup2(STDOUT_FILENO, STDERR_FILENO); + if (op) { + fprintf(stderr, + "-t: conflicting options\n"); + goto usage; + } + op = OP_TEST; + use_all = 1; + warnings = 1; + break; case ('u'): + if (op) { + fprintf(stderr, + "-u: conflicting options\n"); + goto usage; + } dir = optarg; op = OP_DELETE; break; case ('v'): verb++; break; + case ('W'): + warnings = 1; + break; default: - usage(); - return((int)MANDOCLEVEL_BADARG); + goto usage; } argc -= optind; argv += optind; + if (OP_CONFFILE == op && argc > 0) { + fprintf(stderr, "-C: too many arguments\n"); + goto usage; + } + memset(&info, 0, sizeof(BTREEINFO)); + info.lorder = 4321; info.flags = R_DUP; mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); @@ -354,45 +406,48 @@ main(int argc, char *argv[]) buf.cp = mandoc_malloc(buf.size); dbuf.cp = mandoc_malloc(dbuf.size); - flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR; + if (OP_TEST == op) { + ofile_argbuild(argc, argv, &of, "."); + if (NULL == of) + goto out; + index_merge(of, mp, &dbuf, &buf, + hash, &mdb, &recs, "."); + goto out; + } if (OP_UPDATE == op || OP_DELETE == op) { - ibuf[0] = fbuf[0] = '\0'; + strlcat(mdb.dbn, dir, MAXPATHLEN); + strlcat(mdb.dbn, "/", MAXPATHLEN); + sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); - strlcat(fbuf, dir, MAXPATHLEN); - strlcat(fbuf, "/", MAXPATHLEN); - sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + strlcat(mdb.idxn, dir, MAXPATHLEN); + strlcat(mdb.idxn, "/", MAXPATHLEN); + sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); - strlcat(ibuf, dir, MAXPATHLEN); - strlcat(ibuf, "/", MAXPATHLEN); - sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); - if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", dir); + fprintf(stderr, "%s: path too long\n", dir); exit((int)MANDOCLEVEL_BADARG); } - db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); - idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); + flags = O_CREAT | O_RDWR; + mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); + mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); - if (NULL == db) { - perror(fbuf); + if (NULL == mdb.db) { + perror(mdb.dbn); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == idx) { - perror(ibuf); + } else if (NULL == mdb.idx) { + perror(mdb.idxn); exit((int)MANDOCLEVEL_SYSERR); } - ofile_argbuild(argc, argv, &of); + ofile_argbuild(argc, argv, &of, dir); if (NULL == of) goto out; - of = of->first; + index_prune(of, &mdb, &recs, dir); - index_prune(of, db, fbuf, idx, ibuf, - &maxrec, &recs, &recsz, &reccur); - /* * Go to the root of the respective manual tree. * This must work or no manuals may be found (they're @@ -405,8 +460,7 @@ main(int argc, char *argv[]) exit((int)MANDOCLEVEL_SYSERR); } index_merge(of, mp, &dbuf, &buf, hash, - db, fbuf, idx, ibuf, - maxrec, recs, reccur); + &mdb, &recs, dir); } goto out; @@ -421,84 +475,63 @@ main(int argc, char *argv[]) if (argc > 0) { dirs.paths = mandoc_calloc(argc, sizeof(char *)); dirs.sz = argc; - for (i = 0; i < argc; i++) { - if (NULL == (cp = realpath(argv[i], pbuf))) { - perror(argv[i]); - goto out; - } - dirs.paths[i] = mandoc_strdup(cp); - } + for (i = 0; i < argc; i++) + dirs.paths[i] = mandoc_strdup(argv[i]); } else - manpath_parse(&dirs, conf_file, NULL, NULL); + manpath_parse(&dirs, dir, NULL, NULL); for (i = 0; i < dirs.sz; i++) { - ibuf[0] = fbuf[0] = '\0'; + /* + * Go to the root of the respective manual tree. + * This must work or no manuals may be found: + * They are indexed relative to the root. + */ - strlcat(fbuf, dirs.paths[i], MAXPATHLEN); - strlcat(fbuf, "/", MAXPATHLEN); - sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); - - strlcat(ibuf, dirs.paths[i], MAXPATHLEN); - strlcat(ibuf, "/", MAXPATHLEN); - sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); - - if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", - dirs.paths[i]); - exit((int)MANDOCLEVEL_BADARG); + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); + exit((int)MANDOCLEVEL_SYSERR); } - if (db) - (*db->close)(db); - if (idx) - (*idx->close)(idx); + strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN); + strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN); - db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); - idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); + flags = O_CREAT | O_TRUNC | O_RDWR; + mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); + mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); - if (NULL == db) { - perror(fbuf); + if (NULL == mdb.db) { + perror(mdb.dbn); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == idx) { - perror(ibuf); + } else if (NULL == mdb.idx) { + perror(mdb.idxn); exit((int)MANDOCLEVEL_SYSERR); } - ofile_free(of); - of = NULL; - - if (-1 == chdir(dirs.paths[i])) { - perror(dirs.paths[i]); - exit((int)MANDOCLEVEL_SYSERR); - } - - ofile_dirbuild(".", NULL, NULL, 0, &of); - - if (NULL == of) - continue; - - of = of->first; - /* - * Go to the root of the respective manual tree. - * This must work or no manuals may be found (they're - * indexed relative to the root). + * Search for manuals and fill the new database. */ - if (-1 == chdir(dirs.paths[i])) { - perror(dirs.paths[i]); - exit((int)MANDOCLEVEL_SYSERR); + strlcpy(dirbuf, dirs.paths[i], MAXPATHLEN); + ofile_dirbuild(".", "", "", 0, &of, dirbuf); + + if (NULL != of) { + index_merge(of, mp, &dbuf, &buf, hash, + &mdb, &recs, dirs.paths[i]); + ofile_free(of); + of = NULL; } - index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, - idx, ibuf, maxrec, recs, reccur); + (*mdb.db->close)(mdb.db); + (*mdb.idx->close)(mdb.idx); + mdb.db = NULL; + mdb.idx = NULL; } out: - if (db) - (*db->close)(db); - if (idx) - (*idx->close)(idx); + if (mdb.db) + (*mdb.db->close)(mdb.db); + if (mdb.idx) + (*mdb.idx->close)(mdb.idx); if (hash) (*hash->close)(hash); if (mp) @@ -508,29 +541,48 @@ out: ofile_free(of); free(buf.cp); free(dbuf.cp); - free(recs); + free(recs.stack); return(MANDOCLEVEL_OK); + +usage: + fprintf(stderr, + "usage: %s [-av] [-C file] | dir ... | -t file ...\n" + " -d dir [file ...] | " + "-u dir [file ...]\n", + progname); + + return((int)MANDOCLEVEL_BADARG); } void index_merge(const struct of *of, struct mparse *mp, struct buf *dbuf, struct buf *buf, DB *hash, - DB *db, const char *dbf, DB *idx, const char *idxf, - recno_t maxrec, const recno_t *recs, size_t reccur) + struct mdb *mdb, struct recs *recs, + const char *basedir) { recno_t rec; - int ch; + int ch, skip; DBT key, val; + DB *files; /* temporary file name table */ + char emptystring[1] = {'\0'}; struct mdoc *mdoc; struct man *man; - const char *fn, *msec, *mtitle, *arch; + char *p; + const char *fn, *msec, *march, *mtitle; + uint64_t mask; size_t sv; unsigned seq; - struct db_val vbuf; + uint64_t vbuf[2]; char type; - for (rec = 0; of; of = of->next) { + if (warnings) { + files = NULL; + hash_reset(&files); + } + + rec = 0; + for (of = of->first; of; of = of->next) { fn = of->fname; /* @@ -550,71 +602,131 @@ index_merge(const struct of *of, struct mparse *mp, if (NULL != mdoc) { msec = mdoc_meta(mdoc)->msec; - arch = mdoc_meta(mdoc)->arch; + march = mdoc_meta(mdoc)->arch; + if (NULL == march) + march = ""; mtitle = mdoc_meta(mdoc)->title; } else if (NULL != man) { msec = man_meta(man)->msec; - arch = NULL; + march = ""; mtitle = man_meta(man)->title; } else { msec = of->sec; - arch = of->arch; + march = of->arch; mtitle = of->title; } /* - * By default, skip a file if the manual section - * and architecture given in the file disagree - * with the directory where the file is located. + * Check whether the manual section given in a file + * agrees with the directory where the file is located. + * Some manuals have suffixes like (3p) on their + * section number either inside the file or in the + * directory name, some are linked into more than one + * section, like encrypt(1) = makekey(8). Do not skip + * manuals for such reasons. */ - if (0 == use_all) { - assert(of->sec); - assert(msec); - if (strcasecmp(msec, of->sec)) - continue; + skip = 0; + assert(of->sec); + assert(msec); + if (strcasecmp(msec, of->sec)) + WARNING(fn, basedir, "Section \"%s\" manual " + "in \"%s\" directory", msec, of->sec); + /* + * Manual page directories exist for each kernel + * architecture as returned by machine(1). + * However, many manuals only depend on the + * application architecture as returned by arch(1). + * For example, some (2/ARM) manuals are shared + * across the "armish" and "zaurus" kernel + * architectures. + * A few manuals are even shared across completely + * different architectures, for example fdformat(1) + * on amd64, i386, sparc, and sparc64. + * Thus, warn about architecture mismatches, + * but don't skip manuals for this reason. + */ - if (NULL == arch) { - if (NULL != of->arch) - continue; - } else if (NULL == of->arch || - strcasecmp(arch, of->arch)) - continue; - } + assert(of->arch); + assert(march); + if (strcasecmp(march, of->arch)) + WARNING(fn, basedir, "Architecture \"%s\" " + "manual in \"%s\" directory", + march, of->arch); - if (NULL == arch) - arch = ""; - - /* + /* * By default, skip a file if the title given * in the file disagrees with the file name. - * If both agree, use the file name as the title, - * because the one in the file usually is all caps. + * Do not warn, this happens for all MLINKs. */ assert(of->title); assert(mtitle); + if (strcasecmp(mtitle, of->title)) + skip = 1; - if (0 == strcasecmp(mtitle, of->title)) - mtitle = of->title; - else if (0 == use_all) + /* + * Build a title string for the file. If it matches + * the location of the file, remember the title as + * found; else, remember it as missing. + */ + + if (warnings) { + buf->len = 0; + buf_appendb(buf, mtitle, strlen(mtitle)); + buf_appendb(buf, "(", 1); + buf_appendb(buf, msec, strlen(msec)); + if ('\0' != *march) { + buf_appendb(buf, "/", 1); + buf_appendb(buf, march, strlen(march)); + } + buf_appendb(buf, ")", 2); + for (p = buf->cp; '\0' != *p; p++) + *p = tolower(*p); + key.data = buf->cp; + key.size = buf->len; + val.data = NULL; + val.size = 0; + if (0 == skip) + val.data = emptystring; + else { + ch = (*files->get)(files, &key, &val, 0); + if (ch < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } else if (ch > 0) { + val.data = (void *)fn; + val.size = strlen(fn) + 1; + } else + val.data = NULL; + } + if (NULL != val.data && + (*files->put)(files, &key, &val, 0) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } + } + + if (skip && !use_all) continue; - /* + /* * The index record value consists of a nil-terminated * filename, a nil-terminated manual section, and a - * nil-terminated description. Since the description - * may not be set, we set a sentinel to see if we're - * going to write a nil byte in its place. + * nil-terminated description. Use the actual + * location of the file, such that the user can find + * it with man(1). Since the description may not be + * set, we set a sentinel to see if we're going to + * write a nil byte in its place. */ dbuf->len = 0; type = mdoc ? 'd' : (man ? 'a' : 'c'); buf_appendb(dbuf, &type, 1); buf_appendb(dbuf, fn, strlen(fn) + 1); - buf_appendb(dbuf, msec, strlen(msec) + 1); - buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); - buf_appendb(dbuf, arch, strlen(arch) + 1); + buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); + buf_appendb(dbuf, of->title, strlen(of->title) + 1); + buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); sv = dbuf->len; @@ -630,22 +742,35 @@ index_merge(const struct of *of, struct mparse *mp, else if (man) pman_node(hash, buf, dbuf, man_node(man)); else - pformatted(hash, buf, dbuf, of); + pformatted(hash, buf, dbuf, of, basedir); + /* Test mode, do not access any database. */ + + if (NULL == mdb->db || NULL == mdb->idx) + continue; + /* + * Make sure the file name is always registered + * as an .Nm search key. + */ + buf->len = 0; + buf_append(buf, of->title); + hash_put(hash, buf, TYPE_Nm); + + /* * Reclaim an empty index record, if available. * Use its record number for all new btree nodes. */ - if (reccur > 0) { - --reccur; - rec = recs[(int)reccur]; - } else if (maxrec > 0) { - rec = maxrec; - maxrec = 0; + if (recs->cur > 0) { + recs->cur--; + rec = recs->stack[(int)recs->cur]; + } else if (recs->last > 0) { + rec = recs->last; + recs->last = 0; } else rec++; - vbuf.rec = htobe32(rec); + vbuf[1] = htobe64(rec); /* * Copy from the in-memory hashtable of pending @@ -655,16 +780,18 @@ index_merge(const struct of *of, struct mparse *mp, seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - vbuf.mask = htobe64(*(uint64_t *)val.data); - val.size = sizeof(struct db_val); + assert(sizeof(uint64_t) == val.size); + memcpy(&mask, val.data, val.size); + vbuf[0] = htobe64(mask); + val.size = sizeof(vbuf); val.data = &vbuf; - dbt_put(db, dbf, &key, &val); + dbt_put(mdb->db, mdb->dbn, &key, &val); } if (ch < 0) { perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } - + /* * Apply to the index. If we haven't had a description * set, put an empty one in now. @@ -680,10 +807,27 @@ index_merge(const struct of *of, struct mparse *mp, val.size = dbuf->len; if (verb) - printf("%s: Added index\n", fn); + printf("%s: Adding to index: %s\n", basedir, fn); - dbt_put(idx, idxf, &key, &val); + dbt_put(mdb->idx, mdb->idxn, &key, &val); } + + /* + * Iterate the remembered file titles and check that + * all files can be found by their main title. + */ + + if (warnings) { + seq = R_FIRST; + while (0 == (*files->seq)(files, &key, &val, seq)) { + seq = R_NEXT; + if (val.size) + WARNING((char *)val.data, basedir, + "Probably unreachable, title " + "is %s", (char *)key.data); + } + (*files->close)(files); + } } /* @@ -693,22 +837,22 @@ index_merge(const struct of *of, struct mparse *mp, * in `idx' (zeroing its value size). */ static void -index_prune(const struct of *ofile, DB *db, const char *dbf, - DB *idx, const char *idxf, recno_t *maxrec, - recno_t **recs, size_t *recsz, size_t *reccur) +index_prune(const struct of *ofile, struct mdb *mdb, + struct recs *recs, const char *basedir) { const struct of *of; const char *fn; - struct db_val *vbuf; + uint64_t vbuf[2]; unsigned seq, sseq; DBT key, val; int ch; - *reccur = 0; + recs->cur = 0; seq = R_FIRST; - while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { + while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { seq = R_NEXT; - *maxrec = *(recno_t *)key.data; + assert(sizeof(recno_t) == key.size); + memcpy(&recs->last, key.data, key.size); /* Deleted records are zero-sized. Skip them. */ @@ -726,12 +870,12 @@ index_prune(const struct of *ofile, DB *db, const char if (NULL == memchr(fn, '\0', val.size - 1)) break; - /* + /* * Search for the file in those we care about. * XXX: build this into a tree. Too slow. */ - for (of = ofile; of; of = of->next) + for (of = ofile->first; of; of = of->next) if (0 == strcmp(fn, of->fname)) break; @@ -744,55 +888,59 @@ index_prune(const struct of *ofile, DB *db, const char */ sseq = R_FIRST; - while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { + while (0 == (ch = (*mdb->db->seq)(mdb->db, + &key, &val, sseq))) { sseq = R_NEXT; - if (sizeof(struct db_val) != val.size) + if (sizeof(vbuf) != val.size) break; - vbuf = val.data; - if (*maxrec != betoh32(vbuf->rec)) + memcpy(vbuf, val.data, val.size); + if (recs->last != betoh64(vbuf[1])) continue; - if ((ch = (*db->del)(db, &key, R_CURSOR)) < 0) + if ((ch = (*mdb->db->del)(mdb->db, + &key, R_CURSOR)) < 0) break; } if (ch < 0) { - perror(dbf); + perror(mdb->dbn); exit((int)MANDOCLEVEL_SYSERR); } else if (1 != ch) { - fprintf(stderr, "%s: Corrupt database\n", dbf); + fprintf(stderr, "%s: corrupt database\n", + mdb->dbn); exit((int)MANDOCLEVEL_SYSERR); } if (verb) - printf("%s: Deleted index\n", fn); + printf("%s: Deleting from index: %s\n", + basedir, fn); val.size = 0; - ch = (*idx->put)(idx, &key, &val, R_CURSOR); + ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); if (ch < 0) break; cont: - if (*reccur >= *recsz) { - *recsz += MANDOC_SLOP; - *recs = mandoc_realloc - (*recs, *recsz * sizeof(recno_t)); + if (recs->cur >= recs->size) { + recs->size += MANDOC_SLOP; + recs->stack = mandoc_realloc(recs->stack, + recs->size * sizeof(recno_t)); } - (*recs)[(int)*reccur] = *maxrec; - (*reccur)++; + recs->stack[(int)recs->cur] = recs->last; + recs->cur++; } if (ch < 0) { - perror(idxf); + perror(mdb->idxn); exit((int)MANDOCLEVEL_SYSERR); } else if (1 != ch) { - fprintf(stderr, "%s: Corrupt index\n", idxf); + fprintf(stderr, "%s: corrupt index\n", mdb->idxn); exit((int)MANDOCLEVEL_SYSERR); } - (*maxrec)++; + recs->last++; } /* @@ -1068,6 +1216,7 @@ pmdoc_Sh(MDOC_ARGS) static void hash_put(DB *db, const struct buf *buf, uint64_t mask) { + uint64_t oldmask; DBT key, val; int rc; @@ -1080,8 +1229,11 @@ hash_put(DB *db, const struct buf *buf, uint64_t mask) if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { perror("hash"); exit((int)MANDOCLEVEL_SYSERR); - } else if (0 == rc) - mask |= *(uint64_t *)val.data; + } else if (0 == rc) { + assert(sizeof(uint64_t) == val.size); + memcpy(&oldmask, val.data, val.size); + mask |= oldmask; + } val.data = &mask; val.size = sizeof(uint64_t); @@ -1171,8 +1323,8 @@ static int pman_node(MAN_ARGS) { const struct man_node *head, *body; - const char *start, *sv; - size_t sz; + char *start, *sv, *title; + size_t sz, titlesz; if (NULL == n) return(0); @@ -1195,9 +1347,55 @@ pman_node(MAN_ARGS) NULL != (body = body->child) && MAN_TEXT == body->type) { - assert(body->string); - start = sv = body->string; + title = NULL; + titlesz = 0; + /* + * Suck the entire NAME section into memory. + * Yes, we might run away. + * But too many manuals have big, spread-out + * NAME sections over many lines. + */ + for ( ; NULL != body; body = body->next) { + if (MAN_TEXT != body->type) + break; + if (0 == (sz = strlen(body->string))) + continue; + title = mandoc_realloc + (title, titlesz + sz + 1); + memcpy(title + titlesz, body->string, sz); + titlesz += sz + 1; + title[(int)titlesz - 1] = ' '; + } + if (NULL == title) + return(0); + title = mandoc_realloc(title, titlesz + 1); + title[(int)titlesz] = '\0'; + + /* Skip leading space. */ + + sv = title; + while (isspace((unsigned char)*sv)) + sv++; + + if (0 == (sz = strlen(sv))) { + free(title); + return(0); + } + + /* Erase trailing space. */ + + start = &sv[sz - 1]; + while (start > sv && isspace((unsigned char)*start)) + *start-- = '\0'; + + if (start == sv) { + free(title); + return(0); + } + + start = sv; + /* * Go through a special heuristic dance here. * This is why -man manuals are great! @@ -1234,14 +1432,17 @@ pman_node(MAN_ARGS) if (sv == start) { buf_append(buf, start); + free(title); return(1); } - while (' ' == *start) + while (isspace((unsigned char)*start)) start++; if (0 == strncmp(start, "-", 1)) start += 1; + else if (0 == strncmp(start, "\\-\\-", 4)) + start += 4; else if (0 == strncmp(start, "\\-", 2)) start += 2; else if (0 == strncmp(start, "\\(en", 4)) @@ -1257,6 +1458,7 @@ pman_node(MAN_ARGS) buf_appendb(buf, start, sz); hash_put(hash, buf, TYPE_Nd); + free(title); } } @@ -1272,15 +1474,15 @@ pman_node(MAN_ARGS) * By necessity, this involves rather crude guesswork. */ static void -pformatted(DB *hash, struct buf *buf, struct buf *dbuf, - const struct of *of) +pformatted(DB *hash, struct buf *buf, struct buf *dbuf, + const struct of *of, const char *basedir) { FILE *stream; - char *line, *p; - size_t len, plen; + char *line, *p, *title; + size_t len, plen, titlesz; if (NULL == (stream = fopen(of->fname, "r"))) { - perror(of->fname); + WARNING(of->fname, basedir, "%s", strerror(errno)); return; } @@ -1309,7 +1511,32 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf while (NULL != (line = fgetln(stream, &len))) if ('\n' != *line && ' ' != *line) break; + + /* + * Read up until the next section into a buffer. + * Strip the leading and trailing newline from each read line, + * appending a trailing space. + * Ignore empty (whitespace-only) lines. + */ + titlesz = 0; + title = NULL; + + while (NULL != (line = fgetln(stream, &len))) { + if (' ' != *line || '\n' != line[(int)len - 1]) + break; + while (len > 0 && isspace((unsigned char)*line)) { + line++; + len--; + } + if (1 == len) + continue; + title = mandoc_realloc(title, titlesz + len); + memcpy(title + titlesz, line, len); + titlesz += len; + title[(int)titlesz - 1] = ' '; + } + /* * If no page content can be found, or the input line * is already the next section header, or there is no @@ -1317,15 +1544,18 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf * description. */ - line = fgetln(stream, &len); - if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) { + if (NULL == title || '\0' == *title) { + WARNING(of->fname, basedir, + "Cannot find NAME section"); buf_appendb(dbuf, buf->cp, buf->size); hash_put(hash, buf, TYPE_Nd); fclose(stream); + free(title); return; } - line[(int)--len] = '\0'; + title = mandoc_realloc(title, titlesz + 1); + title[(int)titlesz] = '\0'; /* * Skip to the first dash. @@ -1333,17 +1563,17 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf * bytes). */ - if (NULL != (p = strstr(line, "- "))) { + if (NULL != (p = strstr(title, "- "))) { for (p += 2; ' ' == *p || '\b' == *p; p++) /* Skip to next word. */ ; - } else - p = line; - - if ((plen = strlen(p)) > 70) { - plen = 70; - p[plen] = '\0'; + } else { + WARNING(of->fname, basedir, + "No dash in title line"); + p = title; } + plen = strlen(p); + /* Strip backspace-encoding from line. */ while (NULL != (line = memchr(p, '\b', plen))) { @@ -1361,13 +1591,16 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf buf_appendb(buf, p, plen + 1); hash_put(hash, buf, TYPE_Nd); fclose(stream); + free(title); } static void -ofile_argbuild(int argc, char *argv[], struct of **of) +ofile_argbuild(int argc, char *argv[], + struct of **of, const char *basedir) { char buf[MAXPATHLEN]; - char *sec, *arch, *title, *p; + const char *sec, *arch, *title; + char *p; int i, src_form; struct of *nof; @@ -1384,11 +1617,11 @@ ofile_argbuild(int argc, char *argv[], struct of **of) fprintf(stderr, "%s: Path too long\n", argv[i]); continue; } - sec = arch = title = NULL; + sec = arch = title = ""; src_form = 0; p = strrchr(buf, '\0'); while (p-- > buf) { - if (NULL == sec && '.' == *p) { + if ('\0' == *sec && '.' == *p) { sec = p + 1; *p = '\0'; if ('0' == *sec) @@ -1399,7 +1632,7 @@ ofile_argbuild(int argc, char *argv[], struct of **of) } if ('/' != *p) continue; - if (NULL == title) { + if ('\0' == *title) { title = p + 1; *p = '\0'; continue; @@ -1412,8 +1645,11 @@ ofile_argbuild(int argc, char *argv[], struct of **of) arch = p + 1; break; } - if (NULL == title) + if ('\0' == *title) { + WARNING(argv[i], basedir, + "Cannot deduce title from filename"); title = buf; + } /* * Build the file structure. @@ -1421,10 +1657,8 @@ ofile_argbuild(int argc, char *argv[], struct of **of) nof = mandoc_calloc(1, sizeof(struct of)); nof->fname = mandoc_strdup(argv[i]); - if (NULL != sec) - nof->sec = mandoc_strdup(sec); - if (NULL != arch) - nof->arch = mandoc_strdup(arch); + nof->sec = mandoc_strdup(sec); + nof->arch = mandoc_strdup(arch); nof->title = mandoc_strdup(title); nof->src_form = src_form; @@ -1447,13 +1681,13 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * Recursively build up a list of files to parse. * We use this instead of ftw() and so on because I don't want global * variables hanging around. - * This ignores the mandoc.db and mandoc.index files, but assumes that + * This ignores the mandocdb.db and mandocdb.index files, but assumes that * everything else is a manual. * Pass in a pointer to a NULL structure for the first invocation. */ static void ofile_dirbuild(const char *dir, const char* psec, const char *parch, - int p_src_form, struct of **of) + int p_src_form, struct of **of, char *basedir) { char buf[MAXPATHLEN]; size_t sz; @@ -1465,8 +1699,8 @@ ofile_dirbuild(const char *dir, const char* psec, cons int src_form; if (NULL == (d = opendir(dir))) { - perror(dir); - exit((int)MANDOCLEVEL_SYSERR); + WARNING("", dir, "%s", strerror(errno)); + return; } while (NULL != (dp = readdir(d))) { @@ -1487,41 +1721,64 @@ ofile_dirbuild(const char *dir, const char* psec, cons * cat
/[/] */ - if (NULL == sec) { + if ('\0' == *sec) { if(0 == strncmp("man", fn, 3)) { src_form |= MANDOC_SRC; sec = fn + 3; } else if (0 == strncmp("cat", fn, 3)) { src_form |= MANDOC_FORM; sec = fn + 3; - } else if (use_all) - sec = fn; - else - continue; - } else if (NULL == arch && (use_all || - NULL == strchr(fn, '.'))) + } else { + WARNING(fn, basedir, "Bad section"); + if (use_all) + sec = fn; + else + continue; + } + } else if ('\0' == *arch) { + if (NULL != strchr(fn, '.')) { + WARNING(fn, basedir, "Bad architecture"); + if (0 == use_all) + continue; + } arch = fn; - else if (0 == use_all) - continue; + } else { + WARNING(fn, basedir, "Excessive subdirectory"); + if (0 == use_all) + continue; + } buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); strlcat(buf, "/", MAXPATHLEN); + strlcat(basedir, "/", MAXPATHLEN); + strlcat(basedir, fn, MAXPATHLEN); sz = strlcat(buf, fn, MAXPATHLEN); if (MAXPATHLEN <= sz) { - fprintf(stderr, "%s: Path too long\n", dir); - exit((int)MANDOCLEVEL_SYSERR); + WARNING(fn, basedir, "Path too long"); + continue; } - - ofile_dirbuild(buf, sec, arch, src_form, of); + + ofile_dirbuild(buf, sec, arch, + src_form, of, basedir); + + p = strrchr(basedir, '/'); + *p = '\0'; + continue; } - if (DT_REG != dp->d_type || - (NULL == psec && !use_all) || - ! strcmp(MANDOC_DB, fn) || - ! strcmp(MANDOC_IDX, fn)) + if (DT_REG != dp->d_type) { + WARNING(fn, basedir, "Not a regular file"); continue; + } + if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) + continue; + if ('\0' == *psec) { + WARNING(fn, basedir, "File outside section"); + if (0 == use_all) + continue; + } /* * By default, skip files where the file name suffix @@ -1530,23 +1787,23 @@ ofile_dirbuild(const char *dir, const char* psec, cons */ suffix = strrchr(fn, '.'); - if (0 == use_all) { - if (NULL == suffix) + if (NULL == suffix) { + WARNING(fn, basedir, "No filename suffix"); + if (0 == use_all) continue; - if ((MANDOC_SRC & src_form && - strcmp(suffix + 1, psec)) || + } else if ((MANDOC_SRC & src_form && + strcmp(suffix + 1, psec)) || (MANDOC_FORM & src_form && - strcmp(suffix + 1, "0"))) - continue; - } - if (NULL != suffix) { + strcmp(suffix + 1, "0"))) { + WARNING(fn, basedir, "Wrong filename suffix"); + if (0 == use_all) + continue; if ('0' == suffix[1]) src_form |= MANDOC_FORM; else if ('1' <= suffix[1] && '9' >= suffix[1]) src_form |= MANDOC_SRC; } - /* * Skip formatted manuals if a source version is * available. Ignore the age: it is very unlikely @@ -1555,11 +1812,11 @@ ofile_dirbuild(const char *dir, const char* psec, cons * and in ports, old manuals get removed on update. */ if (0 == use_all && MANDOC_FORM & src_form && - NULL != psec) { + '\0' != *psec) { buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); p = strrchr(buf, '/'); - if (NULL != parch && NULL != p) + if ('\0' != *parch && NULL != p) for (p--; p > buf; p--) if ('/' == *p) break; @@ -1572,7 +1829,7 @@ ofile_dirbuild(const char *dir, const char* psec, cons strlcat(buf, "/", MAXPATHLEN); sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", buf); + WARNING(fn, basedir, "Path too long"); continue; } q = strrchr(buf, '.'); @@ -1580,8 +1837,7 @@ ofile_dirbuild(const char *dir, const char* psec, cons *q = '\0'; sz = strlcat(buf, psec, MAXPATHLEN); if (sz >= MAXPATHLEN) { - fprintf(stderr, - "%s: Path too long\n", buf); + WARNING(fn, basedir, "Path too long"); continue; } if (0 == access(buf, R_OK)) @@ -1589,23 +1845,22 @@ ofile_dirbuild(const char *dir, const char* psec, cons } } - assert('.' == dir[0]); - assert('/' == dir[1]); buf[0] = '\0'; - strlcat(buf, dir + 2, MAXPATHLEN); - strlcat(buf, "/", MAXPATHLEN); + assert('.' == dir[0]); + if ('/' == dir[1]) { + strlcat(buf, dir + 2, MAXPATHLEN); + strlcat(buf, "/", MAXPATHLEN); + } sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", dir); + WARNING(fn, basedir, "Path too long"); continue; } nof = mandoc_calloc(1, sizeof(struct of)); nof->fname = mandoc_strdup(buf); - if (NULL != psec) - nof->sec = mandoc_strdup(psec); - if (NULL != parch) - nof->arch = mandoc_strdup(parch); + nof->sec = mandoc_strdup(psec); + nof->arch = mandoc_strdup(parch); nof->src_form = src_form; /* @@ -1639,7 +1894,10 @@ ofile_free(struct of *of) { struct of *nof; - while (of) { + if (NULL != of) + of = of->first; + + while (NULL != of) { nof = of->next; free(of->fname); free(of->sec); @@ -1648,15 +1906,4 @@ ofile_free(struct of *of) free(of); of = nof; } -} - -static void -usage(void) -{ - - fprintf(stderr, "usage: %s [-v] " - "[-C file] |" - " dir ... |" - " -d dir [file ...] |" - " -u dir [file ...]\n", progname); }