=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.1 retrieving revision 1.23 diff -u -p -r1.1 -r1.23 --- mandoc/mandocdb.c 2011/07/14 10:57:02 1.1 +++ mandoc/mandocdb.c 2011/12/03 18:47:09 1.23 @@ -1,6 +1,7 @@ -/* $Id: mandocdb.c,v 1.1 2011/07/14 10:57:02 kristaps Exp $ */ +/* $Id: mandocdb.c,v 1.23 2011/12/03 18:47:09 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons + * Copyright (c) 2011 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,17 +20,25 @@ #endif #include +#include +#include #include +#include #include #include #include #include #include #include +#include -#ifdef __linux__ +#if defined(__linux__) +# include # include +#elif defined(__APPLE__) +# include +# include #else # include #endif @@ -37,42 +46,40 @@ #include "man.h" #include "mdoc.h" #include "mandoc.h" +#include "mandocdb.h" +#include "manpath.h" -#define MANDOC_DB "mandoc.db" -#define MANDOC_IDX "mandoc.index" #define MANDOC_BUFSZ BUFSIZ -#define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR #define MANDOC_SLOP 1024 -/* Bit-fields. See makewhatis.1. */ +#define MANDOC_SRC 0x1 +#define MANDOC_FORM 0x2 -#define TYPE_NAME 0x01 -#define TYPE_FUNCTION 0x02 -#define TYPE_UTILITY 0x04 -#define TYPE_INCLUDES 0x08 -#define TYPE_VARIABLE 0x10 -#define TYPE_STANDARD 0x20 -#define TYPE_AUTHOR 0x40 -#define TYPE_CONFIG 0x80 -#define TYPE_DESC 0x100 -#define TYPE_XREF 0x200 -#define TYPE_PATH 0x400 -#define TYPE_ENV 0x800 -#define TYPE_ERR 0x1000 +/* Tiny list for files. No need to bring in QUEUE. */ +struct of { + char *fname; /* heap-allocated */ + char *sec; + char *arch; + char *title; + int src_form; + struct of *next; /* NULL for last one */ + struct of *first; /* first in list */ +}; + /* Buffer for storing growable data. */ struct buf { char *cp; - size_t len; - size_t size; + size_t len; /* current length */ + size_t size; /* total buffer size */ }; /* Operation we're going to perform. */ enum op { OP_NEW = 0, /* new database */ - OP_UPDATE, /* update entries in existing database */ + OP_UPDATE, /* delete/add entries in existing database */ OP_DELETE /* delete entries from existing database */ }; @@ -92,8 +99,21 @@ static void buf_append(struct buf *, const char *); static void buf_appendb(struct buf *, const void *, size_t); static void dbt_put(DB *, const char *, DBT *, DBT *); -static void hash_put(DB *, const struct buf *, int); +static void hash_put(DB *, const struct buf *, uint64_t); static void hash_reset(DB **); +static void index_merge(const struct of *, struct mparse *, + struct buf *, struct buf *, DB *, + DB *, const char *, DB *, const char *, + recno_t, const recno_t *, size_t); +static void index_prune(const struct of *, DB *, + const char *, DB *, const char *, + recno_t *, recno_t **, size_t *); +static void ofile_argbuild(int, char *[], struct of **); +static int ofile_dirbuild(const char *, const char *, + const char *, int, struct of **); +static void ofile_free(struct of *); +static void pformatted(DB *, struct buf *, struct buf *, + const struct of *); static int pman_node(MAN_ARGS); static void pmdoc_node(MDOC_ARGS); static void pmdoc_An(MDOC_ARGS); @@ -240,37 +260,31 @@ static const pmdoc_nf mdocs[MDOC_MAX] = { }; static const char *progname; +static int use_all; /* Use all directories and files. */ +static int verb; /* Output verbosity level. */ int main(int argc, char *argv[]) { struct mparse *mp; /* parse sequence */ - struct mdoc *mdoc; /* resulting mdoc */ - struct man *man; /* resulting man */ + struct manpaths dirs; enum op op; /* current operation */ - char *fn; /* current file being parsed */ - const char *msec, /* manual section */ - *mtitle, /* manual title */ - *arch, /* manual architecture */ - *dir; /* result dir (default: cwd) */ + const char *dir; char ibuf[MAXPATHLEN], /* index fname */ - fbuf[MAXPATHLEN], /* btree fname */ - vbuf[8]; /* stringified record number */ - int ch, seq, sseq, verb, i; + fbuf[MAXPATHLEN]; /* btree fname */ + int ch, i, flags; DB *idx, /* index database */ *db, /* keyword database */ *hash; /* temporary keyword hashtable */ - DBT key, val; - enum mandoclevel ec; /* exit status */ - size_t sv; BTREEINFO info; /* btree configuration */ - recno_t rec, - maxrec; /* supremum of all records */ - recno_t *recs; /* buffer of empty records */ - size_t recsz, /* buffer size of recs */ - reccur; /* valid number of recs */ + recno_t maxrec; /* last record number in the index */ + recno_t *recs; /* the numbers of all empty records */ + size_t sz1, sz2, + recsz, /* number of allocated slots in recs */ + reccur; /* current number of empty records */ struct buf buf, /* keyword buffer */ dbuf; /* description buffer */ + struct of *of; /* list of files for processing */ extern int optind; extern char *optarg; @@ -280,8 +294,11 @@ main(int argc, char *argv[]) else ++progname; - dir = ""; + memset(&dirs, 0, sizeof(struct manpaths)); + verb = 0; + use_all = 0; + of = NULL; db = idx = NULL; mp = NULL; hash = NULL; @@ -289,22 +306,21 @@ main(int argc, char *argv[]) recsz = reccur = 0; maxrec = 0; op = OP_NEW; - ec = MANDOCLEVEL_SYSERR; + dir = NULL; - memset(&buf, 0, sizeof(struct buf)); - memset(&dbuf, 0, sizeof(struct buf)); - - while (-1 != (ch = getopt(argc, argv, "d:ruv"))) + while (-1 != (ch = getopt(argc, argv, "ad:u:v"))) switch (ch) { + case ('a'): + use_all = 1; + break; case ('d'): dir = optarg; + op = OP_UPDATE; break; - case ('r'): + case ('u'): + dir = optarg; op = OP_DELETE; break; - case ('u'): - op = OP_UPDATE; - break; case ('v'): verb++; break; @@ -316,176 +332,273 @@ main(int argc, char *argv[]) argc -= optind; argv += optind; - ibuf[0] = ibuf[MAXPATHLEN - 2] = - fbuf[0] = fbuf[MAXPATHLEN - 2] = '\0'; + memset(&info, 0, sizeof(BTREEINFO)); + info.flags = R_DUP; - strlcat(fbuf, dir, MAXPATHLEN); - strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); - strlcat(ibuf, dir, MAXPATHLEN); - strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + memset(&buf, 0, sizeof(struct buf)); + memset(&dbuf, 0, sizeof(struct buf)); - if ('\0' != fbuf[MAXPATHLEN - 2] || - '\0' != ibuf[MAXPATHLEN - 2]) { - fprintf(stderr, "%s: Path too long\n", dir); - goto out; - } + buf.size = dbuf.size = MANDOC_BUFSZ; - /* - * For the keyword database, open a BTREE database that allows - * duplicates. - * For the index database, use a standard RECNO database type. - * Truncate the database if we're creating a new one. - */ + buf.cp = mandoc_malloc(buf.size); + dbuf.cp = mandoc_malloc(dbuf.size); - memset(&info, 0, sizeof(BTREEINFO)); - info.flags = R_DUP; + flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR; - if (OP_NEW == op) { - db = dbopen(fbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info); - idx = dbopen(ibuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL); - } else { - db = dbopen(fbuf, O_CREAT|O_RDWR, 0644, DB_BTREE, &info); - idx = dbopen(ibuf, O_CREAT|O_RDWR, 0644, DB_RECNO, NULL); - } + if (OP_UPDATE == op || OP_DELETE == op) { + ibuf[0] = fbuf[0] = '\0'; - if (NULL == db) { - perror(fbuf); + strlcat(fbuf, dir, MAXPATHLEN); + strlcat(fbuf, "/", MAXPATHLEN); + sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + + strlcat(ibuf, dir, MAXPATHLEN); + strlcat(ibuf, "/", MAXPATHLEN); + sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + + if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { + fprintf(stderr, "%s: Path too long\n", dir); + exit((int)MANDOCLEVEL_BADARG); + } + + db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); + idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); + + if (NULL == db) { + perror(fbuf); + exit((int)MANDOCLEVEL_SYSERR); + } else if (NULL == idx) { + perror(ibuf); + exit((int)MANDOCLEVEL_SYSERR); + } + + if (verb > 2) { + printf("%s: Opened\n", fbuf); + printf("%s: Opened\n", ibuf); + } + + ofile_argbuild(argc, argv, &of); + if (NULL == of) + goto out; + + of = of->first; + + index_prune(of, db, fbuf, idx, ibuf, + &maxrec, &recs, &recsz); + + /* + * Go to the root of the respective manual tree + * such that .so links work. In case of failure, + * just prod on, even though .so links won't work. + */ + + if (OP_UPDATE == op) { + chdir(dir); + index_merge(of, mp, &dbuf, &buf, hash, + db, fbuf, idx, ibuf, + maxrec, recs, reccur); + } + goto out; - } else if (NULL == db) { - perror(ibuf); - goto out; } /* - * If we're going to delete or update a database, remove the - * entries now (both the index and all keywords pointing to it). - * This doesn't actually remove them: it only sets their record - * value lengths to zero. - * While doing so, add the empty records to a list we'll access - * later in re-adding entries to the database. + * Configure the directories we're going to scan. + * If we have command-line arguments, use them. + * If not, we use man(1)'s method (see mandocdb.8). */ - if (OP_DELETE == op || OP_UPDATE == op) { - seq = R_FIRST; - while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { - seq = R_NEXT; - maxrec = *(recno_t *)key.data; - if (0 == val.size && OP_UPDATE == op) { - if (reccur >= recsz) { - recsz += MANDOC_SLOP; - recs = mandoc_realloc - (recs, recsz * sizeof(recno_t)); - } - recs[(int)reccur] = maxrec; - reccur++; - continue; - } + if (argc > 0) { + dirs.paths = mandoc_malloc(argc * sizeof(char *)); + dirs.sz = argc; + for (i = 0; i < argc; i++) + dirs.paths[i] = mandoc_strdup(argv[i]); + } else + manpath_parse(&dirs, NULL, NULL); - fn = (char *)val.data; - for (i = 0; i < argc; i++) - if (0 == strcmp(fn, argv[i])) - break; + for (i = 0; i < dirs.sz; i++) { + ibuf[0] = fbuf[0] = '\0'; - if (i == argc) - continue; + strlcat(fbuf, dirs.paths[i], MAXPATHLEN); + strlcat(fbuf, "/", MAXPATHLEN); + sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); - sseq = R_FIRST; - while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { - sseq = R_NEXT; - assert(8 == val.size); - if (maxrec != *(recno_t *)(val.data + 4)) - continue; - if (verb > 1) - printf("%s: Deleted keyword: %s\n", - fn, (char *)key.data); - ch = (*db->del)(db, &key, R_CURSOR); - if (ch < 0) - break; - } - if (ch < 0) { - perror(fbuf); - exit((int)MANDOCLEVEL_SYSERR); - } + strlcat(ibuf, dirs.paths[i], MAXPATHLEN); + strlcat(ibuf, "/", MAXPATHLEN); + sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); - if (verb) - printf("%s: Deleted index\n", fn); + if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { + fprintf(stderr, "%s: Path too long\n", + dirs.paths[i]); + exit((int)MANDOCLEVEL_BADARG); + } - val.size = 0; - ch = (*idx->put)(idx, &key, &val, R_CURSOR); - if (ch < 0) { - perror(ibuf); - exit((int)MANDOCLEVEL_SYSERR); - } + if (db) + (*db->close)(db); + if (idx) + (*idx->close)(idx); - if (OP_UPDATE == op) { - if (reccur >= recsz) { - recsz += MANDOC_SLOP; - recs = mandoc_realloc - (recs, recsz * sizeof(recno_t)); - } - recs[(int)reccur] = maxrec; - reccur++; - } + db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); + idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); + + if (NULL == db) { + perror(fbuf); + exit((int)MANDOCLEVEL_SYSERR); + } else if (NULL == idx) { + perror(ibuf); + exit((int)MANDOCLEVEL_SYSERR); } - maxrec++; - } - if (OP_DELETE == op) { - ec = MANDOCLEVEL_OK; - goto out; + if (verb > 2) { + printf("%s: Truncated\n", fbuf); + printf("%s: Truncated\n", ibuf); + } + + ofile_free(of); + of = NULL; + + if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, + 0, &of)) + exit((int)MANDOCLEVEL_SYSERR); + + if (NULL == of) + continue; + + of = of->first; + + /* + * Go to the root of the respective manual tree + * such that .so links work. In case of failure, + * just prod on, even though .so links won't work. + */ + + chdir(dirs.paths[i]); + index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, + idx, ibuf, maxrec, recs, reccur); } - /* - * Add records to the database. - * Try parsing each manual given on the command line. - * If we fail, then emit an error and keep on going. - * Take resulting trees and push them down into the database code. - * Use the auto-parser and don't report any errors. - */ +out: + if (db) + (*db->close)(db); + if (idx) + (*idx->close)(idx); + if (hash) + (*hash->close)(hash); + if (mp) + mparse_free(mp); - mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); + manpath_free(&dirs); + ofile_free(of); + free(buf.cp); + free(dbuf.cp); + free(recs); - buf.size = dbuf.size = MANDOC_BUFSZ; - buf.cp = mandoc_malloc(buf.size); - dbuf.cp = mandoc_malloc(dbuf.size); + return(MANDOCLEVEL_OK); +} - for (rec = 0, i = 0; i < argc; i++) { - fn = argv[i]; - if (OP_UPDATE == op) { - if (reccur > 0) { - --reccur; - rec = recs[(int)reccur]; - } else if (maxrec > 0) { - rec = maxrec; - maxrec = 0; - } else - rec++; +void +index_merge(const struct of *of, struct mparse *mp, + struct buf *dbuf, struct buf *buf, DB *hash, + DB *db, const char *dbf, DB *idx, const char *idxf, + recno_t maxrec, const recno_t *recs, size_t reccur) +{ + recno_t rec; + int ch; + DBT key, val; + struct mdoc *mdoc; + struct man *man; + const char *fn, *msec, *mtitle, *arch; + size_t sv; + unsigned seq; + struct db_val vbuf; + + for (rec = 0; of; of = of->next) { + fn = of->fname; + + /* + * Reclaim an empty index record, if available. + */ + + if (reccur > 0) { + --reccur; + rec = recs[(int)reccur]; + } else if (maxrec > 0) { + rec = maxrec; + maxrec = 0; } else rec++; mparse_reset(mp); hash_reset(&hash); + mdoc = NULL; + man = NULL; - if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { - fprintf(stderr, "%s: Parse failure\n", fn); - continue; + /* + * Try interpreting the file as mdoc(7) or man(7) + * source code, unless it is already known to be + * formatted. Fall back to formatted mode. + */ + + if ((MANDOC_SRC & of->src_form || + ! (MANDOC_FORM & of->src_form)) && + MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) + mparse_result(mp, &mdoc, &man); + + if (NULL != mdoc) { + msec = mdoc_meta(mdoc)->msec; + arch = mdoc_meta(mdoc)->arch; + mtitle = mdoc_meta(mdoc)->title; + } else if (NULL != man) { + msec = man_meta(man)->msec; + arch = NULL; + mtitle = man_meta(man)->title; + } else { + msec = of->sec; + arch = of->arch; + mtitle = of->title; } - mparse_result(mp, &mdoc, &man); - if (NULL == mdoc && NULL == man) - continue; + /* + * By default, skip a file if the manual section + * and architecture given in the file disagree + * with the directory where the file is located. + */ - msec = NULL != mdoc ? - mdoc_meta(mdoc)->msec : man_meta(man)->msec; - mtitle = NULL != mdoc ? - mdoc_meta(mdoc)->title : man_meta(man)->title; - arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL; + if (0 == use_all) { + assert(of->sec); + assert(msec); + if (strcmp(msec, of->sec)) + continue; + if (NULL == arch) { + if (NULL != of->arch) + continue; + } else if (NULL == of->arch || + strcmp(arch, of->arch)) + continue; + } + if (NULL == arch) arch = ""; /* + * By default, skip a file if the title given + * in the file disagrees with the file name. + * If both agree, use the file name as the title, + * because the one in the file usually is all caps. + */ + + assert(of->title); + assert(mtitle); + + if (0 == strcasecmp(mtitle, of->title)) + mtitle = of->title; + else if (0 == use_all) + continue; + + /* * The index record value consists of a nil-terminated * filename, a nil-terminated manual section, and a * nil-terminated description. Since the description @@ -493,43 +606,38 @@ main(int argc, char *argv[]) * going to write a nil byte in its place. */ - dbuf.len = 0; - buf_appendb(&dbuf, fn, strlen(fn) + 1); - buf_appendb(&dbuf, msec, strlen(msec) + 1); - buf_appendb(&dbuf, mtitle, strlen(mtitle) + 1); - buf_appendb(&dbuf, arch, strlen(arch) + 1); + dbuf->len = 0; + buf_append(dbuf, mdoc ? "mdoc" : (man ? "man" : "cat")); + buf_appendb(dbuf, fn, strlen(fn) + 1); + buf_appendb(dbuf, msec, strlen(msec) + 1); + buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); + buf_appendb(dbuf, arch, strlen(arch) + 1); - sv = dbuf.len; + sv = dbuf->len; /* Fix the record number in the btree value. */ if (mdoc) - pmdoc_node(hash, &buf, &dbuf, + pmdoc_node(hash, buf, dbuf, mdoc_node(mdoc), mdoc_meta(mdoc)); - else - pman_node(hash, &buf, &dbuf, man_node(man)); + else if (man) + pman_node(hash, buf, dbuf, man_node(man)); + else + pformatted(hash, buf, dbuf, of); /* * Copy from the in-memory hashtable of pending keywords * into the database. */ - - memset(vbuf, 0, sizeof(uint32_t)); - memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + vbuf.rec = htobe32(rec); seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - - memcpy(vbuf, val.data, sizeof(uint32_t)); - val.size = sizeof(vbuf); - val.data = vbuf; - - if (verb > 1) - printf("%s: Added keyword: %s, 0x%x\n", - fn, (char *)key.data, - *(int *)val.data); - dbt_put(db, fbuf, &key, &val); + vbuf.mask = htobe64(*(uint64_t *)val.data); + val.size = sizeof(struct db_val); + val.data = &vbuf; + dbt_put(db, dbf, &key, &val); } if (ch < 0) { perror("hash"); @@ -541,37 +649,134 @@ main(int argc, char *argv[]) * set, put an empty one in now. */ - if (dbuf.len == sv) - buf_appendb(&dbuf, "", 1); + if (dbuf->len == sv) + buf_appendb(dbuf, "", 1); key.data = &rec; key.size = sizeof(recno_t); - val.data = dbuf.cp; - val.size = dbuf.len; + val.data = dbuf->cp; + val.size = dbuf->len; - if (verb > 0) + if (verb) printf("%s: Added index\n", fn); - dbt_put(idx, ibuf, &key, &val); + dbt_put(idx, idxf, &key, &val); } +} - ec = MANDOCLEVEL_OK; -out: - if (db) - (*db->close)(db); - if (idx) - (*idx->close)(idx); - if (hash) - (*hash->close)(hash); - if (mp) - mparse_free(mp); +/* + * Scan through all entries in the index file `idx' and prune those + * entries in `ofile'. + * Pruning consists of removing from `db', then invalidating the entry + * in `idx' (zeroing its value size). + */ +static void +index_prune(const struct of *ofile, DB *db, const char *dbf, + DB *idx, const char *idxf, + recno_t *maxrec, recno_t **recs, size_t *recsz) +{ + const struct of *of; + const char *fn, *cp; + struct db_val *vbuf; + unsigned seq, sseq; + DBT key, val; + size_t reccur; + int ch; - free(buf.cp); - free(dbuf.cp); - free(recs); + reccur = 0; + seq = R_FIRST; + while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { + seq = R_NEXT; + *maxrec = *(recno_t *)key.data; + cp = val.data; - return((int)ec); + /* Deleted records are zero-sized. Skip them. */ + + if (0 == val.size) + goto cont; + + /* + * Make sure we're sane. + * Read past our mdoc/man/cat type to the next string, + * then make sure it's bounded by a NUL. + * Failing any of these, we go into our error handler. + */ + + if (NULL == (fn = memchr(cp, '\0', val.size))) + break; + if (++fn - cp >= (int)val.size) + break; + if (NULL == memchr(fn, '\0', val.size - (fn - cp))) + break; + + /* + * Search for the file in those we care about. + * XXX: build this into a tree. Too slow. + */ + + for (of = ofile; of; of = of->next) + if (0 == strcmp(fn, of->fname)) + break; + + if (NULL == of) + continue; + + /* + * Search through the keyword database, throwing out all + * references to our file. + */ + + sseq = R_FIRST; + while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { + sseq = R_NEXT; + if (sizeof(struct db_val) != val.size) + break; + + vbuf = val.data; + if (*maxrec != betoh32(vbuf->rec)) + continue; + + if ((ch = (*db->del)(db, &key, R_CURSOR)) < 0) + break; + } + + if (ch < 0) { + perror(dbf); + exit((int)MANDOCLEVEL_SYSERR); + } else if (1 != ch) { + fprintf(stderr, "%s: Corrupt database\n", dbf); + exit((int)MANDOCLEVEL_SYSERR); + } + + if (verb) + printf("%s: Deleted index\n", fn); + + val.size = 0; + ch = (*idx->put)(idx, &key, &val, R_CURSOR); + + if (ch < 0) + break; +cont: + if (reccur >= *recsz) { + *recsz += MANDOC_SLOP; + *recs = mandoc_realloc + (*recs, *recsz * sizeof(recno_t)); + } + + (*recs)[(int)reccur] = *maxrec; + reccur++; + } + + if (ch < 0) { + perror(idxf); + exit((int)MANDOCLEVEL_SYSERR); + } else if (1 != ch) { + fprintf(stderr, "%s: Corrupt index\n", idxf); + exit((int)MANDOCLEVEL_SYSERR); + } + + (*maxrec)++; } /* @@ -648,7 +853,7 @@ pmdoc_An(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_AUTHOR); + hash_put(hash, buf, TYPE_An); } static void @@ -659,7 +864,7 @@ hash_reset(DB **db) if (NULL != (hash = *db)) (*hash->close)(hash); - *db = dbopen(NULL, MANDOC_FLAGS, 0644, DB_HASH, NULL); + *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); if (NULL == *db) { perror("hash"); exit((int)MANDOCLEVEL_SYSERR); @@ -709,7 +914,7 @@ pmdoc_Fd(MDOC_ARGS) buf_appendb(buf, start, (size_t)(end - start + 1)); buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_INCLUDES); + hash_put(hash, buf, TYPE_In); } /* ARGSUSED */ @@ -721,7 +926,7 @@ pmdoc_Cd(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_CONFIG); + hash_put(hash, buf, TYPE_Cd); } /* ARGSUSED */ @@ -735,7 +940,7 @@ pmdoc_In(MDOC_ARGS) return; buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_INCLUDES); + hash_put(hash, buf, TYPE_In); } /* ARGSUSED */ @@ -761,7 +966,7 @@ pmdoc_Fn(MDOC_ARGS) cp++; buf_append(buf, cp); - hash_put(hash, buf, TYPE_FUNCTION); + hash_put(hash, buf, TYPE_Fn); } /* ARGSUSED */ @@ -775,7 +980,7 @@ pmdoc_St(MDOC_ARGS) return; buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_STANDARD); + hash_put(hash, buf, TYPE_St); } /* ARGSUSED */ @@ -794,7 +999,7 @@ pmdoc_Xr(MDOC_ARGS) } else buf_appendb(buf, ".", 2); - hash_put(hash, buf, TYPE_XREF); + hash_put(hash, buf, TYPE_Xr); } /* ARGSUSED */ @@ -831,7 +1036,7 @@ pmdoc_Vt(MDOC_ARGS) buf_appendb(buf, start, sz); buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_VARIABLE); + hash_put(hash, buf, TYPE_Va); } /* ARGSUSED */ @@ -845,7 +1050,7 @@ pmdoc_Fo(MDOC_ARGS) return; buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_FUNCTION); + hash_put(hash, buf, TYPE_Fn); } @@ -860,7 +1065,7 @@ pmdoc_Nd(MDOC_ARGS) buf_appendmdoc(dbuf, n->child, 1); buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_DESC); + hash_put(hash, buf, TYPE_Nd); } /* ARGSUSED */ @@ -872,7 +1077,7 @@ pmdoc_Er(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_ERR); + hash_put(hash, buf, TYPE_Er); } /* ARGSUSED */ @@ -884,7 +1089,7 @@ pmdoc_Ev(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_ENV); + hash_put(hash, buf, TYPE_Ev); } /* ARGSUSED */ @@ -896,7 +1101,7 @@ pmdoc_Pa(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_PATH); + hash_put(hash, buf, TYPE_Pa); } /* ARGSUSED */ @@ -906,7 +1111,7 @@ pmdoc_Nm(MDOC_ARGS) if (SEC_NAME == n->sec) { buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_NAME); + hash_put(hash, buf, TYPE_Nm); return; } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) return; @@ -915,11 +1120,11 @@ pmdoc_Nm(MDOC_ARGS) buf_append(buf, m->name); buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_UTILITY); + hash_put(hash, buf, TYPE_Nm); } static void -hash_put(DB *db, const struct buf *buf, int mask) +hash_put(DB *db, const struct buf *buf, uint64_t mask) { DBT key, val; int rc; @@ -934,10 +1139,10 @@ hash_put(DB *db, const struct buf *buf, int mask) perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } else if (0 == rc) - mask |= *(int *)val.data; + mask |= *(uint64_t *)val.data; val.data = &mask; - val.size = sizeof(int); + val.size = sizeof(uint64_t); if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { perror("hash"); @@ -1045,7 +1250,7 @@ pman_node(MAN_ARGS) buf_appendb(buf, start, sz); buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_NAME); + hash_put(hash, buf, TYPE_Nm); if (' ' == start[(int)sz]) { start += (int)sz + 1; @@ -1084,22 +1289,391 @@ pman_node(MAN_ARGS) buf_appendb(dbuf, start, sz); buf_appendb(buf, start, sz); - hash_put(hash, buf, TYPE_DESC); + hash_put(hash, buf, TYPE_Nd); } } - if (pman_node(hash, buf, dbuf, n->child)) - return(1); - if (pman_node(hash, buf, dbuf, n->next)) - return(1); + for (n = n->child; n; n = n->next) + if (pman_node(hash, buf, dbuf, n)) + return(1); return(0); } +/* + * Parse a formatted manual page. + * By necessity, this involves rather crude guesswork. + */ static void +pformatted(DB *hash, struct buf *buf, struct buf *dbuf, + const struct of *of) +{ + FILE *stream; + char *line, *p; + size_t len, plen; + + if (NULL == (stream = fopen(of->fname, "r"))) { + perror(of->fname); + return; + } + + /* + * Always use the title derived from the filename up front, + * do not even try to find it in the file. This also makes + * sure we don't end up with an orphan index record, even if + * the file content turns out to be completely unintelligible. + */ + + buf->len = 0; + buf_append(buf, of->title); + hash_put(hash, buf, TYPE_Nm); + + while (NULL != (line = fgetln(stream, &len)) && '\n' != *line) + /* Skip to first blank line. */ ; + + while (NULL != (line = fgetln(stream, &len)) && + ('\n' == *line || ' ' == *line)) + /* Skip to first section header. */ ; + + /* + * If no page content can be found, + * reuse the page title as the page description. + */ + + if (NULL == (line = fgetln(stream, &len))) { + buf_appendb(dbuf, buf->cp, buf->size); + hash_put(hash, buf, TYPE_Nd); + fclose(stream); + return; + } + fclose(stream); + + /* + * If there is a dash, skip to the text following it. + */ + + for (p = line, plen = len; plen; p++, plen--) + if ('-' == *p) + break; + for ( ; plen; p++, plen--) + if ('-' != *p && ' ' != *p && 8 != *p) + break; + if (0 == plen) { + p = line; + plen = len; + } + + /* + * Copy the rest of the line, but no more than 70 bytes. + */ + + if (70 < plen) + plen = 70; + p[plen-1] = '\0'; + buf_appendb(dbuf, p, plen); + buf->len = 0; + buf_appendb(buf, p, plen); + hash_put(hash, buf, TYPE_Nd); +} + +static void +ofile_argbuild(int argc, char *argv[], struct of **of) +{ + char buf[MAXPATHLEN]; + char *sec, *arch, *title, *p; + int i, src_form; + struct of *nof; + + for (i = 0; i < argc; i++) { + + /* + * Try to infer the manual section, architecture and + * page title from the path, assuming it looks like + * man*[/]/.<section> or + * cat<section>[/<arch>]/<title>.0 + */ + + if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { + fprintf(stderr, "%s: Path too long\n", argv[i]); + continue; + } + sec = arch = title = NULL; + src_form = 0; + p = strrchr(buf, '\0'); + while (p-- > buf) { + if (NULL == sec && '.' == *p) { + sec = p + 1; + *p = '\0'; + if ('0' == *sec) + src_form |= MANDOC_FORM; + else if ('1' <= *sec && '9' >= *sec) + src_form |= MANDOC_SRC; + continue; + } + if ('/' != *p) + continue; + if (NULL == title) { + title = p + 1; + *p = '\0'; + continue; + } + if (strncmp("man", p + 1, 3)) { + src_form |= MANDOC_SRC; + arch = p + 1; + } else if (strncmp("cat", p + 1, 3)) { + src_form |= MANDOC_FORM; + arch = p + 1; + } + break; + } + if (NULL == title) + title = buf; + + /* + * Build the file structure. + */ + + nof = mandoc_calloc(1, sizeof(struct of)); + nof->fname = mandoc_strdup(argv[i]); + if (NULL != sec) + nof->sec = mandoc_strdup(sec); + if (NULL != arch) + nof->arch = mandoc_strdup(arch); + nof->title = mandoc_strdup(title); + nof->src_form = src_form; + + /* + * Add the structure to the list. + */ + + if (verb > 2) + printf("%s: Scheduling\n", argv[i]); + if (NULL == *of) { + *of = nof; + (*of)->first = nof; + } else { + nof->first = (*of)->first; + (*of)->next = nof; + *of = nof; + } + } +} + +/* + * Recursively build up a list of files to parse. + * We use this instead of ftw() and so on because I don't want global + * variables hanging around. + * This ignores the mandoc.db and mandoc.index files, but assumes that + * everything else is a manual. + * Pass in a pointer to a NULL structure for the first invocation. + */ +static int +ofile_dirbuild(const char *dir, const char* psec, const char *parch, + int p_src_form, struct of **of) +{ + char buf[MAXPATHLEN]; + struct stat sb; + size_t sz; + DIR *d; + const char *fn, *sec, *arch; + char *p, *q, *suffix; + struct of *nof; + struct dirent *dp; + int src_form; + + if (NULL == (d = opendir(dir))) { + perror(dir); + return(0); + } + + while (NULL != (dp = readdir(d))) { + fn = dp->d_name; + + if ('.' == *fn) + continue; + + src_form = p_src_form; + + if (DT_DIR == dp->d_type) { + sec = psec; + arch = parch; + + /* + * By default, only use directories called: + * man<section>/[<arch>/] or + * cat<section>/[<arch>/] + */ + + if (NULL == sec) { + if(0 == strncmp("man", fn, 3)) { + src_form |= MANDOC_SRC; + sec = fn + 3; + } else if (0 == strncmp("cat", fn, 3)) { + src_form |= MANDOC_FORM; + sec = fn + 3; + } else if (use_all) + sec = fn; + else + continue; + } else if (NULL == arch && (use_all || + NULL == strchr(fn, '.'))) + arch = fn; + else if (0 == use_all) + continue; + + buf[0] = '\0'; + strlcat(buf, dir, MAXPATHLEN); + strlcat(buf, "/", MAXPATHLEN); + sz = strlcat(buf, fn, MAXPATHLEN); + + if (MAXPATHLEN <= sz) { + fprintf(stderr, "%s: Path too long\n", dir); + return(0); + } + + if (verb > 2) + printf("%s: Scanning\n", buf); + + if ( ! ofile_dirbuild(buf, sec, arch, + src_form, of)) + return(0); + } + if (DT_REG != dp->d_type || + (NULL == psec && !use_all) || + !strcmp(MANDOC_DB, fn) || + !strcmp(MANDOC_IDX, fn)) + continue; + + /* + * By default, skip files where the file name suffix + * does not agree with the section directory + * they are located in. + */ + + suffix = strrchr(fn, '.'); + if (0 == use_all) { + if (NULL == suffix) + continue; + if ((MANDOC_SRC & src_form && + strcmp(suffix + 1, psec)) || + (MANDOC_FORM & src_form && + strcmp(suffix + 1, "0"))) + continue; + } + if (NULL != suffix) { + if ('0' == suffix[1]) + src_form |= MANDOC_FORM; + else if ('1' <= suffix[1] && '9' >= suffix[1]) + src_form |= MANDOC_SRC; + } + + + /* + * Skip formatted manuals if a source version is + * available. Ignore the age: it is very unlikely + * that people install newer formatted base manuals + * when they used to have source manuals before, + * and in ports, old manuals get removed on update. + */ + if (0 == use_all && MANDOC_FORM & src_form && + NULL != psec) { + buf[0] = '\0'; + strlcat(buf, dir, MAXPATHLEN); + p = strrchr(buf, '/'); + if (NULL == p) + p = buf; + else + p++; + if (0 == strncmp("cat", p, 3)) + memcpy(p, "man", 3); + strlcat(buf, "/", MAXPATHLEN); + sz = strlcat(buf, fn, MAXPATHLEN); + if (sz >= MAXPATHLEN) { + fprintf(stderr, "%s: Path too long\n", buf); + continue; + } + q = strrchr(buf, '.'); + if (NULL != q && p < q++) { + *q = '\0'; + sz = strlcat(buf, psec, MAXPATHLEN); + if (sz >= MAXPATHLEN) { + fprintf(stderr, + "%s: Path too long\n", buf); + continue; + } + if (0 == stat(buf, &sb)) + continue; + } + } + + buf[0] = '\0'; + strlcat(buf, dir, MAXPATHLEN); + strlcat(buf, "/", MAXPATHLEN); + sz = strlcat(buf, fn, MAXPATHLEN); + if (sz >= MAXPATHLEN) { + fprintf(stderr, "%s: Path too long\n", dir); + continue; + } + + nof = mandoc_calloc(1, sizeof(struct of)); + nof->fname = mandoc_strdup(buf); + if (NULL != psec) + nof->sec = mandoc_strdup(psec); + if (NULL != parch) + nof->arch = mandoc_strdup(parch); + nof->src_form = src_form; + + /* + * Remember the file name without the extension, + * to be used as the page title in the database. + */ + + if (NULL != suffix) + *suffix = '\0'; + nof->title = mandoc_strdup(fn); + + /* + * Add the structure to the list. + */ + + if (verb > 2) + printf("%s: Scheduling\n", buf); + if (NULL == *of) { + *of = nof; + (*of)->first = nof; + } else { + nof->first = (*of)->first; + (*of)->next = nof; + *of = nof; + } + } + + closedir(d); + return(1); +} + +static void +ofile_free(struct of *of) +{ + struct of *nof; + + while (of) { + nof = of->next; + free(of->fname); + free(of->sec); + free(of->arch); + free(of->title); + free(of); + of = nof; + } +} + +static void usage(void) { - fprintf(stderr, "usage: %s [-ruv] [-d path] [file...]\n", - progname); + fprintf(stderr, "usage: %s [-v] " + "[-d dir [files...] |" + " -u dir [files...] |" + " dir...]\n", progname); }